feat: add descriptions to topics to improve accuracy

Also upgraded to more powerful model
This commit is contained in:
2026-02-08 15:10:11 +00:00
parent b019885b2f
commit a9d63c7041
4 changed files with 27 additions and 13 deletions

View File

@@ -4,6 +4,7 @@ from server.stat_gen import StatGen
import pandas as pd
import traceback
import json
app = Flask(__name__)
@@ -13,7 +14,8 @@ CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}})
# Global State
posts_df = pd.read_json('posts.jsonl', lines=True)
comments_df = pd.read_json('comments.jsonl', lines=True)
domain_topics = open("topic_buckets.txt").read().splitlines()
with open("topic_buckets.json", "r", encoding="utf-8") as f:
domain_topics = json.load(f)
stat_obj = StatGen(posts_df, comments_df, domain_topics)
@app.route('/upload', methods=['POST'])
@@ -28,15 +30,15 @@ def upload_data():
if post_file.filename == "" or comment_file.filename == "" or topic_file == "":
return jsonify({"error": "Empty filename"}), 400
if not post_file.filename.endswith('.jsonl') or not comment_file.filename.endswith('.jsonl') or not topic_file.endswith('.txt'):
return jsonify({"error": "Invalid file type. Only .jsonl and .txt files are allowed."}), 400
if not post_file.filename.endswith('.jsonl') or not comment_file.filename.endswith('.jsonl') or not topic_file.endswith('.json'):
return jsonify({"error": "Invalid file type. Only .jsonl and .json files are allowed."}), 400
try:
global stat_obj
posts_df = pd.read_json(post_file, lines=True)
comments_df = pd.read_json(comment_file, lines=True)
stat_obj = StatGen(posts_df, comments_df, topic_file.splitlines())
stat_obj = StatGen(posts_df, comments_df, json.load(topic_file))
return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
except ValueError as e:
return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400