refactor: move pandas processing out of Stat Obj

Also improves error handling and logging
2026-01-29 14:52:43 +00:00
parent 0a45bd6855
commit 64c3422395
2 changed files with 7 additions and 5 deletions
--- a/server/app.py
+++ b/server/app.py
@@ -2,6 +2,8 @@ from flask import Flask, jsonify, request
 from flask_cors import CORS
 from server.stat_gen import StatGen
 import pandas as pd
 app = Flask(__name__)
 # Allow for CORS from localhost:5173
@@ -26,7 +28,10 @@ def upload_data():
    try:
        global stat_obj
-        stat_obj = StatGen(post_file, comment_file)
+
        posts_df = pd.read_json(post_file, lines=True)
        comments_df = pd.read_json(comment_file, lines=True)
        stat_obj = StatGen(posts_df, comments_df)
    except ValueError as e:
        return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400
    except Exception as e:
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -19,10 +19,7 @@ nltk.download('stopwords')
 EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
 class StatGen:
-    def __init__(self, posts: list, comments: list) -> None:
+    def __init__(self, posts_df: pd.DataFrame, comments_df: pd.DataFrame) -> None:
        posts_df = pd.read_json(posts, lines=True)
        comments_df = pd.read_json(comments, lines=True)
        posts_df["type"] = "post"
        posts_df["parent_id"] = None