From 64c342239554a1d775a6f48ac2e26e8fbbda08b0 Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Thu, 29 Jan 2026 14:52:43 +0000 Subject: [PATCH] refactor: move pandas processing out of Stat Obj Also improves error handling and logging --- server/app.py | 7 ++++++- server/stat_gen.py | 5 +---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/server/app.py b/server/app.py index 46932e7..a5cb482 100644 --- a/server/app.py +++ b/server/app.py @@ -2,6 +2,8 @@ from flask import Flask, jsonify, request from flask_cors import CORS from server.stat_gen import StatGen +import pandas as pd + app = Flask(__name__) # Allow for CORS from localhost:5173 @@ -26,7 +28,10 @@ def upload_data(): try: global stat_obj - stat_obj = StatGen(post_file, comment_file) + + posts_df = pd.read_json(post_file, lines=True) + comments_df = pd.read_json(comment_file, lines=True) + stat_obj = StatGen(posts_df, comments_df) except ValueError as e: return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400 except Exception as e: diff --git a/server/stat_gen.py b/server/stat_gen.py index 526e510..851942b 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -19,10 +19,7 @@ nltk.download('stopwords') EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS class StatGen: - def __init__(self, posts: list, comments: list) -> None: - posts_df = pd.read_json(posts, lines=True) - comments_df = pd.read_json(comments, lines=True) - + def __init__(self, posts_df: pd.DataFrame, comments_df: pd.DataFrame) -> None: posts_df["type"] = "post" posts_df["parent_id"] = None