refactor: move pandas processing out of Stat Obj
Also improves error handling and logging
This commit is contained in:
@@ -2,6 +2,8 @@ from flask import Flask, jsonify, request
|
|||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from server.stat_gen import StatGen
|
from server.stat_gen import StatGen
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
# Allow for CORS from localhost:5173
|
# Allow for CORS from localhost:5173
|
||||||
@@ -26,7 +28,10 @@ def upload_data():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
global stat_obj
|
global stat_obj
|
||||||
stat_obj = StatGen(post_file, comment_file)
|
|
||||||
|
posts_df = pd.read_json(post_file, lines=True)
|
||||||
|
comments_df = pd.read_json(comment_file, lines=True)
|
||||||
|
stat_obj = StatGen(posts_df, comments_df)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400
|
return jsonify({"error": f"Failed to read JSONL file: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -19,10 +19,7 @@ nltk.download('stopwords')
|
|||||||
EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
|
EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
|
||||||
|
|
||||||
class StatGen:
|
class StatGen:
|
||||||
def __init__(self, posts: list, comments: list) -> None:
|
def __init__(self, posts_df: pd.DataFrame, comments_df: pd.DataFrame) -> None:
|
||||||
posts_df = pd.read_json(posts, lines=True)
|
|
||||||
comments_df = pd.read_json(comments, lines=True)
|
|
||||||
|
|
||||||
posts_df["type"] = "post"
|
posts_df["type"] = "post"
|
||||||
posts_df["parent_id"] = None
|
posts_df["parent_id"] = None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user