From d608514dfac5512ed31229c0a80a336db087eebe Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Sun, 1 Feb 2026 18:00:54 +0000 Subject: [PATCH] feat: user analysis endpoint --- server/app.py | 13 +++++++++++++ server/stat_gen.py | 25 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/server/app.py b/server/app.py index 9af41bc..4a93b89 100644 --- a/server/app.py +++ b/server/app.py @@ -79,6 +79,19 @@ def get_time_analysis(): print(traceback.format_exc()) return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 +@app.route("/stats/user", methods=["GET"]) +def get_user_analysis(): + if stat_obj is None: + return jsonify({"error": "No data uploaded"}), 400 + + try: + return jsonify(stat_obj.user_analysis()), 200 + except ValueError as e: + return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400 + except Exception as e: + print(traceback.format_exc()) + return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 + @app.route('/filter/search', methods=["POST"]) def search_dataset(): if stat_obj is None: diff --git a/server/stat_gen.py b/server/stat_gen.py index 4960b20..99c307d 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -125,6 +125,31 @@ class StatGen: "word_frequencies": word_frequencies.to_dict(orient='records') } + def user_analysis(self) -> dict: + counts = ( + self.df.groupby(["author", "source"]) + .size() + .sort_values(ascending=False) + ) + + per_user = ( + self.df.groupby(["author", "type"]) + .size() + .unstack(fill_value=0) + ) + + per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1) + per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1) + per_user = per_user.sort_values("comment_post_ratio", ascending=True) + + return { + "top_users": [ + {"author": author, "source": source, "count": int(count)} + for (author, source), count in counts.items() + ], + "users": per_user.reset_index().to_dict(orient="records") + } + def search(self, search_query: str) -> dict: self.df = self.df[ self.df["content"].str.contains(search_query)