From d608514dfac5512ed31229c0a80a336db087eebe Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Sun, 1 Feb 2026 18:00:54 +0000
Subject: [PATCH] feat: user analysis endpoint

---
 server/app.py      | 13 +++++++++++++
 server/stat_gen.py | 25 +++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/server/app.py b/server/app.py
index 9af41bc..4a93b89 100644
--- a/server/app.py
+++ b/server/app.py
@@ -79,6 +79,19 @@ def get_time_analysis():
         print(traceback.format_exc())
         return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
     
+@app.route("/stats/user", methods=["GET"])
+def get_user_analysis():
+    if stat_obj is None:
+        return jsonify({"error": "No data uploaded"}), 400
+    
+    try:
+        return jsonify(stat_obj.user_analysis()), 200
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
+    
 @app.route('/filter/search', methods=["POST"])
 def search_dataset():
     if stat_obj is None:
diff --git a/server/stat_gen.py b/server/stat_gen.py
index 4960b20..99c307d 100644
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -125,6 +125,31 @@ class StatGen:
             "word_frequencies": word_frequencies.to_dict(orient='records')
         }
     
+    def user_analysis(self) -> dict:
+        counts = (
+            self.df.groupby(["author", "source"])
+            .size()
+            .sort_values(ascending=False)
+        )
+
+        per_user = (
+            self.df.groupby(["author", "type"])
+            .size()
+            .unstack(fill_value=0)
+        )
+
+        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
+        per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
+        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
+
+        return {
+            "top_users": [
+                {"author": author, "source": source, "count": int(count)}
+                for (author, source), count in counts.items()
+            ],
+            "users": per_user.reset_index().to_dict(orient="records")
+        }
+    
     def search(self, search_query: str) -> dict:
         self.df = self.df[
             self.df["content"].str.contains(search_query)