2026-03-18 20:30:19 +00:00
3 changed files with 65 additions and 82 deletions
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -57,73 +57,6 @@ class InteractionAnalysis:

        return rows

-    def top_users(self, df: pd.DataFrame) -> list:
-        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
-
-        top_users = [
-            {"author": author, "source": source, "count": int(count)}
-            for (author, source), count in counts.items()
-        ]
-
-        return top_users
-
-    def per_user_analysis(self, df: pd.DataFrame) -> dict:
-        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
-
-        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
-
-        avg_emotions_by_author = {}
-        if emotion_cols:
-            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
-            avg_emotions_by_author = {
-                author: {emotion: float(score) for emotion, score in row.items()}
-                for author, row in avg_emotions.iterrows()
-            }
-
-        # ensure columns always exist
-        for col in ("post", "comment"):
-            if col not in per_user.columns:
-                per_user[col] = 0
-
-        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
-            0, 1
-        )
-        per_user["comment_share"] = per_user["comment"] / (
-            per_user["post"] + per_user["comment"]
-        ).replace(0, 1)
-        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
-        per_user_records = per_user.reset_index().to_dict(orient="records")
-
-        vocab_rows = self._vocab_richness_per_user(df)
-        vocab_by_author = {row["author"]: row for row in vocab_rows}
-
-        # merge vocab richness + per_user information
-        merged_users = []
-        for row in per_user_records:
-            author = row["author"]
-            merged_users.append(
-                {
-                    "author": author,
-                    "post": int(row.get("post", 0)),
-                    "comment": int(row.get("comment", 0)),
-                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
-                    "comment_share": float(row.get("comment_share", 0)),
-                    "avg_emotions": avg_emotions_by_author.get(author, {}),
-                    "vocab": vocab_by_author.get(
-                        author,
-                        {
-                            "vocab_richness": 0,
-                            "avg_words_per_event": 0,
-                            "top_words": [],
-                        },
-                    ),
-                }
-            )
-
-        merged_users.sort(key=lambda u: u["comment_post_ratio"])
-
-        return merged_users
-
    def interaction_graph(self, df: pd.DataFrame):
        interactions = {a: {} for a in df["author"].dropna().unique()}

--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -39,7 +39,7 @@ class StatGen:
        self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
        self.cultural_analysis = CulturalAnalysis()
        self.summary_analysis = SummaryAnalysis()
-        self.user_analysis = UserAnalysis(self.interaction_analysis)
+        self.user_analysis = UserAnalysis()

    ## Private Methods
    def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
@@ -111,7 +111,7 @@ class StatGen:

        return {
            "top_users": self.user_analysis.top_users(filtered_df),
-            "users": self.user_analysis.users(filtered_df)
+            "users": self.user_analysis.per_user_analysis(filtered_df)
        }

    def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -1,20 +1,70 @@
 import pandas as pd

-from server.analysis.interactional import InteractionAnalysis
-

 class UserAnalysis:
-    def __init__(self, interaction_analysis: InteractionAnalysis):
-        self.interaction_analysis = interaction_analysis
-
    def top_users(self, df: pd.DataFrame) -> list:
-        return self.interaction_analysis.top_users(df)
+        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)

-    def users(self, df: pd.DataFrame) -> dict | list:
-        return self.interaction_analysis.per_user_analysis(df)
+        top_users = [
+            {"author": author, "source": source, "count": int(count)}
+            for (author, source), count in counts.items()
+        ]

-    def user(self, df: pd.DataFrame) -> dict:
-        return {
-            "top_users": self.top_users(df),
-            "users": self.users(df),
+        return top_users
+
+    def per_user_analysis(self, df: pd.DataFrame) -> dict:
+        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
+
+        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
+
+        avg_emotions_by_author = {}
+        if emotion_cols:
+            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
+            avg_emotions_by_author = {
+                author: {emotion: float(score) for emotion, score in row.items()}
+                for author, row in avg_emotions.iterrows()
            }
+
+        # ensure columns always exist
+        for col in ("post", "comment"):
+            if col not in per_user.columns:
+                per_user[col] = 0
+
+        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
+            0, 1
+        )
+        per_user["comment_share"] = per_user["comment"] / (
+            per_user["post"] + per_user["comment"]
+        ).replace(0, 1)
+        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
+        per_user_records = per_user.reset_index().to_dict(orient="records")
+
+        vocab_rows = self._vocab_richness_per_user(df)
+        vocab_by_author = {row["author"]: row for row in vocab_rows}
+
+        # merge vocab richness + per_user information
+        merged_users = []
+        for row in per_user_records:
+            author = row["author"]
+            merged_users.append(
+                {
+                    "author": author,
+                    "post": int(row.get("post", 0)),
+                    "comment": int(row.get("comment", 0)),
+                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
+                    "comment_share": float(row.get("comment_share", 0)),
+                    "avg_emotions": avg_emotions_by_author.get(author, {}),
+                    "vocab": vocab_by_author.get(
+                        author,
+                        {
+                            "vocab_richness": 0,
+                            "avg_words_per_event": 0,
+                            "top_words": [],
+                        },
+                    ),
+                }
+            )
+
+        merged_users.sort(key=lambda u: u["comment_post_ratio"])
+
+        return merged_users