refactor(stats): move user stats out of interactional into users

2026-03-17 12:23:03 +00:00
parent 8a13444b16
commit 9093059d05
3 changed files with 65 additions and 82 deletions
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -57,73 +57,6 @@ class InteractionAnalysis:
        return rows
    def top_users(self, df: pd.DataFrame) -> list:
        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
        top_users = [
            {"author": author, "source": source, "count": int(count)}
            for (author, source), count in counts.items()
        ]
        return top_users
    def per_user_analysis(self, df: pd.DataFrame) -> dict:
        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
        avg_emotions_by_author = {}
        if emotion_cols:
            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
            avg_emotions_by_author = {
                author: {emotion: float(score) for emotion, score in row.items()}
                for author, row in avg_emotions.iterrows()
            }
        # ensure columns always exist
        for col in ("post", "comment"):
            if col not in per_user.columns:
                per_user[col] = 0
        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
            0, 1
        )
        per_user["comment_share"] = per_user["comment"] / (
            per_user["post"] + per_user["comment"]
        ).replace(0, 1)
        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
        per_user_records = per_user.reset_index().to_dict(orient="records")
        vocab_rows = self._vocab_richness_per_user(df)
        vocab_by_author = {row["author"]: row for row in vocab_rows}
        # merge vocab richness + per_user information
        merged_users = []
        for row in per_user_records:
            author = row["author"]
            merged_users.append(
                {
                    "author": author,
                    "post": int(row.get("post", 0)),
                    "comment": int(row.get("comment", 0)),
                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                    "comment_share": float(row.get("comment_share", 0)),
                    "avg_emotions": avg_emotions_by_author.get(author, {}),
                    "vocab": vocab_by_author.get(
                        author,
                        {
                            "vocab_richness": 0,
                            "avg_words_per_event": 0,
                            "top_words": [],
                        },
                    ),
                }
            )
        merged_users.sort(key=lambda u: u["comment_post_ratio"])
        return merged_users
    def interaction_graph(self, df: pd.DataFrame):
        interactions = {a: {} for a in df["author"].dropna().unique()}
--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -39,7 +39,7 @@ class StatGen:
        self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
        self.cultural_analysis = CulturalAnalysis()
        self.summary_analysis = SummaryAnalysis()
-        self.user_analysis = UserAnalysis(self.interaction_analysis)
+        self.user_analysis = UserAnalysis()
    ## Private Methods
    def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
@@ -111,7 +111,7 @@ class StatGen:
        return {
            "top_users": self.user_analysis.top_users(filtered_df),
-            "users": self.user_analysis.users(filtered_df)
+            "users": self.user_analysis.per_user_analysis(filtered_df)
        }
    def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -1,20 +1,70 @@
 import pandas as pd
 from server.analysis.interactional import InteractionAnalysis
 class UserAnalysis:
    def __init__(self, interaction_analysis: InteractionAnalysis):
        self.interaction_analysis = interaction_analysis
    def top_users(self, df: pd.DataFrame) -> list:
-        return self.interaction_analysis.top_users(df)
+        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
-    def users(self, df: pd.DataFrame) -> dict | list:
+        top_users = [
-        return self.interaction_analysis.per_user_analysis(df)
+            {"author": author, "source": source, "count": int(count)}
            for (author, source), count in counts.items()
        ]
-    def user(self, df: pd.DataFrame) -> dict:
+        return top_users
-        return {
+
-            "top_users": self.top_users(df),
+    def per_user_analysis(self, df: pd.DataFrame) -> dict:
-            "users": self.users(df),
+        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
        avg_emotions_by_author = {}
        if emotion_cols:
            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
            avg_emotions_by_author = {
                author: {emotion: float(score) for emotion, score in row.items()}
                for author, row in avg_emotions.iterrows()
            }
        # ensure columns always exist
        for col in ("post", "comment"):
            if col not in per_user.columns:
                per_user[col] = 0
        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
            0, 1
        )
        per_user["comment_share"] = per_user["comment"] / (
            per_user["post"] + per_user["comment"]
        ).replace(0, 1)
        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
        per_user_records = per_user.reset_index().to_dict(orient="records")
        vocab_rows = self._vocab_richness_per_user(df)
        vocab_by_author = {row["author"]: row for row in vocab_rows}
        # merge vocab richness + per_user information
        merged_users = []
        for row in per_user_records:
            author = row["author"]
            merged_users.append(
                {
                    "author": author,
                    "post": int(row.get("post", 0)),
                    "comment": int(row.get("comment", 0)),
                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                    "comment_share": float(row.get("comment_share", 0)),
                    "avg_emotions": avg_emotions_by_author.get(author, {}),
                    "vocab": vocab_by_author.get(
                        author,
                        {
                            "vocab_richness": 0,
                            "avg_words_per_event": 0,
                            "top_words": [],
                        },
                    ),
                }
            )
        merged_users.sort(key=lambda u: u["comment_post_ratio"])
        return merged_users