feat(user): add dominant topic information to user data

2026-04-07 11:34:03 +01:00
parent 0c4dc02852
commit e903e1b738
3 changed files with 43 additions and 0 deletions
--- a/frontend/src/components/UserModal.tsx
+++ b/frontend/src/components/UserModal.tsx
@@ -88,6 +88,15 @@ export default function UserModal({
                  </div>
                </div>
              ) : null}
              {userData.dominant_topic ? (
                <div style={styles.topUserItem}>
                  <div style={styles.topUserName}>Most Common Topic</div>
                  <div style={styles.topUserMeta}>
                    {userData.dominant_topic.topic} ({userData.dominant_topic.count} events)
                  </div>
                </div>
              ) : null}
            </div>
          )}
        </DialogPanel>
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -34,6 +34,11 @@ type Vocab = {
  top_words: FrequencyWord[];
 };
 type DominantTopic = {
  topic: string;
  count: number;
 };
 type User = {
  author: string;
  post: number;
@@ -41,6 +46,7 @@ type User = {
  comment_post_ratio: number;
  comment_share: number;
  avg_emotions?: Record<string, number>;
  dominant_topic?: DominantTopic | null;
  vocab?: Vocab | null;
 };
@@ -202,6 +208,7 @@ type FilterResponse = {
 export type {
  TopUser,
  DominantTopic,
  Vocab,
  User,
  InteractionGraph,
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -71,6 +71,7 @@ class UserAnalysis:
        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
        dominant_topic_by_author = {}
        avg_emotions_by_author = {}
        if emotion_cols:
@@ -80,6 +81,31 @@ class UserAnalysis:
                for author, row in avg_emotions.iterrows()
            }
        if "topic" in df.columns:
            topic_df = df[
                df["topic"].notna()
                & (df["topic"] != "")
                & (df["topic"] != "Misc")
            ]
            if not topic_df.empty:
                topic_counts = (
                    topic_df.groupby(["author", "topic"])
                    .size()
                    .reset_index(name="count")
                    .sort_values(
                        ["author", "count", "topic"],
                        ascending=[True, False, True],
                    )
                    .drop_duplicates(subset=["author"])
                )
                dominant_topic_by_author = {
                    row["author"]: {
                        "topic": row["topic"],
                        "count": int(row["count"]),
                    }
                    for _, row in topic_counts.iterrows()
                }
        # ensure columns always exist
        for col in ("post", "comment"):
            if col not in per_user.columns:
@@ -109,6 +135,7 @@ class UserAnalysis:
                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                    "comment_share": float(row.get("comment_share", 0)),
                    "avg_emotions": avg_emotions_by_author.get(author, {}),
                    "dominant_topic": dominant_topic_by_author.get(author),
                    "vocab": vocab_by_author.get(
                        author,
                        {