From e903e1b738c2b2a92f231ac8046d3f61279f788c Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Tue, 7 Apr 2026 11:34:03 +0100
Subject: [PATCH] feat(user): add dominant topic information to user data

---
 frontend/src/components/UserModal.tsx |  9 +++++++++
 frontend/src/types/ApiTypes.ts        |  7 +++++++
 server/analysis/user.py               | 27 +++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)
diff --git a/frontend/src/components/UserModal.tsx b/frontend/src/components/UserModal.tsx
index 6321804..30ab6c9 100644
--- a/frontend/src/components/UserModal.tsx
+++ b/frontend/src/components/UserModal.tsx
@@ -88,6 +88,15 @@ export default function UserModal({
                   </div>
                 </div>
               ) : null}
+
+              {userData.dominant_topic ? (
+                <div style={styles.topUserItem}>
+                  <div style={styles.topUserName}>Most Common Topic</div>
+                  <div style={styles.topUserMeta}>
+                    {userData.dominant_topic.topic} ({userData.dominant_topic.count} events)
+                  </div>
+                </div>
+              ) : null}
             </div>
           )}
         </DialogPanel>
diff --git a/frontend/src/types/ApiTypes.ts b/frontend/src/types/ApiTypes.ts
index 9492119..3143c32 100644
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -34,6 +34,11 @@ type Vocab = {
   top_words: FrequencyWord[];
 };
 
+type DominantTopic = {
+  topic: string;
+  count: number;
+};
+
 type User = {
   author: string;
   post: number;
@@ -41,6 +46,7 @@ type User = {
   comment_post_ratio: number;
   comment_share: number;
   avg_emotions?: Record<string, number>;
+  dominant_topic?: DominantTopic | null;
   vocab?: Vocab | null;
 };
 
@@ -202,6 +208,7 @@ type FilterResponse = {
 
 export type {
   TopUser,
+  DominantTopic,
   Vocab,
   User,
   InteractionGraph,
diff --git a/server/analysis/user.py b/server/analysis/user.py
index f4837d3..2fc5c94 100644
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -71,6 +71,7 @@ class UserAnalysis:
         per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
 
         emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
+        dominant_topic_by_author = {}
 
         avg_emotions_by_author = {}
         if emotion_cols:
@@ -80,6 +81,31 @@ class UserAnalysis:
                 for author, row in avg_emotions.iterrows()
             }
 
+        if "topic" in df.columns:
+            topic_df = df[
+                df["topic"].notna()
+                & (df["topic"] != "")
+                & (df["topic"] != "Misc")
+            ]
+            if not topic_df.empty:
+                topic_counts = (
+                    topic_df.groupby(["author", "topic"])
+                    .size()
+                    .reset_index(name="count")
+                    .sort_values(
+                        ["author", "count", "topic"],
+                        ascending=[True, False, True],
+                    )
+                    .drop_duplicates(subset=["author"])
+                )
+                dominant_topic_by_author = {
+                    row["author"]: {
+                        "topic": row["topic"],
+                        "count": int(row["count"]),
+                    }
+                    for _, row in topic_counts.iterrows()
+                }
+
         # ensure columns always exist
         for col in ("post", "comment"):
             if col not in per_user.columns:
@@ -109,6 +135,7 @@ class UserAnalysis:
                     "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                     "comment_share": float(row.get("comment_share", 0)),
                     "avg_emotions": avg_emotions_by_author.get(author, {}),
+                    "dominant_topic": dominant_topic_by_author.get(author),
                     "vocab": vocab_by_author.get(
                         author,
                         {