From e903e1b738c2b2a92f231ac8046d3f61279f788c Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Tue, 7 Apr 2026 11:34:03 +0100 Subject: [PATCH] feat(user): add dominant topic information to user data --- frontend/src/components/UserModal.tsx | 9 +++++++++ frontend/src/types/ApiTypes.ts | 7 +++++++ server/analysis/user.py | 27 +++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/frontend/src/components/UserModal.tsx b/frontend/src/components/UserModal.tsx index 6321804..30ab6c9 100644 --- a/frontend/src/components/UserModal.tsx +++ b/frontend/src/components/UserModal.tsx @@ -88,6 +88,15 @@ export default function UserModal({ ) : null} + + {userData.dominant_topic ? ( +
+
Most Common Topic
+
+ {userData.dominant_topic.topic} ({userData.dominant_topic.count} events) +
+
+ ) : null} )} diff --git a/frontend/src/types/ApiTypes.ts b/frontend/src/types/ApiTypes.ts index 9492119..3143c32 100644 --- a/frontend/src/types/ApiTypes.ts +++ b/frontend/src/types/ApiTypes.ts @@ -34,6 +34,11 @@ type Vocab = { top_words: FrequencyWord[]; }; +type DominantTopic = { + topic: string; + count: number; +}; + type User = { author: string; post: number; @@ -41,6 +46,7 @@ type User = { comment_post_ratio: number; comment_share: number; avg_emotions?: Record; + dominant_topic?: DominantTopic | null; vocab?: Vocab | null; }; @@ -202,6 +208,7 @@ type FilterResponse = { export type { TopUser, + DominantTopic, Vocab, User, InteractionGraph, diff --git a/server/analysis/user.py b/server/analysis/user.py index f4837d3..2fc5c94 100644 --- a/server/analysis/user.py +++ b/server/analysis/user.py @@ -71,6 +71,7 @@ class UserAnalysis: per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0) emotion_cols = [col for col in df.columns if col.startswith("emotion_")] + dominant_topic_by_author = {} avg_emotions_by_author = {} if emotion_cols: @@ -80,6 +81,31 @@ class UserAnalysis: for author, row in avg_emotions.iterrows() } + if "topic" in df.columns: + topic_df = df[ + df["topic"].notna() + & (df["topic"] != "") + & (df["topic"] != "Misc") + ] + if not topic_df.empty: + topic_counts = ( + topic_df.groupby(["author", "topic"]) + .size() + .reset_index(name="count") + .sort_values( + ["author", "count", "topic"], + ascending=[True, False, True], + ) + .drop_duplicates(subset=["author"]) + ) + dominant_topic_by_author = { + row["author"]: { + "topic": row["topic"], + "count": int(row["count"]), + } + for _, row in topic_counts.iterrows() + } + # ensure columns always exist for col in ("post", "comment"): if col not in per_user.columns: @@ -109,6 +135,7 @@ class UserAnalysis: "comment_post_ratio": float(row.get("comment_post_ratio", 0)), "comment_share": float(row.get("comment_share", 0)), "avg_emotions": avg_emotions_by_author.get(author, {}), + "dominant_topic": dominant_topic_by_author.get(author), "vocab": vocab_by_author.get( author, {