feat(user): add dominant topic information to user data

This commit is contained in:
2026-04-07 11:34:03 +01:00
parent 0c4dc02852
commit e903e1b738
3 changed files with 43 additions and 0 deletions

View File

@@ -88,6 +88,15 @@ export default function UserModal({
</div> </div>
</div> </div>
) : null} ) : null}
{userData.dominant_topic ? (
<div style={styles.topUserItem}>
<div style={styles.topUserName}>Most Common Topic</div>
<div style={styles.topUserMeta}>
{userData.dominant_topic.topic} ({userData.dominant_topic.count} events)
</div>
</div>
) : null}
</div> </div>
)} )}
</DialogPanel> </DialogPanel>

View File

@@ -34,6 +34,11 @@ type Vocab = {
top_words: FrequencyWord[]; top_words: FrequencyWord[];
}; };
type DominantTopic = {
topic: string;
count: number;
};
type User = { type User = {
author: string; author: string;
post: number; post: number;
@@ -41,6 +46,7 @@ type User = {
comment_post_ratio: number; comment_post_ratio: number;
comment_share: number; comment_share: number;
avg_emotions?: Record<string, number>; avg_emotions?: Record<string, number>;
dominant_topic?: DominantTopic | null;
vocab?: Vocab | null; vocab?: Vocab | null;
}; };
@@ -202,6 +208,7 @@ type FilterResponse = {
export type { export type {
TopUser, TopUser,
DominantTopic,
Vocab, Vocab,
User, User,
InteractionGraph, InteractionGraph,

View File

@@ -71,6 +71,7 @@ class UserAnalysis:
per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0) per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
emotion_cols = [col for col in df.columns if col.startswith("emotion_")] emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
dominant_topic_by_author = {}
avg_emotions_by_author = {} avg_emotions_by_author = {}
if emotion_cols: if emotion_cols:
@@ -80,6 +81,31 @@ class UserAnalysis:
for author, row in avg_emotions.iterrows() for author, row in avg_emotions.iterrows()
} }
if "topic" in df.columns:
topic_df = df[
df["topic"].notna()
& (df["topic"] != "")
& (df["topic"] != "Misc")
]
if not topic_df.empty:
topic_counts = (
topic_df.groupby(["author", "topic"])
.size()
.reset_index(name="count")
.sort_values(
["author", "count", "topic"],
ascending=[True, False, True],
)
.drop_duplicates(subset=["author"])
)
dominant_topic_by_author = {
row["author"]: {
"topic": row["topic"],
"count": int(row["count"]),
}
for _, row in topic_counts.iterrows()
}
# ensure columns always exist # ensure columns always exist
for col in ("post", "comment"): for col in ("post", "comment"):
if col not in per_user.columns: if col not in per_user.columns:
@@ -109,6 +135,7 @@ class UserAnalysis:
"comment_post_ratio": float(row.get("comment_post_ratio", 0)), "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
"comment_share": float(row.get("comment_share", 0)), "comment_share": float(row.get("comment_share", 0)),
"avg_emotions": avg_emotions_by_author.get(author, {}), "avg_emotions": avg_emotions_by_author.get(author, {}),
"dominant_topic": dominant_topic_by_author.get(author),
"vocab": vocab_by_author.get( "vocab": vocab_by_author.get(
author, author,
{ {