From 2a00384a5557e51aab0e2b081f58a8f723ec1457 Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Tue, 17 Mar 2026 19:03:56 +0000 Subject: [PATCH] feat(interaction): add top interaction pairs and initiator ratio methods --- server/analysis/interactional.py | 77 +++++++------------------------- server/analysis/stat_gen.py | 3 +- 2 files changed, 18 insertions(+), 62 deletions(-) diff --git a/server/analysis/interactional.py b/server/analysis/interactional.py index 53d97dc..7e0c081 100644 --- a/server/analysis/interactional.py +++ b/server/analysis/interactional.py @@ -51,68 +51,23 @@ class InteractionAnalysis: return 0 return round(sum(depths) / len(depths), 2) + + def top_interaction_pairs(self, df: pd.DataFrame, top_n=10): + graph = self.interaction_graph(df) + pairs = [] - def average_thread_length_by_emotion(self, df: pd.DataFrame): - emotion_exclusions = {"emotion_neutral", "emotion_surprise"} + for a, targets in graph.items(): + for b, count in targets.items(): + pairs.append(((a, b), count)) - emotion_cols = [ - c - for c in df.columns - if c.startswith("emotion_") and c not in emotion_exclusions - ] + pairs.sort(key=lambda x: x[1], reverse=True) + return pairs[:top_n] + + def initiator_ratio(self, df: pd.DataFrame): + starters = df["reply_to"].isna().sum() + total = len(df) - id_to_reply = df.set_index("id")["reply_to"].to_dict() - length_cache = {} + if total == 0: + return 0 - def thread_length_from(start_id): - if start_id in length_cache: - return length_cache[start_id] - - seen = set() - length = 1 - current = start_id - - while True: - if current in seen: - # infinite loop shouldn't happen, but just in case - break - seen.add(current) - - reply_to = id_to_reply.get(current) - - if ( - reply_to is None - or (isinstance(reply_to, float) and pd.isna(reply_to)) - or reply_to == "" - ): - break - - length += 1 - current = reply_to - - if current in length_cache: - length += length_cache[current] - 1 - break - - length_cache[start_id] = length - return length - - emotion_to_lengths = {} - - # Fill NaNs in emotion cols to avoid max() issues - emo_df = df[["id"] + emotion_cols].copy() - emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0) - - for _, row in emo_df.iterrows(): - msg_id = row["id"] - length = thread_length_from(msg_id) - - emotions = {c: row[c] for c in emotion_cols} - dominant = max(emotions, key=emotions.get) - - emotion_to_lengths.setdefault(dominant, []).append(length) - - return { - emotion: round(sum(lengths) / len(lengths), 2) - for emotion, lengths in emotion_to_lengths.items() - } + return round(starters / total, 2) diff --git a/server/analysis/stat_gen.py b/server/analysis/stat_gen.py index c2f09ed..f5b328d 100644 --- a/server/analysis/stat_gen.py +++ b/server/analysis/stat_gen.py @@ -120,7 +120,8 @@ class StatGen: return { "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df), - "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(filtered_df), + "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100), + "initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df), "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df) }