From 2a00384a5557e51aab0e2b081f58a8f723ec1457 Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Tue, 17 Mar 2026 19:03:56 +0000
Subject: [PATCH] feat(interaction): add top interaction pairs and initiator
 ratio methods

---
 server/analysis/interactional.py | 77 +++++++-------------------------
 server/analysis/stat_gen.py      |  3 +-
 2 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/server/analysis/interactional.py b/server/analysis/interactional.py
index 53d97dc..7e0c081 100644
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -51,68 +51,23 @@ class InteractionAnalysis:
             return 0
 
         return round(sum(depths) / len(depths), 2)
+    
+    def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
+        graph = self.interaction_graph(df)
+        pairs = []
 
-    def average_thread_length_by_emotion(self, df: pd.DataFrame):
-        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+        for a, targets in graph.items():
+            for b, count in targets.items():
+                pairs.append(((a, b), count))
 
-        emotion_cols = [
-            c
-            for c in df.columns
-            if c.startswith("emotion_") and c not in emotion_exclusions
-        ]
+        pairs.sort(key=lambda x: x[1], reverse=True)
+        return pairs[:top_n]
+    
+    def initiator_ratio(self, df: pd.DataFrame):
+        starters = df["reply_to"].isna().sum()
+        total = len(df)
 
-        id_to_reply = df.set_index("id")["reply_to"].to_dict()
-        length_cache = {}
+        if total == 0:
+            return 0
 
-        def thread_length_from(start_id):
-            if start_id in length_cache:
-                return length_cache[start_id]
-
-            seen = set()
-            length = 1
-            current = start_id
-
-            while True:
-                if current in seen:
-                    # infinite loop shouldn't happen, but just in case
-                    break
-                seen.add(current)
-
-                reply_to = id_to_reply.get(current)
-
-                if (
-                    reply_to is None
-                    or (isinstance(reply_to, float) and pd.isna(reply_to))
-                    or reply_to == ""
-                ):
-                    break
-
-                length += 1
-                current = reply_to
-
-                if current in length_cache:
-                    length += length_cache[current] - 1
-                    break
-
-            length_cache[start_id] = length
-            return length
-
-        emotion_to_lengths = {}
-
-        # Fill NaNs in emotion cols to avoid max() issues
-        emo_df = df[["id"] + emotion_cols].copy()
-        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
-
-        for _, row in emo_df.iterrows():
-            msg_id = row["id"]
-            length = thread_length_from(msg_id)
-
-            emotions = {c: row[c] for c in emotion_cols}
-            dominant = max(emotions, key=emotions.get)
-
-            emotion_to_lengths.setdefault(dominant, []).append(length)
-
-        return {
-            emotion: round(sum(lengths) / len(lengths), 2)
-            for emotion, lengths in emotion_to_lengths.items()
-        }
+        return round(starters / total, 2)
diff --git a/server/analysis/stat_gen.py b/server/analysis/stat_gen.py
index c2f09ed..f5b328d 100644
--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -120,7 +120,8 @@ class StatGen:
 
         return {
             "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
-            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(filtered_df),
+            "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
+            "initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df),
             "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
         }