From 257eb80de7a2d58554a8fe06fe7544c16460f16a Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Mon, 23 Feb 2026 19:09:48 +0000
Subject: [PATCH] feat(api): add average thread length per emotion

---
 server/analysis/interactional.py | 62 +++++++++++++++++++++++++++++++-
 server/stat_gen.py               |  1 +
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/server/analysis/interactional.py b/server/analysis/interactional.py
index 0d7785c..4ed4950 100644
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -145,4 +145,64 @@ class InteractionAnalysis:
         if not depths:
             return 0
         
-        return round(sum(depths) / len(depths), 2)
\ No newline at end of file
+        return round(sum(depths) / len(depths), 2)
+    
+    def average_thread_length_by_emotion(self):
+        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+
+        emotion_cols = [
+            c for c in self.df.columns
+            if c.startswith("emotion_") and c not in emotion_exclusions
+        ]
+
+        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
+        length_cache = {}
+
+        def thread_length_from(start_id):
+            if start_id in length_cache:
+                return length_cache[start_id]
+
+            seen = set()
+            length = 1
+            current = start_id
+
+            while True:
+                if current in seen:
+                    # infinite loop shouldn't happen, but just in case
+                    break
+                seen.add(current)
+
+                reply_to = id_to_reply.get(current)
+
+                if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
+                    break
+
+                length += 1
+                current = reply_to
+
+                if current in length_cache:
+                    length += (length_cache[current] - 1)
+                    break
+
+            length_cache[start_id] = length
+            return length
+
+        emotion_to_lengths = {}
+
+        # Fill NaNs in emotion cols to avoid max() issues
+        emo_df = self.df[["id"] + emotion_cols].copy()
+        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
+
+        for _, row in emo_df.iterrows():
+            msg_id = row["id"]
+            length = thread_length_from(msg_id)
+
+            emotions = {c: row[c] for c in emotion_cols}
+            dominant = max(emotions, key=emotions.get)
+
+            emotion_to_lengths.setdefault(dominant, []).append(length)
+
+        return {
+            emotion: round(sum(lengths) / len(lengths), 2)
+            for emotion, lengths in emotion_to_lengths.items()
+        }
\ No newline at end of file
diff --git a/server/stat_gen.py b/server/stat_gen.py
index 209589b..6ac7159 100644
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -97,6 +97,7 @@ class StatGen:
     def get_interactional_analysis(self) -> dict:
         return {
             "average_thread_depth": self.interaction_analysis.average_thread_depth(),
+            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
         }
     
     # detect community jargon