fix(db): change title type to text

Occasionally a Reddit post would have a long title, and would break in the schema.
feat(interaction): add top interaction pairs and initiator ratio methods
2026-03-17 19:49:03 +00:00 · 2026-03-17 19:03:56 +00:00
3 changed files with 19 additions and 63 deletions
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -51,68 +51,23 @@ class InteractionAnalysis:
            return 0
        return round(sum(depths) / len(depths), 2)
    def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
        graph = self.interaction_graph(df)
        pairs = []
-    def average_thread_length_by_emotion(self, df: pd.DataFrame):
+        for a, targets in graph.items():
-        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+            for b, count in targets.items():
                pairs.append(((a, b), count))
-        emotion_cols = [
+        pairs.sort(key=lambda x: x[1], reverse=True)
-            c
+        return pairs[:top_n]
-            for c in df.columns
+    
-            if c.startswith("emotion_") and c not in emotion_exclusions
+    def initiator_ratio(self, df: pd.DataFrame):
-        ]
+        starters = df["reply_to"].isna().sum()
        total = len(df)
-        id_to_reply = df.set_index("id")["reply_to"].to_dict()
+        if total == 0:
-        length_cache = {}
+            return 0
-        def thread_length_from(start_id):
+        return round(starters / total, 2)
            if start_id in length_cache:
                return length_cache[start_id]
            seen = set()
            length = 1
            current = start_id
            while True:
                if current in seen:
                    # infinite loop shouldn't happen, but just in case
                    break
                seen.add(current)
                reply_to = id_to_reply.get(current)
                if (
                    reply_to is None
                    or (isinstance(reply_to, float) and pd.isna(reply_to))
                    or reply_to == ""
                ):
                    break
                length += 1
                current = reply_to
                if current in length_cache:
                    length += length_cache[current] - 1
                    break
            length_cache[start_id] = length
            return length
        emotion_to_lengths = {}
        # Fill NaNs in emotion cols to avoid max() issues
        emo_df = df[["id"] + emotion_cols].copy()
        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
        for _, row in emo_df.iterrows():
            msg_id = row["id"]
            length = thread_length_from(msg_id)
            emotions = {c: row[c] for c in emotion_cols}
            dominant = max(emotions, key=emotions.get)
            emotion_to_lengths.setdefault(dominant, []).append(length)
        return {
            emotion: round(sum(lengths) / len(lengths), 2)
            for emotion, lengths in emotion_to_lengths.items()
        }
--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -120,7 +120,8 @@ class StatGen:
        return {
            "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
-            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(filtered_df),
+            "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
            "initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df),
            "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
        }
--- a/server/db/schema.sql
+++ b/server/db/schema.sql
@@ -43,7 +43,7 @@ CREATE TABLE events (
    weekday VARCHAR(255) NOT NULL,
    /* Posts Only */
-    title VARCHAR(255),
+    title TEXT,
    /* Comments Only*/
    parent_id VARCHAR(255),
Author	SHA1	Message	Date
Dylan De Faoite	71998c450e	fix(db): change title type to text Occasionally a Reddit post would have a long title, and would break in the schema.	2026-03-17 19:49:03 +00:00
Dylan De Faoite	2a00384a55	feat(interaction): add top interaction pairs and initiator ratio methods	2026-03-17 19:03:56 +00:00