feat(api): add average thread length per emotion

feat(api): add average thread depth
fix(api): broken analysis calls due to overlap in attribute and method names
2026-02-23 19:09:48 +00:00 · 2026-02-23 18:14:34 +00:00 · 2026-02-23 18:14:24 +00:00
3 changed files with 107 additions and 11 deletions
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -124,3 +124,85 @@ class InteractionAnalysis:
            interactions[a][b] = interactions[a].get(b, 0) + 1
        return interactions
    def average_thread_depth(self):
        depths = []
        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
        for _, row in self.df.iterrows():
            depth = 0
            current_id = row["id"]
            while True:
                reply_to = id_to_reply.get(current_id)
                if pd.isna(reply_to) or reply_to == "":
                    break
                depth += 1
                current_id = reply_to
            depths.append(depth)
        if not depths:
            return 0
        return round(sum(depths) / len(depths), 2)
    def average_thread_length_by_emotion(self):
        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
        emotion_cols = [
            c for c in self.df.columns
            if c.startswith("emotion_") and c not in emotion_exclusions
        ]
        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
        length_cache = {}
        def thread_length_from(start_id):
            if start_id in length_cache:
                return length_cache[start_id]
            seen = set()
            length = 1
            current = start_id
            while True:
                if current in seen:
                    # infinite loop shouldn't happen, but just in case
                    break
                seen.add(current)
                reply_to = id_to_reply.get(current)
                if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
                    break
                length += 1
                current = reply_to
                if current in length_cache:
                    length += (length_cache[current] - 1)
                    break
            length_cache[start_id] = length
            return length
        emotion_to_lengths = {}
        # Fill NaNs in emotion cols to avoid max() issues
        emo_df = self.df[["id"] + emotion_cols].copy()
        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
        for _, row in emo_df.iterrows():
            msg_id = row["id"]
            length = thread_length_from(msg_id)
            emotions = {c: row[c] for c in emotion_cols}
            dominant = max(emotions, key=emotions.get)
            emotion_to_lengths.setdefault(dominant, []).append(length)
        return {
            emotion: round(sum(lengths) / len(lengths), 2)
            for emotion, lengths in emotion_to_lengths.items()
        }
--- a/server/app.py
+++ b/server/app.py
@@ -55,7 +55,7 @@ def word_frequencies():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.content_analysis()), 200
+        return jsonify(stat_obj.get_content_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -80,7 +80,7 @@ def get_time_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.time_analysis()), 200
+        return jsonify(stat_obj.get_time_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -93,7 +93,7 @@ def get_user_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.user_analysis()), 200
+        return jsonify(stat_obj.get_user_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -106,7 +106,20 @@ def get_cultural_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.cultural_analysis()), 200
+        return jsonify(stat_obj.get_cultural_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/stats/interaction", methods=["GET"])
 def get_interaction_analysis():
    if stat_obj is None:
        return jsonify({"error": "No data uploaded"}), 400
    try:
        return jsonify(stat_obj.get_interactional_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -66,14 +66,14 @@ class StatGen:
    # topics over time
    # emotions over time
-    def time_analysis(self) -> pd.DataFrame:
+    def get_time_analysis(self) -> pd.DataFrame:
        return {
            "events_per_day": self.temporal_analysis.posts_per_day(),
            "weekday_hour_heatmap": self.temporal_analysis.heatmap()
        }
    # average topic duration
-    def content_analysis(self) -> dict:
+    def get_content_analysis(self) -> dict:
        return {
            "word_frequencies": self.linguistic_analysis.word_frequencies(),
            "common_two_phrases": self.linguistic_analysis.ngrams(),
@@ -84,7 +84,7 @@ class StatGen:
    # average emotion per user
    # average chain length
-    def user_analysis(self) -> dict:
+    def get_user_analysis(self) -> dict:
        return {
            "top_users": self.interaction_analysis.top_users(),
            "users": self.interaction_analysis.per_user_analysis(),
@@ -94,14 +94,15 @@ class StatGen:
    # average / max thread depth
    # high engagment threads based on volume
-    def conversational_analysis(self) -> dict:
+    def get_interactional_analysis(self) -> dict:
        return {
-
+            "average_thread_depth": self.interaction_analysis.average_thread_depth(),
            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
        }
    # detect community jargon
    # in-group and out-group linguistic markers
-    def cultural_analysis(self) -> dict:
+    def get_cultural_analysis(self) -> dict:
        return {
            "identity_markers": self.linguistic_analysis.identity_markers()
        }
Author	SHA1	Message	Date
Dylan De Faoite	257eb80de7	feat(api): add average thread length per emotion	2026-02-23 19:09:48 +00:00
Dylan De Faoite	3a23b1f0c8	feat(api): add average thread depth	2026-02-23 18:14:34 +00:00
Dylan De Faoite	8c76476cd3	fix(api): broken analysis calls due to overlap in attribute and method names	2026-02-23 18:14:24 +00:00