From 3e78a54388e4bc3cd2d2637a443736d29910fb48 Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Wed, 18 Mar 2026 18:36:05 +0000 Subject: [PATCH] feat(stat): add conversation concentration metric Remove old `initiator_ratio` metric which wasn't working due every event having a `reply_to` value. This metric was suggested by AI, and is a surprisingly interesting one that gave interesting insights. --- server/analysis/interactional.py | 26 ++++++++++++++++++++------ server/analysis/stat_gen.py | 4 ++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/server/analysis/interactional.py b/server/analysis/interactional.py index 7e0c081..e15940e 100644 --- a/server/analysis/interactional.py +++ b/server/analysis/interactional.py @@ -63,11 +63,25 @@ class InteractionAnalysis: pairs.sort(key=lambda x: x[1], reverse=True) return pairs[:top_n] - def initiator_ratio(self, df: pd.DataFrame): - starters = df["reply_to"].isna().sum() - total = len(df) + def conversation_concentration(self, df: pd.DataFrame) -> dict: + if "type" not in df.columns: + return {} - if total == 0: - return 0 + comments = df[df["type"] == "comment"] + if comments.empty: + return {} - return round(starters / total, 2) + author_counts = comments["author"].value_counts() + total_comments = len(comments) + total_authors = len(author_counts) + + top_10_pct_n = max(1, int(total_authors * 0.1)) + top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4) + + return { + "total_commenting_authors": total_authors, + "top_10pct_author_count": top_10_pct_n, + "top_10pct_comment_share": float(top_10_pct_share), + "single_comment_authors": int((author_counts == 1).sum()), + "single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)), + } \ No newline at end of file diff --git a/server/analysis/stat_gen.py b/server/analysis/stat_gen.py index f5b328d..4368841 100644 --- a/server/analysis/stat_gen.py +++ b/server/analysis/stat_gen.py @@ -121,8 +121,8 @@ class StatGen: return { "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df), "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100), - "initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df), - "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df) + "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df), + "conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df) } def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict: