feat(stat): add conversation concentration metric

Remove old `initiator_ratio` metric which wasn't working due every event having a `reply_to` value.

This metric was suggested by AI, and is a surprisingly interesting one that gave interesting insights.
This commit is contained in:
2026-03-18 18:36:05 +00:00
parent 71998c450e
commit 3e78a54388
2 changed files with 22 additions and 8 deletions

View File

@@ -63,11 +63,25 @@ class InteractionAnalysis:
pairs.sort(key=lambda x: x[1], reverse=True)
return pairs[:top_n]
def initiator_ratio(self, df: pd.DataFrame):
starters = df["reply_to"].isna().sum()
total = len(df)
def conversation_concentration(self, df: pd.DataFrame) -> dict:
if "type" not in df.columns:
return {}
if total == 0:
return 0
comments = df[df["type"] == "comment"]
if comments.empty:
return {}
return round(starters / total, 2)
author_counts = comments["author"].value_counts()
total_comments = len(comments)
total_authors = len(author_counts)
top_10_pct_n = max(1, int(total_authors * 0.1))
top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
return {
"total_commenting_authors": total_authors,
"top_10pct_author_count": top_10_pct_n,
"top_10pct_comment_share": float(top_10_pct_share),
"single_comment_authors": int((author_counts == 1).sum()),
"single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
}