feat(stat): add conversation concentration metric
Remove old `initiator_ratio` metric which wasn't working due every event having a `reply_to` value. This metric was suggested by AI, and is a surprisingly interesting one that gave interesting insights.
This commit is contained in:
@@ -63,11 +63,25 @@ class InteractionAnalysis:
|
||||
pairs.sort(key=lambda x: x[1], reverse=True)
|
||||
return pairs[:top_n]
|
||||
|
||||
def initiator_ratio(self, df: pd.DataFrame):
|
||||
starters = df["reply_to"].isna().sum()
|
||||
total = len(df)
|
||||
def conversation_concentration(self, df: pd.DataFrame) -> dict:
|
||||
if "type" not in df.columns:
|
||||
return {}
|
||||
|
||||
if total == 0:
|
||||
return 0
|
||||
comments = df[df["type"] == "comment"]
|
||||
if comments.empty:
|
||||
return {}
|
||||
|
||||
return round(starters / total, 2)
|
||||
author_counts = comments["author"].value_counts()
|
||||
total_comments = len(comments)
|
||||
total_authors = len(author_counts)
|
||||
|
||||
top_10_pct_n = max(1, int(total_authors * 0.1))
|
||||
top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
|
||||
|
||||
return {
|
||||
"total_commenting_authors": total_authors,
|
||||
"top_10pct_author_count": top_10_pct_n,
|
||||
"top_10pct_comment_share": float(top_10_pct_share),
|
||||
"single_comment_authors": int((author_counts == 1).sum()),
|
||||
"single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
|
||||
}
|
||||
Reference in New Issue
Block a user