Compare commits
2 Commits
8372aa7278
...
71998c450e
| Author | SHA1 | Date | |
|---|---|---|---|
| 71998c450e | |||
| 2a00384a55 |
@@ -51,68 +51,23 @@ class InteractionAnalysis:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
return round(sum(depths) / len(depths), 2)
|
return round(sum(depths) / len(depths), 2)
|
||||||
|
|
||||||
|
def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
|
||||||
|
graph = self.interaction_graph(df)
|
||||||
|
pairs = []
|
||||||
|
|
||||||
def average_thread_length_by_emotion(self, df: pd.DataFrame):
|
for a, targets in graph.items():
|
||||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
for b, count in targets.items():
|
||||||
|
pairs.append(((a, b), count))
|
||||||
|
|
||||||
emotion_cols = [
|
pairs.sort(key=lambda x: x[1], reverse=True)
|
||||||
c
|
return pairs[:top_n]
|
||||||
for c in df.columns
|
|
||||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
def initiator_ratio(self, df: pd.DataFrame):
|
||||||
]
|
starters = df["reply_to"].isna().sum()
|
||||||
|
total = len(df)
|
||||||
|
|
||||||
id_to_reply = df.set_index("id")["reply_to"].to_dict()
|
if total == 0:
|
||||||
length_cache = {}
|
return 0
|
||||||
|
|
||||||
def thread_length_from(start_id):
|
return round(starters / total, 2)
|
||||||
if start_id in length_cache:
|
|
||||||
return length_cache[start_id]
|
|
||||||
|
|
||||||
seen = set()
|
|
||||||
length = 1
|
|
||||||
current = start_id
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if current in seen:
|
|
||||||
# infinite loop shouldn't happen, but just in case
|
|
||||||
break
|
|
||||||
seen.add(current)
|
|
||||||
|
|
||||||
reply_to = id_to_reply.get(current)
|
|
||||||
|
|
||||||
if (
|
|
||||||
reply_to is None
|
|
||||||
or (isinstance(reply_to, float) and pd.isna(reply_to))
|
|
||||||
or reply_to == ""
|
|
||||||
):
|
|
||||||
break
|
|
||||||
|
|
||||||
length += 1
|
|
||||||
current = reply_to
|
|
||||||
|
|
||||||
if current in length_cache:
|
|
||||||
length += length_cache[current] - 1
|
|
||||||
break
|
|
||||||
|
|
||||||
length_cache[start_id] = length
|
|
||||||
return length
|
|
||||||
|
|
||||||
emotion_to_lengths = {}
|
|
||||||
|
|
||||||
# Fill NaNs in emotion cols to avoid max() issues
|
|
||||||
emo_df = df[["id"] + emotion_cols].copy()
|
|
||||||
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
|
||||||
|
|
||||||
for _, row in emo_df.iterrows():
|
|
||||||
msg_id = row["id"]
|
|
||||||
length = thread_length_from(msg_id)
|
|
||||||
|
|
||||||
emotions = {c: row[c] for c in emotion_cols}
|
|
||||||
dominant = max(emotions, key=emotions.get)
|
|
||||||
|
|
||||||
emotion_to_lengths.setdefault(dominant, []).append(length)
|
|
||||||
|
|
||||||
return {
|
|
||||||
emotion: round(sum(lengths) / len(lengths), 2)
|
|
||||||
for emotion, lengths in emotion_to_lengths.items()
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -120,7 +120,8 @@ class StatGen:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
|
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
|
||||||
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(filtered_df),
|
"top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
|
||||||
|
"initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df),
|
||||||
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
|
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ CREATE TABLE events (
|
|||||||
weekday VARCHAR(255) NOT NULL,
|
weekday VARCHAR(255) NOT NULL,
|
||||||
|
|
||||||
/* Posts Only */
|
/* Posts Only */
|
||||||
title VARCHAR(255),
|
title TEXT,
|
||||||
|
|
||||||
/* Comments Only*/
|
/* Comments Only*/
|
||||||
parent_id VARCHAR(255),
|
parent_id VARCHAR(255),
|
||||||
|
|||||||
Reference in New Issue
Block a user