Finish off the links between frontend and backend #10
@@ -51,68 +51,23 @@ class InteractionAnalysis:
|
||||
return 0
|
||||
|
||||
return round(sum(depths) / len(depths), 2)
|
||||
|
||||
def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
|
||||
graph = self.interaction_graph(df)
|
||||
pairs = []
|
||||
|
||||
def average_thread_length_by_emotion(self, df: pd.DataFrame):
|
||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||
for a, targets in graph.items():
|
||||
for b, count in targets.items():
|
||||
pairs.append(((a, b), count))
|
||||
|
||||
emotion_cols = [
|
||||
c
|
||||
for c in df.columns
|
||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||
]
|
||||
pairs.sort(key=lambda x: x[1], reverse=True)
|
||||
return pairs[:top_n]
|
||||
|
||||
def initiator_ratio(self, df: pd.DataFrame):
|
||||
starters = df["reply_to"].isna().sum()
|
||||
total = len(df)
|
||||
|
||||
id_to_reply = df.set_index("id")["reply_to"].to_dict()
|
||||
length_cache = {}
|
||||
if total == 0:
|
||||
return 0
|
||||
|
||||
def thread_length_from(start_id):
|
||||
if start_id in length_cache:
|
||||
return length_cache[start_id]
|
||||
|
||||
seen = set()
|
||||
length = 1
|
||||
current = start_id
|
||||
|
||||
while True:
|
||||
if current in seen:
|
||||
# infinite loop shouldn't happen, but just in case
|
||||
break
|
||||
seen.add(current)
|
||||
|
||||
reply_to = id_to_reply.get(current)
|
||||
|
||||
if (
|
||||
reply_to is None
|
||||
or (isinstance(reply_to, float) and pd.isna(reply_to))
|
||||
or reply_to == ""
|
||||
):
|
||||
break
|
||||
|
||||
length += 1
|
||||
current = reply_to
|
||||
|
||||
if current in length_cache:
|
||||
length += length_cache[current] - 1
|
||||
break
|
||||
|
||||
length_cache[start_id] = length
|
||||
return length
|
||||
|
||||
emotion_to_lengths = {}
|
||||
|
||||
# Fill NaNs in emotion cols to avoid max() issues
|
||||
emo_df = df[["id"] + emotion_cols].copy()
|
||||
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
||||
|
||||
for _, row in emo_df.iterrows():
|
||||
msg_id = row["id"]
|
||||
length = thread_length_from(msg_id)
|
||||
|
||||
emotions = {c: row[c] for c in emotion_cols}
|
||||
dominant = max(emotions, key=emotions.get)
|
||||
|
||||
emotion_to_lengths.setdefault(dominant, []).append(length)
|
||||
|
||||
return {
|
||||
emotion: round(sum(lengths) / len(lengths), 2)
|
||||
for emotion, lengths in emotion_to_lengths.items()
|
||||
}
|
||||
return round(starters / total, 2)
|
||||
|
||||
@@ -120,7 +120,8 @@ class StatGen:
|
||||
|
||||
return {
|
||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
|
||||
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(filtered_df),
|
||||
"top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
|
||||
"initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df),
|
||||
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user