Compare commits
3 Commits
397986dc89
...
257eb80de7
| Author | SHA1 | Date | |
|---|---|---|---|
| 257eb80de7 | |||
| 3a23b1f0c8 | |||
| 8c76476cd3 |
@@ -124,3 +124,85 @@ class InteractionAnalysis:
|
|||||||
interactions[a][b] = interactions[a].get(b, 0) + 1
|
interactions[a][b] = interactions[a].get(b, 0) + 1
|
||||||
|
|
||||||
return interactions
|
return interactions
|
||||||
|
|
||||||
|
def average_thread_depth(self):
|
||||||
|
depths = []
|
||||||
|
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||||
|
for _, row in self.df.iterrows():
|
||||||
|
depth = 0
|
||||||
|
current_id = row["id"]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
reply_to = id_to_reply.get(current_id)
|
||||||
|
if pd.isna(reply_to) or reply_to == "":
|
||||||
|
break
|
||||||
|
|
||||||
|
depth += 1
|
||||||
|
current_id = reply_to
|
||||||
|
|
||||||
|
depths.append(depth)
|
||||||
|
|
||||||
|
if not depths:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return round(sum(depths) / len(depths), 2)
|
||||||
|
|
||||||
|
def average_thread_length_by_emotion(self):
|
||||||
|
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||||
|
|
||||||
|
emotion_cols = [
|
||||||
|
c for c in self.df.columns
|
||||||
|
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||||
|
]
|
||||||
|
|
||||||
|
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||||
|
length_cache = {}
|
||||||
|
|
||||||
|
def thread_length_from(start_id):
|
||||||
|
if start_id in length_cache:
|
||||||
|
return length_cache[start_id]
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
length = 1
|
||||||
|
current = start_id
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if current in seen:
|
||||||
|
# infinite loop shouldn't happen, but just in case
|
||||||
|
break
|
||||||
|
seen.add(current)
|
||||||
|
|
||||||
|
reply_to = id_to_reply.get(current)
|
||||||
|
|
||||||
|
if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
|
||||||
|
break
|
||||||
|
|
||||||
|
length += 1
|
||||||
|
current = reply_to
|
||||||
|
|
||||||
|
if current in length_cache:
|
||||||
|
length += (length_cache[current] - 1)
|
||||||
|
break
|
||||||
|
|
||||||
|
length_cache[start_id] = length
|
||||||
|
return length
|
||||||
|
|
||||||
|
emotion_to_lengths = {}
|
||||||
|
|
||||||
|
# Fill NaNs in emotion cols to avoid max() issues
|
||||||
|
emo_df = self.df[["id"] + emotion_cols].copy()
|
||||||
|
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
||||||
|
|
||||||
|
for _, row in emo_df.iterrows():
|
||||||
|
msg_id = row["id"]
|
||||||
|
length = thread_length_from(msg_id)
|
||||||
|
|
||||||
|
emotions = {c: row[c] for c in emotion_cols}
|
||||||
|
dominant = max(emotions, key=emotions.get)
|
||||||
|
|
||||||
|
emotion_to_lengths.setdefault(dominant, []).append(length)
|
||||||
|
|
||||||
|
return {
|
||||||
|
emotion: round(sum(lengths) / len(lengths), 2)
|
||||||
|
for emotion, lengths in emotion_to_lengths.items()
|
||||||
|
}
|
||||||
@@ -55,7 +55,7 @@ def word_frequencies():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.content_analysis()), 200
|
return jsonify(stat_obj.get_content_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -80,7 +80,7 @@ def get_time_analysis():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.time_analysis()), 200
|
return jsonify(stat_obj.get_time_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -93,7 +93,7 @@ def get_user_analysis():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.user_analysis()), 200
|
return jsonify(stat_obj.get_user_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -106,7 +106,20 @@ def get_cultural_analysis():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.cultural_analysis()), 200
|
return jsonify(stat_obj.get_cultural_analysis()), 200
|
||||||
|
except ValueError as e:
|
||||||
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
|
@app.route("/stats/interaction", methods=["GET"])
|
||||||
|
def get_interaction_analysis():
|
||||||
|
if stat_obj is None:
|
||||||
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
return jsonify(stat_obj.get_interactional_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -66,14 +66,14 @@ class StatGen:
|
|||||||
|
|
||||||
# topics over time
|
# topics over time
|
||||||
# emotions over time
|
# emotions over time
|
||||||
def time_analysis(self) -> pd.DataFrame:
|
def get_time_analysis(self) -> pd.DataFrame:
|
||||||
return {
|
return {
|
||||||
"events_per_day": self.temporal_analysis.posts_per_day(),
|
"events_per_day": self.temporal_analysis.posts_per_day(),
|
||||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
||||||
}
|
}
|
||||||
|
|
||||||
# average topic duration
|
# average topic duration
|
||||||
def content_analysis(self) -> dict:
|
def get_content_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"word_frequencies": self.linguistic_analysis.word_frequencies(),
|
"word_frequencies": self.linguistic_analysis.word_frequencies(),
|
||||||
"common_two_phrases": self.linguistic_analysis.ngrams(),
|
"common_two_phrases": self.linguistic_analysis.ngrams(),
|
||||||
@@ -84,7 +84,7 @@ class StatGen:
|
|||||||
|
|
||||||
# average emotion per user
|
# average emotion per user
|
||||||
# average chain length
|
# average chain length
|
||||||
def user_analysis(self) -> dict:
|
def get_user_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"top_users": self.interaction_analysis.top_users(),
|
"top_users": self.interaction_analysis.top_users(),
|
||||||
"users": self.interaction_analysis.per_user_analysis(),
|
"users": self.interaction_analysis.per_user_analysis(),
|
||||||
@@ -94,14 +94,15 @@ class StatGen:
|
|||||||
# average / max thread depth
|
# average / max thread depth
|
||||||
# high engagment threads based on volume
|
# high engagment threads based on volume
|
||||||
|
|
||||||
def conversational_analysis(self) -> dict:
|
def get_interactional_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
|
"average_thread_depth": self.interaction_analysis.average_thread_depth(),
|
||||||
|
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
|
||||||
}
|
}
|
||||||
|
|
||||||
# detect community jargon
|
# detect community jargon
|
||||||
# in-group and out-group linguistic markers
|
# in-group and out-group linguistic markers
|
||||||
def cultural_analysis(self) -> dict:
|
def get_cultural_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"identity_markers": self.linguistic_analysis.identity_markers()
|
"identity_markers": self.linguistic_analysis.identity_markers()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user