Compare commits
8 Commits
c11b4bb85b
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| ce0aa6bc43 | |||
| e82ac8d73b | |||
| ccba6a5262 | |||
| 257eb80de7 | |||
| 3a23b1f0c8 | |||
| 8c76476cd3 | |||
| 397986dc89 | |||
| 04b7094036 |
@@ -34,7 +34,7 @@ function ApiToGraphData(apiData: InteractionGraph) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const InteractionStats = (props: { data: UserAnalysisResponse }) => {
|
const UserStats = (props: { data: UserAnalysisResponse }) => {
|
||||||
const graphData = ApiToGraphData(props.data.interaction_graph);
|
const graphData = ApiToGraphData(props.data.interaction_graph);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -44,7 +44,7 @@ const InteractionStats = (props: { data: UserAnalysisResponse }) => {
|
|||||||
This graph visualizes interactions between users based on comments and replies.
|
This graph visualizes interactions between users based on comments and replies.
|
||||||
Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
|
Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
|
||||||
</p>
|
</p>
|
||||||
<div style={{ height: "600px", border: "1px solid #ccc", borderRadius: 8, marginTop: 16 }}>
|
<div>
|
||||||
<ForceGraph3D
|
<ForceGraph3D
|
||||||
graphData={graphData}
|
graphData={graphData}
|
||||||
nodeAutoColorBy="id"
|
nodeAutoColorBy="id"
|
||||||
@@ -58,4 +58,4 @@ const InteractionStats = (props: { data: UserAnalysisResponse }) => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default InteractionStats;
|
export default UserStats;
|
||||||
@@ -3,7 +3,7 @@ import axios from "axios";
|
|||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
import SummaryStats from "../components/SummaryStats";
|
import SummaryStats from "../components/SummaryStats";
|
||||||
import EmotionalStats from "../components/EmotionalStats";
|
import EmotionalStats from "../components/EmotionalStats";
|
||||||
import InteractionStats from "../components/InteractionStats";
|
import InteractionStats from "../components/UserStats";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
type SummaryResponse,
|
type SummaryResponse,
|
||||||
|
|||||||
154
server/analysis/cultural.py
Normal file
154
server/analysis/cultural.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class CulturalAnalysis:
|
||||||
|
def __init__(self, df: pd.DataFrame, content_col: str = "content", topic_col: str = "topic"):
|
||||||
|
self.df = df
|
||||||
|
self.content_col = content_col
|
||||||
|
self.topic_col = topic_col
|
||||||
|
|
||||||
|
def get_identity_markers(self):
|
||||||
|
df = self.df.copy()
|
||||||
|
s = df[self.content_col].fillna("").astype(str).str.lower()
|
||||||
|
|
||||||
|
in_group_words = {"we", "us", "our", "ourselves"}
|
||||||
|
out_group_words = {"they", "them", "their", "themselves"}
|
||||||
|
|
||||||
|
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||||
|
emotion_cols = [
|
||||||
|
c for c in df.columns
|
||||||
|
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||||
|
]
|
||||||
|
|
||||||
|
# Tokenize per row
|
||||||
|
tokens_per_row = s.apply(lambda txt: re.findall(r"\b[a-z]{2,}\b", txt))
|
||||||
|
|
||||||
|
total_tokens = int(tokens_per_row.map(len).sum())
|
||||||
|
in_hits = tokens_per_row.map(lambda toks: sum(t in in_group_words for t in toks)).astype(int)
|
||||||
|
out_hits = tokens_per_row.map(lambda toks: sum(t in out_group_words for t in toks)).astype(int)
|
||||||
|
|
||||||
|
in_count = int(in_hits.sum())
|
||||||
|
out_count = int(out_hits.sum())
|
||||||
|
|
||||||
|
in_mask = in_hits > out_hits
|
||||||
|
out_mask = out_hits > in_hits
|
||||||
|
tie_mask = ~(in_mask | out_mask)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"in_group_usage": in_count,
|
||||||
|
"out_group_usage": out_count,
|
||||||
|
"in_group_ratio": round(in_count / max(total_tokens, 1), 5),
|
||||||
|
"out_group_ratio": round(out_count / max(total_tokens, 1), 5),
|
||||||
|
|
||||||
|
"in_group_posts": int(in_mask.sum()),
|
||||||
|
"out_group_posts": int(out_mask.sum()),
|
||||||
|
"tie_posts": int(tie_mask.sum()),
|
||||||
|
}
|
||||||
|
|
||||||
|
if emotion_cols:
|
||||||
|
emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
||||||
|
|
||||||
|
in_avg = emo.loc[in_mask].mean() if in_mask.any() else pd.Series(0.0, index=emotion_cols)
|
||||||
|
out_avg = emo.loc[out_mask].mean() if out_mask.any() else pd.Series(0.0, index=emotion_cols)
|
||||||
|
|
||||||
|
result["in_group_emotion_avg"] = in_avg.to_dict()
|
||||||
|
result["out_group_emotion_avg"] = out_avg.to_dict()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_stance_markers(self) -> dict[str, Any]:
|
||||||
|
s = self.df[self.content_col].fillna("").astype(str)
|
||||||
|
|
||||||
|
hedges = {
|
||||||
|
"maybe", "perhaps", "possibly", "probably", "likely", "seems", "seem",
|
||||||
|
"i think", "i feel", "i guess", "kind of", "sort of", "somewhat"
|
||||||
|
}
|
||||||
|
certainty = {
|
||||||
|
"definitely", "certainly", "clearly", "obviously", "undeniably", "always", "never"
|
||||||
|
}
|
||||||
|
|
||||||
|
deontic = {
|
||||||
|
"must", "should", "need", "needs", "have to", "has to", "ought", "required", "require"
|
||||||
|
}
|
||||||
|
|
||||||
|
permission = {"can", "allowed", "okay", "ok", "permitted"}
|
||||||
|
|
||||||
|
def count_phrases(text: str, phrases: set[str]) -> int:
|
||||||
|
c = 0
|
||||||
|
for p in phrases:
|
||||||
|
if " " in p:
|
||||||
|
c += len(re.findall(r"\b" + re.escape(p) + r"\b", text))
|
||||||
|
else:
|
||||||
|
c += len(re.findall(r"\b" + re.escape(p) + r"\b", text))
|
||||||
|
return c
|
||||||
|
|
||||||
|
hedge_counts = s.apply(lambda t: count_phrases(t, hedges))
|
||||||
|
certainty_counts = s.apply(lambda t: count_phrases(t, certainty))
|
||||||
|
deontic_counts = s.apply(lambda t: count_phrases(t, deontic))
|
||||||
|
perm_counts = s.apply(lambda t: count_phrases(t, permission))
|
||||||
|
|
||||||
|
token_counts = s.apply(lambda t: len(re.findall(r"\b[a-z]{2,}\b", t))).replace(0, 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hedge_total": int(hedge_counts.sum()),
|
||||||
|
"certainty_total": int(certainty_counts.sum()),
|
||||||
|
"deontic_total": int(deontic_counts.sum()),
|
||||||
|
"permission_total": int(perm_counts.sum()),
|
||||||
|
"hedge_per_1k_tokens": round(1000 * hedge_counts.sum() / token_counts.sum(), 3),
|
||||||
|
"certainty_per_1k_tokens": round(1000 * certainty_counts.sum() / token_counts.sum(), 3),
|
||||||
|
"deontic_per_1k_tokens": round(1000 * deontic_counts.sum() / token_counts.sum(), 3),
|
||||||
|
"permission_per_1k_tokens": round(1000 * perm_counts.sum() / token_counts.sum(), 3),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_avg_emotions_per_entity(self, top_n: int = 25, min_posts: int = 10) -> dict[str, Any]:
|
||||||
|
if "entities" not in self.df.columns:
|
||||||
|
return {"entity_emotion_avg": {}}
|
||||||
|
|
||||||
|
df = self.df
|
||||||
|
emotion_cols = [c for c in df.columns if c.startswith("emotion_")]
|
||||||
|
|
||||||
|
entity_counter = Counter()
|
||||||
|
|
||||||
|
for row in df["entities"].dropna():
|
||||||
|
if isinstance(row, list):
|
||||||
|
for ent in row:
|
||||||
|
if isinstance(ent, dict):
|
||||||
|
text = ent.get("text")
|
||||||
|
if isinstance(text, str):
|
||||||
|
text = text.strip()
|
||||||
|
if len(text) >= 3: # filter short junk
|
||||||
|
entity_counter[text] += 1
|
||||||
|
|
||||||
|
top_entities = entity_counter.most_common(top_n)
|
||||||
|
|
||||||
|
entity_emotion_avg = {}
|
||||||
|
|
||||||
|
for entity_text, _ in top_entities:
|
||||||
|
mask = df["entities"].apply(
|
||||||
|
lambda ents: isinstance(ents, list) and
|
||||||
|
any(isinstance(e, dict) and e.get("text") == entity_text for e in ents)
|
||||||
|
)
|
||||||
|
|
||||||
|
post_count = int(mask.sum())
|
||||||
|
|
||||||
|
if post_count >= min_posts:
|
||||||
|
emo_means = (
|
||||||
|
df.loc[mask, emotion_cols]
|
||||||
|
.apply(pd.to_numeric, errors="coerce")
|
||||||
|
.fillna(0.0)
|
||||||
|
.mean()
|
||||||
|
.to_dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
entity_emotion_avg[entity_text] = {
|
||||||
|
"post_count": post_count,
|
||||||
|
"emotion_avg": emo_means
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"entity_emotion_avg": entity_emotion_avg
|
||||||
|
}
|
||||||
@@ -5,14 +5,9 @@ class EmotionalAnalysis:
|
|||||||
self.df = df
|
self.df = df
|
||||||
|
|
||||||
def avg_emotion_by_topic(self) -> dict:
|
def avg_emotion_by_topic(self) -> dict:
|
||||||
emotion_exclusions = [
|
|
||||||
"emotion_neutral",
|
|
||||||
"emotion_surprise"
|
|
||||||
]
|
|
||||||
|
|
||||||
emotion_cols = [
|
emotion_cols = [
|
||||||
col for col in self.df.columns
|
col for col in self.df.columns
|
||||||
if col.startswith("emotion_") and col not in emotion_exclusions
|
if col.startswith("emotion_")
|
||||||
]
|
]
|
||||||
|
|
||||||
counts = (
|
counts = (
|
||||||
|
|||||||
@@ -124,3 +124,85 @@ class InteractionAnalysis:
|
|||||||
interactions[a][b] = interactions[a].get(b, 0) + 1
|
interactions[a][b] = interactions[a].get(b, 0) + 1
|
||||||
|
|
||||||
return interactions
|
return interactions
|
||||||
|
|
||||||
|
def average_thread_depth(self):
|
||||||
|
depths = []
|
||||||
|
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||||
|
for _, row in self.df.iterrows():
|
||||||
|
depth = 0
|
||||||
|
current_id = row["id"]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
reply_to = id_to_reply.get(current_id)
|
||||||
|
if pd.isna(reply_to) or reply_to == "":
|
||||||
|
break
|
||||||
|
|
||||||
|
depth += 1
|
||||||
|
current_id = reply_to
|
||||||
|
|
||||||
|
depths.append(depth)
|
||||||
|
|
||||||
|
if not depths:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return round(sum(depths) / len(depths), 2)
|
||||||
|
|
||||||
|
def average_thread_length_by_emotion(self):
|
||||||
|
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||||
|
|
||||||
|
emotion_cols = [
|
||||||
|
c for c in self.df.columns
|
||||||
|
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||||
|
]
|
||||||
|
|
||||||
|
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||||
|
length_cache = {}
|
||||||
|
|
||||||
|
def thread_length_from(start_id):
|
||||||
|
if start_id in length_cache:
|
||||||
|
return length_cache[start_id]
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
length = 1
|
||||||
|
current = start_id
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if current in seen:
|
||||||
|
# infinite loop shouldn't happen, but just in case
|
||||||
|
break
|
||||||
|
seen.add(current)
|
||||||
|
|
||||||
|
reply_to = id_to_reply.get(current)
|
||||||
|
|
||||||
|
if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
|
||||||
|
break
|
||||||
|
|
||||||
|
length += 1
|
||||||
|
current = reply_to
|
||||||
|
|
||||||
|
if current in length_cache:
|
||||||
|
length += (length_cache[current] - 1)
|
||||||
|
break
|
||||||
|
|
||||||
|
length_cache[start_id] = length
|
||||||
|
return length
|
||||||
|
|
||||||
|
emotion_to_lengths = {}
|
||||||
|
|
||||||
|
# Fill NaNs in emotion cols to avoid max() issues
|
||||||
|
emo_df = self.df[["id"] + emotion_cols].copy()
|
||||||
|
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
||||||
|
|
||||||
|
for _, row in emo_df.iterrows():
|
||||||
|
msg_id = row["id"]
|
||||||
|
length = thread_length_from(msg_id)
|
||||||
|
|
||||||
|
emotions = {c: row[c] for c in emotion_cols}
|
||||||
|
dominant = max(emotions, key=emotions.get)
|
||||||
|
|
||||||
|
emotion_to_lengths.setdefault(dominant, []).append(length)
|
||||||
|
|
||||||
|
return {
|
||||||
|
emotion: round(sum(lengths) / len(lengths), 2)
|
||||||
|
for emotion, lengths in emotion_to_lengths.items()
|
||||||
|
}
|
||||||
@@ -9,6 +9,10 @@ class LinguisticAnalysis:
|
|||||||
self.df = df
|
self.df = df
|
||||||
self.word_exclusions = word_exclusions
|
self.word_exclusions = word_exclusions
|
||||||
|
|
||||||
|
def _tokenize(self, text: str):
|
||||||
|
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||||
|
return [t for t in tokens if t not in self.word_exclusions]
|
||||||
|
|
||||||
def _clean_text(self, text: str) -> str:
|
def _clean_text(self, text: str) -> str:
|
||||||
text = re.sub(r"http\S+", "", text) # remove URLs
|
text = re.sub(r"http\S+", "", text) # remove URLs
|
||||||
text = re.sub(r"www\S+", "", text)
|
text = re.sub(r"www\S+", "", text)
|
||||||
|
|||||||
@@ -200,6 +200,35 @@ class NLP:
|
|||||||
if column.startswith("emotion_") and column not in emotion_df.columns:
|
if column.startswith("emotion_") and column not in emotion_df.columns:
|
||||||
self.df[column] = 0.0
|
self.df[column] = 0.0
|
||||||
|
|
||||||
|
# drop neutral and surprise columns from df and normalize others to sum to 1
|
||||||
|
drop_cols = ["emotion_neutral", "emotion_surprise"]
|
||||||
|
|
||||||
|
existing_drop = [c for c in drop_cols if c in self.df.columns]
|
||||||
|
self.df.drop(columns=existing_drop, inplace=True)
|
||||||
|
|
||||||
|
remaining_emotion_cols = [
|
||||||
|
c for c in self.df.columns
|
||||||
|
if c.startswith("emotion_")
|
||||||
|
]
|
||||||
|
|
||||||
|
if remaining_emotion_cols:
|
||||||
|
emotion_matrix = (
|
||||||
|
self.df[remaining_emotion_cols]
|
||||||
|
.apply(pd.to_numeric, errors="coerce")
|
||||||
|
.fillna(0.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
row_sums = emotion_matrix.sum(axis=1)
|
||||||
|
|
||||||
|
# Avoid division by zero
|
||||||
|
row_sums = row_sums.replace(0, 1.0)
|
||||||
|
|
||||||
|
normalized = emotion_matrix.div(row_sums, axis=0)
|
||||||
|
|
||||||
|
self.df[remaining_emotion_cols] = normalized.values
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def add_topic_col(self, confidence_threshold: float = 0.3) -> None:
|
def add_topic_col(self, confidence_threshold: float = 0.3) -> None:
|
||||||
titles = self.df[self.title_col].fillna("").astype(str)
|
titles = self.df[self.title_col].fillna("").astype(str)
|
||||||
contents = self.df[self.content_col].fillna("").astype(str)
|
contents = self.df[self.content_col].fillna("").astype(str)
|
||||||
@@ -276,3 +305,5 @@ class NLP:
|
|||||||
self.df[col_name] = [
|
self.df[col_name] = [
|
||||||
d.get(label, 0) for d in entity_count_dicts
|
d.get(label, 0) for d in entity_count_dicts
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ def word_frequencies():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.content_analysis()), 200
|
return jsonify(stat_obj.get_content_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -80,7 +80,7 @@ def get_time_analysis():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.time_analysis()), 200
|
return jsonify(stat_obj.get_time_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -93,7 +93,33 @@ def get_user_analysis():
|
|||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.user_analysis()), 200
|
return jsonify(stat_obj.get_user_analysis()), 200
|
||||||
|
except ValueError as e:
|
||||||
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
|
@app.route("/stats/cultural", methods=["GET"])
|
||||||
|
def get_cultural_analysis():
|
||||||
|
if stat_obj is None:
|
||||||
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
return jsonify(stat_obj.get_cultural_analysis()), 200
|
||||||
|
except ValueError as e:
|
||||||
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
|
@app.route("/stats/interaction", methods=["GET"])
|
||||||
|
def get_interaction_analysis():
|
||||||
|
if stat_obj is None:
|
||||||
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
return jsonify(stat_obj.get_interactional_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from server.analysis.temporal import TemporalAnalysis
|
|||||||
from server.analysis.emotional import EmotionalAnalysis
|
from server.analysis.emotional import EmotionalAnalysis
|
||||||
from server.analysis.interactional import InteractionAnalysis
|
from server.analysis.interactional import InteractionAnalysis
|
||||||
from server.analysis.linguistic import LinguisticAnalysis
|
from server.analysis.linguistic import LinguisticAnalysis
|
||||||
|
from server.analysis.cultural import CulturalAnalysis
|
||||||
|
|
||||||
DOMAIN_STOPWORDS = {
|
DOMAIN_STOPWORDS = {
|
||||||
"www", "https", "http",
|
"www", "https", "http",
|
||||||
@@ -46,6 +47,7 @@ class StatGen:
|
|||||||
self.emotional_analysis = EmotionalAnalysis(self.df)
|
self.emotional_analysis = EmotionalAnalysis(self.df)
|
||||||
self.interaction_analysis = InteractionAnalysis(self.df, EXCLUDE_WORDS)
|
self.interaction_analysis = InteractionAnalysis(self.df, EXCLUDE_WORDS)
|
||||||
self.linguistic_analysis = LinguisticAnalysis(self.df, EXCLUDE_WORDS)
|
self.linguistic_analysis = LinguisticAnalysis(self.df, EXCLUDE_WORDS)
|
||||||
|
self.cultural_analysis = CulturalAnalysis(self.df)
|
||||||
|
|
||||||
self.original_df = self.df.copy(deep=True)
|
self.original_df = self.df.copy(deep=True)
|
||||||
|
|
||||||
@@ -62,13 +64,18 @@ class StatGen:
|
|||||||
self.nlp.add_ner_cols()
|
self.nlp.add_ner_cols()
|
||||||
|
|
||||||
## Public
|
## Public
|
||||||
def time_analysis(self) -> pd.DataFrame:
|
|
||||||
|
|
||||||
|
# topics over time
|
||||||
|
# emotions over time
|
||||||
|
def get_time_analysis(self) -> pd.DataFrame:
|
||||||
return {
|
return {
|
||||||
"events_per_day": self.temporal_analysis.posts_per_day(),
|
"events_per_day": self.temporal_analysis.posts_per_day(),
|
||||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
||||||
}
|
}
|
||||||
|
|
||||||
def content_analysis(self) -> dict:
|
# average topic duration
|
||||||
|
def get_content_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"word_frequencies": self.linguistic_analysis.word_frequencies(),
|
"word_frequencies": self.linguistic_analysis.word_frequencies(),
|
||||||
"common_two_phrases": self.linguistic_analysis.ngrams(),
|
"common_two_phrases": self.linguistic_analysis.ngrams(),
|
||||||
@@ -77,13 +84,32 @@ class StatGen:
|
|||||||
"reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
|
"reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
|
||||||
}
|
}
|
||||||
|
|
||||||
def user_analysis(self) -> dict:
|
# average emotion per user
|
||||||
|
# average chain length
|
||||||
|
def get_user_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"top_users": self.interaction_analysis.top_users(),
|
"top_users": self.interaction_analysis.top_users(),
|
||||||
"users": self.interaction_analysis.per_user_analysis(),
|
"users": self.interaction_analysis.per_user_analysis()
|
||||||
|
}
|
||||||
|
|
||||||
|
# average / max thread depth
|
||||||
|
# high engagment threads based on volume
|
||||||
|
def get_interactional_analysis(self) -> dict:
|
||||||
|
return {
|
||||||
|
"average_thread_depth": self.interaction_analysis.average_thread_depth(),
|
||||||
|
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(),
|
||||||
"interaction_graph": self.interaction_analysis.interaction_graph()
|
"interaction_graph": self.interaction_analysis.interaction_graph()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# detect community jargon
|
||||||
|
# in-group and out-group linguistic markers
|
||||||
|
def get_cultural_analysis(self) -> dict:
|
||||||
|
return {
|
||||||
|
"identity_markers": self.cultural_analysis.get_identity_markers(),
|
||||||
|
"stance_markers": self.cultural_analysis.get_stance_markers(),
|
||||||
|
"entity_salience": self.cultural_analysis.get_avg_emotions_per_entity()
|
||||||
|
}
|
||||||
|
|
||||||
def summary(self) -> dict:
|
def summary(self) -> dict:
|
||||||
total_posts = (self.df["type"] == "post").sum()
|
total_posts = (self.df["type"] == "post").sum()
|
||||||
total_comments = (self.df["type"] == "comment").sum()
|
total_comments = (self.df["type"] == "comment").sum()
|
||||||
|
|||||||
Reference in New Issue
Block a user