feat(api): add cultural analysis endpoint with identity markers
This commit is contained in:
40
server/analysis/cultural.py
Normal file
40
server/analysis/cultural.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
class CulturalAnalysis:
|
||||
def __init__(self, df: pd.DataFrame):
|
||||
self.df = df
|
||||
|
||||
def get_identity_markers(self):
|
||||
df = self.df.copy()
|
||||
s = df["content"].fillna("").astype(str).str.lower()
|
||||
|
||||
in_group_words = {"we", "us", "our", "ourselves"}
|
||||
out_group_words = {"they", "them", "their", "themselves"}
|
||||
|
||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||
emotion_cols = [c for c in df.columns if c.startswith("emotion_") and c not in emotion_exclusions]
|
||||
|
||||
# token counts per row
|
||||
tokens_per_row = s.apply(lambda txt: re.findall(r"\b[a-z]{2,}\b", txt))
|
||||
|
||||
total_tokens = int(tokens_per_row.map(len).sum())
|
||||
in_hits = tokens_per_row.map(lambda toks: sum(t in in_group_words for t in toks))
|
||||
out_hits = tokens_per_row.map(lambda toks: sum(t in out_group_words for t in toks))
|
||||
|
||||
in_count = int(in_hits.sum())
|
||||
out_count = int(out_hits.sum())
|
||||
|
||||
result = {
|
||||
"in_group_usage": in_count,
|
||||
"out_group_usage": out_count,
|
||||
"in_group_ratio": round(in_count / max(total_tokens, 1), 5),
|
||||
"out_group_ratio": round(out_count / max(total_tokens, 1), 5),
|
||||
}
|
||||
|
||||
if emotion_cols:
|
||||
emo = df[emotion_cols].fillna(0).astype(float)
|
||||
result["in_group_emotion_sums"] = emo[in_hits > out_hits].sum().to_dict()
|
||||
result["out_group_emotion_sums"] = emo[out_hits > in_hits].sum().to_dict()
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user