feat(api): add cultural analysis endpoint with identity markers
This commit is contained in:
@@ -69,45 +69,4 @@ class LinguisticAnalysis:
|
||||
.sort_values("count", ascending=False)
|
||||
.head(limit)
|
||||
.to_dict(orient="records")
|
||||
)
|
||||
|
||||
def identity_markers(self):
|
||||
df = self.df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
|
||||
in_group_words = {"we", "us", "our", "ourselves"}
|
||||
out_group_words = {"they", "them", "their", "themselves"}
|
||||
|
||||
emotion_exclusions = [
|
||||
"emotion_neutral",
|
||||
"emotion_surprise"
|
||||
]
|
||||
|
||||
emotion_cols = [
|
||||
col for col in self.df.columns
|
||||
if col.startswith("emotion_") and col not in emotion_exclusions
|
||||
]
|
||||
in_count = 0
|
||||
out_count = 0
|
||||
in_emotions = {e: 0 for e in emotion_cols}
|
||||
out_emotions = {e: 0 for e in emotion_cols}
|
||||
total = 0
|
||||
|
||||
for post in df:
|
||||
text = post["content"]
|
||||
tokens = re.findall(r"\b[a-z]{2,}\b", text)
|
||||
total += len(tokens)
|
||||
in_count += sum(t in in_group_words for t in tokens)
|
||||
out_count += sum(t in out_group_words for t in tokens)
|
||||
|
||||
emotions = post[emotion_cols]
|
||||
print(emotions)
|
||||
|
||||
|
||||
|
||||
return {
|
||||
"in_group_usage": in_count,
|
||||
"out_group_usage": out_count,
|
||||
"in_group_ratio": round(in_count / max(total, 1), 5),
|
||||
"out_group_ratio": round(out_count / max(total, 1), 5),
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user