style: run python linter & prettifier on backend code
This commit is contained in:
@@ -15,7 +15,8 @@ class CulturalAnalysis:
|
||||
|
||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||
emotion_cols = [
|
||||
c for c in df.columns
|
||||
c
|
||||
for c in df.columns
|
||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||
]
|
||||
|
||||
@@ -40,7 +41,6 @@ class CulturalAnalysis:
|
||||
"out_group_usage": out_count,
|
||||
"in_group_ratio": round(in_count / max(total_tokens, 1), 5),
|
||||
"out_group_ratio": round(out_count / max(total_tokens, 1), 5),
|
||||
|
||||
"in_group_posts": int(in_mask.sum()),
|
||||
"out_group_posts": int(out_mask.sum()),
|
||||
"tie_posts": int(tie_mask.sum()),
|
||||
@@ -49,20 +49,34 @@ class CulturalAnalysis:
|
||||
if emotion_cols:
|
||||
emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
||||
|
||||
in_avg = emo.loc[in_mask].mean() if in_mask.any() else pd.Series(0.0, index=emotion_cols)
|
||||
out_avg = emo.loc[out_mask].mean() if out_mask.any() else pd.Series(0.0, index=emotion_cols)
|
||||
in_avg = (
|
||||
emo.loc[in_mask].mean()
|
||||
if in_mask.any()
|
||||
else pd.Series(0.0, index=emotion_cols)
|
||||
)
|
||||
out_avg = (
|
||||
emo.loc[out_mask].mean()
|
||||
if out_mask.any()
|
||||
else pd.Series(0.0, index=emotion_cols)
|
||||
)
|
||||
|
||||
result["in_group_emotion_avg"] = in_avg.to_dict()
|
||||
result["out_group_emotion_avg"] = out_avg.to_dict()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
|
||||
s = df[self.content_col].fillna("").astype(str)
|
||||
|
||||
hedge_pattern = re.compile(r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b")
|
||||
certainty_pattern = re.compile(r"\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b")
|
||||
deontic_pattern = re.compile(r"\b(must|should|need|needs|have to|has to|ought|required|require)\b")
|
||||
hedge_pattern = re.compile(
|
||||
r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
|
||||
)
|
||||
certainty_pattern = re.compile(
|
||||
r"\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b"
|
||||
)
|
||||
deontic_pattern = re.compile(
|
||||
r"\b(must|should|need|needs|have to|has to|ought|required|require)\b"
|
||||
)
|
||||
permission_pattern = re.compile(r"\b(can|allowed|okay|ok|permitted)\b")
|
||||
|
||||
hedge_counts = s.str.count(hedge_pattern)
|
||||
@@ -70,20 +84,32 @@ class CulturalAnalysis:
|
||||
deontic_counts = s.str.count(deontic_pattern)
|
||||
perm_counts = s.str.count(permission_pattern)
|
||||
|
||||
token_counts = s.apply(lambda t: len(re.findall(r"\b[a-z]{2,}\b", t))).replace(0, 1)
|
||||
token_counts = s.apply(lambda t: len(re.findall(r"\b[a-z]{2,}\b", t))).replace(
|
||||
0, 1
|
||||
)
|
||||
|
||||
return {
|
||||
"hedge_total": int(hedge_counts.sum()),
|
||||
"certainty_total": int(certainty_counts.sum()),
|
||||
"deontic_total": int(deontic_counts.sum()),
|
||||
"permission_total": int(perm_counts.sum()),
|
||||
"hedge_per_1k_tokens": round(1000 * hedge_counts.sum() / token_counts.sum(), 3),
|
||||
"certainty_per_1k_tokens": round(1000 * certainty_counts.sum() / token_counts.sum(), 3),
|
||||
"deontic_per_1k_tokens": round(1000 * deontic_counts.sum() / token_counts.sum(), 3),
|
||||
"permission_per_1k_tokens": round(1000 * perm_counts.sum() / token_counts.sum(), 3),
|
||||
"hedge_per_1k_tokens": round(
|
||||
1000 * hedge_counts.sum() / token_counts.sum(), 3
|
||||
),
|
||||
"certainty_per_1k_tokens": round(
|
||||
1000 * certainty_counts.sum() / token_counts.sum(), 3
|
||||
),
|
||||
"deontic_per_1k_tokens": round(
|
||||
1000 * deontic_counts.sum() / token_counts.sum(), 3
|
||||
),
|
||||
"permission_per_1k_tokens": round(
|
||||
1000 * perm_counts.sum() / token_counts.sum(), 3
|
||||
),
|
||||
}
|
||||
|
||||
def get_avg_emotions_per_entity(self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10) -> dict[str, Any]:
|
||||
|
||||
def get_avg_emotions_per_entity(
|
||||
self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
|
||||
) -> dict[str, Any]:
|
||||
if "ner_entities" not in df.columns:
|
||||
return {"entity_emotion_avg": {}}
|
||||
|
||||
@@ -92,9 +118,13 @@ class CulturalAnalysis:
|
||||
entity_df = df[["ner_entities"] + emotion_cols].explode("ner_entities")
|
||||
|
||||
entity_df["entity_text"] = entity_df["ner_entities"].apply(
|
||||
lambda e: e.get("text").strip()
|
||||
if isinstance(e, dict) and isinstance(e.get("text"), str) and len(e.get("text")) >= 3
|
||||
else None
|
||||
lambda e: (
|
||||
e.get("text").strip()
|
||||
if isinstance(e, dict)
|
||||
and isinstance(e.get("text"), str)
|
||||
and len(e.get("text")) >= 3
|
||||
else None
|
||||
)
|
||||
)
|
||||
|
||||
entity_df = entity_df.dropna(subset=["entity_text"])
|
||||
@@ -114,4 +144,4 @@ class CulturalAnalysis:
|
||||
"emotion_avg": emo_means,
|
||||
}
|
||||
|
||||
return {"entity_emotion_avg": entity_emotion_avg}
|
||||
return {"entity_emotion_avg": entity_emotion_avg}
|
||||
|
||||
Reference in New Issue
Block a user