feat(analysis): add emotional averages to stance markers

This commit is contained in:
2026-04-07 12:49:18 +01:00
parent addc1d4087
commit c6cae040f0
3 changed files with 68 additions and 21 deletions

View File

@@ -39,6 +39,21 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`; return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
}; };
const stanceSublabel = (
per1kTokens: number | undefined,
emotionAvg: Record<string, number> | undefined,
) => {
const rateLabel =
typeof per1kTokens === "number"
? `${per1kTokens.toFixed(1)} per 1k words`
: "Word frequency";
const emotionLabel = topEmotion(emotionAvg);
return emotionLabel === "—"
? rateLabel
: `${rateLabel} • Avg mood: ${emotionLabel}`;
};
return ( return (
<div style={styles.page}> <div style={styles.page}>
<div style={{ ...styles.container, ...styles.grid }}> <div style={{ ...styles.container, ...styles.grid }}>
@@ -107,41 +122,37 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card <Card
label="Hedging Words" label="Hedging Words"
value={stance?.hedge_total?.toLocaleString() ?? "—"} value={stance?.hedge_total?.toLocaleString() ?? "—"}
sublabel={ sublabel={stanceSublabel(
typeof stance?.hedge_per_1k_tokens === "number" stance?.hedge_per_1k_tokens,
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` stance?.hedge_emotion_avg,
: "Word frequency" )}
}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Certainty Words" label="Certainty Words"
value={stance?.certainty_total?.toLocaleString() ?? "—"} value={stance?.certainty_total?.toLocaleString() ?? "—"}
sublabel={ sublabel={stanceSublabel(
typeof stance?.certainty_per_1k_tokens === "number" stance?.certainty_per_1k_tokens,
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` stance?.certainty_emotion_avg,
: "Word frequency" )}
}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Need/Should Words" label="Need/Should Words"
value={stance?.deontic_total?.toLocaleString() ?? "—"} value={stance?.deontic_total?.toLocaleString() ?? "—"}
sublabel={ sublabel={stanceSublabel(
typeof stance?.deontic_per_1k_tokens === "number" stance?.deontic_per_1k_tokens,
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` stance?.deontic_emotion_avg,
: "Word frequency" )}
}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Permission Words" label="Permission Words"
value={stance?.permission_total?.toLocaleString() ?? "—"} value={stance?.permission_total?.toLocaleString() ?? "—"}
sublabel={ sublabel={stanceSublabel(
typeof stance?.permission_per_1k_tokens === "number" stance?.permission_per_1k_tokens,
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` stance?.permission_emotion_avg,
: "Word frequency" )}
}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />

View File

@@ -168,6 +168,10 @@ type StanceMarkers = {
certainty_per_1k_tokens: number; certainty_per_1k_tokens: number;
deontic_per_1k_tokens: number; deontic_per_1k_tokens: number;
permission_per_1k_tokens: number; permission_per_1k_tokens: number;
hedge_emotion_avg?: Record<string, number>;
certainty_emotion_avg?: Record<string, number>;
deontic_emotion_avg?: Record<string, number>;
permission_emotion_avg?: Record<string, number>;
}; };
type EntityEmotionAggregate = { type EntityEmotionAggregate = {

View File

@@ -67,6 +67,12 @@ class CulturalAnalysis:
def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]: def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
s = df[self.content_col].fillna("").astype(str) s = df[self.content_col].fillna("").astype(str)
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
emotion_cols = [
c
for c in df.columns
if c.startswith("emotion_") and c not in emotion_exclusions
]
hedge_pattern = re.compile( hedge_pattern = re.compile(
r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b" r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
@@ -88,7 +94,7 @@ class CulturalAnalysis:
0, 1 0, 1
) )
return { result = {
"hedge_total": int(hedge_counts.sum()), "hedge_total": int(hedge_counts.sum()),
"certainty_total": int(certainty_counts.sum()), "certainty_total": int(certainty_counts.sum()),
"deontic_total": int(deontic_counts.sum()), "deontic_total": int(deontic_counts.sum()),
@@ -107,6 +113,32 @@ class CulturalAnalysis:
), ),
} }
if emotion_cols:
emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
result["hedge_emotion_avg"] = (
emo.loc[hedge_counts > 0].mean()
if (hedge_counts > 0).any()
else pd.Series(0.0, index=emotion_cols)
).to_dict()
result["certainty_emotion_avg"] = (
emo.loc[certainty_counts > 0].mean()
if (certainty_counts > 0).any()
else pd.Series(0.0, index=emotion_cols)
).to_dict()
result["deontic_emotion_avg"] = (
emo.loc[deontic_counts > 0].mean()
if (deontic_counts > 0).any()
else pd.Series(0.0, index=emotion_cols)
).to_dict()
result["permission_emotion_avg"] = (
emo.loc[perm_counts > 0].mean()
if (perm_counts > 0).any()
else pd.Series(0.0, index=emotion_cols)
).to_dict()
return result
def get_avg_emotions_per_entity( def get_avg_emotions_per_entity(
self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10 self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
) -> dict[str, Any]: ) -> dict[str, Any]: