feat(analysis): add emotional averages to stance markers
This commit is contained in:
@@ -39,6 +39,21 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const stanceSublabel = (
|
||||||
|
per1kTokens: number | undefined,
|
||||||
|
emotionAvg: Record<string, number> | undefined,
|
||||||
|
) => {
|
||||||
|
const rateLabel =
|
||||||
|
typeof per1kTokens === "number"
|
||||||
|
? `${per1kTokens.toFixed(1)} per 1k words`
|
||||||
|
: "Word frequency";
|
||||||
|
const emotionLabel = topEmotion(emotionAvg);
|
||||||
|
|
||||||
|
return emotionLabel === "—"
|
||||||
|
? rateLabel
|
||||||
|
: `${rateLabel} • Avg mood: ${emotionLabel}`;
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
<div style={{ ...styles.container, ...styles.grid }}>
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
@@ -107,41 +122,37 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
<Card
|
<Card
|
||||||
label="Hedging Words"
|
label="Hedging Words"
|
||||||
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
||||||
sublabel={
|
sublabel={stanceSublabel(
|
||||||
typeof stance?.hedge_per_1k_tokens === "number"
|
stance?.hedge_per_1k_tokens,
|
||||||
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
|
stance?.hedge_emotion_avg,
|
||||||
: "Word frequency"
|
)}
|
||||||
}
|
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Certainty Words"
|
label="Certainty Words"
|
||||||
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
||||||
sublabel={
|
sublabel={stanceSublabel(
|
||||||
typeof stance?.certainty_per_1k_tokens === "number"
|
stance?.certainty_per_1k_tokens,
|
||||||
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
|
stance?.certainty_emotion_avg,
|
||||||
: "Word frequency"
|
)}
|
||||||
}
|
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Need/Should Words"
|
label="Need/Should Words"
|
||||||
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
||||||
sublabel={
|
sublabel={stanceSublabel(
|
||||||
typeof stance?.deontic_per_1k_tokens === "number"
|
stance?.deontic_per_1k_tokens,
|
||||||
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
|
stance?.deontic_emotion_avg,
|
||||||
: "Word frequency"
|
)}
|
||||||
}
|
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Permission Words"
|
label="Permission Words"
|
||||||
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
||||||
sublabel={
|
sublabel={stanceSublabel(
|
||||||
typeof stance?.permission_per_1k_tokens === "number"
|
stance?.permission_per_1k_tokens,
|
||||||
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
|
stance?.permission_emotion_avg,
|
||||||
: "Word frequency"
|
)}
|
||||||
}
|
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|||||||
@@ -168,6 +168,10 @@ type StanceMarkers = {
|
|||||||
certainty_per_1k_tokens: number;
|
certainty_per_1k_tokens: number;
|
||||||
deontic_per_1k_tokens: number;
|
deontic_per_1k_tokens: number;
|
||||||
permission_per_1k_tokens: number;
|
permission_per_1k_tokens: number;
|
||||||
|
hedge_emotion_avg?: Record<string, number>;
|
||||||
|
certainty_emotion_avg?: Record<string, number>;
|
||||||
|
deontic_emotion_avg?: Record<string, number>;
|
||||||
|
permission_emotion_avg?: Record<string, number>;
|
||||||
};
|
};
|
||||||
|
|
||||||
type EntityEmotionAggregate = {
|
type EntityEmotionAggregate = {
|
||||||
|
|||||||
@@ -67,6 +67,12 @@ class CulturalAnalysis:
|
|||||||
|
|
||||||
def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
|
def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
|
||||||
s = df[self.content_col].fillna("").astype(str)
|
s = df[self.content_col].fillna("").astype(str)
|
||||||
|
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||||
|
emotion_cols = [
|
||||||
|
c
|
||||||
|
for c in df.columns
|
||||||
|
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||||
|
]
|
||||||
|
|
||||||
hedge_pattern = re.compile(
|
hedge_pattern = re.compile(
|
||||||
r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
|
r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
|
||||||
@@ -88,7 +94,7 @@ class CulturalAnalysis:
|
|||||||
0, 1
|
0, 1
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
result = {
|
||||||
"hedge_total": int(hedge_counts.sum()),
|
"hedge_total": int(hedge_counts.sum()),
|
||||||
"certainty_total": int(certainty_counts.sum()),
|
"certainty_total": int(certainty_counts.sum()),
|
||||||
"deontic_total": int(deontic_counts.sum()),
|
"deontic_total": int(deontic_counts.sum()),
|
||||||
@@ -107,6 +113,32 @@ class CulturalAnalysis:
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if emotion_cols:
|
||||||
|
emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
||||||
|
|
||||||
|
result["hedge_emotion_avg"] = (
|
||||||
|
emo.loc[hedge_counts > 0].mean()
|
||||||
|
if (hedge_counts > 0).any()
|
||||||
|
else pd.Series(0.0, index=emotion_cols)
|
||||||
|
).to_dict()
|
||||||
|
result["certainty_emotion_avg"] = (
|
||||||
|
emo.loc[certainty_counts > 0].mean()
|
||||||
|
if (certainty_counts > 0).any()
|
||||||
|
else pd.Series(0.0, index=emotion_cols)
|
||||||
|
).to_dict()
|
||||||
|
result["deontic_emotion_avg"] = (
|
||||||
|
emo.loc[deontic_counts > 0].mean()
|
||||||
|
if (deontic_counts > 0).any()
|
||||||
|
else pd.Series(0.0, index=emotion_cols)
|
||||||
|
).to_dict()
|
||||||
|
result["permission_emotion_avg"] = (
|
||||||
|
emo.loc[perm_counts > 0].mean()
|
||||||
|
if (perm_counts > 0).any()
|
||||||
|
else pd.Series(0.0, index=emotion_cols)
|
||||||
|
).to_dict()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def get_avg_emotions_per_entity(
|
def get_avg_emotions_per_entity(
|
||||||
self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
|
self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
|||||||
Reference in New Issue
Block a user