feat(analysis): add emotional averages to stance markers

2026-04-07 12:49:18 +01:00
parent addc1d4087
commit c6cae040f0
3 changed files with 68 additions and 21 deletions
--- a/frontend/src/components/CulturalStats.tsx
+++ b/frontend/src/components/CulturalStats.tsx
@@ -39,6 +39,21 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
    return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
  };

+  const stanceSublabel = (
+    per1kTokens: number | undefined,
+    emotionAvg: Record<string, number> | undefined,
+  ) => {
+    const rateLabel =
+      typeof per1kTokens === "number"
+        ? `${per1kTokens.toFixed(1)} per 1k words`
+        : "Word frequency";
+    const emotionLabel = topEmotion(emotionAvg);
+
+    return emotionLabel === "—"
+      ? rateLabel
+      : `${rateLabel} • Avg mood: ${emotionLabel}`;
+  };
+
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.grid }}>
@@ -107,41 +122,37 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
        <Card
          label="Hedging Words"
          value={stance?.hedge_total?.toLocaleString() ?? "—"}
-          sublabel={
-            typeof stance?.hedge_per_1k_tokens === "number"
-              ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
-              : "Word frequency"
-          }
+          sublabel={stanceSublabel(
+            stance?.hedge_per_1k_tokens,
+            stance?.hedge_emotion_avg,
+          )}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Certainty Words"
          value={stance?.certainty_total?.toLocaleString() ?? "—"}
-          sublabel={
-            typeof stance?.certainty_per_1k_tokens === "number"
-              ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
-              : "Word frequency"
-          }
+          sublabel={stanceSublabel(
+            stance?.certainty_per_1k_tokens,
+            stance?.certainty_emotion_avg,
+          )}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Need/Should Words"
          value={stance?.deontic_total?.toLocaleString() ?? "—"}
-          sublabel={
-            typeof stance?.deontic_per_1k_tokens === "number"
-              ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
-              : "Word frequency"
-          }
+          sublabel={stanceSublabel(
+            stance?.deontic_per_1k_tokens,
+            stance?.deontic_emotion_avg,
+          )}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Permission Words"
          value={stance?.permission_total?.toLocaleString() ?? "—"}
-          sublabel={
-            typeof stance?.permission_per_1k_tokens === "number"
-              ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
-              : "Word frequency"
-          }
+          sublabel={stanceSublabel(
+            stance?.permission_per_1k_tokens,
+            stance?.permission_emotion_avg,
+          )}
          style={{ gridColumn: "span 3" }}
        />

--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -168,6 +168,10 @@ type StanceMarkers = {
  certainty_per_1k_tokens: number;
  deontic_per_1k_tokens: number;
  permission_per_1k_tokens: number;
+  hedge_emotion_avg?: Record<string, number>;
+  certainty_emotion_avg?: Record<string, number>;
+  deontic_emotion_avg?: Record<string, number>;
+  permission_emotion_avg?: Record<string, number>;
 };

 type EntityEmotionAggregate = {
--- a/server/analysis/cultural.py
+++ b/server/analysis/cultural.py
@@ -67,6 +67,12 @@ class CulturalAnalysis:

    def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
        s = df[self.content_col].fillna("").astype(str)
+        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+        emotion_cols = [
+            c
+            for c in df.columns
+            if c.startswith("emotion_") and c not in emotion_exclusions
+        ]

        hedge_pattern = re.compile(
            r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
@@ -88,7 +94,7 @@ class CulturalAnalysis:
            0, 1
        )

-        return {
+        result = {
            "hedge_total": int(hedge_counts.sum()),
            "certainty_total": int(certainty_counts.sum()),
            "deontic_total": int(deontic_counts.sum()),
@@ -107,6 +113,32 @@ class CulturalAnalysis:
            ),
        }

+        if emotion_cols:
+            emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
+
+            result["hedge_emotion_avg"] = (
+                emo.loc[hedge_counts > 0].mean()
+                if (hedge_counts > 0).any()
+                else pd.Series(0.0, index=emotion_cols)
+            ).to_dict()
+            result["certainty_emotion_avg"] = (
+                emo.loc[certainty_counts > 0].mean()
+                if (certainty_counts > 0).any()
+                else pd.Series(0.0, index=emotion_cols)
+            ).to_dict()
+            result["deontic_emotion_avg"] = (
+                emo.loc[deontic_counts > 0].mean()
+                if (deontic_counts > 0).any()
+                else pd.Series(0.0, index=emotion_cols)
+            ).to_dict()
+            result["permission_emotion_avg"] = (
+                emo.loc[perm_counts > 0].mean()
+                if (perm_counts > 0).any()
+                else pd.Series(0.0, index=emotion_cols)
+            ).to_dict()
+
+        return result
+
    def get_avg_emotions_per_entity(
        self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
    ) -> dict[str, Any]: