feat(analysis): add emotional averages to stance markers

2026-04-07 12:49:18 +01:00
parent addc1d4087
commit c6cae040f0
3 changed files with 68 additions and 21 deletions
--- a/frontend/src/components/CulturalStats.tsx
+++ b/frontend/src/components/CulturalStats.tsx
@@ -39,6 +39,21 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
    return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
  };
  const stanceSublabel = (
    per1kTokens: number | undefined,
    emotionAvg: Record<string, number> | undefined,
  ) => {
    const rateLabel =
      typeof per1kTokens === "number"
        ? `${per1kTokens.toFixed(1)} per 1k words`
        : "Word frequency";
    const emotionLabel = topEmotion(emotionAvg);
    return emotionLabel === "—"
      ? rateLabel
      : `${rateLabel} • Avg mood: ${emotionLabel}`;
  };
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.grid }}>
@@ -107,41 +122,37 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
        <Card
          label="Hedging Words"
          value={stance?.hedge_total?.toLocaleString() ?? "—"}
-          sublabel={
+          sublabel={stanceSublabel(
-            typeof stance?.hedge_per_1k_tokens === "number"
+            stance?.hedge_per_1k_tokens,
-              ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
+            stance?.hedge_emotion_avg,
-              : "Word frequency"
+          )}
          }
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Certainty Words"
          value={stance?.certainty_total?.toLocaleString() ?? "—"}
-          sublabel={
+          sublabel={stanceSublabel(
-            typeof stance?.certainty_per_1k_tokens === "number"
+            stance?.certainty_per_1k_tokens,
-              ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
+            stance?.certainty_emotion_avg,
-              : "Word frequency"
+          )}
          }
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Need/Should Words"
          value={stance?.deontic_total?.toLocaleString() ?? "—"}
-          sublabel={
+          sublabel={stanceSublabel(
-            typeof stance?.deontic_per_1k_tokens === "number"
+            stance?.deontic_per_1k_tokens,
-              ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
+            stance?.deontic_emotion_avg,
-              : "Word frequency"
+          )}
          }
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Permission Words"
          value={stance?.permission_total?.toLocaleString() ?? "—"}
-          sublabel={
+          sublabel={stanceSublabel(
-            typeof stance?.permission_per_1k_tokens === "number"
+            stance?.permission_per_1k_tokens,
-              ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
+            stance?.permission_emotion_avg,
-              : "Word frequency"
+          )}
          }
          style={{ gridColumn: "span 3" }}
        />
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -168,6 +168,10 @@ type StanceMarkers = {
  certainty_per_1k_tokens: number;
  deontic_per_1k_tokens: number;
  permission_per_1k_tokens: number;
  hedge_emotion_avg?: Record<string, number>;
  certainty_emotion_avg?: Record<string, number>;
  deontic_emotion_avg?: Record<string, number>;
  permission_emotion_avg?: Record<string, number>;
 };
 type EntityEmotionAggregate = {
--- a/server/analysis/cultural.py
+++ b/server/analysis/cultural.py
@@ -67,6 +67,12 @@ class CulturalAnalysis:
    def get_stance_markers(self, df: pd.DataFrame) -> dict[str, Any]:
        s = df[self.content_col].fillna("").astype(str)
        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
        emotion_cols = [
            c
            for c in df.columns
            if c.startswith("emotion_") and c not in emotion_exclusions
        ]
        hedge_pattern = re.compile(
            r"\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b"
@@ -88,7 +94,7 @@ class CulturalAnalysis:
            0, 1
        )
-        return {
+        result = {
            "hedge_total": int(hedge_counts.sum()),
            "certainty_total": int(certainty_counts.sum()),
            "deontic_total": int(deontic_counts.sum()),
@@ -107,6 +113,32 @@ class CulturalAnalysis:
            ),
        }
        if emotion_cols:
            emo = df[emotion_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
            result["hedge_emotion_avg"] = (
                emo.loc[hedge_counts > 0].mean()
                if (hedge_counts > 0).any()
                else pd.Series(0.0, index=emotion_cols)
            ).to_dict()
            result["certainty_emotion_avg"] = (
                emo.loc[certainty_counts > 0].mean()
                if (certainty_counts > 0).any()
                else pd.Series(0.0, index=emotion_cols)
            ).to_dict()
            result["deontic_emotion_avg"] = (
                emo.loc[deontic_counts > 0].mean()
                if (deontic_counts > 0).any()
                else pd.Series(0.0, index=emotion_cols)
            ).to_dict()
            result["permission_emotion_avg"] = (
                emo.loc[perm_counts > 0].mean()
                if (perm_counts > 0).any()
                else pd.Series(0.0, index=emotion_cols)
            ).to_dict()
        return result
    def get_avg_emotions_per_entity(
        self, df: pd.DataFrame, top_n: int = 25, min_posts: int = 10
    ) -> dict[str, Any]: