Merge pull request 'Finish off the links between frontend and backend' (#10 ) from feat/add-frontend-pages into main

Reviewed-on: #10
feat(frontend): reword CulturalStats to improve understandability
2026-03-18 20:30:19 +00:00 · 2026-03-18 19:23:35 +00:00 · 2026-03-18 19:12:25 +00:00 · 2026-03-18 19:12:11 +00:00 · 2026-03-18 19:11:18 +00:00 · 2026-03-18 19:02:11 +00:00
21 changed files with 1364 additions and 406 deletions
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -2,7 +2,7 @@
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <link rel="icon" type="image/png" href="/icon.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>frontend</title>
  </head>
--- a/frontend/public/icon.png
+++ b/frontend/public/icon.png
--- a/frontend/src/components/CulturalStats.tsx
+++ b/frontend/src/components/CulturalStats.tsx
@@ -0,0 +1,158 @@
+import Card from "./Card";
+import StatsStyling from "../styles/stats_styling";
+import type { CulturalAnalysisResponse } from "../types/ApiTypes";
+
+const styles = StatsStyling;
+
+type CulturalStatsProps = {
+  data: CulturalAnalysisResponse;
+};
+
+const CulturalStats = ({ data }: CulturalStatsProps) => {
+  const identity = data.identity_markers;
+  const stance = data.stance_markers;
+  const inGroupWords = identity?.in_group_usage ?? 0;
+  const outGroupWords = identity?.out_group_usage ?? 0;
+  const totalGroupWords = inGroupWords + outGroupWords;
+  const inGroupWordRate = typeof identity?.in_group_ratio === "number"
+    ? identity.in_group_ratio * 100
+    : null;
+  const outGroupWordRate = typeof identity?.out_group_ratio === "number"
+    ? identity.out_group_ratio * 100
+    : null;
+  const rawEntities = data.avg_emotion_per_entity?.entity_emotion_avg ?? {};
+  const entities = Object.entries(rawEntities)
+    .sort((a, b) => (b[1].post_count - a[1].post_count))
+    .slice(0, 20);
+
+  const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
+    const entries = Object.entries(emotionAvg ?? {});
+    if (!entries.length) {
+      return "—";
+    }
+
+    entries.sort((a, b) => b[1] - a[1]);
+    const dominant = entries[0] ?? ["emotion_unknown", 0];
+    const dominantLabel = dominant[0].replace("emotion_", "");
+    return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
+  };
+
+  return (
+    <div style={styles.page}>
+      <div style={{ ...styles.container, ...styles.grid }}>
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Community Framing Overview</h2>
+          <p style={styles.sectionSubtitle}>Simple view of how often people use "us" words vs "them" words, and the tone around that language.</p>
+        </div>
+
+        <Card
+          label="In-Group Words"
+          value={inGroupWords.toLocaleString()}
+          sublabel="Times we/us/our appears"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Out-Group Words"
+          value={outGroupWords.toLocaleString()}
+          sublabel="Times they/them/their appears"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="In-Group Posts"
+          value={identity?.in_group_posts?.toLocaleString() ?? "—"}
+          sublabel='Posts leaning toward "us" language'
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Out-Group Posts"
+          value={identity?.out_group_posts?.toLocaleString() ?? "—"}
+          sublabel='Posts leaning toward "them" language'
+          style={{ gridColumn: "span 3" }}
+        />
+
+        <Card
+          label="Balanced Posts"
+          value={identity?.tie_posts?.toLocaleString() ?? "—"}
+          sublabel="Posts with equal us/them signals"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Total Group Words"
+          value={totalGroupWords.toLocaleString()}
+          sublabel="In-group + out-group words"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="In-Group Share"
+          value={inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`}
+          sublabel="Share of all words"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Out-Group Share"
+          value={outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`}
+          sublabel="Share of all words"
+          style={{ gridColumn: "span 3" }}
+        />
+
+        <Card
+          label="Hedging Words"
+          value={stance?.hedge_total?.toLocaleString() ?? "—"}
+          sublabel={typeof stance?.hedge_per_1k_tokens === "number" ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Certainty Words"
+          value={stance?.certainty_total?.toLocaleString() ?? "—"}
+          sublabel={typeof stance?.certainty_per_1k_tokens === "number" ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Need/Should Words"
+          value={stance?.deontic_total?.toLocaleString() ?? "—"}
+          sublabel={typeof stance?.deontic_per_1k_tokens === "number" ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Permission Words"
+          value={stance?.permission_total?.toLocaleString() ?? "—"}
+          sublabel={typeof stance?.permission_per_1k_tokens === "number" ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
+          style={{ gridColumn: "span 3" }}
+        />
+
+        <div style={{ ...styles.card, gridColumn: "span 6" }}>
+          <h2 style={styles.sectionTitle}>Mood in "Us" Posts</h2>
+          <p style={styles.sectionSubtitle}>Most likely emotion when in-group wording is stronger.</p>
+          <div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 6" }}>
+          <h2 style={styles.sectionTitle}>Mood in "Them" Posts</h2>
+          <p style={styles.sectionSubtitle}>Most likely emotion when out-group wording is stronger.</p>
+          <div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Entity Mood Snapshot</h2>
+          <p style={styles.sectionSubtitle}>Most mentioned entities and the mood that appears most with each.</p>
+          {!entities.length ? (
+            <div style={styles.topUserMeta}>No entity-level cultural data available.</div>
+          ) : (
+            <div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
+              {entities.map(([entity, aggregate]) => (
+                <div key={entity} style={styles.topUserItem}>
+                  <div style={styles.topUserName}>{entity}</div>
+                  <div style={styles.topUserMeta}>
+                    {aggregate.post_count.toLocaleString()} posts • Likely mood: {topEmotion(aggregate.emotion_avg)}
+                  </div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default CulturalStats;
--- a/frontend/src/components/EmotionalStats.tsx
+++ b/frontend/src/components/EmotionalStats.tsx
@@ -9,6 +9,9 @@ type EmotionalStatsProps = {

 const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
  const rows = contentData.average_emotion_by_topic ?? [];
+  const overallEmotionAverage = contentData.overall_emotion_average ?? [];
+  const dominantEmotionDistribution = contentData.dominant_emotion_distribution ?? [];
+  const emotionBySource = contentData.emotion_by_source ?? [];
  const lowSampleThreshold = 20;
  const stableSampleThreshold = 50;
  const emotionKeys = rows.length
@@ -64,41 +67,106 @@ const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
-        <h2 style={styles.sectionTitle}>Average Emotion by Topic</h2>
-        <p style={styles.sectionSubtitle}>Read confidence together with sample size. Topics with fewer than {lowSampleThreshold} events are usually noisy and less reliable.</p>
+        <h2 style={styles.sectionTitle}>Topic Mood Overview</h2>
+        <p style={styles.sectionSubtitle}>Use the strength score together with post count. Topics with fewer than {lowSampleThreshold} events are often noisy.</p>
        <div style={styles.emotionalSummaryRow}>
          <span><strong style={{ color: "#24292f" }}>Topics:</strong> {strongestPerTopic.length}</span>
-          <span><strong style={{ color: "#24292f" }}>Median Sample:</strong> {medianSampleSize} events</span>
-          <span><strong style={{ color: "#24292f" }}>Low Sample (&lt;{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
-          <span><strong style={{ color: "#24292f" }}>Stable Sample ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
+          <span><strong style={{ color: "#24292f" }}>Median Posts:</strong> {medianSampleSize}</span>
+          <span><strong style={{ color: "#24292f" }}>Small Topics (&lt;{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
+          <span><strong style={{ color: "#24292f" }}>Stable Topics ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
        </div>
        <p style={{ ...styles.sectionSubtitle, marginTop: 10, marginBottom: 0 }}>
-          Confidence reflects how strongly one emotion leads within a topic, not model accuracy. Use larger samples for stronger conclusions.
+          Strength means how far the top emotion is ahead in that topic. It does not mean model accuracy.
        </p>
      </div>

      <div style={{ ...styles.container, ...styles.grid }}>
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Mood Averages</h2>
+          <p style={styles.sectionSubtitle}>Average score for each emotion.</p>
+          {!overallEmotionAverage.length ? (
+            <div style={styles.topUserMeta}>No overall emotion averages available.</div>
+          ) : (
+            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
+              {[...overallEmotionAverage]
+                .sort((a, b) => b.score - a.score)
+                .map((row) => (
+                  <div key={row.emotion} style={styles.topUserItem}>
+                    <div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
+                    <div style={styles.topUserMeta}>{row.score.toFixed(3)}</div>
+                  </div>
+                ))}
+            </div>
+          )}
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Mood Split</h2>
+          <p style={styles.sectionSubtitle}>How often each emotion is dominant.</p>
+          {!dominantEmotionDistribution.length ? (
+            <div style={styles.topUserMeta}>No dominant-emotion split available.</div>
+          ) : (
+            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
+              {[...dominantEmotionDistribution]
+                .sort((a, b) => b.ratio - a.ratio)
+                .map((row) => (
+                  <div key={row.emotion} style={styles.topUserItem}>
+                    <div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
+                    <div style={styles.topUserMeta}>{(row.ratio * 100).toFixed(1)}% • {row.count.toLocaleString()} events</div>
+                  </div>
+                ))}
+            </div>
+          )}
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Mood by Source</h2>
+          <p style={styles.sectionSubtitle}>Leading emotion in each source.</p>
+          {!emotionBySource.length ? (
+            <div style={styles.topUserMeta}>No source emotion profile available.</div>
+          ) : (
+            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
+              {[...emotionBySource]
+                .sort((a, b) => b.event_count - a.event_count)
+                .map((row) => (
+                  <div key={row.source} style={styles.topUserItem}>
+                    <div style={styles.topUserName}>{row.source}</div>
+                    <div style={styles.topUserMeta}>
+                      {formatEmotion(row.dominant_emotion)} • {row.dominant_score.toFixed(3)} • {row.event_count.toLocaleString()} events
+                    </div>
+                  </div>
+                ))}
+            </div>
+          )}
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Topic Snapshots</h2>
+          <p style={styles.sectionSubtitle}>Per-topic mood with strength and post count.</p>
+          <div style={{ ...styles.grid, marginTop: 10 }}>
            {strongestPerTopic.map((topic) => (
-          <div key={topic.topic} style={{ ...styles.card, gridColumn: "span 4" }}>
+              <div key={topic.topic} style={{ ...styles.cardBase, gridColumn: "span 4" }}>
                <h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
                <div style={styles.emotionalTopicLabel}>
-              Top Emotion
+                  Likely Mood
                </div>
                <div style={styles.emotionalTopicValue}>
                  {formatEmotion(topic.emotion)}
                </div>
                <div style={styles.emotionalMetricRow}>
-              <span>Confidence</span>
+                  <span>Strength</span>
                  <span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
                </div>
                <div style={styles.emotionalMetricRowCompact}>
-              <span>Sample Size</span>
-              <span style={styles.emotionalMetricValue}>{topic.count} events</span>
+                  <span>Posts in Topic</span>
+                  <span style={styles.emotionalMetricValue}>{topic.count}</span>
                </div>
              </div>
            ))}
          </div>
        </div>
+      </div>
+    </div>
  );
 }

--- a/frontend/src/components/InteractionalStats.tsx
+++ b/frontend/src/components/InteractionalStats.tsx
@@ -0,0 +1,208 @@
+import Card from "./Card";
+import StatsStyling from "../styles/stats_styling";
+import type { InteractionAnalysisResponse } from "../types/ApiTypes";
+import {
+  ResponsiveContainer,
+  BarChart,
+  Bar,
+  XAxis,
+  YAxis,
+  CartesianGrid,
+  Tooltip,
+  PieChart,
+  Pie,
+  Cell,
+  Legend,
+} from "recharts";
+
+const styles = StatsStyling;
+
+type InteractionalStatsProps = {
+  data: InteractionAnalysisResponse;
+};
+
+const InteractionalStats = ({ data }: InteractionalStatsProps) => {
+  const graph = data.interaction_graph ?? {};
+  const userCount = Object.keys(graph).length;
+  const edges = Object.values(graph).flatMap((targets) => Object.values(targets));
+  const edgeCount = edges.length;
+  const interactionVolume = edges.reduce((sum, value) => sum + value, 0);
+  const concentration = data.conversation_concentration;
+  const topTenCommentShare = typeof concentration?.top_10pct_comment_share === "number"
+    ? concentration?.top_10pct_comment_share
+    : null;
+  const topTenAuthorCount = typeof concentration?.top_10pct_author_count === "number"
+    ? concentration.top_10pct_author_count
+    : null;
+  const totalCommentingAuthors = typeof concentration?.total_commenting_authors === "number"
+    ? concentration.total_commenting_authors
+    : null;
+  const singleCommentAuthorRatio = typeof concentration?.single_comment_author_ratio === "number"
+    ? concentration.single_comment_author_ratio
+    : null;
+  const singleCommentAuthors = typeof concentration?.single_comment_authors === "number"
+    ? concentration.single_comment_authors
+    : null;
+
+  const topPairs = (data.top_interaction_pairs ?? [])
+    .filter((item): item is [[string, string], number] => {
+      if (!Array.isArray(item) || item.length !== 2) {
+        return false;
+      }
+
+      const pair = item[0];
+      const count = item[1];
+
+      return Array.isArray(pair)
+        && pair.length === 2
+        && typeof pair[0] === "string"
+        && typeof pair[1] === "string"
+        && typeof count === "number";
+    })
+    .slice(0, 20);
+
+  const topPairChartData = topPairs.slice(0, 8).map(([[source, target], value], index) => ({
+    pair: `${source} -> ${target}`,
+    replies: value,
+    rank: index + 1,
+  }));
+
+  const topTenSharePercent = topTenCommentShare === null
+    ? null
+    : topTenCommentShare * 100;
+  const nonTopTenSharePercent = topTenSharePercent === null
+    ? null
+    : Math.max(0, 100 - topTenSharePercent);
+
+  let concentrationPieData: { name: string; value: number }[] = [];
+  if (topTenSharePercent !== null && nonTopTenSharePercent !== null) {
+    concentrationPieData = [
+      { name: "Top 10% authors", value: topTenSharePercent },
+      { name: "Other authors", value: nonTopTenSharePercent },
+    ];
+  }
+
+  const PIE_COLORS = ["#2b6777", "#c8d8e4"];
+
+  return (
+    <div style={styles.page}>
+      <div style={{ ...styles.container, ...styles.grid }}>
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Conversation Overview</h2>
+          <p style={styles.sectionSubtitle}>Who talks to who, and how concentrated the replies are.</p>
+        </div>
+
+        <Card
+          label="Average Reply Depth"
+          value={typeof data.average_thread_depth === "number" ? data.average_thread_depth.toFixed(2) : "—"}
+          sublabel="How deep reply chains usually go"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Users in Network"
+          value={userCount.toLocaleString()}
+          sublabel="Users in the reply graph"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="User-to-User Links"
+          value={edgeCount.toLocaleString()}
+          sublabel="Unique reply directions"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Total Replies"
+          value={interactionVolume.toLocaleString()}
+          sublabel="All reply links combined"
+          style={{ gridColumn: "span 3" }}
+        />
+        <Card
+          label="Concentrated Replies"
+          value={topTenSharePercent === null ? "-" : `${topTenSharePercent.toFixed(1)}%`}
+          sublabel={topTenAuthorCount === null || totalCommentingAuthors === null
+            ? "Reply share from the top 10% commenters"
+            : `${topTenAuthorCount.toLocaleString()} of ${totalCommentingAuthors.toLocaleString()} authors`}
+          style={{ gridColumn: "span 6" }}
+        />
+        <Card
+          label="Single-Comment Authors"
+          value={singleCommentAuthorRatio === null ? "-" : `${(singleCommentAuthorRatio * 100).toFixed(1)}%`}
+          sublabel={singleCommentAuthors === null
+            ? "Authors who commented exactly once"
+            : `${singleCommentAuthors.toLocaleString()} authors commented exactly once`}
+          style={{ gridColumn: "span 6" }}
+        />
+
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Conversation Visuals</h2>
+          <p style={styles.sectionSubtitle}>Main reply links and concentration split.</p>
+
+          <div style={{ ...styles.grid, marginTop: 12 }}>
+            <div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
+              <h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top Interaction Pairs</h3>
+              <div style={{ width: "100%", height: 300 }}>
+                <ResponsiveContainer>
+                  <BarChart data={topPairChartData} layout="vertical" margin={{ top: 8, right: 16, left: 16, bottom: 8 }}>
+                    <CartesianGrid strokeDasharray="3 3" stroke="#d9e2ec" />
+                    <XAxis type="number" allowDecimals={false} />
+                    <YAxis
+                      type="category"
+                      dataKey="rank"
+                      tickFormatter={(value) => `#${value}`}
+                      width={36}
+                    />
+                    <Tooltip />
+                    <Bar dataKey="replies" fill="#2b6777" radius={[0, 6, 6, 0]} />
+                  </BarChart>
+                </ResponsiveContainer>
+              </div>
+            </div>
+
+            <div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
+              <h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top 10% vs Other Comment Share</h3>
+              <div style={{ width: "100%", height: 300 }}>
+                <ResponsiveContainer>
+                  <PieChart>
+                    <Pie
+                      data={concentrationPieData}
+                      dataKey="value"
+                      nameKey="name"
+                      innerRadius={56}
+                      outerRadius={88}
+                      paddingAngle={2}
+                    >
+                      {concentrationPieData.map((entry, index) => (
+                        <Cell key={`${entry.name}-${index}`} fill={PIE_COLORS[index % PIE_COLORS.length]} />
+                      ))}
+                    </Pie>
+                    <Tooltip />
+                    <Legend verticalAlign="bottom" height={36} />
+                  </PieChart>
+                </ResponsiveContainer>
+              </div>
+            </div>
+          </div>
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Frequent Reply Paths</h2>
+          <p style={styles.sectionSubtitle}>Most common user-to-user reply paths.</p>
+          {!topPairs.length ? (
+            <div style={styles.topUserMeta}>No interaction pair data available.</div>
+          ) : (
+            <div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
+              {topPairs.map(([[source, target], value], index) => (
+                <div key={`${source}->${target}-${index}`} style={styles.topUserItem}>
+                  <div style={styles.topUserName}>{source} -&gt; {target}</div>
+                  <div style={styles.topUserMeta}>{value.toLocaleString()} replies</div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default InteractionalStats;
--- a/frontend/src/components/LinguisticStats.tsx
+++ b/frontend/src/components/LinguisticStats.tsx
@@ -0,0 +1,91 @@
+import Card from "./Card";
+import StatsStyling from "../styles/stats_styling";
+import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
+
+const styles = StatsStyling;
+
+type LinguisticStatsProps = {
+  data: LinguisticAnalysisResponse;
+};
+
+const LinguisticStats = ({ data }: LinguisticStatsProps) => {
+  const lexical = data.lexical_diversity;
+  const words = data.word_frequencies ?? [];
+  const bigrams = data.common_two_phrases ?? [];
+  const trigrams = data.common_three_phrases ?? [];
+
+  const topWords = words.slice(0, 20);
+  const topBigrams = bigrams.slice(0, 10);
+  const topTrigrams = trigrams.slice(0, 10);
+
+  return (
+    <div style={styles.page}>
+      <div style={{ ...styles.container, ...styles.grid }}>
+        <div style={{ ...styles.card, gridColumn: "span 12" }}>
+          <h2 style={styles.sectionTitle}>Language Overview</h2>
+          <p style={styles.sectionSubtitle}>Quick read on how broad and repetitive the wording is.</p>
+        </div>
+
+        <Card
+          label="Total Words"
+          value={lexical?.total_tokens?.toLocaleString() ?? "—"}
+          sublabel="Words after basic filtering"
+          style={{ gridColumn: "span 4" }}
+        />
+        <Card
+          label="Unique Words"
+          value={lexical?.unique_tokens?.toLocaleString() ?? "—"}
+          sublabel="Different words used"
+          style={{ gridColumn: "span 4" }}
+        />
+        <Card
+          label="Vocabulary Variety"
+          value={typeof lexical?.ttr === "number" ? lexical.ttr.toFixed(4) : "—"}
+          sublabel="Higher means less repetition"
+          style={{ gridColumn: "span 4" }}
+        />
+
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Top Words</h2>
+          <p style={styles.sectionSubtitle}>Most used single words.</p>
+          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
+            {topWords.map((item) => (
+              <div key={item.word} style={styles.topUserItem}>
+                <div style={styles.topUserName}>{item.word}</div>
+                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
+              </div>
+            ))}
+          </div>
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Top Bigrams</h2>
+          <p style={styles.sectionSubtitle}>Most used 2-word phrases.</p>
+          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
+            {topBigrams.map((item) => (
+              <div key={item.ngram} style={styles.topUserItem}>
+                <div style={styles.topUserName}>{item.ngram}</div>
+                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
+              </div>
+            ))}
+          </div>
+        </div>
+
+        <div style={{ ...styles.card, gridColumn: "span 4" }}>
+          <h2 style={styles.sectionTitle}>Top Trigrams</h2>
+          <p style={styles.sectionSubtitle}>Most used 3-word phrases.</p>
+          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
+            {topTrigrams.map((item) => (
+              <div key={item.ngram} style={styles.topUserItem}>
+                <div style={styles.topUserName}>{item.ngram}</div>
+                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
+              </div>
+            ))}
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default LinguisticStats;
--- a/frontend/src/components/SummaryStats.tsx
+++ b/frontend/src/components/SummaryStats.tsx
@@ -58,15 +58,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
    const [selectedUser, setSelectedUser] = useState<string | null>(null);
    const selectedUserData: User | null = userData?.users.find((u) => u.author === selectedUser) ?? null;

-    console.log(summary)
-
    return (
    <div style={styles.page}>

        {/* main grid*/}
        <div style={{ ...styles.container, ...styles.grid}}>
            <Card
-            label="Total Events"
+            label="Total Activity"
            value={summary?.total_events ?? "—"}
            sublabel="Posts + comments"
            style={{
@@ -74,15 +72,15 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
            }}
            />
            <Card
-            label="Unique Users"
+            label="Active People"
            value={summary?.unique_users ?? "—"}
-            sublabel="Distinct authors"
+            sublabel="Distinct users"
            style={{
                gridColumn: "span 4"
            }}
            />
            <Card
-            label="Posts / Comments"
+            label="Posts vs Comments"
            value={
                summary
                ? `${summary.total_posts} / ${summary.total_comments}`
@@ -108,13 +106,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
            />

            <Card
-            label="Lurker Ratio"
+            label="One-Time Users"
            value={
                typeof summary?.lurker_ratio === "number"
                ? `${Math.round(summary.lurker_ratio * 100)}%`
                : "—"
            }
-            sublabel="Users with only 1 event"
+            sublabel="Users with only one event"
            style={{
                gridColumn: "span 4"
            }}
@@ -136,12 +134,12 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr

        {/* events per day */}
        <div style={{ ...styles.card, gridColumn: "span 5" }}>
-            <h2 style={styles.sectionTitle}>Events per Day</h2>
-            <p style={styles.sectionSubtitle}>Trend of activity over time</p>
+            <h2 style={styles.sectionTitle}>Activity Over Time</h2>
+            <p style={styles.sectionSubtitle}>How much posting happened each day.</p>

            <div style={styles.chartWrapper}>
            <ResponsiveContainer width="100%" height="100%">
-                <LineChart data={timeData?.events_per_day.filter((d) => new Date(d.date) >= new Date('2026-01-10'))}>
+                <LineChart data={timeData?.events_per_day ?? []}>
                <CartesianGrid strokeDasharray="3 3" />
                <XAxis dataKey="date" />
                <YAxis />
@@ -154,8 +152,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr

        {/* Word Cloud */}
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
-            <h2 style={styles.sectionTitle}>Word Cloud</h2>
-            <p style={styles.sectionSubtitle}>Most common terms across events</p>
+            <h2 style={styles.sectionTitle}>Common Words</h2>
+            <p style={styles.sectionSubtitle}>Frequently used words across the dataset.</p>

            <div style={styles.chartWrapper}>
            <ReactWordcloud
@@ -174,8 +172,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
        <div style={{...styles.card, ...styles.scrollArea, gridColumn: "span 3",
        }}
        >
-            <h2 style={styles.sectionTitle}>Top Users</h2>
-            <p style={styles.sectionSubtitle}>Most active authors</p>
+            <h2 style={styles.sectionTitle}>Most Active Users</h2>
+            <p style={styles.sectionSubtitle}>Who posted the most events.</p>

            <div style={styles.topUsersList}>
            {userData?.top_users.slice(0, 100).map((item) => (
@@ -195,8 +193,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr

        {/* Heatmap */}
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
-            <h2 style={styles.sectionTitle}>Heatmap</h2>
-            <p style={styles.sectionSubtitle}>Activity density across time</p>
+            <h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
+            <p style={styles.sectionSubtitle}>When activity tends to happen by weekday and hour.</p>

            <div style={styles.heatmapWrapper}>
            <ActivityHeatmap data={timeData?.weekday_hour_heatmap ?? []} />
--- a/frontend/src/components/UserModal.tsx
+++ b/frontend/src/components/UserModal.tsx
@@ -12,6 +12,9 @@ type Props = {
 };

 export default function UserModal({ open, onClose, userData, username }: Props) {
+  const dominantEmotionEntry = Object.entries(userData?.avg_emotions ?? {})
+    .sort((a, b) => b[1] - a[1])[0];
+
  return (
    <Dialog open={open} onClose={onClose} style={styles.modalRoot}>
      <div style={styles.modalBackdrop} />
@@ -66,6 +69,15 @@ export default function UserModal({ open, onClose, userData, username }: Props)
                  </div>
                </div>
              ) : null}
+
+              {dominantEmotionEntry ? (
+                <div style={styles.topUserItem}>
+                  <div style={styles.topUserName}>Dominant Avg Emotion</div>
+                  <div style={styles.topUserMeta}>
+                    {dominantEmotionEntry[0].replace("emotion_", "")} ({dominantEmotionEntry[1].toFixed(3)})
+                  </div>
+                </div>
+              ) : null}
            </div>
          )}
        </DialogPanel>
--- a/frontend/src/components/UserStats.tsx
+++ b/frontend/src/components/UserStats.tsx
@@ -87,15 +87,15 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
            style={{ gridColumn: "span 3" }}
          />
          <Card
-            label="Interactions"
+            label="Replies"
            value={totalInteractions.toLocaleString()}
-            sublabel="Filtered links (2+ interactions)"
+            sublabel="Links with at least 2 replies"
            style={{ gridColumn: "span 3" }}
          />
          <Card
-            label="Average Intensity"
+            label="Replies per Connected User"
            value={avgInteractionsPerConnectedUser.toFixed(1)}
-            sublabel="Interactions per connected user"
+            sublabel="Average from visible graph links"
            style={{ gridColumn: "span 3" }}
          />
          <Card
@@ -106,13 +106,13 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
          />

          <Card
-            label="Strongest Connection"
+            label="Strongest User Link"
            value={strongestLink ? `${strongestLink.source} -> ${strongestLink.target}` : "—"}
-            sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} interactions` : "No graph edges after filtering"}
+            sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} replies` : "No graph links after filtering"}
            style={{ gridColumn: "span 6" }}
          />
          <Card
-            label="Most Reply-Driven User"
+            label="Most Comment-Heavy User"
            value={highlyInteractiveUser?.author ?? "—"}
            sublabel={
              highlyInteractiveUser
@@ -125,7 +125,7 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
          <div style={{ ...styles.card, gridColumn: "span 12" }}>
            <h2 style={styles.sectionTitle}>User Interaction Graph</h2>
            <p style={styles.sectionSubtitle}>
-              Nodes represent users and links represent conversation interactions.
+              Each node is a user, and each link shows replies between them.
            </p>
            <div ref={graphContainerRef} style={{ width: "100%", height: graphSize.height }}>
              <ForceGraph3D
--- a/frontend/src/pages/AutoScrape.tsx
+++ b/frontend/src/pages/AutoScrape.tsx
@@ -191,6 +191,9 @@ const AutoScrapePage = () => {
            <p style={styles.sectionHeaderSubtitle}>
              Select sources and scrape settings, then queue processing automatically.
            </p>
+            <p style={{ ...styles.subtleBodyText, marginTop: 6, color: "#9a6700" }}>
+              Warning: Scraping more than 250 posts from any single site can take hours due to rate limits.
+            </p>
          </div>
          <button
            type="button"
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -5,26 +5,42 @@ import StatsStyling from "../styles/stats_styling";
 import SummaryStats from "../components/SummaryStats";
 import EmotionalStats from "../components/EmotionalStats";
 import UserStats from "../components/UserStats";
+import LinguisticStats from "../components/LinguisticStats";
+import InteractionalStats from "../components/InteractionalStats";
+import CulturalStats from "../components/CulturalStats";

 import { 
  type SummaryResponse, 
  type UserAnalysisResponse, 
  type TimeAnalysisResponse,
-  type ContentAnalysisResponse
+  type ContentAnalysisResponse,
+  type UserEndpointResponse,
+  type LinguisticAnalysisResponse,
+  type EmotionalAnalysisResponse,
+  type InteractionAnalysisResponse,
+  type CulturalAnalysisResponse
 } from '../types/ApiTypes'

 const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
 const styles = StatsStyling;
+const DELETED_USERS = ["[deleted]"];
+
+const isDeletedUser = (value: string | null | undefined) => (
+  DELETED_USERS.includes((value ?? "").trim().toLowerCase())
+);

 const StatPage = () => {
  const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
  const [error, setError] = useState('');
  const [loading, setLoading] = useState(false);
-  const [activeView, setActiveView] = useState<"summary" | "emotional" | "user">("summary");
+  const [activeView, setActiveView] = useState<"summary" | "emotional" | "user" | "linguistic" | "interactional" | "cultural">("summary");

  const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
  const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
  const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
+  const [linguisticData, setLinguisticData] = useState<LinguisticAnalysisResponse | null>(null);
+  const [interactionData, setInteractionData] = useState<InteractionAnalysisResponse | null>(null);
+  const [culturalData, setCulturalData] = useState<CulturalAnalysisResponse | null>(null);
  const [summary, setSummary] = useState<SummaryResponse | null>(null);


@@ -83,15 +99,23 @@ const StatPage = () => {
    setLoading(true);

    Promise.all([
-      axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/time`, {
+      axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
        params,
        headers: authHeaders,
      }),
-      axios.get<UserAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
+      axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
        params,
        headers: authHeaders,
      }),
-      axios.get<ContentAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/content`, {
+      axios.get<LinguisticAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/linguistic`, {
+        params,
+        headers: authHeaders,
+      }),
+      axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
+        params,
+        headers: authHeaders,
+      }),
+      axios.get<InteractionAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/interactional`, {
        params,
        headers: authHeaders,
      }),
@@ -99,12 +123,87 @@ const StatPage = () => {
        params,
        headers: authHeaders,
      }),
+      axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
+        params,
+        headers: authHeaders,
+      }),
    ]) 
-      .then(([timeRes, userRes, contentRes, summaryRes]) => {
-        setUserData(userRes.data || null);
+      .then(([timeRes, userRes, linguisticRes, emotionalRes, interactionRes, summaryRes, culturalRes]) => {
+        const usersList = userRes.data.users ?? [];
+        const topUsersList = userRes.data.top_users ?? [];
+        const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
+        const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
+
+        const filteredUsers: typeof usersList = [];
+        for (const user of usersList) {
+          if (isDeletedUser(user.author)) continue;
+          filteredUsers.push(user);
+        }
+
+        const filteredTopUsers: typeof topUsersList = [];
+        for (const user of topUsersList) {
+          if (isDeletedUser(user.author)) continue;
+          filteredTopUsers.push(user);
+        }
+
+        const filteredInteractionGraph: Record<string, Record<string, number>> = {};
+        for (const [source, targets] of Object.entries(interactionGraphRaw)) {
+          if (isDeletedUser(source)) {
+            continue;
+          }
+
+          const nextTargets: Record<string, number> = {};
+          for (const [target, count] of Object.entries(targets)) {
+            if (isDeletedUser(target)) {
+              continue;
+            }
+            nextTargets[target] = count;
+          }
+
+          filteredInteractionGraph[source] = nextTargets;
+        }
+
+        const filteredTopInteractionPairs: typeof topPairsRaw = [];
+        for (const pairEntry of topPairsRaw) {
+          const pair = pairEntry[0];
+          const source = pair[0];
+          const target = pair[1];
+          if (isDeletedUser(source) || isDeletedUser(target)) {
+            continue;
+          }
+          filteredTopInteractionPairs.push(pairEntry);
+        }
+
+        const combinedUserData: UserAnalysisResponse = {
+          ...userRes.data,
+          users: filteredUsers,
+          top_users: filteredTopUsers,
+          interaction_graph: filteredInteractionGraph,
+        };
+
+        const combinedContentData: ContentAnalysisResponse = {
+          ...linguisticRes.data,
+          ...emotionalRes.data,
+        };
+
+        const filteredInteractionData: InteractionAnalysisResponse = {
+          ...interactionRes.data,
+          interaction_graph: filteredInteractionGraph,
+          top_interaction_pairs: filteredTopInteractionPairs,
+        };
+
+        const filteredSummary: SummaryResponse = {
+          ...summaryRes.data,
+          unique_users: filteredUsers.length,
+        };
+
+        setUserData(combinedUserData);
        setTimeData(timeRes.data || null);
-        setContentData(contentRes.data || null);
-        setSummary(summaryRes.data || null);
+        setContentData(combinedContentData);
+        setLinguisticData(linguisticRes.data || null);
+        setInteractionData(filteredInteractionData || null);
+        setCulturalData(culturalRes.data || null);
+        setSummary(filteredSummary || null);
      })
      .catch((e) => setError("Failed to load statistics: " + String(e)))
      .finally(() => setLoading(false));
@@ -198,7 +297,7 @@ return (
          <div style={styles.dashboardMeta}>Dataset #{datasetId ?? "-"}</div>
        </div>

-    <div style={{ ...styles.container, ...styles.tabsRow }}>
+    <div style={{ ...styles.container, ...styles.tabsRow, justifyContent: "center" }}>
      <button
        onClick={() => setActiveView("summary")}
        style={activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary}
@@ -218,6 +317,24 @@ return (
      >
        Users
      </button>
+      <button
+        onClick={() => setActiveView("linguistic")}
+        style={activeView === "linguistic" ? styles.buttonPrimary : styles.buttonSecondary}
+      >
+        Linguistic
+      </button>
+      <button
+        onClick={() => setActiveView("interactional")}
+        style={activeView === "interactional" ? styles.buttonPrimary : styles.buttonSecondary}
+      >
+        Interactional
+      </button>
+      <button
+        onClick={() => setActiveView("cultural")}
+        style={activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary}
+      >
+        Cultural
+      </button>
    </div>

    {activeView === "summary" && (
@@ -243,6 +360,36 @@ return (
      <UserStats data={userData} />
    )}

+    {activeView === "linguistic" && linguisticData && (
+      <LinguisticStats data={linguisticData} />
+    )}
+
+    {activeView === "linguistic" && !linguisticData && (
+      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
+        No linguistic data available.
+      </div>
+    )}
+
+    {activeView === "interactional" && interactionData && (
+      <InteractionalStats data={interactionData} />
+    )}
+
+    {activeView === "interactional" && !interactionData && (
+      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
+        No interactional data available.
+      </div>
+    )}
+
+    {activeView === "cultural" && culturalData && (
+      <CulturalStats data={culturalData} />
+    )}
+
+    {activeView === "cultural" && !culturalData && (
+      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
+        No cultural data available.
+      </div>
+    )}
+
  </div>
 );
 }
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -1,14 +1,28 @@
-// User Responses
-type TopUser = { 
-    author: string; 
-    source: string; 
-    count: number 
-};
-
+// Shared types
 type FrequencyWord = {
  word: string;
  count: number;
-}
+};
+
+type NGram = {
+  count: number;
+  ngram: string;
+};
+
+type Emotion = {
+  emotion_anger: number;
+  emotion_disgust: number;
+  emotion_fear: number;
+  emotion_joy: number;
+  emotion_sadness: number;
+};
+
+// User 
+type TopUser = {
+  author: string;
+  source: string;
+  count: number;
+};

 type Vocab = {
  author: string;
@@ -26,60 +40,145 @@ type User = {
  comment: number;
  comment_post_ratio: number;
  comment_share: number;
+  avg_emotions?: Record<string, number>;
  vocab?: Vocab | null;
 };

 type InteractionGraph = Record<string, Record<string, number>>;

+type UserEndpointResponse = {
+  top_users: TopUser[];
+  users: User[];
+};
+
 type UserAnalysisResponse = {
  top_users: TopUser[];
  users: User[];
  interaction_graph: InteractionGraph;
 };

-// Time Analysis
+// Time 
 type EventsPerDay = {
  date: Date;
  count: number;
-}
+};

 type HeatmapCell = {
  date: Date;
  hour: number;
  count: number;
-}
+};

 type TimeAnalysisResponse = {
  events_per_day: EventsPerDay[];
  weekday_hour_heatmap: HeatmapCell[];
-}
-
-// Content Analysis
-type Emotion = {
-  emotion_anger: number;
-  emotion_disgust: number;
-  emotion_fear: number;
-  emotion_joy: number;
-  emotion_sadness: number;
 };

-type NGram = {
-    count: number;
-    ngram: string;
-}
-
+// Content (combines emotional and linguistic)
 type AverageEmotionByTopic = Emotion & {
  n: number;
  topic: string;
+  [key: string]: string | number;
 };

+type OverallEmotionAverage = {
+  emotion: string;
+  score: number;
+};
+
+type DominantEmotionDistribution = {
+  emotion: string;
+  count: number;
+  ratio: number;
+};
+
+type EmotionBySource = {
+  source: string;
+  dominant_emotion: string;
+  dominant_score: number;
+  event_count: number;
+};

 type ContentAnalysisResponse = {
  word_frequencies: FrequencyWord[];
  average_emotion_by_topic: AverageEmotionByTopic[];
  common_three_phrases: NGram[];
  common_two_phrases: NGram[];
-}
+  overall_emotion_average?: OverallEmotionAverage[];
+  dominant_emotion_distribution?: DominantEmotionDistribution[];
+  emotion_by_source?: EmotionBySource[];
+};
+
+// Linguistic
+type LinguisticAnalysisResponse = {
+  word_frequencies: FrequencyWord[];
+  common_two_phrases: NGram[];
+  common_three_phrases: NGram[];
+  lexical_diversity?: Record<string, number>;
+};
+
+// Emotional
+type EmotionalAnalysisResponse = {
+  average_emotion_by_topic: AverageEmotionByTopic[];
+  overall_emotion_average?: OverallEmotionAverage[];
+  dominant_emotion_distribution?: DominantEmotionDistribution[];
+  emotion_by_source?: EmotionBySource[];
+};
+
+// Interactional 
+type ConversationConcentration = {
+  total_commenting_authors: number;
+  top_10pct_author_count: number;
+  top_10pct_comment_share: number;
+  single_comment_authors: number;
+  single_comment_author_ratio: number;
+};
+
+type InteractionAnalysisResponse = {
+  average_thread_depth?: number;
+  top_interaction_pairs?: [[string, string], number][];
+  conversation_concentration?: ConversationConcentration;
+  interaction_graph: InteractionGraph;
+};
+
+// Cultural
+type IdentityMarkers = {
+  in_group_usage: number;
+  out_group_usage: number;
+  in_group_ratio: number;
+  out_group_ratio: number;
+  in_group_posts: number;
+  out_group_posts: number;
+  tie_posts: number;
+  in_group_emotion_avg?: Record<string, number>;
+  out_group_emotion_avg?: Record<string, number>;
+};
+
+type StanceMarkers = {
+  hedge_total: number;
+  certainty_total: number;
+  deontic_total: number;
+  permission_total: number;
+  hedge_per_1k_tokens: number;
+  certainty_per_1k_tokens: number;
+  deontic_per_1k_tokens: number;
+  permission_per_1k_tokens: number;
+};
+
+type EntityEmotionAggregate = {
+  post_count: number;
+  emotion_avg: Record<string, number>;
+};
+
+type AverageEmotionPerEntity = {
+  entity_emotion_avg: Record<string, EntityEmotionAggregate>;
+};
+
+type CulturalAnalysisResponse = {
+  identity_markers?: IdentityMarkers;
+  stance_markers?: StanceMarkers;
+  avg_emotion_per_entity?: AverageEmotionPerEntity;
+};

 // Summary 
 type SummaryResponse = {
@@ -96,22 +195,35 @@ type SummaryResponse = {
  sources: string[];
 };

-// Filtering Response
+// Filter 
 type FilterResponse = {
-    rows: number
+  rows: number;
  data: any;
-}
+};

 export type {
  TopUser,
  Vocab,
  User,
  InteractionGraph,
+  ConversationConcentration,
  UserAnalysisResponse,
+  UserEndpointResponse,
  FrequencyWord,
  AverageEmotionByTopic,
+  OverallEmotionAverage,
+  DominantEmotionDistribution,
+  EmotionBySource,
  SummaryResponse,
  TimeAnalysisResponse,
  ContentAnalysisResponse,
-    FilterResponse
-}
+  LinguisticAnalysisResponse,
+  EmotionalAnalysisResponse,
+  InteractionAnalysisResponse,
+  IdentityMarkers,
+  StanceMarkers,
+  EntityEmotionAggregate,
+  AverageEmotionPerEntity,
+  CulturalAnalysisResponse,
+  FilterResponse,
+};
--- a/server/analysis/emotional.py
+++ b/server/analysis/emotional.py
@@ -1,33 +1,86 @@
 import pandas as pd

+
 class EmotionalAnalysis:
-    def avg_emotion_by_topic(self, df: pd.DataFrame) -> dict:
-        emotion_cols = [
-            col for col in df.columns
-            if col.startswith("emotion_")
-        ]
+    def _emotion_cols(self, df: pd.DataFrame) -> list[str]:
+        return [col for col in df.columns if col.startswith("emotion_")]
+
+    def avg_emotion_by_topic(self, df: pd.DataFrame) -> list[dict]:
+        emotion_cols = self._emotion_cols(df)
+
+        if not emotion_cols:
+            return []

        counts = (
-            df[
-                (df["topic"] != "Misc")
-            ]
-            .groupby("topic")
-            .size()
-            .rename("n")
+            df[(df["topic"] != "Misc")].groupby("topic").size().reset_index(name="n")
        )

        avg_emotion_by_topic = (
-            df[
-                (df["topic"] != "Misc")
-            ]
+            df[(df["topic"] != "Misc")]
            .groupby("topic")[emotion_cols]
            .mean()
            .reset_index()
        )

-        avg_emotion_by_topic = avg_emotion_by_topic.merge(
-            counts,
-            on="topic"
+        avg_emotion_by_topic = avg_emotion_by_topic.merge(counts, on="topic")
+
+        return avg_emotion_by_topic.to_dict(orient="records")
+
+    def overall_emotion_average(self, df: pd.DataFrame) -> list[dict]:
+        emotion_cols = self._emotion_cols(df)
+
+        if not emotion_cols:
+            return []
+
+        means = df[emotion_cols].mean()
+        return [
+            {
+                "emotion": col.replace("emotion_", ""),
+                "score": float(means[col]),
+            }
+            for col in emotion_cols
+        ]
+
+    def dominant_emotion_distribution(self, df: pd.DataFrame) -> list[dict]:
+        emotion_cols = self._emotion_cols(df)
+
+        if not emotion_cols or df.empty:
+            return []
+
+        dominant_per_row = df[emotion_cols].idxmax(axis=1)
+        counts = dominant_per_row.value_counts()
+        total = max(len(dominant_per_row), 1)
+
+        return [
+            {
+                "emotion": col.replace("emotion_", ""),
+                "count": int(count),
+                "ratio": round(float(count / total), 4),
+            }
+            for col, count in counts.items()
+        ]
+
+    def emotion_by_source(self, df: pd.DataFrame) -> list[dict]:
+        emotion_cols = self._emotion_cols(df)
+
+        if not emotion_cols or "source" not in df.columns or df.empty:
+            return []
+
+        source_counts = df.groupby("source").size()
+        source_means = df.groupby("source")[emotion_cols].mean().reset_index()
+        rows = source_means.to_dict(orient="records")
+        output = []
+
+        for row in rows:
+            source = row["source"]
+            dominant_col = max(emotion_cols, key=lambda col: float(row.get(col, 0)))
+            output.append(
+                {
+                    "source": str(source),
+                    "dominant_emotion": dominant_col.replace("emotion_", ""),
+                    "dominant_score": round(float(row.get(dominant_col, 0)), 4),
+                    "event_count": int(source_counts.get(source, 0)),
+                }
            )

-        return avg_emotion_by_topic.to_dict(orient='records')
+        return output
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -1,9 +1,6 @@
 import pandas as pd
 import re

-from collections import Counter
-
-
 class InteractionAnalysis:
    def __init__(self, word_exclusions: set[str]):
        self.word_exclusions = word_exclusions
@@ -12,118 +9,6 @@ class InteractionAnalysis:
        tokens = re.findall(r"\b[a-z]{3,}\b", text)
        return [t for t in tokens if t not in self.word_exclusions]

-    def _vocab_richness_per_user(
-        self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
-    ) -> list:
-        df = df.copy()
-        df["content"] = df["content"].fillna("").astype(str).str.lower()
-        df["tokens"] = df["content"].apply(self._tokenize)
-
-        rows = []
-        for author, group in df.groupby("author"):
-            all_tokens = [t for tokens in group["tokens"] for t in tokens]
-
-            total_words = len(all_tokens)
-            unique_words = len(set(all_tokens))
-            events = len(group)
-
-            # Min amount of words for a user, any less than this might give weird results
-            if total_words < min_words:
-                continue
-
-            # 100% = they never reused a word (excluding stop words)
-            vocab_richness = unique_words / total_words
-            avg_words = total_words / max(events, 1)
-
-            counts = Counter(all_tokens)
-            top_words = [
-                {"word": w, "count": int(c)}
-                for w, c in counts.most_common(top_most_used_words)
-            ]
-
-            rows.append(
-                {
-                    "author": author,
-                    "events": int(events),
-                    "total_words": int(total_words),
-                    "unique_words": int(unique_words),
-                    "vocab_richness": round(vocab_richness, 3),
-                    "avg_words_per_event": round(avg_words, 2),
-                    "top_words": top_words,
-                }
-            )
-
-        rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
-
-        return rows
-
-    def top_users(self, df: pd.DataFrame) -> list:
-        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
-
-        top_users = [
-            {"author": author, "source": source, "count": int(count)}
-            for (author, source), count in counts.items()
-        ]
-
-        return top_users
-
-    def per_user_analysis(self, df: pd.DataFrame) -> dict:
-        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
-
-        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
-
-        avg_emotions_by_author = {}
-        if emotion_cols:
-            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
-            avg_emotions_by_author = {
-                author: {emotion: float(score) for emotion, score in row.items()}
-                for author, row in avg_emotions.iterrows()
-            }
-
-        # ensure columns always exist
-        for col in ("post", "comment"):
-            if col not in per_user.columns:
-                per_user[col] = 0
-
-        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
-            0, 1
-        )
-        per_user["comment_share"] = per_user["comment"] / (
-            per_user["post"] + per_user["comment"]
-        ).replace(0, 1)
-        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
-        per_user_records = per_user.reset_index().to_dict(orient="records")
-
-        vocab_rows = self._vocab_richness_per_user(df)
-        vocab_by_author = {row["author"]: row for row in vocab_rows}
-
-        # merge vocab richness + per_user information
-        merged_users = []
-        for row in per_user_records:
-            author = row["author"]
-            merged_users.append(
-                {
-                    "author": author,
-                    "post": int(row.get("post", 0)),
-                    "comment": int(row.get("comment", 0)),
-                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
-                    "comment_share": float(row.get("comment_share", 0)),
-                    "avg_emotions": avg_emotions_by_author.get(author, {}),
-                    "vocab": vocab_by_author.get(
-                        author,
-                        {
-                            "vocab_richness": 0,
-                            "avg_words_per_event": 0,
-                            "top_words": [],
-                        },
-                    ),
-                }
-            )
-
-        merged_users.sort(key=lambda u: u["comment_post_ratio"])
-
-        return merged_users
-
    def interaction_graph(self, df: pd.DataFrame):
        interactions = {a: {} for a in df["author"].dropna().unique()}

@@ -167,67 +52,36 @@ class InteractionAnalysis:

        return round(sum(depths) / len(depths), 2)
    
-    def average_thread_length_by_emotion(self, df: pd.DataFrame):
-        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+    def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
+        graph = self.interaction_graph(df)
+        pairs = []

-        emotion_cols = [
-            c
-            for c in df.columns
-            if c.startswith("emotion_") and c not in emotion_exclusions
-        ]
+        for a, targets in graph.items():
+            for b, count in targets.items():
+                pairs.append(((a, b), count))

-        id_to_reply = df.set_index("id")["reply_to"].to_dict()
-        length_cache = {}
+        pairs.sort(key=lambda x: x[1], reverse=True)
+        return pairs[:top_n]
    
-        def thread_length_from(start_id):
-            if start_id in length_cache:
-                return length_cache[start_id]
+    def conversation_concentration(self, df: pd.DataFrame) -> dict:
+        if "type" not in df.columns:
+            return {}

-            seen = set()
-            length = 1
-            current = start_id
+        comments = df[df["type"] == "comment"]
+        if comments.empty:
+            return {}

-            while True:
-                if current in seen:
-                    # infinite loop shouldn't happen, but just in case
-                    break
-                seen.add(current)
+        author_counts = comments["author"].value_counts()
+        total_comments = len(comments)
+        total_authors = len(author_counts)

-                reply_to = id_to_reply.get(current)
-
-                if (
-                    reply_to is None
-                    or (isinstance(reply_to, float) and pd.isna(reply_to))
-                    or reply_to == ""
-                ):
-                    break
-
-                length += 1
-                current = reply_to
-
-                if current in length_cache:
-                    length += length_cache[current] - 1
-                    break
-
-            length_cache[start_id] = length
-            return length
-
-        emotion_to_lengths = {}
-
-        # Fill NaNs in emotion cols to avoid max() issues
-        emo_df = df[["id"] + emotion_cols].copy()
-        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
-
-        for _, row in emo_df.iterrows():
-            msg_id = row["id"]
-            length = thread_length_from(msg_id)
-
-            emotions = {c: row[c] for c in emotion_cols}
-            dominant = max(emotions, key=emotions.get)
-
-            emotion_to_lengths.setdefault(dominant, []).append(length)
+        top_10_pct_n = max(1, int(total_authors * 0.1))
+        top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)

        return {
-            emotion: round(sum(lengths) / len(lengths), 2)
-            for emotion, lengths in emotion_to_lengths.items()
+            "total_commenting_authors": total_authors,
+            "top_10pct_author_count": top_10_pct_n,
+            "top_10pct_comment_share": float(top_10_pct_share),
+            "single_comment_authors": int((author_counts == 1).sum()),
+            "single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
        }
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -61,3 +61,19 @@ class LinguisticAnalysis:
            .head(limit)
            .to_dict(orient="records")
        )
+
+    def lexical_diversity(self, df: pd.DataFrame) -> dict:
+        tokens = (
+            df["content"].fillna("").astype(str).str.lower()
+            .str.findall(r"\b[a-z]{2,}\b")
+            .explode()
+        )
+        tokens = tokens[~tokens.isin(self.word_exclusions)]
+        total = max(len(tokens), 1)
+        unique = int(tokens.nunique())
+
+        return {
+            "total_tokens": total,
+            "unique_tokens": unique,
+            "ttr": round(unique / total, 4),
+        }
--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -6,7 +6,9 @@ from server.analysis.cultural import CulturalAnalysis
 from server.analysis.emotional import EmotionalAnalysis
 from server.analysis.interactional import InteractionAnalysis
 from server.analysis.linguistic import LinguisticAnalysis
+from server.analysis.summary import SummaryAnalysis
 from server.analysis.temporal import TemporalAnalysis
+from server.analysis.user import UserAnalysis

 DOMAIN_STOPWORDS = {
    "www",
@@ -36,12 +38,11 @@ class StatGen:
        self.interaction_analysis = InteractionAnalysis(EXCLUDE_WORDS)
        self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
        self.cultural_analysis = CulturalAnalysis()
+        self.summary_analysis = SummaryAnalysis()
+        self.user_analysis = UserAnalysis(EXCLUDE_WORDS)

    ## Private Methods
-    def _prepare_filtered_df(self, 
-                             df: pd.DataFrame, 
-                             filters: dict | None = None
-                             ) -> pd.DataFrame:
+    def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
        filters = filters or {}
        filtered_df = df.copy()

@@ -51,10 +52,9 @@ class StatGen:
        data_source_filter = filters.get("data_sources", None)

        if search_query:
-            mask = (
-                filtered_df["content"].str.contains(search_query, case=False, na=False)
-                | filtered_df["author"].str.contains(search_query, case=False, na=False)
-            )
+            mask = filtered_df["content"].str.contains(
+                search_query, case=False, na=False
+            ) | filtered_df["author"].str.contains(search_query, case=False, na=False)

            # Only include title if the column exists
            if "title" in filtered_df.columns:
@@ -76,10 +76,10 @@ class StatGen:
        return filtered_df

    ## Public Methods
-    def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
        return self._prepare_filtered_df(df, filters).to_dict(orient="records")

-    def get_time_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

        return {
@@ -87,84 +87,54 @@ class StatGen:
            "weekday_hour_heatmap": self.temporal_analysis.heatmap(filtered_df),
        }

-    def get_content_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def linguistic(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

        return {
            "word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
            "common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
            "common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
-            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(
-                filtered_df
-            )
+            "lexical_diversity": self.linguistic_analysis.lexical_diversity(filtered_df)
        }

-    def get_user_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

        return {
-            "top_users": self.interaction_analysis.top_users(filtered_df),
-            "users": self.interaction_analysis.per_user_analysis(filtered_df),
-            "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
+            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(filtered_df),
+            "overall_emotion_average": self.emotional_analysis.overall_emotion_average(filtered_df),
+            "dominant_emotion_distribution": self.emotional_analysis.dominant_emotion_distribution(filtered_df),
+            "emotion_by_source": self.emotional_analysis.emotion_by_source(filtered_df)
        }

-    def get_interactional_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def user(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

        return {
-            "average_thread_depth": self.interaction_analysis.average_thread_depth(
-                filtered_df
-            ),
-            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(
-                filtered_df
-            ),
+            "top_users": self.user_analysis.top_users(filtered_df),
+            "users": self.user_analysis.per_user_analysis(filtered_df)
        }

-    def get_cultural_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

        return {
-            "identity_markers": self.cultural_analysis.get_identity_markers(
-                filtered_df
-            ),
+            "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
+            "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
+            "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df),
+            "conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df)
+        }
+
+    def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+        filtered_df = self._prepare_filtered_df(df, filters)
+
+        return {
+            "identity_markers": self.cultural_analysis.get_identity_markers(filtered_df),
            "stance_markers": self.cultural_analysis.get_stance_markers(filtered_df),
-            "entity_salience": self.cultural_analysis.get_avg_emotions_per_entity(
-                filtered_df
-            ),
+            "avg_emotion_per_entity": self.cultural_analysis.get_avg_emotions_per_entity(filtered_df)
        }

    def summary(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)

-        total_posts = (filtered_df["type"] == "post").sum()
-        total_comments = (filtered_df["type"] == "comment").sum()
-        events_per_user = filtered_df.groupby("author").size()
-
-        if filtered_df.empty:
-            return {
-                "total_events": 0,
-                "total_posts": 0,
-                "total_comments": 0,
-                "unique_users": 0,
-                "comments_per_post": 0,
-                "lurker_ratio": 0,
-                "time_range": {
-                    "start": None,
-                    "end": None,
-                },
-                "sources": [],
-            }
-
-        return {
-            "total_events": int(len(filtered_df)),
-            "total_posts": int(total_posts),
-            "total_comments": int(total_comments),
-            "unique_users": int(events_per_user.count()),
-            "comments_per_post": round(total_comments / max(total_posts, 1), 2),
-            "lurker_ratio": round((events_per_user == 1).mean(), 2),
-            "time_range": {
-                "start": int(filtered_df["dt"].min().timestamp()),
-                "end": int(filtered_df["dt"].max().timestamp()),
-            },
-            "sources": filtered_df["source"].dropna().unique().tolist(),
-        }
+        return self.summary_analysis.summary(filtered_df)
--- a/server/analysis/summary.py
+++ b/server/analysis/summary.py
@@ -0,0 +1,64 @@
+import pandas as pd
+
+
+class SummaryAnalysis:
+    def total_events(self, df: pd.DataFrame) -> int:
+        return int(len(df))
+
+    def total_posts(self, df: pd.DataFrame) -> int:
+        return int(len(df[df["type"] == "post"]))
+
+    def total_comments(self, df: pd.DataFrame) -> int:
+        return int(len(df[df["type"] == "comment"]))
+
+    def unique_users(self, df: pd.DataFrame) -> int:
+        return int(len(df["author"].dropna().unique()))
+
+    def comments_per_post(self, total_comments: int, total_posts: int) -> float:
+        return round(total_comments / max(total_posts, 1), 2)
+
+    def lurker_ratio(self, df: pd.DataFrame) -> float:
+        events_per_user = df.groupby("author").size()
+        return round((events_per_user == 1).mean(), 2)
+
+    def time_range(self, df: pd.DataFrame) -> dict:
+        return {
+            "start": int(df["dt"].min().timestamp()),
+            "end": int(df["dt"].max().timestamp()),
+        }
+
+    def sources(self, df: pd.DataFrame) -> list:
+        return df["source"].dropna().unique().tolist()
+
+    def empty_summary(self) -> dict:
+        return {
+            "total_events": 0,
+            "total_posts": 0,
+            "total_comments": 0,
+            "unique_users": 0,
+            "comments_per_post": 0,
+            "lurker_ratio": 0,
+            "time_range": {
+                "start": None,
+                "end": None,
+            },
+            "sources": [],
+        }
+
+    def summary(self, df: pd.DataFrame) -> dict:
+        if df.empty:
+            return self.empty_summary()
+
+        total_posts = self.total_posts(df)
+        total_comments = self.total_comments(df)
+
+        return {
+            "total_events": self.total_events(df),
+            "total_posts": total_posts,
+            "total_comments": total_comments,
+            "unique_users": self.unique_users(df),
+            "comments_per_post": self.comments_per_post(total_comments, total_posts),
+            "lurker_ratio": self.lurker_ratio(df),
+            "time_range": self.time_range(df),
+            "sources": self.sources(df),
+        }
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -0,0 +1,124 @@
+import pandas as pd
+import re
+
+from collections import Counter
+
+class UserAnalysis:
+    def __init__(self, word_exclusions: set[str]):
+        self.word_exclusions = word_exclusions
+
+    def _tokenize(self, text: str):
+        tokens = re.findall(r"\b[a-z]{3,}\b", text)
+        return [t for t in tokens if t not in self.word_exclusions]
+
+    def _vocab_richness_per_user(
+            self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
+        ) -> list:
+            df = df.copy()
+            df["content"] = df["content"].fillna("").astype(str).str.lower()
+            df["tokens"] = df["content"].apply(self._tokenize)
+
+            rows = []
+            for author, group in df.groupby("author"):
+                all_tokens = [t for tokens in group["tokens"] for t in tokens]
+
+                total_words = len(all_tokens)
+                unique_words = len(set(all_tokens))
+                events = len(group)
+
+                # Min amount of words for a user, any less than this might give weird results
+                if total_words < min_words:
+                    continue
+
+                # 100% = they never reused a word (excluding stop words)
+                vocab_richness = unique_words / total_words
+                avg_words = total_words / max(events, 1)
+
+                counts = Counter(all_tokens)
+                top_words = [
+                    {"word": w, "count": int(c)}
+                    for w, c in counts.most_common(top_most_used_words)
+                ]
+
+                rows.append(
+                    {
+                        "author": author,
+                        "events": int(events),
+                        "total_words": int(total_words),
+                        "unique_words": int(unique_words),
+                        "vocab_richness": round(vocab_richness, 3),
+                        "avg_words_per_event": round(avg_words, 2),
+                        "top_words": top_words,
+                    }
+                )
+
+            rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
+
+            return rows
+
+    def top_users(self, df: pd.DataFrame) -> list:
+        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
+
+        top_users = [
+            {"author": author, "source": source, "count": int(count)}
+            for (author, source), count in counts.items()
+        ]
+
+        return top_users
+
+    def per_user_analysis(self, df: pd.DataFrame) -> dict:
+        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
+
+        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
+
+        avg_emotions_by_author = {}
+        if emotion_cols:
+            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
+            avg_emotions_by_author = {
+                author: {emotion: float(score) for emotion, score in row.items()}
+                for author, row in avg_emotions.iterrows()
+            }
+
+        # ensure columns always exist
+        for col in ("post", "comment"):
+            if col not in per_user.columns:
+                per_user[col] = 0
+
+        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
+            0, 1
+        )
+        per_user["comment_share"] = per_user["comment"] / (
+            per_user["post"] + per_user["comment"]
+        ).replace(0, 1)
+        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
+        per_user_records = per_user.reset_index().to_dict(orient="records")
+
+        vocab_rows = self._vocab_richness_per_user(df)
+        vocab_by_author = {row["author"]: row for row in vocab_rows}
+
+        # merge vocab richness + per_user information
+        merged_users = []
+        for row in per_user_records:
+            author = row["author"]
+            merged_users.append(
+                {
+                    "author": author,
+                    "post": int(row.get("post", 0)),
+                    "comment": int(row.get("comment", 0)),
+                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
+                    "comment_share": float(row.get("comment_share", 0)),
+                    "avg_emotions": avg_emotions_by_author.get(author, {}),
+                    "vocab": vocab_by_author.get(
+                        author,
+                        {
+                            "vocab_richness": 0,
+                            "avg_words_per_event": 0,
+                            "top_words": [],
+                        },
+                    ),
+                }
+            )
+
+        merged_users.sort(key=lambda u: u["comment_post_ratio"])
+
+        return merged_users
--- a/server/app.py
+++ b/server/app.py
@@ -186,7 +186,7 @@ def scrape_data():
        dataset_manager.set_dataset_status(
            dataset_id,
            "fetching",
-            f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}"
+            f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}",
        )

        fetch_and_process_dataset.delay(
@@ -198,12 +198,14 @@ def scrape_data():
        print(traceback.format_exc())
        return jsonify({"error": "Failed to queue dataset processing"}), 500

-
-    return jsonify({
+    return jsonify(
+        {
            "message": "Dataset queued for processing",
            "dataset_id": dataset_id,
-        "status": "processing"
-    }), 202
+            "status": "processing",
+        }
+    ), 202
+

@app.route("/datasets/upload", methods=["POST"])
@jwt_required()
@@ -233,7 +235,9 @@ def upload_data():

        posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
        topics = json.load(topic_file)
-        dataset_id = dataset_manager.save_dataset_info(current_user, dataset_name, topics)
+        dataset_id = dataset_manager.save_dataset_info(
+            current_user, dataset_name, topics
+        )

        process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)

@@ -249,6 +253,7 @@ def upload_data():
    except Exception as e:
        return jsonify({"error": f"An unexpected error occurred"}), 500

+
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
@jwt_required()
 def get_dataset(dataset_id):
@@ -256,7 +261,9 @@ def get_dataset(dataset_id):
        user_id = int(get_jwt_identity())

        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_info = dataset_manager.get_dataset_info(dataset_id)
        included_cols = {"id", "name", "created_at"}
@@ -270,6 +277,7 @@ def get_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500

+
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
@jwt_required()
 def update_dataset(dataset_id):
@@ -277,7 +285,9 @@ def update_dataset(dataset_id):
        user_id = int(get_jwt_identity())

        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        body = request.get_json()
        new_name = body.get("name")
@@ -286,7 +296,9 @@ def update_dataset(dataset_id):
            return jsonify({"error": "A valid name must be provided"}), 400

        dataset_manager.update_dataset_name(dataset_id, new_name.strip())
-        return jsonify({"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}), 200
+        return jsonify(
+            {"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}
+        ), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -295,6 +307,7 @@ def update_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occurred"}), 500

+
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
@jwt_required()
 def delete_dataset(dataset_id):
@@ -302,11 +315,17 @@ def delete_dataset(dataset_id):
        user_id = int(get_jwt_identity())

        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_manager.delete_dataset_info(dataset_id)
        dataset_manager.delete_dataset_content(dataset_id)
-        return jsonify({"message": f"Dataset {dataset_id} metadata and content successfully deleted"}), 200
+        return jsonify(
+            {
+                "message": f"Dataset {dataset_id} metadata and content successfully deleted"
+            }
+        ), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -315,6 +334,7 @@ def delete_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500

+
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
@jwt_required()
 def get_dataset_status(dataset_id):
@@ -322,7 +342,9 @@ def get_dataset_status(dataset_id):
        user_id = int(get_jwt_identity())

        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_status = dataset_manager.get_dataset_status(dataset_id)
        return jsonify(dataset_status), 200
@@ -334,17 +356,44 @@ def get_dataset_status(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500

-@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
+
+@app.route("/dataset/<int:dataset_id>/linguistic", methods=["GET"])
@jwt_required()
-def content_endpoint(dataset_id):
+def get_linguistic_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.linguistic(dataset_content, filters)), 200
+    except NotAuthorisedException:
+        return jsonify({"error": "User is not authorised to access this content"}), 403
+    except NonExistentDatasetException:
+        return jsonify({"error": "Dataset does not exist"}), 404
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred"}), 500
+
+
+@app.route("/dataset/<int:dataset_id>/emotional", methods=["GET"])
+@jwt_required()
+def get_emotional_analysis(dataset_id):
+    try:
+        user_id = int(get_jwt_identity())
+        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )
+
+        dataset_content = dataset_manager.get_dataset_content(dataset_id)
+        filters = get_request_filters()
+        return jsonify(stat_gen.emotional(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -362,7 +411,9 @@ def get_summary(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
@@ -378,17 +429,19 @@ def get_summary(dataset_id):
        return jsonify({"error": f"An unexpected error occurred"}), 500


-@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
+@app.route("/dataset/<int:dataset_id>/temporal", methods=["GET"])
@jwt_required()
-def get_time_analysis(dataset_id):
+def get_temporal_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.temporal(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -406,11 +459,13 @@ def get_user_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.user(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -428,11 +483,13 @@ def get_cultural_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.cultural(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -444,17 +501,19 @@ def get_cultural_analysis(dataset_id):
        return jsonify({"error": f"An unexpected error occurred"}), 500


-@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
+@app.route("/dataset/<int:dataset_id>/interactional", methods=["GET"])
@jwt_required()
 def get_interaction_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )

        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.interactional(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -465,6 +524,27 @@ def get_interaction_analysis(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred"}), 500
    
+@app.route("/dataset/<int:dataset_id>/all", methods=["GET"])
+@jwt_required()
+def get_full_dataset(dataset_id: int):
+    try:
+        user_id = int(get_jwt_identity())
+        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
+            raise NotAuthorisedException(
+                "This user is not authorised to access this dataset"
+            )
+
+        dataset_content = dataset_manager.get_dataset_content(dataset_id)
+        return jsonify(dataset_content.to_dict(orient="records")), 200
+    except NotAuthorisedException:
+        return jsonify({"error": "User is not authorised to access this content"}), 403
+    except NonExistentDatasetException:
+        return jsonify({"error": "Dataset does not exist"}), 404
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred"}), 500

 if __name__ == "__main__":
    app.run(debug=True)
--- a/server/core/datasets.py
+++ b/server/core/datasets.py
@@ -101,7 +101,7 @@ class DatasetManager:
                row["source"],
                row.get("topic"),
                row.get("topic_confidence"),
-                Json(row["ner_entities"]) if row.get("ner_entities") else None,
+                Json(row["entities"]) if row.get("entities") is not None else None,
                row.get("emotion_anger"),
                row.get("emotion_disgust"),
                row.get("emotion_fear"),
--- a/server/db/schema.sql
+++ b/server/db/schema.sql
@@ -43,7 +43,7 @@ CREATE TABLE events (
    weekday VARCHAR(255) NOT NULL,

    /* Posts Only */
-    title VARCHAR(255),
+    title TEXT,

    /* Comments Only*/
    parent_id VARCHAR(255),
Author	SHA1	Message	Date
Dylan De Faoite	acc591ff1e	Merge pull request 'Finish off the links between frontend and backend' (#10 ) from feat/add-frontend-pages into main Reviewed-on: #10	2026-03-18 20:30:19 +00:00
Dylan De Faoite	e054997bb1	feat(frontend): reword CulturalStats to improve understandability	2026-03-18 19:23:35 +00:00
Dylan De Faoite	e5414befa7	feat(frontend): add dominant emotion display to UserModal	2026-03-18 19:12:25 +00:00
Dylan De Faoite	86926898ce	feat(frontend): improve labels to be more understandable	2026-03-18 19:12:11 +00:00
Dylan De Faoite	b1177540a1	feat(frontend): enhance EmotionalStats component with detailed mood analysis	2026-03-18 19:11:18 +00:00
Dylan De Faoite	f604fcc531	feat(frontend): add warning message for scraping limits	2026-03-18 19:02:11 +00:00
Dylan De Faoite	b7aec2b0ea	feat(frontend): add favicon Credit goes to `srip` on flaticon for the image.	2026-03-18 19:00:31 +00:00
Dylan De Faoite	1446dd176d	feat(frontend): center page selection	2026-03-18 18:53:14 +00:00
Dylan De Faoite	c215024ef2	feat(frontend): add deleted user filter Reddit often contains "[Deleted]" when a user is banned or deletes their post/comment. Keeping the backend faithful to the original dataset is important so the filtering is being done on the frontend.	2026-03-18 18:50:51 +00:00
Dylan De Faoite	17ef42e548	feat!(frontend): add cultural, interactional and linguistic stat pages	2026-03-18 18:43:49 +00:00
Dylan De Faoite	7e4a91bb5e	style(frontend): style api types to be in order of the endpoint	2026-03-18 18:40:39 +00:00
Dylan De Faoite	436549641f	chore(frontend): add api types for new backend data	2026-03-18 18:37:39 +00:00
Dylan De Faoite	3e78a54388	feat(stat): add conversation concentration metric Remove old `initiator_ratio` metric which wasn't working due every event having a `reply_to` value. This metric was suggested by AI, and is a surprisingly interesting one that gave interesting insights.	2026-03-18 18:36:09 +00:00
Dylan De Faoite	71998c450e	fix(db): change title type to text Occasionally a Reddit post would have a long title, and would break in the schema.	2026-03-17 19:49:03 +00:00
Dylan De Faoite	2a00384a55	feat(interaction): add top interaction pairs and initiator ratio methods	2026-03-17 19:03:56 +00:00
Dylan De Faoite	8372aa7278	feat(api): add endpoint to view entire dataset	2026-03-17 13:36:41 +00:00
Dylan De Faoite	7b5a939271	fix(stats): missing private methods in User obj	2026-03-17 13:36:10 +00:00
Dylan De Faoite	2fa1dff4b7	feat(stat): add lexical diversity stat	2026-03-17 13:27:49 +00:00
Dylan De Faoite	31fb275ee3	fix(db): incorrect NER column being inserted	2026-03-17 12:53:30 +00:00
Dylan De Faoite	8a0f6e71e8	chore(api): rename cultural entity emotion endpoint	2026-03-17 12:31:53 +00:00
Dylan De Faoite	9093059d05	refactor(stats): move user stats out of interactional into users	2026-03-17 12:23:03 +00:00
Dylan De Faoite	8a13444b16	chore(frontend): add new API types	2026-03-16 16:46:07 +00:00
Dylan De Faoite	3468fdc2ea	feat(api): add new user and linguistic endpoints	2026-03-16 16:45:11 +00:00
Dylan De Faoite	09a4f9036f	refactor(stats): add summary and user stat classes for consistency	2026-03-16 16:43:24 +00:00
Dylan De Faoite	97fccd073b	feat(emotional): add average emotion & dominant emotion stats	2026-03-16 16:41:28 +00:00