2026-03-18 20:30:19 +00:00
21 changed files with 1364 additions and 406 deletions
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -2,7 +2,7 @@
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <link rel="icon" type="image/png" href="/icon.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>frontend</title>
  </head>
--- a/frontend/public/icon.png
+++ b/frontend/public/icon.png
--- a/frontend/src/components/CulturalStats.tsx
+++ b/frontend/src/components/CulturalStats.tsx
@@ -0,0 +1,158 @@
 import Card from "./Card";
 import StatsStyling from "../styles/stats_styling";
 import type { CulturalAnalysisResponse } from "../types/ApiTypes";
 const styles = StatsStyling;
 type CulturalStatsProps = {
  data: CulturalAnalysisResponse;
 };
 const CulturalStats = ({ data }: CulturalStatsProps) => {
  const identity = data.identity_markers;
  const stance = data.stance_markers;
  const inGroupWords = identity?.in_group_usage ?? 0;
  const outGroupWords = identity?.out_group_usage ?? 0;
  const totalGroupWords = inGroupWords + outGroupWords;
  const inGroupWordRate = typeof identity?.in_group_ratio === "number"
    ? identity.in_group_ratio * 100
    : null;
  const outGroupWordRate = typeof identity?.out_group_ratio === "number"
    ? identity.out_group_ratio * 100
    : null;
  const rawEntities = data.avg_emotion_per_entity?.entity_emotion_avg ?? {};
  const entities = Object.entries(rawEntities)
    .sort((a, b) => (b[1].post_count - a[1].post_count))
    .slice(0, 20);
  const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
    const entries = Object.entries(emotionAvg ?? {});
    if (!entries.length) {
      return "—";
    }
    entries.sort((a, b) => b[1] - a[1]);
    const dominant = entries[0] ?? ["emotion_unknown", 0];
    const dominantLabel = dominant[0].replace("emotion_", "");
    return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
  };
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.grid }}>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Community Framing Overview</h2>
          <p style={styles.sectionSubtitle}>Simple view of how often people use "us" words vs "them" words, and the tone around that language.</p>
        </div>
        <Card
          label="In-Group Words"
          value={inGroupWords.toLocaleString()}
          sublabel="Times we/us/our appears"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Out-Group Words"
          value={outGroupWords.toLocaleString()}
          sublabel="Times they/them/their appears"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="In-Group Posts"
          value={identity?.in_group_posts?.toLocaleString() ?? "—"}
          sublabel='Posts leaning toward "us" language'
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Out-Group Posts"
          value={identity?.out_group_posts?.toLocaleString() ?? "—"}
          sublabel='Posts leaning toward "them" language'
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Balanced Posts"
          value={identity?.tie_posts?.toLocaleString() ?? "—"}
          sublabel="Posts with equal us/them signals"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Total Group Words"
          value={totalGroupWords.toLocaleString()}
          sublabel="In-group + out-group words"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="In-Group Share"
          value={inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`}
          sublabel="Share of all words"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Out-Group Share"
          value={outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`}
          sublabel="Share of all words"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Hedging Words"
          value={stance?.hedge_total?.toLocaleString() ?? "—"}
          sublabel={typeof stance?.hedge_per_1k_tokens === "number" ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Certainty Words"
          value={stance?.certainty_total?.toLocaleString() ?? "—"}
          sublabel={typeof stance?.certainty_per_1k_tokens === "number" ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Need/Should Words"
          value={stance?.deontic_total?.toLocaleString() ?? "—"}
          sublabel={typeof stance?.deontic_per_1k_tokens === "number" ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Permission Words"
          value={stance?.permission_total?.toLocaleString() ?? "—"}
          sublabel={typeof stance?.permission_per_1k_tokens === "number" ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
          style={{ gridColumn: "span 3" }}
        />
        <div style={{ ...styles.card, gridColumn: "span 6" }}>
          <h2 style={styles.sectionTitle}>Mood in "Us" Posts</h2>
          <p style={styles.sectionSubtitle}>Most likely emotion when in-group wording is stronger.</p>
          <div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
        </div>
        <div style={{ ...styles.card, gridColumn: "span 6" }}>
          <h2 style={styles.sectionTitle}>Mood in "Them" Posts</h2>
          <p style={styles.sectionSubtitle}>Most likely emotion when out-group wording is stronger.</p>
          <div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
        </div>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Entity Mood Snapshot</h2>
          <p style={styles.sectionSubtitle}>Most mentioned entities and the mood that appears most with each.</p>
          {!entities.length ? (
            <div style={styles.topUserMeta}>No entity-level cultural data available.</div>
          ) : (
            <div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
              {entities.map(([entity, aggregate]) => (
                <div key={entity} style={styles.topUserItem}>
                  <div style={styles.topUserName}>{entity}</div>
                  <div style={styles.topUserMeta}>
                    {aggregate.post_count.toLocaleString()} posts • Likely mood: {topEmotion(aggregate.emotion_avg)}
                  </div>
                </div>
              ))}
            </div>
          )}
        </div>
      </div>
    </div>
  );
 };
 export default CulturalStats;
--- a/frontend/src/components/EmotionalStats.tsx
+++ b/frontend/src/components/EmotionalStats.tsx
@@ -9,6 +9,9 @@ type EmotionalStatsProps = {
 const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
  const rows = contentData.average_emotion_by_topic ?? [];
  const overallEmotionAverage = contentData.overall_emotion_average ?? [];
  const dominantEmotionDistribution = contentData.dominant_emotion_distribution ?? [];
  const emotionBySource = contentData.emotion_by_source ?? [];
  const lowSampleThreshold = 20;
  const stableSampleThreshold = 50;
  const emotionKeys = rows.length
@@ -64,41 +67,106 @@ const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
-        <h2 style={styles.sectionTitle}>Average Emotion by Topic</h2>
+        <h2 style={styles.sectionTitle}>Topic Mood Overview</h2>
-        <p style={styles.sectionSubtitle}>Read confidence together with sample size. Topics with fewer than {lowSampleThreshold} events are usually noisy and less reliable.</p>
+        <p style={styles.sectionSubtitle}>Use the strength score together with post count. Topics with fewer than {lowSampleThreshold} events are often noisy.</p>
        <div style={styles.emotionalSummaryRow}>
          <span><strong style={{ color: "#24292f" }}>Topics:</strong> {strongestPerTopic.length}</span>
-          <span><strong style={{ color: "#24292f" }}>Median Sample:</strong> {medianSampleSize} events</span>
+          <span><strong style={{ color: "#24292f" }}>Median Posts:</strong> {medianSampleSize}</span>
-          <span><strong style={{ color: "#24292f" }}>Low Sample (&lt;{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
+          <span><strong style={{ color: "#24292f" }}>Small Topics (&lt;{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
-          <span><strong style={{ color: "#24292f" }}>Stable Sample ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
+          <span><strong style={{ color: "#24292f" }}>Stable Topics ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
        </div>
        <p style={{ ...styles.sectionSubtitle, marginTop: 10, marginBottom: 0 }}>
-          Confidence reflects how strongly one emotion leads within a topic, not model accuracy. Use larger samples for stronger conclusions.
+          Strength means how far the top emotion is ahead in that topic. It does not mean model accuracy.
        </p>
      </div>
      <div style={{ ...styles.container, ...styles.grid }}>
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Mood Averages</h2>
          <p style={styles.sectionSubtitle}>Average score for each emotion.</p>
          {!overallEmotionAverage.length ? (
            <div style={styles.topUserMeta}>No overall emotion averages available.</div>
          ) : (
            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
              {[...overallEmotionAverage]
                .sort((a, b) => b.score - a.score)
                .map((row) => (
                  <div key={row.emotion} style={styles.topUserItem}>
                    <div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
                    <div style={styles.topUserMeta}>{row.score.toFixed(3)}</div>
                  </div>
                ))}
            </div>
          )}
        </div>
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Mood Split</h2>
          <p style={styles.sectionSubtitle}>How often each emotion is dominant.</p>
          {!dominantEmotionDistribution.length ? (
            <div style={styles.topUserMeta}>No dominant-emotion split available.</div>
          ) : (
            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
              {[...dominantEmotionDistribution]
                .sort((a, b) => b.ratio - a.ratio)
                .map((row) => (
                  <div key={row.emotion} style={styles.topUserItem}>
                    <div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
                    <div style={styles.topUserMeta}>{(row.ratio * 100).toFixed(1)}% • {row.count.toLocaleString()} events</div>
                  </div>
                ))}
            </div>
          )}
        </div>
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Mood by Source</h2>
          <p style={styles.sectionSubtitle}>Leading emotion in each source.</p>
          {!emotionBySource.length ? (
            <div style={styles.topUserMeta}>No source emotion profile available.</div>
          ) : (
            <div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
              {[...emotionBySource]
                .sort((a, b) => b.event_count - a.event_count)
                .map((row) => (
                  <div key={row.source} style={styles.topUserItem}>
                    <div style={styles.topUserName}>{row.source}</div>
                    <div style={styles.topUserMeta}>
                      {formatEmotion(row.dominant_emotion)} • {row.dominant_score.toFixed(3)} • {row.event_count.toLocaleString()} events
                    </div>
                  </div>
                ))}
            </div>
          )}
        </div>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Topic Snapshots</h2>
          <p style={styles.sectionSubtitle}>Per-topic mood with strength and post count.</p>
          <div style={{ ...styles.grid, marginTop: 10 }}>
            {strongestPerTopic.map((topic) => (
-          <div key={topic.topic} style={{ ...styles.card, gridColumn: "span 4" }}>
+              <div key={topic.topic} style={{ ...styles.cardBase, gridColumn: "span 4" }}>
                <h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
                <div style={styles.emotionalTopicLabel}>
-              Top Emotion
+                  Likely Mood
                </div>
                <div style={styles.emotionalTopicValue}>
                  {formatEmotion(topic.emotion)}
                </div>
                <div style={styles.emotionalMetricRow}>
-              <span>Confidence</span>
+                  <span>Strength</span>
                  <span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
                </div>
                <div style={styles.emotionalMetricRowCompact}>
-              <span>Sample Size</span>
+                  <span>Posts in Topic</span>
-              <span style={styles.emotionalMetricValue}>{topic.count} events</span>
+                  <span style={styles.emotionalMetricValue}>{topic.count}</span>
                </div>
              </div>
            ))}
          </div>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/InteractionalStats.tsx
+++ b/frontend/src/components/InteractionalStats.tsx
@@ -0,0 +1,208 @@
 import Card from "./Card";
 import StatsStyling from "../styles/stats_styling";
 import type { InteractionAnalysisResponse } from "../types/ApiTypes";
 import {
  ResponsiveContainer,
  BarChart,
  Bar,
  XAxis,
  YAxis,
  CartesianGrid,
  Tooltip,
  PieChart,
  Pie,
  Cell,
  Legend,
 } from "recharts";
 const styles = StatsStyling;
 type InteractionalStatsProps = {
  data: InteractionAnalysisResponse;
 };
 const InteractionalStats = ({ data }: InteractionalStatsProps) => {
  const graph = data.interaction_graph ?? {};
  const userCount = Object.keys(graph).length;
  const edges = Object.values(graph).flatMap((targets) => Object.values(targets));
  const edgeCount = edges.length;
  const interactionVolume = edges.reduce((sum, value) => sum + value, 0);
  const concentration = data.conversation_concentration;
  const topTenCommentShare = typeof concentration?.top_10pct_comment_share === "number"
    ? concentration?.top_10pct_comment_share
    : null;
  const topTenAuthorCount = typeof concentration?.top_10pct_author_count === "number"
    ? concentration.top_10pct_author_count
    : null;
  const totalCommentingAuthors = typeof concentration?.total_commenting_authors === "number"
    ? concentration.total_commenting_authors
    : null;
  const singleCommentAuthorRatio = typeof concentration?.single_comment_author_ratio === "number"
    ? concentration.single_comment_author_ratio
    : null;
  const singleCommentAuthors = typeof concentration?.single_comment_authors === "number"
    ? concentration.single_comment_authors
    : null;
  const topPairs = (data.top_interaction_pairs ?? [])
    .filter((item): item is [[string, string], number] => {
      if (!Array.isArray(item) || item.length !== 2) {
        return false;
      }
      const pair = item[0];
      const count = item[1];
      return Array.isArray(pair)
        && pair.length === 2
        && typeof pair[0] === "string"
        && typeof pair[1] === "string"
        && typeof count === "number";
    })
    .slice(0, 20);
  const topPairChartData = topPairs.slice(0, 8).map(([[source, target], value], index) => ({
    pair: `${source} -> ${target}`,
    replies: value,
    rank: index + 1,
  }));
  const topTenSharePercent = topTenCommentShare === null
    ? null
    : topTenCommentShare * 100;
  const nonTopTenSharePercent = topTenSharePercent === null
    ? null
    : Math.max(0, 100 - topTenSharePercent);
  let concentrationPieData: { name: string; value: number }[] = [];
  if (topTenSharePercent !== null && nonTopTenSharePercent !== null) {
    concentrationPieData = [
      { name: "Top 10% authors", value: topTenSharePercent },
      { name: "Other authors", value: nonTopTenSharePercent },
    ];
  }
  const PIE_COLORS = ["#2b6777", "#c8d8e4"];
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.grid }}>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Conversation Overview</h2>
          <p style={styles.sectionSubtitle}>Who talks to who, and how concentrated the replies are.</p>
        </div>
        <Card
          label="Average Reply Depth"
          value={typeof data.average_thread_depth === "number" ? data.average_thread_depth.toFixed(2) : "—"}
          sublabel="How deep reply chains usually go"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Users in Network"
          value={userCount.toLocaleString()}
          sublabel="Users in the reply graph"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="User-to-User Links"
          value={edgeCount.toLocaleString()}
          sublabel="Unique reply directions"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Total Replies"
          value={interactionVolume.toLocaleString()}
          sublabel="All reply links combined"
          style={{ gridColumn: "span 3" }}
        />
        <Card
          label="Concentrated Replies"
          value={topTenSharePercent === null ? "-" : `${topTenSharePercent.toFixed(1)}%`}
          sublabel={topTenAuthorCount === null || totalCommentingAuthors === null
            ? "Reply share from the top 10% commenters"
            : `${topTenAuthorCount.toLocaleString()} of ${totalCommentingAuthors.toLocaleString()} authors`}
          style={{ gridColumn: "span 6" }}
        />
        <Card
          label="Single-Comment Authors"
          value={singleCommentAuthorRatio === null ? "-" : `${(singleCommentAuthorRatio * 100).toFixed(1)}%`}
          sublabel={singleCommentAuthors === null
            ? "Authors who commented exactly once"
            : `${singleCommentAuthors.toLocaleString()} authors commented exactly once`}
          style={{ gridColumn: "span 6" }}
        />
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Conversation Visuals</h2>
          <p style={styles.sectionSubtitle}>Main reply links and concentration split.</p>
          <div style={{ ...styles.grid, marginTop: 12 }}>
            <div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
              <h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top Interaction Pairs</h3>
              <div style={{ width: "100%", height: 300 }}>
                <ResponsiveContainer>
                  <BarChart data={topPairChartData} layout="vertical" margin={{ top: 8, right: 16, left: 16, bottom: 8 }}>
                    <CartesianGrid strokeDasharray="3 3" stroke="#d9e2ec" />
                    <XAxis type="number" allowDecimals={false} />
                    <YAxis
                      type="category"
                      dataKey="rank"
                      tickFormatter={(value) => `#${value}`}
                      width={36}
                    />
                    <Tooltip />
                    <Bar dataKey="replies" fill="#2b6777" radius={[0, 6, 6, 0]} />
                  </BarChart>
                </ResponsiveContainer>
              </div>
            </div>
            <div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
              <h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top 10% vs Other Comment Share</h3>
              <div style={{ width: "100%", height: 300 }}>
                <ResponsiveContainer>
                  <PieChart>
                    <Pie
                      data={concentrationPieData}
                      dataKey="value"
                      nameKey="name"
                      innerRadius={56}
                      outerRadius={88}
                      paddingAngle={2}
                    >
                      {concentrationPieData.map((entry, index) => (
                        <Cell key={`${entry.name}-${index}`} fill={PIE_COLORS[index % PIE_COLORS.length]} />
                      ))}
                    </Pie>
                    <Tooltip />
                    <Legend verticalAlign="bottom" height={36} />
                  </PieChart>
                </ResponsiveContainer>
              </div>
            </div>
          </div>
        </div>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Frequent Reply Paths</h2>
          <p style={styles.sectionSubtitle}>Most common user-to-user reply paths.</p>
          {!topPairs.length ? (
            <div style={styles.topUserMeta}>No interaction pair data available.</div>
          ) : (
            <div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
              {topPairs.map(([[source, target], value], index) => (
                <div key={`${source}->${target}-${index}`} style={styles.topUserItem}>
                  <div style={styles.topUserName}>{source} -&gt; {target}</div>
                  <div style={styles.topUserMeta}>{value.toLocaleString()} replies</div>
                </div>
              ))}
            </div>
          )}
        </div>
      </div>
    </div>
  );
 };
 export default InteractionalStats;
--- a/frontend/src/components/LinguisticStats.tsx
+++ b/frontend/src/components/LinguisticStats.tsx
@@ -0,0 +1,91 @@
 import Card from "./Card";
 import StatsStyling from "../styles/stats_styling";
 import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
 const styles = StatsStyling;
 type LinguisticStatsProps = {
  data: LinguisticAnalysisResponse;
 };
 const LinguisticStats = ({ data }: LinguisticStatsProps) => {
  const lexical = data.lexical_diversity;
  const words = data.word_frequencies ?? [];
  const bigrams = data.common_two_phrases ?? [];
  const trigrams = data.common_three_phrases ?? [];
  const topWords = words.slice(0, 20);
  const topBigrams = bigrams.slice(0, 10);
  const topTrigrams = trigrams.slice(0, 10);
  return (
    <div style={styles.page}>
      <div style={{ ...styles.container, ...styles.grid }}>
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
          <h2 style={styles.sectionTitle}>Language Overview</h2>
          <p style={styles.sectionSubtitle}>Quick read on how broad and repetitive the wording is.</p>
        </div>
        <Card
          label="Total Words"
          value={lexical?.total_tokens?.toLocaleString() ?? "—"}
          sublabel="Words after basic filtering"
          style={{ gridColumn: "span 4" }}
        />
        <Card
          label="Unique Words"
          value={lexical?.unique_tokens?.toLocaleString() ?? "—"}
          sublabel="Different words used"
          style={{ gridColumn: "span 4" }}
        />
        <Card
          label="Vocabulary Variety"
          value={typeof lexical?.ttr === "number" ? lexical.ttr.toFixed(4) : "—"}
          sublabel="Higher means less repetition"
          style={{ gridColumn: "span 4" }}
        />
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Top Words</h2>
          <p style={styles.sectionSubtitle}>Most used single words.</p>
          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
            {topWords.map((item) => (
              <div key={item.word} style={styles.topUserItem}>
                <div style={styles.topUserName}>{item.word}</div>
                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
              </div>
            ))}
          </div>
        </div>
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Top Bigrams</h2>
          <p style={styles.sectionSubtitle}>Most used 2-word phrases.</p>
          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
            {topBigrams.map((item) => (
              <div key={item.ngram} style={styles.topUserItem}>
                <div style={styles.topUserName}>{item.ngram}</div>
                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
              </div>
            ))}
          </div>
        </div>
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
          <h2 style={styles.sectionTitle}>Top Trigrams</h2>
          <p style={styles.sectionSubtitle}>Most used 3-word phrases.</p>
          <div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
            {topTrigrams.map((item) => (
              <div key={item.ngram} style={styles.topUserItem}>
                <div style={styles.topUserName}>{item.ngram}</div>
                <div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
              </div>
            ))}
          </div>
        </div>
      </div>
    </div>
  );
 };
 export default LinguisticStats;
--- a/frontend/src/components/SummaryStats.tsx
+++ b/frontend/src/components/SummaryStats.tsx
@@ -58,15 +58,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
    const [selectedUser, setSelectedUser] = useState<string | null>(null);
    const selectedUserData: User | null = userData?.users.find((u) => u.author === selectedUser) ?? null;
    console.log(summary)
    return (
    <div style={styles.page}>
        {/* main grid*/}
        <div style={{ ...styles.container, ...styles.grid}}>
            <Card
-            label="Total Events"
+            label="Total Activity"
            value={summary?.total_events ?? "—"}
            sublabel="Posts + comments"
            style={{
@@ -74,15 +72,15 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
            }}
            />
            <Card
-            label="Unique Users"
+            label="Active People"
            value={summary?.unique_users ?? "—"}
-            sublabel="Distinct authors"
+            sublabel="Distinct users"
            style={{
                gridColumn: "span 4"
            }}
            />
            <Card
-            label="Posts / Comments"
+            label="Posts vs Comments"
            value={
                summary
                ? `${summary.total_posts} / ${summary.total_comments}`
@@ -108,13 +106,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
            />
            <Card
-            label="Lurker Ratio"
+            label="One-Time Users"
            value={
                typeof summary?.lurker_ratio === "number"
                ? `${Math.round(summary.lurker_ratio * 100)}%`
                : "—"
            }
-            sublabel="Users with only 1 event"
+            sublabel="Users with only one event"
            style={{
                gridColumn: "span 4"
            }}
@@ -136,12 +134,12 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
        {/* events per day */}
        <div style={{ ...styles.card, gridColumn: "span 5" }}>
-            <h2 style={styles.sectionTitle}>Events per Day</h2>
+            <h2 style={styles.sectionTitle}>Activity Over Time</h2>
-            <p style={styles.sectionSubtitle}>Trend of activity over time</p>
+            <p style={styles.sectionSubtitle}>How much posting happened each day.</p>
            <div style={styles.chartWrapper}>
            <ResponsiveContainer width="100%" height="100%">
-                <LineChart data={timeData?.events_per_day.filter((d) => new Date(d.date) >= new Date('2026-01-10'))}>
+                <LineChart data={timeData?.events_per_day ?? []}>
                <CartesianGrid strokeDasharray="3 3" />
                <XAxis dataKey="date" />
                <YAxis />
@@ -154,8 +152,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
        {/* Word Cloud */}
        <div style={{ ...styles.card, gridColumn: "span 4" }}>
-            <h2 style={styles.sectionTitle}>Word Cloud</h2>
+            <h2 style={styles.sectionTitle}>Common Words</h2>
-            <p style={styles.sectionSubtitle}>Most common terms across events</p>
+            <p style={styles.sectionSubtitle}>Frequently used words across the dataset.</p>
            <div style={styles.chartWrapper}>
            <ReactWordcloud
@@ -174,8 +172,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
        <div style={{...styles.card, ...styles.scrollArea, gridColumn: "span 3",
        }}
        >
-            <h2 style={styles.sectionTitle}>Top Users</h2>
+            <h2 style={styles.sectionTitle}>Most Active Users</h2>
-            <p style={styles.sectionSubtitle}>Most active authors</p>
+            <p style={styles.sectionSubtitle}>Who posted the most events.</p>
            <div style={styles.topUsersList}>
            {userData?.top_users.slice(0, 100).map((item) => (
@@ -195,8 +193,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
        {/* Heatmap */}
        <div style={{ ...styles.card, gridColumn: "span 12" }}>
-            <h2 style={styles.sectionTitle}>Heatmap</h2>
+            <h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
-            <p style={styles.sectionSubtitle}>Activity density across time</p>
+            <p style={styles.sectionSubtitle}>When activity tends to happen by weekday and hour.</p>
            <div style={styles.heatmapWrapper}>
            <ActivityHeatmap data={timeData?.weekday_hour_heatmap ?? []} />
--- a/frontend/src/components/UserModal.tsx
+++ b/frontend/src/components/UserModal.tsx
@@ -12,6 +12,9 @@ type Props = {
 };
 export default function UserModal({ open, onClose, userData, username }: Props) {
  const dominantEmotionEntry = Object.entries(userData?.avg_emotions ?? {})
    .sort((a, b) => b[1] - a[1])[0];
  return (
    <Dialog open={open} onClose={onClose} style={styles.modalRoot}>
      <div style={styles.modalBackdrop} />
@@ -66,6 +69,15 @@ export default function UserModal({ open, onClose, userData, username }: Props)
                  </div>
                </div>
              ) : null}
              {dominantEmotionEntry ? (
                <div style={styles.topUserItem}>
                  <div style={styles.topUserName}>Dominant Avg Emotion</div>
                  <div style={styles.topUserMeta}>
                    {dominantEmotionEntry[0].replace("emotion_", "")} ({dominantEmotionEntry[1].toFixed(3)})
                  </div>
                </div>
              ) : null}
            </div>
          )}
        </DialogPanel>
--- a/frontend/src/components/UserStats.tsx
+++ b/frontend/src/components/UserStats.tsx
@@ -87,15 +87,15 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
            style={{ gridColumn: "span 3" }}
          />
          <Card
-            label="Interactions"
+            label="Replies"
            value={totalInteractions.toLocaleString()}
-            sublabel="Filtered links (2+ interactions)"
+            sublabel="Links with at least 2 replies"
            style={{ gridColumn: "span 3" }}
          />
          <Card
-            label="Average Intensity"
+            label="Replies per Connected User"
            value={avgInteractionsPerConnectedUser.toFixed(1)}
-            sublabel="Interactions per connected user"
+            sublabel="Average from visible graph links"
            style={{ gridColumn: "span 3" }}
          />
          <Card
@@ -106,13 +106,13 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
          />
          <Card
-            label="Strongest Connection"
+            label="Strongest User Link"
            value={strongestLink ? `${strongestLink.source} -> ${strongestLink.target}` : "—"}
-            sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} interactions` : "No graph edges after filtering"}
+            sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} replies` : "No graph links after filtering"}
            style={{ gridColumn: "span 6" }}
          />
          <Card
-            label="Most Reply-Driven User"
+            label="Most Comment-Heavy User"
            value={highlyInteractiveUser?.author ?? "—"}
            sublabel={
              highlyInteractiveUser
@@ -125,7 +125,7 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
          <div style={{ ...styles.card, gridColumn: "span 12" }}>
            <h2 style={styles.sectionTitle}>User Interaction Graph</h2>
            <p style={styles.sectionSubtitle}>
-              Nodes represent users and links represent conversation interactions.
+              Each node is a user, and each link shows replies between them.
            </p>
            <div ref={graphContainerRef} style={{ width: "100%", height: graphSize.height }}>
              <ForceGraph3D
--- a/frontend/src/pages/AutoScrape.tsx
+++ b/frontend/src/pages/AutoScrape.tsx
@@ -191,6 +191,9 @@ const AutoScrapePage = () => {
            <p style={styles.sectionHeaderSubtitle}>
              Select sources and scrape settings, then queue processing automatically.
            </p>
            <p style={{ ...styles.subtleBodyText, marginTop: 6, color: "#9a6700" }}>
              Warning: Scraping more than 250 posts from any single site can take hours due to rate limits.
            </p>
          </div>
          <button
            type="button"
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -5,26 +5,42 @@ import StatsStyling from "../styles/stats_styling";
 import SummaryStats from "../components/SummaryStats";
 import EmotionalStats from "../components/EmotionalStats";
 import UserStats from "../components/UserStats";
 import LinguisticStats from "../components/LinguisticStats";
 import InteractionalStats from "../components/InteractionalStats";
 import CulturalStats from "../components/CulturalStats";
 import { 
  type SummaryResponse, 
  type UserAnalysisResponse, 
  type TimeAnalysisResponse,
-  type ContentAnalysisResponse
+  type ContentAnalysisResponse,
  type UserEndpointResponse,
  type LinguisticAnalysisResponse,
  type EmotionalAnalysisResponse,
  type InteractionAnalysisResponse,
  type CulturalAnalysisResponse
 } from '../types/ApiTypes'
 const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
 const styles = StatsStyling;
 const DELETED_USERS = ["[deleted]"];
 const isDeletedUser = (value: string | null | undefined) => (
  DELETED_USERS.includes((value ?? "").trim().toLowerCase())
 );
 const StatPage = () => {
  const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
  const [error, setError] = useState('');
  const [loading, setLoading] = useState(false);
-  const [activeView, setActiveView] = useState<"summary" | "emotional" | "user">("summary");
+  const [activeView, setActiveView] = useState<"summary" | "emotional" | "user" | "linguistic" | "interactional" | "cultural">("summary");
  const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
  const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
  const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
  const [linguisticData, setLinguisticData] = useState<LinguisticAnalysisResponse | null>(null);
  const [interactionData, setInteractionData] = useState<InteractionAnalysisResponse | null>(null);
  const [culturalData, setCulturalData] = useState<CulturalAnalysisResponse | null>(null);
  const [summary, setSummary] = useState<SummaryResponse | null>(null);
@@ -83,15 +99,23 @@ const StatPage = () => {
    setLoading(true);
    Promise.all([
-      axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/time`, {
+      axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
        params,
        headers: authHeaders,
      }),
-      axios.get<UserAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
+      axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
        params,
        headers: authHeaders,
      }),
-      axios.get<ContentAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/content`, {
+      axios.get<LinguisticAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/linguistic`, {
        params,
        headers: authHeaders,
      }),
      axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
        params,
        headers: authHeaders,
      }),
      axios.get<InteractionAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/interactional`, {
        params,
        headers: authHeaders,
      }),
@@ -99,12 +123,87 @@ const StatPage = () => {
        params,
        headers: authHeaders,
      }),
      axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
        params,
        headers: authHeaders,
      }),
    ]) 
-      .then(([timeRes, userRes, contentRes, summaryRes]) => {
+      .then(([timeRes, userRes, linguisticRes, emotionalRes, interactionRes, summaryRes, culturalRes]) => {
-        setUserData(userRes.data || null);
+        const usersList = userRes.data.users ?? [];
        const topUsersList = userRes.data.top_users ?? [];
        const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
        const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
        const filteredUsers: typeof usersList = [];
        for (const user of usersList) {
          if (isDeletedUser(user.author)) continue;
          filteredUsers.push(user);
        }
        const filteredTopUsers: typeof topUsersList = [];
        for (const user of topUsersList) {
          if (isDeletedUser(user.author)) continue;
          filteredTopUsers.push(user);
        }
        const filteredInteractionGraph: Record<string, Record<string, number>> = {};
        for (const [source, targets] of Object.entries(interactionGraphRaw)) {
          if (isDeletedUser(source)) {
            continue;
          }
          const nextTargets: Record<string, number> = {};
          for (const [target, count] of Object.entries(targets)) {
            if (isDeletedUser(target)) {
              continue;
            }
            nextTargets[target] = count;
          }
          filteredInteractionGraph[source] = nextTargets;
        }
        const filteredTopInteractionPairs: typeof topPairsRaw = [];
        for (const pairEntry of topPairsRaw) {
          const pair = pairEntry[0];
          const source = pair[0];
          const target = pair[1];
          if (isDeletedUser(source) || isDeletedUser(target)) {
            continue;
          }
          filteredTopInteractionPairs.push(pairEntry);
        }
        const combinedUserData: UserAnalysisResponse = {
          ...userRes.data,
          users: filteredUsers,
          top_users: filteredTopUsers,
          interaction_graph: filteredInteractionGraph,
        };
        const combinedContentData: ContentAnalysisResponse = {
          ...linguisticRes.data,
          ...emotionalRes.data,
        };
        const filteredInteractionData: InteractionAnalysisResponse = {
          ...interactionRes.data,
          interaction_graph: filteredInteractionGraph,
          top_interaction_pairs: filteredTopInteractionPairs,
        };
        const filteredSummary: SummaryResponse = {
          ...summaryRes.data,
          unique_users: filteredUsers.length,
        };
        setUserData(combinedUserData);
        setTimeData(timeRes.data || null);
-        setContentData(contentRes.data || null);
+        setContentData(combinedContentData);
-        setSummary(summaryRes.data || null);
+        setLinguisticData(linguisticRes.data || null);
        setInteractionData(filteredInteractionData || null);
        setCulturalData(culturalRes.data || null);
        setSummary(filteredSummary || null);
      })
      .catch((e) => setError("Failed to load statistics: " + String(e)))
      .finally(() => setLoading(false));
@@ -198,7 +297,7 @@ return (
          <div style={styles.dashboardMeta}>Dataset #{datasetId ?? "-"}</div>
        </div>
-    <div style={{ ...styles.container, ...styles.tabsRow }}>
+    <div style={{ ...styles.container, ...styles.tabsRow, justifyContent: "center" }}>
      <button
        onClick={() => setActiveView("summary")}
        style={activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary}
@@ -218,6 +317,24 @@ return (
      >
        Users
      </button>
      <button
        onClick={() => setActiveView("linguistic")}
        style={activeView === "linguistic" ? styles.buttonPrimary : styles.buttonSecondary}
      >
        Linguistic
      </button>
      <button
        onClick={() => setActiveView("interactional")}
        style={activeView === "interactional" ? styles.buttonPrimary : styles.buttonSecondary}
      >
        Interactional
      </button>
      <button
        onClick={() => setActiveView("cultural")}
        style={activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary}
      >
        Cultural
      </button>
    </div>
    {activeView === "summary" && (
@@ -243,6 +360,36 @@ return (
      <UserStats data={userData} />
    )}
    {activeView === "linguistic" && linguisticData && (
      <LinguisticStats data={linguisticData} />
    )}
    {activeView === "linguistic" && !linguisticData && (
      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
        No linguistic data available.
      </div>
    )}
    {activeView === "interactional" && interactionData && (
      <InteractionalStats data={interactionData} />
    )}
    {activeView === "interactional" && !interactionData && (
      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
        No interactional data available.
      </div>
    )}
    {activeView === "cultural" && culturalData && (
      <CulturalStats data={culturalData} />
    )}
    {activeView === "cultural" && !culturalData && (
      <div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
        No cultural data available.
      </div>
    )}
  </div>
 );
 }
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -1,14 +1,28 @@
-// User Responses
+// Shared types
 type TopUser = { 
    author: string; 
    source: string; 
    count: number 
 };
 type FrequencyWord = {
  word: string;
  count: number;
-}
+};
 type NGram = {
  count: number;
  ngram: string;
 };
 type Emotion = {
  emotion_anger: number;
  emotion_disgust: number;
  emotion_fear: number;
  emotion_joy: number;
  emotion_sadness: number;
 };
 // User 
 type TopUser = {
  author: string;
  source: string;
  count: number;
 };
 type Vocab = {
  author: string;
@@ -26,60 +40,145 @@ type User = {
  comment: number;
  comment_post_ratio: number;
  comment_share: number;
  avg_emotions?: Record<string, number>;
  vocab?: Vocab | null;
 };
 type InteractionGraph = Record<string, Record<string, number>>;
 type UserEndpointResponse = {
  top_users: TopUser[];
  users: User[];
 };
 type UserAnalysisResponse = {
  top_users: TopUser[];
  users: User[];
  interaction_graph: InteractionGraph;
 };
-// Time Analysis
+// Time 
 type EventsPerDay = {
  date: Date;
  count: number;
-}
+};
 type HeatmapCell = {
  date: Date;
  hour: number;
  count: number;
-}
+};
 type TimeAnalysisResponse = {
  events_per_day: EventsPerDay[];
  weekday_hour_heatmap: HeatmapCell[];
 }
 // Content Analysis
 type Emotion = {
  emotion_anger: number;
  emotion_disgust: number;
  emotion_fear: number;
  emotion_joy: number;
  emotion_sadness: number;
 };
-type NGram = {
+// Content (combines emotional and linguistic)
    count: number;
    ngram: string;
 }
 type AverageEmotionByTopic = Emotion & {
  n: number;
  topic: string;
  [key: string]: string | number;
 };
 type OverallEmotionAverage = {
  emotion: string;
  score: number;
 };
 type DominantEmotionDistribution = {
  emotion: string;
  count: number;
  ratio: number;
 };
 type EmotionBySource = {
  source: string;
  dominant_emotion: string;
  dominant_score: number;
  event_count: number;
 };
 type ContentAnalysisResponse = {
  word_frequencies: FrequencyWord[];
  average_emotion_by_topic: AverageEmotionByTopic[];
  common_three_phrases: NGram[];
  common_two_phrases: NGram[];
-}
+  overall_emotion_average?: OverallEmotionAverage[];
  dominant_emotion_distribution?: DominantEmotionDistribution[];
  emotion_by_source?: EmotionBySource[];
 };
 // Linguistic
 type LinguisticAnalysisResponse = {
  word_frequencies: FrequencyWord[];
  common_two_phrases: NGram[];
  common_three_phrases: NGram[];
  lexical_diversity?: Record<string, number>;
 };
 // Emotional
 type EmotionalAnalysisResponse = {
  average_emotion_by_topic: AverageEmotionByTopic[];
  overall_emotion_average?: OverallEmotionAverage[];
  dominant_emotion_distribution?: DominantEmotionDistribution[];
  emotion_by_source?: EmotionBySource[];
 };
 // Interactional 
 type ConversationConcentration = {
  total_commenting_authors: number;
  top_10pct_author_count: number;
  top_10pct_comment_share: number;
  single_comment_authors: number;
  single_comment_author_ratio: number;
 };
 type InteractionAnalysisResponse = {
  average_thread_depth?: number;
  top_interaction_pairs?: [[string, string], number][];
  conversation_concentration?: ConversationConcentration;
  interaction_graph: InteractionGraph;
 };
 // Cultural
 type IdentityMarkers = {
  in_group_usage: number;
  out_group_usage: number;
  in_group_ratio: number;
  out_group_ratio: number;
  in_group_posts: number;
  out_group_posts: number;
  tie_posts: number;
  in_group_emotion_avg?: Record<string, number>;
  out_group_emotion_avg?: Record<string, number>;
 };
 type StanceMarkers = {
  hedge_total: number;
  certainty_total: number;
  deontic_total: number;
  permission_total: number;
  hedge_per_1k_tokens: number;
  certainty_per_1k_tokens: number;
  deontic_per_1k_tokens: number;
  permission_per_1k_tokens: number;
 };
 type EntityEmotionAggregate = {
  post_count: number;
  emotion_avg: Record<string, number>;
 };
 type AverageEmotionPerEntity = {
  entity_emotion_avg: Record<string, EntityEmotionAggregate>;
 };
 type CulturalAnalysisResponse = {
  identity_markers?: IdentityMarkers;
  stance_markers?: StanceMarkers;
  avg_emotion_per_entity?: AverageEmotionPerEntity;
 };
 // Summary 
 type SummaryResponse = {
@@ -96,22 +195,35 @@ type SummaryResponse = {
  sources: string[];
 };
-// Filtering Response
+// Filter 
 type FilterResponse = {
-    rows: number
+  rows: number;
  data: any;
-}
+};
 export type {
  TopUser,
  Vocab,
  User,
  InteractionGraph,
  ConversationConcentration,
  UserAnalysisResponse,
  UserEndpointResponse,
  FrequencyWord,
  AverageEmotionByTopic,
  OverallEmotionAverage,
  DominantEmotionDistribution,
  EmotionBySource,
  SummaryResponse,
  TimeAnalysisResponse,
  ContentAnalysisResponse,
-    FilterResponse
+  LinguisticAnalysisResponse,
-}
+  EmotionalAnalysisResponse,
  InteractionAnalysisResponse,
  IdentityMarkers,
  StanceMarkers,
  EntityEmotionAggregate,
  AverageEmotionPerEntity,
  CulturalAnalysisResponse,
  FilterResponse,
 };
--- a/server/analysis/emotional.py
+++ b/server/analysis/emotional.py
@@ -1,33 +1,86 @@
 import pandas as pd
 class EmotionalAnalysis:
-    def avg_emotion_by_topic(self, df: pd.DataFrame) -> dict:
+    def _emotion_cols(self, df: pd.DataFrame) -> list[str]:
-        emotion_cols = [
+        return [col for col in df.columns if col.startswith("emotion_")]
-            col for col in df.columns
+
-            if col.startswith("emotion_")
+    def avg_emotion_by_topic(self, df: pd.DataFrame) -> list[dict]:
-        ]
+        emotion_cols = self._emotion_cols(df)
        if not emotion_cols:
            return []
        counts = (
-            df[
+            df[(df["topic"] != "Misc")].groupby("topic").size().reset_index(name="n")
                (df["topic"] != "Misc")
            ]
            .groupby("topic")
            .size()
            .rename("n")
        )
        avg_emotion_by_topic = (
-            df[
+            df[(df["topic"] != "Misc")]
                (df["topic"] != "Misc")
            ]
            .groupby("topic")[emotion_cols]
            .mean()
            .reset_index()
        )
-        avg_emotion_by_topic = avg_emotion_by_topic.merge(
+        avg_emotion_by_topic = avg_emotion_by_topic.merge(counts, on="topic")
-            counts,
+
-            on="topic"
+        return avg_emotion_by_topic.to_dict(orient="records")
    def overall_emotion_average(self, df: pd.DataFrame) -> list[dict]:
        emotion_cols = self._emotion_cols(df)
        if not emotion_cols:
            return []
        means = df[emotion_cols].mean()
        return [
            {
                "emotion": col.replace("emotion_", ""),
                "score": float(means[col]),
            }
            for col in emotion_cols
        ]
    def dominant_emotion_distribution(self, df: pd.DataFrame) -> list[dict]:
        emotion_cols = self._emotion_cols(df)
        if not emotion_cols or df.empty:
            return []
        dominant_per_row = df[emotion_cols].idxmax(axis=1)
        counts = dominant_per_row.value_counts()
        total = max(len(dominant_per_row), 1)
        return [
            {
                "emotion": col.replace("emotion_", ""),
                "count": int(count),
                "ratio": round(float(count / total), 4),
            }
            for col, count in counts.items()
        ]
    def emotion_by_source(self, df: pd.DataFrame) -> list[dict]:
        emotion_cols = self._emotion_cols(df)
        if not emotion_cols or "source" not in df.columns or df.empty:
            return []
        source_counts = df.groupby("source").size()
        source_means = df.groupby("source")[emotion_cols].mean().reset_index()
        rows = source_means.to_dict(orient="records")
        output = []
        for row in rows:
            source = row["source"]
            dominant_col = max(emotion_cols, key=lambda col: float(row.get(col, 0)))
            output.append(
                {
                    "source": str(source),
                    "dominant_emotion": dominant_col.replace("emotion_", ""),
                    "dominant_score": round(float(row.get(dominant_col, 0)), 4),
                    "event_count": int(source_counts.get(source, 0)),
                }
            )
-        return avg_emotion_by_topic.to_dict(orient='records')
+        return output
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -1,9 +1,6 @@
 import pandas as pd
 import re
 from collections import Counter
 class InteractionAnalysis:
    def __init__(self, word_exclusions: set[str]):
        self.word_exclusions = word_exclusions
@@ -12,118 +9,6 @@ class InteractionAnalysis:
        tokens = re.findall(r"\b[a-z]{3,}\b", text)
        return [t for t in tokens if t not in self.word_exclusions]
    def _vocab_richness_per_user(
        self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
    ) -> list:
        df = df.copy()
        df["content"] = df["content"].fillna("").astype(str).str.lower()
        df["tokens"] = df["content"].apply(self._tokenize)
        rows = []
        for author, group in df.groupby("author"):
            all_tokens = [t for tokens in group["tokens"] for t in tokens]
            total_words = len(all_tokens)
            unique_words = len(set(all_tokens))
            events = len(group)
            # Min amount of words for a user, any less than this might give weird results
            if total_words < min_words:
                continue
            # 100% = they never reused a word (excluding stop words)
            vocab_richness = unique_words / total_words
            avg_words = total_words / max(events, 1)
            counts = Counter(all_tokens)
            top_words = [
                {"word": w, "count": int(c)}
                for w, c in counts.most_common(top_most_used_words)
            ]
            rows.append(
                {
                    "author": author,
                    "events": int(events),
                    "total_words": int(total_words),
                    "unique_words": int(unique_words),
                    "vocab_richness": round(vocab_richness, 3),
                    "avg_words_per_event": round(avg_words, 2),
                    "top_words": top_words,
                }
            )
        rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
        return rows
    def top_users(self, df: pd.DataFrame) -> list:
        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
        top_users = [
            {"author": author, "source": source, "count": int(count)}
            for (author, source), count in counts.items()
        ]
        return top_users
    def per_user_analysis(self, df: pd.DataFrame) -> dict:
        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
        avg_emotions_by_author = {}
        if emotion_cols:
            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
            avg_emotions_by_author = {
                author: {emotion: float(score) for emotion, score in row.items()}
                for author, row in avg_emotions.iterrows()
            }
        # ensure columns always exist
        for col in ("post", "comment"):
            if col not in per_user.columns:
                per_user[col] = 0
        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
            0, 1
        )
        per_user["comment_share"] = per_user["comment"] / (
            per_user["post"] + per_user["comment"]
        ).replace(0, 1)
        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
        per_user_records = per_user.reset_index().to_dict(orient="records")
        vocab_rows = self._vocab_richness_per_user(df)
        vocab_by_author = {row["author"]: row for row in vocab_rows}
        # merge vocab richness + per_user information
        merged_users = []
        for row in per_user_records:
            author = row["author"]
            merged_users.append(
                {
                    "author": author,
                    "post": int(row.get("post", 0)),
                    "comment": int(row.get("comment", 0)),
                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                    "comment_share": float(row.get("comment_share", 0)),
                    "avg_emotions": avg_emotions_by_author.get(author, {}),
                    "vocab": vocab_by_author.get(
                        author,
                        {
                            "vocab_richness": 0,
                            "avg_words_per_event": 0,
                            "top_words": [],
                        },
                    ),
                }
            )
        merged_users.sort(key=lambda u: u["comment_post_ratio"])
        return merged_users
    def interaction_graph(self, df: pd.DataFrame):
        interactions = {a: {} for a in df["author"].dropna().unique()}
@@ -167,67 +52,36 @@ class InteractionAnalysis:
        return round(sum(depths) / len(depths), 2)
-    def average_thread_length_by_emotion(self, df: pd.DataFrame):
+    def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
-        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+        graph = self.interaction_graph(df)
        pairs = []
-        emotion_cols = [
+        for a, targets in graph.items():
-            c
+            for b, count in targets.items():
-            for c in df.columns
+                pairs.append(((a, b), count))
            if c.startswith("emotion_") and c not in emotion_exclusions
        ]
-        id_to_reply = df.set_index("id")["reply_to"].to_dict()
+        pairs.sort(key=lambda x: x[1], reverse=True)
-        length_cache = {}
+        return pairs[:top_n]
-        def thread_length_from(start_id):
+    def conversation_concentration(self, df: pd.DataFrame) -> dict:
-            if start_id in length_cache:
+        if "type" not in df.columns:
-                return length_cache[start_id]
+            return {}
-            seen = set()
+        comments = df[df["type"] == "comment"]
-            length = 1
+        if comments.empty:
-            current = start_id
+            return {}
-            while True:
+        author_counts = comments["author"].value_counts()
-                if current in seen:
+        total_comments = len(comments)
-                    # infinite loop shouldn't happen, but just in case
+        total_authors = len(author_counts)
                    break
                seen.add(current)
-                reply_to = id_to_reply.get(current)
+        top_10_pct_n = max(1, int(total_authors * 0.1))
-
+        top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
                if (
                    reply_to is None
                    or (isinstance(reply_to, float) and pd.isna(reply_to))
                    or reply_to == ""
                ):
                    break
                length += 1
                current = reply_to
                if current in length_cache:
                    length += length_cache[current] - 1
                    break
            length_cache[start_id] = length
            return length
        emotion_to_lengths = {}
        # Fill NaNs in emotion cols to avoid max() issues
        emo_df = df[["id"] + emotion_cols].copy()
        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
        for _, row in emo_df.iterrows():
            msg_id = row["id"]
            length = thread_length_from(msg_id)
            emotions = {c: row[c] for c in emotion_cols}
            dominant = max(emotions, key=emotions.get)
            emotion_to_lengths.setdefault(dominant, []).append(length)
        return {
-            emotion: round(sum(lengths) / len(lengths), 2)
+            "total_commenting_authors": total_authors,
-            for emotion, lengths in emotion_to_lengths.items()
+            "top_10pct_author_count": top_10_pct_n,
            "top_10pct_comment_share": float(top_10_pct_share),
            "single_comment_authors": int((author_counts == 1).sum()),
            "single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
        }
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -61,3 +61,19 @@ class LinguisticAnalysis:
            .head(limit)
            .to_dict(orient="records")
        )
    def lexical_diversity(self, df: pd.DataFrame) -> dict:
        tokens = (
            df["content"].fillna("").astype(str).str.lower()
            .str.findall(r"\b[a-z]{2,}\b")
            .explode()
        )
        tokens = tokens[~tokens.isin(self.word_exclusions)]
        total = max(len(tokens), 1)
        unique = int(tokens.nunique())
        return {
            "total_tokens": total,
            "unique_tokens": unique,
            "ttr": round(unique / total, 4),
        }
--- a/server/analysis/stat_gen.py
+++ b/server/analysis/stat_gen.py
@@ -6,7 +6,9 @@ from server.analysis.cultural import CulturalAnalysis
 from server.analysis.emotional import EmotionalAnalysis
 from server.analysis.interactional import InteractionAnalysis
 from server.analysis.linguistic import LinguisticAnalysis
 from server.analysis.summary import SummaryAnalysis
 from server.analysis.temporal import TemporalAnalysis
 from server.analysis.user import UserAnalysis
 DOMAIN_STOPWORDS = {
    "www",
@@ -36,12 +38,11 @@ class StatGen:
        self.interaction_analysis = InteractionAnalysis(EXCLUDE_WORDS)
        self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
        self.cultural_analysis = CulturalAnalysis()
        self.summary_analysis = SummaryAnalysis()
        self.user_analysis = UserAnalysis(EXCLUDE_WORDS)
    ## Private Methods
-    def _prepare_filtered_df(self, 
+    def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
                             df: pd.DataFrame, 
                             filters: dict | None = None
                             ) -> pd.DataFrame:
        filters = filters or {}
        filtered_df = df.copy()
@@ -51,10 +52,9 @@ class StatGen:
        data_source_filter = filters.get("data_sources", None)
        if search_query:
-            mask = (
+            mask = filtered_df["content"].str.contains(
-                filtered_df["content"].str.contains(search_query, case=False, na=False)
+                search_query, case=False, na=False
-                | filtered_df["author"].str.contains(search_query, case=False, na=False)
+            ) | filtered_df["author"].str.contains(search_query, case=False, na=False)
            )
            # Only include title if the column exists
            if "title" in filtered_df.columns:
@@ -76,10 +76,10 @@ class StatGen:
        return filtered_df
    ## Public Methods
-    def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
        return self._prepare_filtered_df(df, filters).to_dict(orient="records")
-    def get_time_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
@@ -87,84 +87,54 @@ class StatGen:
            "weekday_hour_heatmap": self.temporal_analysis.heatmap(filtered_df),
        }
-    def get_content_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def linguistic(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
            "word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
            "common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
            "common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
-            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(
+            "lexical_diversity": self.linguistic_analysis.lexical_diversity(filtered_df)
                filtered_df
            )
        }
-    def get_user_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
-            "top_users": self.interaction_analysis.top_users(filtered_df),
+            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(filtered_df),
-            "users": self.interaction_analysis.per_user_analysis(filtered_df),
+            "overall_emotion_average": self.emotional_analysis.overall_emotion_average(filtered_df),
-            "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
+            "dominant_emotion_distribution": self.emotional_analysis.dominant_emotion_distribution(filtered_df),
            "emotion_by_source": self.emotional_analysis.emotion_by_source(filtered_df)
        }
-    def get_interactional_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def user(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
-            "average_thread_depth": self.interaction_analysis.average_thread_depth(
+            "top_users": self.user_analysis.top_users(filtered_df),
-                filtered_df
+            "users": self.user_analysis.per_user_analysis(filtered_df)
            ),
            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(
                filtered_df
            ),
        }
-    def get_cultural_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
+    def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
-            "identity_markers": self.cultural_analysis.get_identity_markers(
+            "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
-                filtered_df
+            "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
-            ),
+            "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df),
            "conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df)
        }
    def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
        return {
            "identity_markers": self.cultural_analysis.get_identity_markers(filtered_df),
            "stance_markers": self.cultural_analysis.get_stance_markers(filtered_df),
-            "entity_salience": self.cultural_analysis.get_avg_emotions_per_entity(
+            "avg_emotion_per_entity": self.cultural_analysis.get_avg_emotions_per_entity(filtered_df)
                filtered_df
            ),
        }
    def summary(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
        filtered_df = self._prepare_filtered_df(df, filters)
-        total_posts = (filtered_df["type"] == "post").sum()
+        return self.summary_analysis.summary(filtered_df)
        total_comments = (filtered_df["type"] == "comment").sum()
        events_per_user = filtered_df.groupby("author").size()
        if filtered_df.empty:
            return {
                "total_events": 0,
                "total_posts": 0,
                "total_comments": 0,
                "unique_users": 0,
                "comments_per_post": 0,
                "lurker_ratio": 0,
                "time_range": {
                    "start": None,
                    "end": None,
                },
                "sources": [],
            }
        return {
            "total_events": int(len(filtered_df)),
            "total_posts": int(total_posts),
            "total_comments": int(total_comments),
            "unique_users": int(events_per_user.count()),
            "comments_per_post": round(total_comments / max(total_posts, 1), 2),
            "lurker_ratio": round((events_per_user == 1).mean(), 2),
            "time_range": {
                "start": int(filtered_df["dt"].min().timestamp()),
                "end": int(filtered_df["dt"].max().timestamp()),
            },
            "sources": filtered_df["source"].dropna().unique().tolist(),
        }
--- a/server/analysis/summary.py
+++ b/server/analysis/summary.py
@@ -0,0 +1,64 @@
 import pandas as pd
 class SummaryAnalysis:
    def total_events(self, df: pd.DataFrame) -> int:
        return int(len(df))
    def total_posts(self, df: pd.DataFrame) -> int:
        return int(len(df[df["type"] == "post"]))
    def total_comments(self, df: pd.DataFrame) -> int:
        return int(len(df[df["type"] == "comment"]))
    def unique_users(self, df: pd.DataFrame) -> int:
        return int(len(df["author"].dropna().unique()))
    def comments_per_post(self, total_comments: int, total_posts: int) -> float:
        return round(total_comments / max(total_posts, 1), 2)
    def lurker_ratio(self, df: pd.DataFrame) -> float:
        events_per_user = df.groupby("author").size()
        return round((events_per_user == 1).mean(), 2)
    def time_range(self, df: pd.DataFrame) -> dict:
        return {
            "start": int(df["dt"].min().timestamp()),
            "end": int(df["dt"].max().timestamp()),
        }
    def sources(self, df: pd.DataFrame) -> list:
        return df["source"].dropna().unique().tolist()
    def empty_summary(self) -> dict:
        return {
            "total_events": 0,
            "total_posts": 0,
            "total_comments": 0,
            "unique_users": 0,
            "comments_per_post": 0,
            "lurker_ratio": 0,
            "time_range": {
                "start": None,
                "end": None,
            },
            "sources": [],
        }
    def summary(self, df: pd.DataFrame) -> dict:
        if df.empty:
            return self.empty_summary()
        total_posts = self.total_posts(df)
        total_comments = self.total_comments(df)
        return {
            "total_events": self.total_events(df),
            "total_posts": total_posts,
            "total_comments": total_comments,
            "unique_users": self.unique_users(df),
            "comments_per_post": self.comments_per_post(total_comments, total_posts),
            "lurker_ratio": self.lurker_ratio(df),
            "time_range": self.time_range(df),
            "sources": self.sources(df),
        }
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -0,0 +1,124 @@
 import pandas as pd
 import re
 from collections import Counter
 class UserAnalysis:
    def __init__(self, word_exclusions: set[str]):
        self.word_exclusions = word_exclusions
    def _tokenize(self, text: str):
        tokens = re.findall(r"\b[a-z]{3,}\b", text)
        return [t for t in tokens if t not in self.word_exclusions]
    def _vocab_richness_per_user(
            self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
        ) -> list:
            df = df.copy()
            df["content"] = df["content"].fillna("").astype(str).str.lower()
            df["tokens"] = df["content"].apply(self._tokenize)
            rows = []
            for author, group in df.groupby("author"):
                all_tokens = [t for tokens in group["tokens"] for t in tokens]
                total_words = len(all_tokens)
                unique_words = len(set(all_tokens))
                events = len(group)
                # Min amount of words for a user, any less than this might give weird results
                if total_words < min_words:
                    continue
                # 100% = they never reused a word (excluding stop words)
                vocab_richness = unique_words / total_words
                avg_words = total_words / max(events, 1)
                counts = Counter(all_tokens)
                top_words = [
                    {"word": w, "count": int(c)}
                    for w, c in counts.most_common(top_most_used_words)
                ]
                rows.append(
                    {
                        "author": author,
                        "events": int(events),
                        "total_words": int(total_words),
                        "unique_words": int(unique_words),
                        "vocab_richness": round(vocab_richness, 3),
                        "avg_words_per_event": round(avg_words, 2),
                        "top_words": top_words,
                    }
                )
            rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
            return rows
    def top_users(self, df: pd.DataFrame) -> list:
        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
        top_users = [
            {"author": author, "source": source, "count": int(count)}
            for (author, source), count in counts.items()
        ]
        return top_users
    def per_user_analysis(self, df: pd.DataFrame) -> dict:
        per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
        emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
        avg_emotions_by_author = {}
        if emotion_cols:
            avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
            avg_emotions_by_author = {
                author: {emotion: float(score) for emotion, score in row.items()}
                for author, row in avg_emotions.iterrows()
            }
        # ensure columns always exist
        for col in ("post", "comment"):
            if col not in per_user.columns:
                per_user[col] = 0
        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
            0, 1
        )
        per_user["comment_share"] = per_user["comment"] / (
            per_user["post"] + per_user["comment"]
        ).replace(0, 1)
        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
        per_user_records = per_user.reset_index().to_dict(orient="records")
        vocab_rows = self._vocab_richness_per_user(df)
        vocab_by_author = {row["author"]: row for row in vocab_rows}
        # merge vocab richness + per_user information
        merged_users = []
        for row in per_user_records:
            author = row["author"]
            merged_users.append(
                {
                    "author": author,
                    "post": int(row.get("post", 0)),
                    "comment": int(row.get("comment", 0)),
                    "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
                    "comment_share": float(row.get("comment_share", 0)),
                    "avg_emotions": avg_emotions_by_author.get(author, {}),
                    "vocab": vocab_by_author.get(
                        author,
                        {
                            "vocab_richness": 0,
                            "avg_words_per_event": 0,
                            "top_words": [],
                        },
                    ),
                }
            )
        merged_users.sort(key=lambda u: u["comment_post_ratio"])
        return merged_users
--- a/server/app.py
+++ b/server/app.py
@@ -186,7 +186,7 @@ def scrape_data():
        dataset_manager.set_dataset_status(
            dataset_id,
            "fetching",
-            f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}"
+            f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}",
        )
        fetch_and_process_dataset.delay(
@@ -198,12 +198,14 @@ def scrape_data():
        print(traceback.format_exc())
        return jsonify({"error": "Failed to queue dataset processing"}), 500
-
+    return jsonify(
-    return jsonify({
+        {
            "message": "Dataset queued for processing",
            "dataset_id": dataset_id,
-        "status": "processing"
+            "status": "processing",
-    }), 202
+        }
    ), 202
@app.route("/datasets/upload", methods=["POST"])
@jwt_required()
@@ -233,7 +235,9 @@ def upload_data():
        posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
        topics = json.load(topic_file)
-        dataset_id = dataset_manager.save_dataset_info(current_user, dataset_name, topics)
+        dataset_id = dataset_manager.save_dataset_info(
            current_user, dataset_name, topics
        )
        process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)
@@ -249,6 +253,7 @@ def upload_data():
    except Exception as e:
        return jsonify({"error": f"An unexpected error occurred"}), 500
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
@jwt_required()
 def get_dataset(dataset_id):
@@ -256,7 +261,9 @@ def get_dataset(dataset_id):
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_info = dataset_manager.get_dataset_info(dataset_id)
        included_cols = {"id", "name", "created_at"}
@@ -270,6 +277,7 @@ def get_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
@jwt_required()
 def update_dataset(dataset_id):
@@ -277,7 +285,9 @@ def update_dataset(dataset_id):
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        body = request.get_json()
        new_name = body.get("name")
@@ -286,7 +296,9 @@ def update_dataset(dataset_id):
            return jsonify({"error": "A valid name must be provided"}), 400
        dataset_manager.update_dataset_name(dataset_id, new_name.strip())
-        return jsonify({"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}), 200
+        return jsonify(
            {"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}
        ), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -295,6 +307,7 @@ def update_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occurred"}), 500
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
@jwt_required()
 def delete_dataset(dataset_id):
@@ -302,11 +315,17 @@ def delete_dataset(dataset_id):
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_manager.delete_dataset_info(dataset_id)
        dataset_manager.delete_dataset_content(dataset_id)
-        return jsonify({"message": f"Dataset {dataset_id} metadata and content successfully deleted"}), 200
+        return jsonify(
            {
                "message": f"Dataset {dataset_id} metadata and content successfully deleted"
            }
        ), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -315,6 +334,7 @@ def delete_dataset(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
@jwt_required()
 def get_dataset_status(dataset_id):
@@ -322,7 +342,9 @@ def get_dataset_status(dataset_id):
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_status = dataset_manager.get_dataset_status(dataset_id)
        return jsonify(dataset_status), 200
@@ -334,17 +356,44 @@ def get_dataset_status(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": "An unexpected error occured"}), 500
-@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
+
@app.route("/dataset/<int:dataset_id>/linguistic", methods=["GET"])
@jwt_required()
-def content_endpoint(dataset_id):
+def get_linguistic_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.linguistic(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
        return jsonify({"error": "Dataset does not exist"}), 404
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred"}), 500
@app.route("/dataset/<int:dataset_id>/emotional", methods=["GET"])
@jwt_required()
 def get_emotional_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
        return jsonify(stat_gen.emotional(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -362,7 +411,9 @@ def get_summary(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
@@ -378,17 +429,19 @@ def get_summary(dataset_id):
        return jsonify({"error": f"An unexpected error occurred"}), 500
-@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
+@app.route("/dataset/<int:dataset_id>/temporal", methods=["GET"])
@jwt_required()
-def get_time_analysis(dataset_id):
+def get_temporal_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.temporal(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -406,11 +459,13 @@ def get_user_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.user(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -428,11 +483,13 @@ def get_cultural_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.cultural(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -444,17 +501,19 @@ def get_cultural_analysis(dataset_id):
        return jsonify({"error": f"An unexpected error occurred"}), 500
-@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
+@app.route("/dataset/<int:dataset_id>/interactional", methods=["GET"])
@jwt_required()
 def get_interaction_analysis(dataset_id):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
-            raise NotAuthorisedException("This user is not authorised to access this dataset")
+            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        filters = get_request_filters()
-        return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
+        return jsonify(stat_gen.interactional(dataset_content, filters)), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
@@ -465,6 +524,27 @@ def get_interaction_analysis(dataset_id):
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred"}), 500
@app.route("/dataset/<int:dataset_id>/all", methods=["GET"])
@jwt_required()
 def get_full_dataset(dataset_id: int):
    try:
        user_id = int(get_jwt_identity())
        if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
            raise NotAuthorisedException(
                "This user is not authorised to access this dataset"
            )
        dataset_content = dataset_manager.get_dataset_content(dataset_id)
        return jsonify(dataset_content.to_dict(orient="records")), 200
    except NotAuthorisedException:
        return jsonify({"error": "User is not authorised to access this content"}), 403
    except NonExistentDatasetException:
        return jsonify({"error": "Dataset does not exist"}), 404
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred"}), 500
 if __name__ == "__main__":
    app.run(debug=True)
--- a/server/core/datasets.py
+++ b/server/core/datasets.py
@@ -101,7 +101,7 @@ class DatasetManager:
                row["source"],
                row.get("topic"),
                row.get("topic_confidence"),
-                Json(row["ner_entities"]) if row.get("ner_entities") else None,
+                Json(row["entities"]) if row.get("entities") is not None else None,
                row.get("emotion_anger"),
                row.get("emotion_disgust"),
                row.get("emotion_fear"),
--- a/server/db/schema.sql
+++ b/server/db/schema.sql
@@ -43,7 +43,7 @@ CREATE TABLE events (
    weekday VARCHAR(255) NOT NULL,
    /* Posts Only */
-    title VARCHAR(255),
+    title TEXT,
    /* Comments Only*/
    parent_id VARCHAR(255),