feat(frontend): implement corpus explorer

This allows you to view the posts & comments associated with a specific aggregate.
This commit is contained in:
2026-04-01 00:04:25 +01:00
parent 1dde5f7b08
commit b270ed03ae
11 changed files with 1064 additions and 179 deletions

View File

@@ -0,0 +1,175 @@
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
import StatsStyling from "../styles/stats_styling";
import type { DatasetRecord } from "../utils/corpusExplorer";
const styles = StatsStyling;
const cleanText = (value: unknown) => {
if (typeof value !== "string") {
return "";
}
const trimmed = value.trim();
if (!trimmed) {
return "";
}
const lowered = trimmed.toLowerCase();
if (lowered === "nan" || lowered === "null" || lowered === "undefined") {
return "";
}
return trimmed;
};
const displayText = (value: unknown, fallback: string) => {
const cleaned = cleanText(value);
return cleaned || fallback;
};
type CorpusExplorerProps = {
open: boolean;
onClose: () => void;
title: string;
description: string;
records: DatasetRecord[];
loading: boolean;
error: string;
emptyMessage: string;
};
const formatRecordDate = (record: DatasetRecord) => {
if (typeof record.dt === "string" && record.dt) {
const date = new Date(record.dt);
if (!Number.isNaN(date.getTime())) {
return date.toLocaleString();
}
}
if (typeof record.date === "string" && record.date) {
return record.date;
}
if (typeof record.timestamp === "number") {
return new Date(record.timestamp * 1000).toLocaleString();
}
return "Unknown time";
};
const getRecordKey = (record: DatasetRecord, index: number) =>
String(record.id ?? record.post_id ?? `${record.author ?? "record"}-${index}`);
const getRecordTitle = (record: DatasetRecord) => {
if (record.type === "comment") {
return "";
}
const title = cleanText(record.title);
if (title) {
return title;
}
const content = cleanText(record.content);
if (!content) {
return "Untitled record";
}
return content.length > 120 ? `${content.slice(0, 117)}...` : content;
};
const getRecordExcerpt = (record: DatasetRecord) => {
const content = cleanText(record.content);
if (!content) {
return "No content available.";
}
return content.length > 320 ? `${content.slice(0, 317)}...` : content;
};
const CorpusExplorer = ({
open,
onClose,
title,
description,
records,
loading,
error,
emptyMessage,
}: CorpusExplorerProps) => (
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
<div style={styles.modalBackdrop} />
<div style={styles.modalContainer}>
<DialogPanel
style={{
...styles.card,
...styles.modalPanel,
width: "min(960px, 96vw)",
maxHeight: "88vh",
display: "flex",
flexDirection: "column",
gap: 12,
}}
>
<div style={styles.headerBar}>
<div>
<DialogTitle style={styles.sectionTitle}>{title}</DialogTitle>
<p style={styles.sectionSubtitle}>
{description} {loading ? "Loading records..." : `${records.length.toLocaleString()} records.`}
</p>
</div>
<button onClick={onClose} style={styles.buttonSecondary}>
Close
</button>
</div>
{error ? <p style={styles.sectionSubtitle}>{error}</p> : null}
{!loading && !error && !records.length ? (
<p style={styles.sectionSubtitle}>{emptyMessage}</p>
) : null}
{loading ? (
<div style={styles.topUserMeta}>Preparing corpus slice...</div>
) : null}
{!loading && !error && records.length ? (
<div
style={{
...styles.topUsersList,
overflowY: "auto",
paddingRight: 4,
}}
>
{records.map((record, index) => (
<div key={getRecordKey(record, index)} style={styles.topUserItem}>
<div style={{ ...styles.headerBar, alignItems: "flex-start" }}>
<div>
{getRecordTitle(record) ? (
<div style={styles.topUserName}>{getRecordTitle(record)}</div>
) : null}
<div style={styles.topUserMeta}>
{displayText(record.author, "Unknown author")} {displayText(record.source, "Unknown source")} {displayText(record.type, "record")} {formatRecordDate(record)}
</div>
</div>
<div style={styles.topUserMeta}>
{cleanText(record.topic) ? `Topic: ${cleanText(record.topic)}` : ""}
</div>
</div>
<div style={{ ...styles.topUserMeta, marginTop: 8, whiteSpace: "pre-wrap" }}>
{getRecordExcerpt(record)}
</div>
</div>
))}
</div>
) : null}
</DialogPanel>
</div>
</Dialog>
);
export default CorpusExplorer;

View File

@@ -1,14 +1,34 @@
import Card from "./Card"; import Card from "./Card";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import type { CulturalAnalysisResponse } from "../types/ApiTypes"; import type { CulturalAnalysisResponse } from "../types/ApiTypes";
import {
buildCertaintySpec,
buildDeonticSpec,
buildEntitySpec,
buildHedgeSpec,
buildIdentityBucketSpec,
buildPermissionSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling; const styles = StatsStyling;
type CulturalStatsProps = { type CulturalStatsProps = {
data: CulturalAnalysisResponse; data: CulturalAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
}; };
const CulturalStats = ({ data }: CulturalStatsProps) => { const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
>
Explore
</button>
);
const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
const identity = data.identity_markers; const identity = data.identity_markers;
const stance = data.stance_markers; const stance = data.stance_markers;
const inGroupWords = identity?.in_group_usage ?? 0; const inGroupWords = identity?.in_group_usage ?? 0;
@@ -30,7 +50,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
const topEmotion = (emotionAvg: Record<string, number> | undefined) => { const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
const entries = Object.entries(emotionAvg ?? {}); const entries = Object.entries(emotionAvg ?? {});
if (!entries.length) { if (!entries.length) {
return ""; return "-";
} }
entries.sort((a, b) => b[1] - a[1]); entries.sort((a, b) => b[1] - a[1]);
@@ -64,21 +84,30 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
/> />
<Card <Card
label="In-Group Posts" label="In-Group Posts"
value={identity?.in_group_posts?.toLocaleString() ?? ""} value={identity?.in_group_posts?.toLocaleString() ?? "-"}
sublabel='Posts leaning toward "us" language' sublabel='Posts leaning toward "us" language'
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("in")),
)}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Out-Group Posts" label="Out-Group Posts"
value={identity?.out_group_posts?.toLocaleString() ?? ""} value={identity?.out_group_posts?.toLocaleString() ?? "-"}
sublabel='Posts leaning toward "them" language' sublabel='Posts leaning toward "them" language'
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("out")),
)}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Balanced Posts" label="Balanced Posts"
value={identity?.tie_posts?.toLocaleString() ?? ""} value={identity?.tie_posts?.toLocaleString() ?? "-"}
sublabel="Posts with equal us/them signals" sublabel="Posts with equal us/them signals"
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("tie")),
)}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
@@ -90,7 +119,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card <Card
label="In-Group Share" label="In-Group Share"
value={ value={
inGroupWordRate === null ? "" : `${inGroupWordRate.toFixed(2)}%` inGroupWordRate === null ? "-" : `${inGroupWordRate.toFixed(2)}%`
} }
sublabel="Share of all words" sublabel="Share of all words"
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
@@ -98,7 +127,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card <Card
label="Out-Group Share" label="Out-Group Share"
value={ value={
outGroupWordRate === null ? "" : `${outGroupWordRate.toFixed(2)}%` outGroupWordRate === null ? "-" : `${outGroupWordRate.toFixed(2)}%`
} }
sublabel="Share of all words" sublabel="Share of all words"
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
@@ -106,42 +135,46 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card <Card
label="Hedging Words" label="Hedging Words"
value={stance?.hedge_total?.toLocaleString() ?? ""} value={stance?.hedge_total?.toLocaleString() ?? "-"}
sublabel={ sublabel={
typeof stance?.hedge_per_1k_tokens === "number" typeof stance?.hedge_per_1k_tokens === "number"
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency" : "Word frequency"
} }
rightSlot={renderExploreButton(() => onExplore(buildHedgeSpec()))}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Certainty Words" label="Certainty Words"
value={stance?.certainty_total?.toLocaleString() ?? ""} value={stance?.certainty_total?.toLocaleString() ?? "-"}
sublabel={ sublabel={
typeof stance?.certainty_per_1k_tokens === "number" typeof stance?.certainty_per_1k_tokens === "number"
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency" : "Word frequency"
} }
rightSlot={renderExploreButton(() => onExplore(buildCertaintySpec()))}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Need/Should Words" label="Need/Should Words"
value={stance?.deontic_total?.toLocaleString() ?? ""} value={stance?.deontic_total?.toLocaleString() ?? "-"}
sublabel={ sublabel={
typeof stance?.deontic_per_1k_tokens === "number" typeof stance?.deontic_per_1k_tokens === "number"
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency" : "Word frequency"
} }
rightSlot={renderExploreButton(() => onExplore(buildDeonticSpec()))}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Permission Words" label="Permission Words"
value={stance?.permission_total?.toLocaleString() ?? ""} value={stance?.permission_total?.toLocaleString() ?? "-"}
sublabel={ sublabel={
typeof stance?.permission_per_1k_tokens === "number" typeof stance?.permission_per_1k_tokens === "number"
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency" : "Word frequency"
} }
rightSlot={renderExploreButton(() => onExplore(buildPermissionSpec()))}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
@@ -150,8 +183,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<p style={styles.sectionSubtitle}> <p style={styles.sectionSubtitle}>
Most likely emotion when in-group wording is stronger. Most likely emotion when in-group wording is stronger.
</p> </p>
<div style={styles.topUserName}> <div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
{topEmotion(identity?.in_group_emotion_avg)} <div style={{ marginTop: 12 }}>
<button
onClick={() => onExplore(buildIdentityBucketSpec("in"))}
style={styles.buttonSecondary}
>
Explore records
</button>
</div> </div>
</div> </div>
@@ -160,8 +199,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<p style={styles.sectionSubtitle}> <p style={styles.sectionSubtitle}>
Most likely emotion when out-group wording is stronger. Most likely emotion when out-group wording is stronger.
</p> </p>
<div style={styles.topUserName}> <div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
{topEmotion(identity?.out_group_emotion_avg)} <div style={{ marginTop: 12 }}>
<button
onClick={() => onExplore(buildIdentityBucketSpec("out"))}
style={styles.buttonSecondary}
>
Explore records
</button>
</div> </div>
</div> </div>
@@ -171,9 +216,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
Most mentioned entities and the mood that appears most with each. Most mentioned entities and the mood that appears most with each.
</p> </p>
{!entities.length ? ( {!entities.length ? (
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>No entity-level cultural data available.</div>
No entity-level cultural data available.
</div>
) : ( ) : (
<div <div
style={{ style={{
@@ -183,7 +226,11 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
}} }}
> >
{entities.map(([entity, aggregate]) => ( {entities.map(([entity, aggregate]) => (
<div key={entity} style={styles.topUserItem}> <div
key={entity}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildEntitySpec(entity))}
>
<div style={styles.topUserName}>{entity}</div> <div style={styles.topUserName}>{entity}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
{aggregate.post_count.toLocaleString()} posts Likely mood:{" "} {aggregate.post_count.toLocaleString()} posts Likely mood:{" "}

View File

@@ -1,13 +1,20 @@
import type { EmotionalAnalysisResponse } from "../types/ApiTypes"; import type { EmotionalAnalysisResponse } from "../types/ApiTypes";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import {
buildDominantEmotionSpec,
buildSourceSpec,
buildTopicSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling; const styles = StatsStyling;
type EmotionalStatsProps = { type EmotionalStatsProps = {
emotionalData: EmotionalAnalysisResponse; emotionalData: EmotionalAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
}; };
const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => { const EmotionalStats = ({ emotionalData, onExplore }: EmotionalStatsProps) => {
const rows = emotionalData.average_emotion_by_topic ?? []; const rows = emotionalData.average_emotion_by_topic ?? [];
const overallEmotionAverage = emotionalData.overall_emotion_average ?? []; const overallEmotionAverage = emotionalData.overall_emotion_average ?? [];
const dominantEmotionDistribution = const dominantEmotionDistribution =
@@ -126,7 +133,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...overallEmotionAverage] {[...overallEmotionAverage]
.sort((a, b) => b.score - a.score) .sort((a, b) => b.score - a.score)
.map((row) => ( .map((row) => (
<div key={row.emotion} style={styles.topUserItem}> <div
key={row.emotion}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
>
<div style={styles.topUserName}> <div style={styles.topUserName}>
{formatEmotion(row.emotion)} {formatEmotion(row.emotion)}
</div> </div>
@@ -157,7 +168,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...dominantEmotionDistribution] {[...dominantEmotionDistribution]
.sort((a, b) => b.ratio - a.ratio) .sort((a, b) => b.ratio - a.ratio)
.map((row) => ( .map((row) => (
<div key={row.emotion} style={styles.topUserItem}> <div
key={row.emotion}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
>
<div style={styles.topUserName}> <div style={styles.topUserName}>
{formatEmotion(row.emotion)} {formatEmotion(row.emotion)}
</div> </div>
@@ -189,7 +204,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...emotionBySource] {[...emotionBySource]
.sort((a, b) => b.event_count - a.event_count) .sort((a, b) => b.event_count - a.event_count)
.map((row) => ( .map((row) => (
<div key={row.source} style={styles.topUserItem}> <div
key={row.source}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildSourceSpec(row.source))}
>
<div style={styles.topUserName}>{row.source}</div> <div style={styles.topUserName}>{row.source}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
{formatEmotion(row.dominant_emotion)} {" "} {formatEmotion(row.dominant_emotion)} {" "}
@@ -211,7 +230,8 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{strongestPerTopic.map((topic) => ( {strongestPerTopic.map((topic) => (
<div <div
key={topic.topic} key={topic.topic}
style={{ ...styles.cardBase, gridColumn: "span 4" }} style={{ ...styles.cardBase, gridColumn: "span 4", cursor: "pointer" }}
onClick={() => onExplore(buildTopicSpec(topic.topic))}
> >
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}> <h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>
{topic.topic} {topic.topic}

View File

@@ -1,14 +1,20 @@
import Card from "./Card"; import Card from "./Card";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import type { LinguisticAnalysisResponse } from "../types/ApiTypes"; import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
import {
buildNgramSpec,
buildWordSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling; const styles = StatsStyling;
type LinguisticStatsProps = { type LinguisticStatsProps = {
data: LinguisticAnalysisResponse; data: LinguisticAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
}; };
const LinguisticStats = ({ data }: LinguisticStatsProps) => { const LinguisticStats = ({ data, onExplore }: LinguisticStatsProps) => {
const lexical = data.lexical_diversity; const lexical = data.lexical_diversity;
const words = data.word_frequencies ?? []; const words = data.word_frequencies ?? [];
const bigrams = data.common_two_phrases ?? []; const bigrams = data.common_two_phrases ?? [];
@@ -60,7 +66,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}} }}
> >
{topWords.map((item) => ( {topWords.map((item) => (
<div key={item.word} style={styles.topUserItem}> <div
key={item.word}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildWordSpec(item.word))}
>
<div style={styles.topUserName}>{item.word}</div> <div style={styles.topUserName}>{item.word}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses {item.count.toLocaleString()} uses
@@ -81,7 +91,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}} }}
> >
{topBigrams.map((item) => ( {topBigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}> <div
key={item.ngram}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildNgramSpec(item.ngram))}
>
<div style={styles.topUserName}>{item.ngram}</div> <div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses {item.count.toLocaleString()} uses
@@ -102,7 +116,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}} }}
> >
{topTrigrams.map((item) => ( {topTrigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}> <div
key={item.ngram}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildNgramSpec(item.ngram))}
>
<div style={styles.topUserName}>{item.ngram}</div> <div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses {item.count.toLocaleString()} uses

View File

@@ -1,4 +1,4 @@
import { memo, useMemo, useState } from "react"; import { memo, useMemo } from "react";
import { import {
LineChart, LineChart,
Line, Line,
@@ -13,7 +13,6 @@ import ActivityHeatmap from "../stats/ActivityHeatmap";
import { ReactWordcloud } from "@cp949/react-wordcloud"; import { ReactWordcloud } from "@cp949/react-wordcloud";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import Card from "../components/Card"; import Card from "../components/Card";
import UserModal from "../components/UserModal";
import { import {
type SummaryResponse, type SummaryResponse,
@@ -21,8 +20,15 @@ import {
type UserEndpointResponse, type UserEndpointResponse,
type TimeAnalysisResponse, type TimeAnalysisResponse,
type LinguisticAnalysisResponse, type LinguisticAnalysisResponse,
type User,
} from "../types/ApiTypes"; } from "../types/ApiTypes";
import {
buildAllRecordsSpec,
buildDateBucketSpec,
buildOneTimeUsersSpec,
buildUserSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling; const styles = StatsStyling;
const MAX_WORDCLOUD_WORDS = 250; const MAX_WORDCLOUD_WORDS = 250;
@@ -39,6 +45,7 @@ type SummaryStatsProps = {
timeData: TimeAnalysisResponse | null; timeData: TimeAnalysisResponse | null;
linguisticData: LinguisticAnalysisResponse | null; linguisticData: LinguisticAnalysisResponse | null;
summary: SummaryResponse | null; summary: SummaryResponse | null;
onExplore: (spec: CorpusExplorerSpec) => void;
}; };
type WordCloudPanelProps = { type WordCloudPanelProps = {
@@ -60,7 +67,7 @@ function formatDateRange(startUnix: number, endUnix: number) {
day: "2-digit", day: "2-digit",
}); });
return `${fmt(start)} ${fmt(end)}`; return `${fmt(start)} -> ${fmt(end)}`;
} }
function convertFrequencyData(data: FrequencyWord[]) { function convertFrequencyData(data: FrequencyWord[]) {
@@ -70,25 +77,22 @@ function convertFrequencyData(data: FrequencyWord[]) {
})); }));
} }
const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
>
Explore
</button>
);
const SummaryStats = ({ const SummaryStats = ({
userData, userData,
timeData, timeData,
linguisticData, linguisticData,
summary, summary,
onExplore,
}: SummaryStatsProps) => { }: SummaryStatsProps) => {
const [selectedUser, setSelectedUser] = useState<string | null>(null);
const usersByAuthor = useMemo(() => {
const nextMap = new Map<string, User>();
for (const user of userData?.users ?? []) {
nextMap.set(user.author, user);
}
return nextMap;
}, [userData?.users]);
const selectedUserData: User | null = selectedUser
? usersByAuthor.get(selectedUser) ?? null
: null;
const wordCloudWords = useMemo( const wordCloudWords = useMemo(
() => () =>
convertFrequencyData( convertFrequencyData(
@@ -104,49 +108,41 @@ const SummaryStats = ({
return ( return (
<div style={styles.page}> <div style={styles.page}>
{/* main grid*/}
<div style={{ ...styles.container, ...styles.grid }}> <div style={{ ...styles.container, ...styles.grid }}>
<Card <Card
label="Total Activity" label="Total Activity"
value={summary?.total_events ?? ""} value={summary?.total_events ?? "-"}
sublabel="Posts + comments" sublabel="Posts + comments"
style={{ rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
<Card <Card
label="Active People" label="Active People"
value={summary?.unique_users ?? ""} value={summary?.unique_users ?? "-"}
sublabel="Distinct users" sublabel="Distinct users"
style={{ rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
<Card <Card
label="Posts vs Comments" label="Posts vs Comments"
value={ value={
summary ? `${summary.total_posts} / ${summary.total_comments}` : "" summary ? `${summary.total_posts} / ${summary.total_comments}` : "-"
} }
sublabel={`Comments per post: ${summary?.comments_per_post ?? ""}`} sublabel={`Comments per post: ${summary?.comments_per_post ?? "-"}`}
style={{ rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
<Card <Card
label="Time Range" label="Time Range"
value={ value={
summary?.time_range summary?.time_range
? formatDateRange( ? formatDateRange(summary.time_range.start, summary.time_range.end)
summary.time_range.start, : "-"
summary.time_range.end,
)
: "—"
} }
sublabel="Based on dataset timestamps" sublabel="Based on dataset timestamps"
style={{ rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
<Card <Card
@@ -154,38 +150,44 @@ const SummaryStats = ({
value={ value={
typeof summary?.lurker_ratio === "number" typeof summary?.lurker_ratio === "number"
? `${Math.round(summary.lurker_ratio * 100)}%` ? `${Math.round(summary.lurker_ratio * 100)}%`
: "" : "-"
} }
sublabel="Users with only one event" sublabel="Users with only one event"
style={{ rightSlot={renderExploreButton(() => onExplore(buildOneTimeUsersSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
<Card <Card
label="Sources" label="Sources"
value={summary?.sources?.length ?? ""} value={summary?.sources?.length ?? "-"}
sublabel={ sublabel={
summary?.sources?.length summary?.sources?.length
? summary.sources.slice(0, 3).join(", ") + ? summary.sources.slice(0, 3).join(", ") +
(summary.sources.length > 3 ? "" : "") (summary.sources.length > 3 ? "..." : "")
: "" : "-"
} }
style={{ rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
gridColumn: "span 4", style={{ gridColumn: "span 4" }}
}}
/> />
{/* events per day */}
<div style={{ ...styles.card, gridColumn: "span 5" }}> <div style={{ ...styles.card, gridColumn: "span 5" }}>
<h2 style={styles.sectionTitle}>Activity Over Time</h2> <h2 style={styles.sectionTitle}>Activity Over Time</h2>
<p style={styles.sectionSubtitle}> <p style={styles.sectionSubtitle}>How much posting happened each day.</p>
How much posting happened each day.
</p>
<div style={styles.chartWrapper}> <div style={styles.chartWrapper}>
<ResponsiveContainer width="100%" height="100%"> <ResponsiveContainer width="100%" height="100%">
<LineChart data={timeData?.events_per_day ?? []}> <LineChart
data={timeData?.events_per_day ?? []}
onClick={(state: unknown) => {
const payload = (state as { activePayload?: Array<{ payload?: { date?: string } }> })
?.activePayload?.[0]?.payload as
| { date?: string }
| undefined;
if (payload?.date) {
onExplore(buildDateBucketSpec(String(payload.date)));
}
}}
>
<CartesianGrid strokeDasharray="3 3" /> <CartesianGrid strokeDasharray="3 3" />
<XAxis dataKey="date" /> <XAxis dataKey="date" />
<YAxis /> <YAxis />
@@ -201,7 +203,6 @@ const SummaryStats = ({
</div> </div>
</div> </div>
{/* Word Cloud */}
<div style={{ ...styles.card, gridColumn: "span 4" }}> <div style={{ ...styles.card, gridColumn: "span 4" }}>
<h2 style={styles.sectionTitle}>Common Words</h2> <h2 style={styles.sectionTitle}>Common Words</h2>
<p style={styles.sectionSubtitle}> <p style={styles.sectionSubtitle}>
@@ -213,7 +214,6 @@ const SummaryStats = ({
</div> </div>
</div> </div>
{/* Top Users */}
<div <div
style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }} style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }}
> >
@@ -225,7 +225,7 @@ const SummaryStats = ({
<div <div
key={`${item.author}-${item.source}`} key={`${item.author}-${item.source}`}
style={{ ...styles.topUserItem, cursor: "pointer" }} style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => setSelectedUser(item.author)} onClick={() => onExplore(buildUserSpec(item.author))}
> >
<div style={styles.topUserName}>{item.author}</div> <div style={styles.topUserName}>{item.author}</div>
<div style={styles.topUserMeta}> <div style={styles.topUserMeta}>
@@ -236,7 +236,6 @@ const SummaryStats = ({
</div> </div>
</div> </div>
{/* Heatmap */}
<div style={{ ...styles.card, gridColumn: "span 12" }}> <div style={{ ...styles.card, gridColumn: "span 12" }}>
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2> <h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
<p style={styles.sectionSubtitle}> <p style={styles.sectionSubtitle}>
@@ -248,13 +247,6 @@ const SummaryStats = ({
</div> </div>
</div> </div>
</div> </div>
<UserModal
open={!!selectedUser}
onClose={() => setSelectedUser(null)}
username={selectedUser ?? ""}
userData={selectedUserData}
/>
</div> </div>
); );
}; };

View File

@@ -5,6 +5,12 @@ import { type TopUser, type InteractionGraph } from "../types/ApiTypes";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import Card from "./Card"; import Card from "./Card";
import {
buildReplyPairSpec,
toText,
buildUserSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling; const styles = StatsStyling;
@@ -39,6 +45,7 @@ type UserStatsProps = {
interactionGraph: InteractionGraph; interactionGraph: InteractionGraph;
totalUsers: number; totalUsers: number;
mostCommentHeavyUser: { author: string; commentShare: number } | null; mostCommentHeavyUser: { author: string; commentShare: number } | null;
onExplore: (spec: CorpusExplorerSpec) => void;
}; };
const UserStats = ({ const UserStats = ({
@@ -46,6 +53,7 @@ const UserStats = ({
interactionGraph, interactionGraph,
totalUsers, totalUsers,
mostCommentHeavyUser, mostCommentHeavyUser,
onExplore,
}: UserStatsProps) => { }: UserStatsProps) => {
const graphData = useMemo( const graphData = useMemo(
() => ApiToGraphData(interactionGraph), () => ApiToGraphData(interactionGraph),
@@ -87,9 +95,9 @@ const UserStats = ({
null, null,
); );
const mostActiveUser = topUsers.find( const mostActiveUser = topUsers.find((u) => u.author !== "[deleted]");
(u) => u.author !== "[deleted]", const strongestLinkSource = strongestLink ? toText(strongestLink.source) : "";
); const strongestLinkTarget = strongestLink ? toText(strongestLink.target) : "";
return ( return (
<div style={styles.page}> <div style={styles.page}>
@@ -114,37 +122,69 @@ const UserStats = ({
/> />
<Card <Card
label="Most Active User" label="Most Active User"
value={mostActiveUser?.author ?? ""} value={mostActiveUser?.author ?? "-"}
sublabel={ sublabel={
mostActiveUser mostActiveUser
? `${mostActiveUser.count.toLocaleString()} events` ? `${mostActiveUser.count.toLocaleString()} events`
: "No user activity found" : "No user activity found"
} }
rightSlot={
mostActiveUser ? (
<button
onClick={() => onExplore(buildUserSpec(mostActiveUser.author))}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 3" }} style={{ gridColumn: "span 3" }}
/> />
<Card <Card
label="Strongest User Link" label="Strongest User Link"
value={ value={
strongestLink strongestLinkSource && strongestLinkTarget
? `${strongestLink.source} -> ${strongestLink.target}` ? `${strongestLinkSource} -> ${strongestLinkTarget}`
: "" : "-"
} }
sublabel={ sublabel={
strongestLink strongestLink
? `${strongestLink.value.toLocaleString()} replies` ? `${strongestLink.value.toLocaleString()} replies`
: "No graph links after filtering" : "No graph links after filtering"
} }
rightSlot={
strongestLinkSource && strongestLinkTarget ? (
<button
onClick={() =>
onExplore(buildReplyPairSpec(strongestLinkSource, strongestLinkTarget))
}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 6" }} style={{ gridColumn: "span 6" }}
/> />
<Card <Card
label="Most Comment-Heavy User" label="Most Comment-Heavy User"
value={mostCommentHeavyUser?.author ?? ""} value={mostCommentHeavyUser?.author ?? "-"}
sublabel={ sublabel={
mostCommentHeavyUser mostCommentHeavyUser
? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments` ? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments`
: "No user distribution available" : "No user distribution available"
} }
rightSlot={
mostCommentHeavyUser ? (
<button
onClick={() => onExplore(buildUserSpec(mostCommentHeavyUser.author))}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 6" }} style={{ gridColumn: "span 6" }}
/> />
@@ -166,6 +206,19 @@ const UserStats = ({
linkDirectionalParticleSpeed={0.004} linkDirectionalParticleSpeed={0.004}
linkWidth={(link) => Math.sqrt(Number(link.value))} linkWidth={(link) => Math.sqrt(Number(link.value))}
nodeLabel={(node) => `${node.id}`} nodeLabel={(node) => `${node.id}`}
onNodeClick={(node) => {
const userId = toText(node.id);
if (userId) {
onExplore(buildUserSpec(userId));
}
}}
onLinkClick={(link) => {
const source = toText(link.source);
const target = toText(link.target);
if (source && target) {
onExplore(buildReplyPairSpec(source, target));
}
}}
/> />
</div> </div>
</div> </div>

View File

@@ -22,12 +22,10 @@ const DatasetEditPage = () => {
const [isSaving, setIsSaving] = useState(false); const [isSaving, setIsSaving] = useState(false);
const [isDeleting, setIsDeleting] = useState(false); const [isDeleting, setIsDeleting] = useState(false);
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false); const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
const [hasError, setHasError] = useState(false);
const [datasetName, setDatasetName] = useState(""); const [datasetName, setDatasetName] = useState("");
useEffect(() => { useEffect(() => {
if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) { if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) {
setHasError(true);
setStatusMessage("Invalid dataset id."); setStatusMessage("Invalid dataset id.");
setLoading(false); setLoading(false);
return; return;
@@ -35,7 +33,6 @@ const DatasetEditPage = () => {
const token = localStorage.getItem("access_token"); const token = localStorage.getItem("access_token");
if (!token) { if (!token) {
setHasError(true);
setStatusMessage("You must be signed in to edit datasets."); setStatusMessage("You must be signed in to edit datasets.");
setLoading(false); setLoading(false);
return; return;
@@ -49,7 +46,6 @@ const DatasetEditPage = () => {
setDatasetName(response.data.name || ""); setDatasetName(response.data.name || "");
}) })
.catch((error: unknown) => { .catch((error: unknown) => {
setHasError(true);
if (axios.isAxiosError(error)) { if (axios.isAxiosError(error)) {
setStatusMessage( setStatusMessage(
String(error.response?.data?.error || error.message), String(error.response?.data?.error || error.message),
@@ -68,21 +64,18 @@ const DatasetEditPage = () => {
const trimmedName = datasetName.trim(); const trimmedName = datasetName.trim();
if (!trimmedName) { if (!trimmedName) {
setHasError(true);
setStatusMessage("Please enter a valid dataset name."); setStatusMessage("Please enter a valid dataset name.");
return; return;
} }
const token = localStorage.getItem("access_token"); const token = localStorage.getItem("access_token");
if (!token) { if (!token) {
setHasError(true);
setStatusMessage("You must be signed in to save changes."); setStatusMessage("You must be signed in to save changes.");
return; return;
} }
try { try {
setIsSaving(true); setIsSaving(true);
setHasError(false);
setStatusMessage(""); setStatusMessage("");
await axios.patch( await axios.patch(
@@ -93,7 +86,6 @@ const DatasetEditPage = () => {
navigate("/datasets", { replace: true }); navigate("/datasets", { replace: true });
} catch (error: unknown) { } catch (error: unknown) {
setHasError(true);
if (axios.isAxiosError(error)) { if (axios.isAxiosError(error)) {
setStatusMessage( setStatusMessage(
String( String(
@@ -111,7 +103,6 @@ const DatasetEditPage = () => {
const deleteDataset = async () => { const deleteDataset = async () => {
const deleteToken = localStorage.getItem("access_token"); const deleteToken = localStorage.getItem("access_token");
if (!deleteToken) { if (!deleteToken) {
setHasError(true);
setStatusMessage("You must be signed in to delete datasets."); setStatusMessage("You must be signed in to delete datasets.");
setIsDeleteModalOpen(false); setIsDeleteModalOpen(false);
return; return;
@@ -119,7 +110,6 @@ const DatasetEditPage = () => {
try { try {
setIsDeleting(true); setIsDeleting(true);
setHasError(false);
setStatusMessage(""); setStatusMessage("");
await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, { await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, {
@@ -129,7 +119,6 @@ const DatasetEditPage = () => {
setIsDeleteModalOpen(false); setIsDeleteModalOpen(false);
navigate("/datasets", { replace: true }); navigate("/datasets", { replace: true });
} catch (error: unknown) { } catch (error: unknown) {
setHasError(true);
if (axios.isAxiosError(error)) { if (axios.isAxiosError(error)) {
setStatusMessage( setStatusMessage(
String( String(

View File

@@ -1,4 +1,4 @@
import { useEffect, useState, useRef } from "react"; import { useEffect, useRef, useState } from "react";
import axios from "axios"; import axios from "axios";
import { useParams } from "react-router-dom"; import { useParams } from "react-router-dom";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
@@ -8,6 +8,7 @@ import UserStats from "../components/UserStats";
import LinguisticStats from "../components/LinguisticStats"; import LinguisticStats from "../components/LinguisticStats";
import InteractionalStats from "../components/InteractionalStats"; import InteractionalStats from "../components/InteractionalStats";
import CulturalStats from "../components/CulturalStats"; import CulturalStats from "../components/CulturalStats";
import CorpusExplorer from "../components/CorpusExplorer";
import { import {
type SummaryResponse, type SummaryResponse,
@@ -19,10 +20,15 @@ import {
type InteractionAnalysisResponse, type InteractionAnalysisResponse,
type CulturalAnalysisResponse, type CulturalAnalysisResponse,
} from "../types/ApiTypes"; } from "../types/ApiTypes";
import {
buildExplorerContext,
type CorpusExplorerSpec,
type DatasetRecord,
} from "../utils/corpusExplorer";
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL; const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
const styles = StatsStyling; const styles = StatsStyling;
const DELETED_USERS = ["[deleted]"]; const DELETED_USERS = ["[deleted]", "automoderator"];
const isDeletedUser = (value: string | null | undefined) => const isDeletedUser = (value: string | null | undefined) =>
DELETED_USERS.includes((value ?? "").trim().toLowerCase()); DELETED_USERS.includes((value ?? "").trim().toLowerCase());
@@ -40,6 +46,97 @@ type UserStatsMeta = {
mostCommentHeavyUser: { author: string; commentShare: number } | null; mostCommentHeavyUser: { author: string; commentShare: number } | null;
}; };
type ExplorerState = {
open: boolean;
title: string;
description: string;
emptyMessage: string;
records: DatasetRecord[];
loading: boolean;
error: string;
};
const EMPTY_EXPLORER_STATE: ExplorerState = {
open: false,
title: "Corpus Explorer",
description: "",
emptyMessage: "No records found.",
records: [],
loading: false,
error: "",
};
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
if (typeof payload === "string") {
try {
return normalizeRecordPayload(JSON.parse(payload));
} catch {
throw new Error("Corpus endpoint returned a non-JSON string payload.");
}
}
if (
payload &&
typeof payload === "object" &&
"error" in payload &&
typeof (payload as { error?: unknown }).error === "string"
) {
throw new Error((payload as { error: string }).error);
}
if (Array.isArray(payload)) {
return payload as DatasetRecord[];
}
if (
payload &&
typeof payload === "object" &&
"data" in payload &&
Array.isArray((payload as { data?: unknown }).data)
) {
return (payload as { data: DatasetRecord[] }).data;
}
if (
payload &&
typeof payload === "object" &&
"records" in payload &&
Array.isArray((payload as { records?: unknown }).records)
) {
return (payload as { records: DatasetRecord[] }).records;
}
if (
payload &&
typeof payload === "object" &&
"rows" in payload &&
Array.isArray((payload as { rows?: unknown }).rows)
) {
return (payload as { rows: DatasetRecord[] }).rows;
}
if (
payload &&
typeof payload === "object" &&
"result" in payload &&
Array.isArray((payload as { result?: unknown }).result)
) {
return (payload as { result: DatasetRecord[] }).result;
}
if (payload && typeof payload === "object") {
const values = Object.values(payload);
if (values.length === 1 && Array.isArray(values[0])) {
return values[0] as DatasetRecord[];
}
if (values.every((value) => value && typeof value === "object")) {
return values as DatasetRecord[];
}
}
throw new Error("Corpus endpoint returned an unexpected payload.");
};
const StatPage = () => { const StatPage = () => {
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>(); const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
const [error, setError] = useState(""); const [error, setError] = useState("");
@@ -61,6 +158,12 @@ const StatPage = () => {
totalUsers: 0, totalUsers: 0,
mostCommentHeavyUser: null, mostCommentHeavyUser: null,
}); });
const [appliedFilters, setAppliedFilters] = useState<Record<string, string>>({});
const [allRecords, setAllRecords] = useState<DatasetRecord[] | null>(null);
const [allRecordsKey, setAllRecordsKey] = useState("");
const [explorerState, setExplorerState] = useState<ExplorerState>(
EMPTY_EXPLORER_STATE,
);
const searchInputRef = useRef<HTMLInputElement>(null); const searchInputRef = useRef<HTMLInputElement>(null);
const beforeDateRef = useRef<HTMLInputElement>(null); const beforeDateRef = useRef<HTMLInputElement>(null);
@@ -104,6 +207,82 @@ const StatPage = () => {
}; };
}; };
const getFilterKey = (params: Record<string, string>) =>
JSON.stringify(Object.entries(params).sort(([a], [b]) => a.localeCompare(b)));
const ensureFilteredRecords = async () => {
if (!datasetId) {
throw new Error("Missing dataset id.");
}
const authHeaders = getAuthHeaders();
if (!authHeaders) {
throw new Error("You must be signed in to load corpus records.");
}
const filterKey = getFilterKey(appliedFilters);
if (allRecords && allRecordsKey === filterKey) {
return allRecords;
}
const response = await axios.get<unknown>(
`${API_BASE_URL}/dataset/${datasetId}/all`,
{
params: appliedFilters,
headers: authHeaders,
},
);
const normalizedRecords = normalizeRecordPayload(response.data);
setAllRecords(normalizedRecords);
setAllRecordsKey(filterKey);
return normalizedRecords;
};
const openExplorer = async (spec: CorpusExplorerSpec) => {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: true,
error: "",
});
try {
const records = await ensureFilteredRecords();
const context = buildExplorerContext(records);
const matched = records.filter((record) => spec.matcher(record, context));
matched.sort((a, b) => {
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
return bValue.localeCompare(aValue);
});
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: matched,
loading: false,
error: "",
});
} catch (e) {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: false,
error: `Failed to load corpus records: ${String(e)}`,
});
}
};
const getStats = (params: Record<string, string> = {}) => { const getStats = (params: Record<string, string> = {}) => {
if (!datasetId) { if (!datasetId) {
setError("Missing dataset id. Open /dataset/<id>/stats."); setError("Missing dataset id. Open /dataset/<id>/stats.");
@@ -118,22 +297,20 @@ const StatPage = () => {
setError(""); setError("");
setLoading(true); setLoading(true);
setAppliedFilters(params);
setAllRecords(null);
setAllRecordsKey("");
setExplorerState((current) => ({ ...current, open: false }));
Promise.all([ Promise.all([
axios.get<TimeAnalysisResponse>( axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
`${API_BASE_URL}/dataset/${datasetId}/temporal`, params,
{ headers: authHeaders,
params, }),
headers: authHeaders, axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
}, params,
), headers: authHeaders,
axios.get<UserEndpointResponse>( }),
`${API_BASE_URL}/dataset/${datasetId}/user`,
{
params,
headers: authHeaders,
},
),
axios.get<LinguisticAnalysisResponse>( axios.get<LinguisticAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/linguistic`, `${API_BASE_URL}/dataset/${datasetId}/linguistic`,
{ {
@@ -141,13 +318,10 @@ const StatPage = () => {
headers: authHeaders, headers: authHeaders,
}, },
), ),
axios.get<EmotionalAnalysisResponse>( axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
`${API_BASE_URL}/dataset/${datasetId}/emotional`, params,
{ headers: authHeaders,
params, }),
headers: authHeaders,
},
),
axios.get<InteractionAnalysisResponse>( axios.get<InteractionAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/interactional`, `${API_BASE_URL}/dataset/${datasetId}/interactional`,
{ {
@@ -155,20 +329,14 @@ const StatPage = () => {
headers: authHeaders, headers: authHeaders,
}, },
), ),
axios.get<SummaryResponse>( axios.get<SummaryResponse>(`${API_BASE_URL}/dataset/${datasetId}/summary`, {
`${API_BASE_URL}/dataset/${datasetId}/summary`, params,
{ headers: authHeaders,
params, }),
headers: authHeaders, axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
}, params,
), headers: authHeaders,
axios.get<CulturalAnalysisResponse>( }),
`${API_BASE_URL}/dataset/${datasetId}/cultural`,
{
params,
headers: authHeaders,
},
),
]) ])
.then( .then(
([ ([
@@ -182,8 +350,7 @@ const StatPage = () => {
]) => { ]) => {
const usersList = userRes.data.users ?? []; const usersList = userRes.data.users ?? [];
const topUsersList = userRes.data.top_users ?? []; const topUsersList = userRes.data.top_users ?? [];
const interactionGraphRaw = const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
interactionRes.data?.interaction_graph ?? {};
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? []; const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
const filteredUsers: typeof usersList = []; const filteredUsers: typeof usersList = [];
@@ -194,18 +361,14 @@ const StatPage = () => {
const filteredTopUsers: typeof topUsersList = []; const filteredTopUsers: typeof topUsersList = [];
for (const user of topUsersList) { for (const user of topUsersList) {
if (isDeletedUser(user.author)) continue; if (isDeletedUser(user.author)) continue;
filteredTopUsers.push(user); filteredTopUsers.push(user);
} }
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] = let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] = null;
null;
for (const user of filteredUsers) { for (const user of filteredUsers) {
const currentShare = user.comment_share ?? 0; const currentShare = user.comment_share ?? 0;
if ( if (!mostCommentHeavyUser || currentShare > mostCommentHeavyUser.commentShare) {
!mostCommentHeavyUser ||
currentShare > mostCommentHeavyUser.commentShare
) {
mostCommentHeavyUser = { mostCommentHeavyUser = {
author: user.author, author: user.author,
commentShare: currentShare, commentShare: currentShare,
@@ -221,8 +384,7 @@ const StatPage = () => {
} }
} }
const filteredInteractionGraph: Record<string, Record<string, number>> = const filteredInteractionGraph: Record<string, Record<string, number>> = {};
{};
for (const [source, targets] of Object.entries(interactionGraphRaw)) { for (const [source, targets] of Object.entries(interactionGraphRaw)) {
if (isDeletedUser(source)) { if (isDeletedUser(source)) {
continue; continue;
@@ -279,7 +441,7 @@ const StatPage = () => {
setSummary(filteredSummary || null); setSummary(filteredSummary || null);
}, },
) )
.catch((e) => setError("Failed to load statistics: " + String(e))) .catch((e) => setError(`Failed to load statistics: ${String(e)}`))
.finally(() => setLoading(false)); .finally(() => setLoading(false));
}; };
@@ -302,6 +464,9 @@ const StatPage = () => {
useEffect(() => { useEffect(() => {
setError(""); setError("");
setAllRecords(null);
setAllRecordsKey("");
setExplorerState(EMPTY_EXPLORER_STATE);
if (!datasetId) { if (!datasetId) {
setError("Missing dataset id. Open /dataset/<id>/stats."); setError("Missing dataset id. Open /dataset/<id>/stats.");
return; return;
@@ -398,9 +563,7 @@ const StatPage = () => {
<button <button
onClick={() => setActiveView("summary")} onClick={() => setActiveView("summary")}
style={ style={
activeView === "summary" activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary
? styles.buttonPrimary
: styles.buttonSecondary
} }
> >
Summary Summary
@@ -418,11 +581,7 @@ const StatPage = () => {
<button <button
onClick={() => setActiveView("user")} onClick={() => setActiveView("user")}
style={ style={activeView === "user" ? styles.buttonPrimary : styles.buttonSecondary}
activeView === "user"
? styles.buttonPrimary
: styles.buttonSecondary
}
> >
Users Users
</button> </button>
@@ -449,9 +608,7 @@ const StatPage = () => {
<button <button
onClick={() => setActiveView("cultural")} onClick={() => setActiveView("cultural")}
style={ style={
activeView === "cultural" activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary
? styles.buttonPrimary
: styles.buttonSecondary
} }
> >
Cultural Cultural
@@ -464,11 +621,12 @@ const StatPage = () => {
timeData={timeData} timeData={timeData}
linguisticData={linguisticData} linguisticData={linguisticData}
summary={summary} summary={summary}
onExplore={openExplorer}
/> />
)} )}
{activeView === "emotional" && emotionalData && ( {activeView === "emotional" && emotionalData && (
<EmotionalStats emotionalData={emotionalData} /> <EmotionalStats emotionalData={emotionalData} onExplore={openExplorer} />
)} )}
{activeView === "emotional" && !emotionalData && ( {activeView === "emotional" && !emotionalData && (
@@ -483,6 +641,7 @@ const StatPage = () => {
interactionGraph={interactionData.interaction_graph} interactionGraph={interactionData.interaction_graph}
totalUsers={userStatsMeta.totalUsers} totalUsers={userStatsMeta.totalUsers}
mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser} mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser}
onExplore={openExplorer}
/> />
)} )}
@@ -493,7 +652,7 @@ const StatPage = () => {
)} )}
{activeView === "linguistic" && linguisticData && ( {activeView === "linguistic" && linguisticData && (
<LinguisticStats data={linguisticData} /> <LinguisticStats data={linguisticData} onExplore={openExplorer} />
)} )}
{activeView === "linguistic" && !linguisticData && ( {activeView === "linguistic" && !linguisticData && (
@@ -503,7 +662,7 @@ const StatPage = () => {
)} )}
{activeView === "interactional" && interactionData && ( {activeView === "interactional" && interactionData && (
<InteractionalStats data={interactionData} /> <InteractionalStats data={interactionData} onExplore={openExplorer} />
)} )}
{activeView === "interactional" && !interactionData && ( {activeView === "interactional" && !interactionData && (
@@ -513,7 +672,7 @@ const StatPage = () => {
)} )}
{activeView === "cultural" && culturalData && ( {activeView === "cultural" && culturalData && (
<CulturalStats data={culturalData} /> <CulturalStats data={culturalData} onExplore={openExplorer} />
)} )}
{activeView === "cultural" && !culturalData && ( {activeView === "cultural" && !culturalData && (
@@ -521,6 +680,17 @@ const StatPage = () => {
No cultural data available. No cultural data available.
</div> </div>
)} )}
<CorpusExplorer
open={explorerState.open}
onClose={() => setExplorerState((current) => ({ ...current, open: false }))}
title={explorerState.title}
description={explorerState.description}
records={explorerState.records}
loading={explorerState.loading}
error={explorerState.error}
emptyMessage={explorerState.emptyMessage}
/>
</div> </div>
); );
}; };

View File

@@ -0,0 +1,405 @@
import type { CSSProperties } from "react";
type EntityRecord = {
text?: string;
[key: string]: unknown;
};
type DatasetRecord = {
id?: string | number;
post_id?: string | number | null;
parent_id?: string | number | null;
author?: string | null;
title?: string | null;
content?: string | null;
timestamp?: string | number | null;
date?: string | null;
dt?: string | null;
hour?: number | null;
weekday?: string | null;
reply_to?: string | number | null;
source?: string | null;
topic?: string | null;
topic_confidence?: number | null;
type?: string | null;
ner_entities?: EntityRecord[] | null;
emotion_anger?: number | null;
emotion_disgust?: number | null;
emotion_fear?: number | null;
emotion_joy?: number | null;
emotion_sadness?: number | null;
[key: string]: unknown;
};
type CorpusExplorerContext = {
authorByPostId: Map<string, string>;
authorEventCounts: Map<string, number>;
authorCommentCounts: Map<string, number>;
};
type CorpusExplorerSpec = {
title: string;
description: string;
emptyMessage?: string;
matcher: (record: DatasetRecord, context: CorpusExplorerContext) => boolean;
};
const IN_GROUP_PATTERN = /\b(we|us|our|ourselves)\b/gi;
const OUT_GROUP_PATTERN = /\b(they|them|their|themselves)\b/gi;
const HEDGE_PATTERN = /\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b/i;
const CERTAINTY_PATTERN = /\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b/i;
const DEONTIC_PATTERN = /\b(must|should|need|needs|have to|has to|ought|required|require)\b/i;
const PERMISSION_PATTERN = /\b(can|allowed|okay|ok|permitted)\b/i;
const EMOTION_KEYS = [
"emotion_anger",
"emotion_disgust",
"emotion_fear",
"emotion_joy",
"emotion_sadness",
] as const;
const shrinkButtonStyle: CSSProperties = {
padding: "4px 8px",
fontSize: 12,
};
const toText = (value: unknown) => {
if (typeof value === "string") {
return value;
}
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (value && typeof value === "object" && "id" in value) {
const id = (value as { id?: unknown }).id;
if (typeof id === "string" || typeof id === "number") {
return String(id);
}
}
return "";
};
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
const getRecordText = (record: DatasetRecord) =>
`${record.title ?? ""} ${record.content ?? ""}`.trim();
const escapeRegExp = (value: string) =>
value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const buildPhrasePattern = (phrase: string) => {
const tokens = phrase
.toLowerCase()
.trim()
.split(/\s+/)
.filter(Boolean)
.map(escapeRegExp);
if (!tokens.length) {
return null;
}
return new RegExp(`\\b${tokens.join("\\s+")}\\b`, "i");
};
const countMatches = (pattern: RegExp, text: string) =>
Array.from(text.matchAll(new RegExp(pattern.source, "gi"))).length;
const getDateBucket = (record: DatasetRecord) => {
if (typeof record.date === "string" && record.date) {
return record.date.slice(0, 10);
}
if (typeof record.dt === "string" && record.dt) {
return record.dt.slice(0, 10);
}
if (typeof record.timestamp === "number") {
return new Date(record.timestamp * 1000).toISOString().slice(0, 10);
}
if (typeof record.timestamp === "string" && record.timestamp) {
const numeric = Number(record.timestamp);
if (Number.isFinite(numeric)) {
return new Date(numeric * 1000).toISOString().slice(0, 10);
}
}
return "";
};
const getDominantEmotion = (record: DatasetRecord) => {
let bestKey = "";
let bestValue = Number.NEGATIVE_INFINITY;
for (const key of EMOTION_KEYS) {
const value = Number(record[key] ?? Number.NEGATIVE_INFINITY);
if (value > bestValue) {
bestValue = value;
bestKey = key;
}
}
return bestKey.replace("emotion_", "");
};
const matchesPhrase = (record: DatasetRecord, phrase: string) => {
const pattern = buildPhrasePattern(phrase);
if (!pattern) {
return false;
}
return pattern.test(getRecordText(record).toLowerCase());
};
const recordIdentityBucket = (record: DatasetRecord) => {
const text = getRecordText(record).toLowerCase();
const inHits = countMatches(IN_GROUP_PATTERN, text);
const outHits = countMatches(OUT_GROUP_PATTERN, text);
if (inHits > outHits) {
return "in";
}
if (outHits > inHits) {
return "out";
}
return "tie";
};
const createAuthorEventCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
if (!author) {
continue;
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
if (!author || record.type !== "comment") {
continue;
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorByPostId = (records: DatasetRecord[]) => {
const map = new Map<string, string>();
for (const record of records) {
const postId = record.post_id;
const author = toText(record.author).trim();
if (postId === null || postId === undefined || !author) {
continue;
}
map.set(String(postId), author);
}
return map;
};
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
authorByPostId: createAuthorByPostId(records),
authorEventCounts: createAuthorEventCounts(records),
authorCommentCounts: createAuthorCommentCounts(records),
});
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
title: "Corpus Explorer",
description: "All records in the current filtered dataset.",
emptyMessage: "No records match the current filters.",
matcher: () => true,
});
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
title: `User: ${author}`,
description: `All records authored by ${author}.`,
emptyMessage: `No records found for ${author}.`,
matcher: (record) => normalize(record.author) === normalize(author),
});
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
title: `Topic: ${topic}`,
description: `Records assigned to the ${topic} topic bucket.`,
emptyMessage: `No records found in the ${topic} topic bucket.`,
matcher: (record) => normalize(record.topic) === normalize(topic),
});
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
title: `Date Bucket: ${date}`,
description: `Records from the ${date} activity bucket.`,
emptyMessage: `No records found on ${date}.`,
matcher: (record) => getDateBucket(record) === date,
});
const buildWordSpec = (word: string): CorpusExplorerSpec => ({
title: `Word: ${word}`,
description: `Records containing the word ${word}.`,
emptyMessage: `No records mention ${word}.`,
matcher: (record) => matchesPhrase(record, word),
});
const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
title: `N-gram: ${ngram}`,
description: `Records containing the phrase ${ngram}.`,
emptyMessage: `No records contain the phrase ${ngram}.`,
matcher: (record) => matchesPhrase(record, ngram),
});
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
title: `Entity: ${entity}`,
description: `Records mentioning the ${entity} entity.`,
emptyMessage: `No records found for the ${entity} entity.`,
matcher: (record) => {
const target = normalize(entity);
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
},
});
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
title: `Source: ${source}`,
description: `Records from the ${source} source.`,
emptyMessage: `No records found for ${source}.`,
matcher: (record) => normalize(record.source) === normalize(source),
});
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
title: `Dominant Emotion: ${emotion}`,
description: `Records where ${emotion} is the strongest emotion score.`,
emptyMessage: `No records found with dominant emotion ${emotion}.`,
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
});
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
title: `Reply Path: ${source} -> ${target}`,
description: `Reply records authored by ${source} in response to ${target}.`,
emptyMessage: `No reply records found for ${source} -> ${target}.`,
matcher: (record, context) => {
if (normalize(record.author) !== normalize(source)) {
return false;
}
const replyTo = record.reply_to;
if (replyTo === null || replyTo === undefined || replyTo === "") {
return false;
}
const replyTarget = context.authorByPostId.get(String(replyTo));
return normalize(replyTarget) === normalize(target);
},
});
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
title: "One-Time Users",
description: "Records written by authors who appear exactly once in the filtered corpus.",
emptyMessage: "No one-time-user records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
return !!author && context.authorEventCounts.get(author) === 1;
},
});
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
title: "Top Commenters",
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
emptyMessage: "No top-commenter records found.",
matcher: (record, context) => {
if (record.type !== "comment") {
return false;
}
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, topAuthorCount)
.map(([author]) => author);
return rankedAuthors.includes(toText(record.author).trim());
},
});
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
title: "Single-Comment Authors",
description: "Comment records from authors who commented exactly once.",
emptyMessage: "No single-comment-author records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
},
});
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
const labels = {
in: "In-Group Posts",
out: "Out-Group Posts",
tie: "Balanced Posts",
} as const;
return {
title: labels[bucket],
description: `Records in the ${labels[bucket].toLowerCase()} cultural bucket.`,
emptyMessage: `No records found for ${labels[bucket].toLowerCase()}.`,
matcher: (record) => recordIdentityBucket(record) === bucket,
};
};
const buildPatternSpec = (
title: string,
description: string,
pattern: RegExp,
): CorpusExplorerSpec => ({
title,
description,
emptyMessage: `No records found for ${title.toLowerCase()}.`,
matcher: (record) => pattern.test(getRecordText(record)),
});
const buildHedgeSpec = () =>
buildPatternSpec("Hedging Words", "Records containing hedging language.", HEDGE_PATTERN);
const buildCertaintySpec = () =>
buildPatternSpec("Certainty Words", "Records containing certainty language.", CERTAINTY_PATTERN);
const buildDeonticSpec = () =>
buildPatternSpec("Need/Should Words", "Records containing deontic language.", DEONTIC_PATTERN);
const buildPermissionSpec = () =>
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
const getExplorerButtonStyle = () => shrinkButtonStyle;
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
export {
buildAllRecordsSpec,
buildCertaintySpec,
buildDateBucketSpec,
buildDeonticSpec,
buildDominantEmotionSpec,
buildEntitySpec,
buildExplorerContext,
buildHedgeSpec,
buildIdentityBucketSpec,
buildNgramSpec,
buildOneTimeUsersSpec,
buildPermissionSpec,
buildReplyPairSpec,
buildSingleCommentAuthorsSpec,
buildSourceSpec,
buildTopicSpec,
buildTopCommentersSpec,
buildUserSpec,
buildWordSpec,
getDateBucket,
getExplorerButtonStyle,
toText,
};

View File

@@ -1,4 +1,5 @@
import nltk import nltk
import json
import pandas as pd import pandas as pd
from nltk.corpus import stopwords from nltk.corpus import stopwords
@@ -27,6 +28,8 @@ DOMAIN_STOPWORDS = {
"one", "one",
} }
EXCLUDED_AUTHORS = {"[deleted]", "automoderator"}
nltk.download("stopwords") nltk.download("stopwords")
EXCLUDE_WORDS = set(stopwords.words("english")) | DOMAIN_STOPWORDS EXCLUDE_WORDS = set(stopwords.words("english")) | DOMAIN_STOPWORDS
@@ -46,6 +49,12 @@ class StatGen:
filters = filters or {} filters = filters or {}
filtered_df = df.copy() filtered_df = df.copy()
if "author" in filtered_df.columns:
normalized_authors = (
filtered_df["author"].fillna("").astype(str).str.strip().str.lower()
)
filtered_df = filtered_df[~normalized_authors.isin(EXCLUDED_AUTHORS)]
search_query = filters.get("search_query", None) search_query = filters.get("search_query", None)
start_date_filter = filters.get("start_date", None) start_date_filter = filters.get("start_date", None)
end_date_filter = filters.get("end_date", None) end_date_filter = filters.get("end_date", None)
@@ -75,9 +84,15 @@ class StatGen:
return filtered_df return filtered_df
def _json_ready_records(self, df: pd.DataFrame) -> list[dict]:
return json.loads(
df.to_json(orient="records", date_format="iso", date_unit="s")
)
## Public Methods ## Public Methods
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]: def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
return self._prepare_filtered_df(df, filters).to_dict(orient="records") filtered_df = self._prepare_filtered_df(df, filters)
return self._json_ready_records(filtered_df)
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict: def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
filtered_df = self._prepare_filtered_df(df, filters) filtered_df = self._prepare_filtered_df(df, filters)

View File

@@ -591,7 +591,8 @@ def get_full_dataset(dataset_id: int):
) )
dataset_content = dataset_manager.get_dataset_content(dataset_id) dataset_content = dataset_manager.get_dataset_content(dataset_id)
return jsonify(dataset_content.to_dict(orient="records")), 200 filters = get_request_filters()
return jsonify(stat_gen.filter_dataset(dataset_content, filters)), 200
except NotAuthorisedException: except NotAuthorisedException:
return jsonify({"error": "User is not authorised to access this content"}), 403 return jsonify({"error": "User is not authorised to access this content"}), 403
except NonExistentDatasetException: except NonExistentDatasetException: