feat(frontend): implement corpus explorer

This allows you to view the posts & comments associated with a specific aggregate.
This commit is contained in:
2026-04-01 00:04:25 +01:00
parent 1dde5f7b08
commit b270ed03ae
11 changed files with 1064 additions and 179 deletions

View File

@@ -0,0 +1,175 @@
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
import StatsStyling from "../styles/stats_styling";
import type { DatasetRecord } from "../utils/corpusExplorer";
const styles = StatsStyling;
const cleanText = (value: unknown) => {
if (typeof value !== "string") {
return "";
}
const trimmed = value.trim();
if (!trimmed) {
return "";
}
const lowered = trimmed.toLowerCase();
if (lowered === "nan" || lowered === "null" || lowered === "undefined") {
return "";
}
return trimmed;
};
const displayText = (value: unknown, fallback: string) => {
const cleaned = cleanText(value);
return cleaned || fallback;
};
type CorpusExplorerProps = {
open: boolean;
onClose: () => void;
title: string;
description: string;
records: DatasetRecord[];
loading: boolean;
error: string;
emptyMessage: string;
};
const formatRecordDate = (record: DatasetRecord) => {
if (typeof record.dt === "string" && record.dt) {
const date = new Date(record.dt);
if (!Number.isNaN(date.getTime())) {
return date.toLocaleString();
}
}
if (typeof record.date === "string" && record.date) {
return record.date;
}
if (typeof record.timestamp === "number") {
return new Date(record.timestamp * 1000).toLocaleString();
}
return "Unknown time";
};
const getRecordKey = (record: DatasetRecord, index: number) =>
String(record.id ?? record.post_id ?? `${record.author ?? "record"}-${index}`);
const getRecordTitle = (record: DatasetRecord) => {
if (record.type === "comment") {
return "";
}
const title = cleanText(record.title);
if (title) {
return title;
}
const content = cleanText(record.content);
if (!content) {
return "Untitled record";
}
return content.length > 120 ? `${content.slice(0, 117)}...` : content;
};
const getRecordExcerpt = (record: DatasetRecord) => {
const content = cleanText(record.content);
if (!content) {
return "No content available.";
}
return content.length > 320 ? `${content.slice(0, 317)}...` : content;
};
const CorpusExplorer = ({
open,
onClose,
title,
description,
records,
loading,
error,
emptyMessage,
}: CorpusExplorerProps) => (
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
<div style={styles.modalBackdrop} />
<div style={styles.modalContainer}>
<DialogPanel
style={{
...styles.card,
...styles.modalPanel,
width: "min(960px, 96vw)",
maxHeight: "88vh",
display: "flex",
flexDirection: "column",
gap: 12,
}}
>
<div style={styles.headerBar}>
<div>
<DialogTitle style={styles.sectionTitle}>{title}</DialogTitle>
<p style={styles.sectionSubtitle}>
{description} {loading ? "Loading records..." : `${records.length.toLocaleString()} records.`}
</p>
</div>
<button onClick={onClose} style={styles.buttonSecondary}>
Close
</button>
</div>
{error ? <p style={styles.sectionSubtitle}>{error}</p> : null}
{!loading && !error && !records.length ? (
<p style={styles.sectionSubtitle}>{emptyMessage}</p>
) : null}
{loading ? (
<div style={styles.topUserMeta}>Preparing corpus slice...</div>
) : null}
{!loading && !error && records.length ? (
<div
style={{
...styles.topUsersList,
overflowY: "auto",
paddingRight: 4,
}}
>
{records.map((record, index) => (
<div key={getRecordKey(record, index)} style={styles.topUserItem}>
<div style={{ ...styles.headerBar, alignItems: "flex-start" }}>
<div>
{getRecordTitle(record) ? (
<div style={styles.topUserName}>{getRecordTitle(record)}</div>
) : null}
<div style={styles.topUserMeta}>
{displayText(record.author, "Unknown author")} {displayText(record.source, "Unknown source")} {displayText(record.type, "record")} {formatRecordDate(record)}
</div>
</div>
<div style={styles.topUserMeta}>
{cleanText(record.topic) ? `Topic: ${cleanText(record.topic)}` : ""}
</div>
</div>
<div style={{ ...styles.topUserMeta, marginTop: 8, whiteSpace: "pre-wrap" }}>
{getRecordExcerpt(record)}
</div>
</div>
))}
</div>
) : null}
</DialogPanel>
</div>
</Dialog>
);
export default CorpusExplorer;

View File

@@ -1,14 +1,34 @@
import Card from "./Card";
import StatsStyling from "../styles/stats_styling";
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
import {
buildCertaintySpec,
buildDeonticSpec,
buildEntitySpec,
buildHedgeSpec,
buildIdentityBucketSpec,
buildPermissionSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
type CulturalStatsProps = {
data: CulturalAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
};
const CulturalStats = ({ data }: CulturalStatsProps) => {
const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
>
Explore
</button>
);
const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
const identity = data.identity_markers;
const stance = data.stance_markers;
const inGroupWords = identity?.in_group_usage ?? 0;
@@ -30,7 +50,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
const entries = Object.entries(emotionAvg ?? {});
if (!entries.length) {
return "";
return "-";
}
entries.sort((a, b) => b[1] - a[1]);
@@ -64,21 +84,30 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
/>
<Card
label="In-Group Posts"
value={identity?.in_group_posts?.toLocaleString() ?? ""}
value={identity?.in_group_posts?.toLocaleString() ?? "-"}
sublabel='Posts leaning toward "us" language'
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("in")),
)}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Out-Group Posts"
value={identity?.out_group_posts?.toLocaleString() ?? ""}
value={identity?.out_group_posts?.toLocaleString() ?? "-"}
sublabel='Posts leaning toward "them" language'
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("out")),
)}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Balanced Posts"
value={identity?.tie_posts?.toLocaleString() ?? ""}
value={identity?.tie_posts?.toLocaleString() ?? "-"}
sublabel="Posts with equal us/them signals"
rightSlot={renderExploreButton(() =>
onExplore(buildIdentityBucketSpec("tie")),
)}
style={{ gridColumn: "span 3" }}
/>
<Card
@@ -90,7 +119,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card
label="In-Group Share"
value={
inGroupWordRate === null ? "" : `${inGroupWordRate.toFixed(2)}%`
inGroupWordRate === null ? "-" : `${inGroupWordRate.toFixed(2)}%`
}
sublabel="Share of all words"
style={{ gridColumn: "span 3" }}
@@ -98,7 +127,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card
label="Out-Group Share"
value={
outGroupWordRate === null ? "" : `${outGroupWordRate.toFixed(2)}%`
outGroupWordRate === null ? "-" : `${outGroupWordRate.toFixed(2)}%`
}
sublabel="Share of all words"
style={{ gridColumn: "span 3" }}
@@ -106,42 +135,46 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<Card
label="Hedging Words"
value={stance?.hedge_total?.toLocaleString() ?? ""}
value={stance?.hedge_total?.toLocaleString() ?? "-"}
sublabel={
typeof stance?.hedge_per_1k_tokens === "number"
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency"
}
rightSlot={renderExploreButton(() => onExplore(buildHedgeSpec()))}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Certainty Words"
value={stance?.certainty_total?.toLocaleString() ?? ""}
value={stance?.certainty_total?.toLocaleString() ?? "-"}
sublabel={
typeof stance?.certainty_per_1k_tokens === "number"
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency"
}
rightSlot={renderExploreButton(() => onExplore(buildCertaintySpec()))}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Need/Should Words"
value={stance?.deontic_total?.toLocaleString() ?? ""}
value={stance?.deontic_total?.toLocaleString() ?? "-"}
sublabel={
typeof stance?.deontic_per_1k_tokens === "number"
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency"
}
rightSlot={renderExploreButton(() => onExplore(buildDeonticSpec()))}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Permission Words"
value={stance?.permission_total?.toLocaleString() ?? ""}
value={stance?.permission_total?.toLocaleString() ?? "-"}
sublabel={
typeof stance?.permission_per_1k_tokens === "number"
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
: "Word frequency"
}
rightSlot={renderExploreButton(() => onExplore(buildPermissionSpec()))}
style={{ gridColumn: "span 3" }}
/>
@@ -150,8 +183,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<p style={styles.sectionSubtitle}>
Most likely emotion when in-group wording is stronger.
</p>
<div style={styles.topUserName}>
{topEmotion(identity?.in_group_emotion_avg)}
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
<div style={{ marginTop: 12 }}>
<button
onClick={() => onExplore(buildIdentityBucketSpec("in"))}
style={styles.buttonSecondary}
>
Explore records
</button>
</div>
</div>
@@ -160,8 +199,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
<p style={styles.sectionSubtitle}>
Most likely emotion when out-group wording is stronger.
</p>
<div style={styles.topUserName}>
{topEmotion(identity?.out_group_emotion_avg)}
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
<div style={{ marginTop: 12 }}>
<button
onClick={() => onExplore(buildIdentityBucketSpec("out"))}
style={styles.buttonSecondary}
>
Explore records
</button>
</div>
</div>
@@ -171,9 +216,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
Most mentioned entities and the mood that appears most with each.
</p>
{!entities.length ? (
<div style={styles.topUserMeta}>
No entity-level cultural data available.
</div>
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
) : (
<div
style={{
@@ -183,7 +226,11 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
}}
>
{entities.map(([entity, aggregate]) => (
<div key={entity} style={styles.topUserItem}>
<div
key={entity}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildEntitySpec(entity))}
>
<div style={styles.topUserName}>{entity}</div>
<div style={styles.topUserMeta}>
{aggregate.post_count.toLocaleString()} posts Likely mood:{" "}

View File

@@ -1,13 +1,20 @@
import type { EmotionalAnalysisResponse } from "../types/ApiTypes";
import StatsStyling from "../styles/stats_styling";
import {
buildDominantEmotionSpec,
buildSourceSpec,
buildTopicSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
type EmotionalStatsProps = {
emotionalData: EmotionalAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
};
const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
const EmotionalStats = ({ emotionalData, onExplore }: EmotionalStatsProps) => {
const rows = emotionalData.average_emotion_by_topic ?? [];
const overallEmotionAverage = emotionalData.overall_emotion_average ?? [];
const dominantEmotionDistribution =
@@ -126,7 +133,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...overallEmotionAverage]
.sort((a, b) => b.score - a.score)
.map((row) => (
<div key={row.emotion} style={styles.topUserItem}>
<div
key={row.emotion}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
>
<div style={styles.topUserName}>
{formatEmotion(row.emotion)}
</div>
@@ -157,7 +168,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...dominantEmotionDistribution]
.sort((a, b) => b.ratio - a.ratio)
.map((row) => (
<div key={row.emotion} style={styles.topUserItem}>
<div
key={row.emotion}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
>
<div style={styles.topUserName}>
{formatEmotion(row.emotion)}
</div>
@@ -189,7 +204,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{[...emotionBySource]
.sort((a, b) => b.event_count - a.event_count)
.map((row) => (
<div key={row.source} style={styles.topUserItem}>
<div
key={row.source}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildSourceSpec(row.source))}
>
<div style={styles.topUserName}>{row.source}</div>
<div style={styles.topUserMeta}>
{formatEmotion(row.dominant_emotion)} {" "}
@@ -211,7 +230,8 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
{strongestPerTopic.map((topic) => (
<div
key={topic.topic}
style={{ ...styles.cardBase, gridColumn: "span 4" }}
style={{ ...styles.cardBase, gridColumn: "span 4", cursor: "pointer" }}
onClick={() => onExplore(buildTopicSpec(topic.topic))}
>
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>
{topic.topic}

View File

@@ -1,14 +1,20 @@
import Card from "./Card";
import StatsStyling from "../styles/stats_styling";
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
import {
buildNgramSpec,
buildWordSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
type LinguisticStatsProps = {
data: LinguisticAnalysisResponse;
onExplore: (spec: CorpusExplorerSpec) => void;
};
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
const LinguisticStats = ({ data, onExplore }: LinguisticStatsProps) => {
const lexical = data.lexical_diversity;
const words = data.word_frequencies ?? [];
const bigrams = data.common_two_phrases ?? [];
@@ -60,7 +66,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}}
>
{topWords.map((item) => (
<div key={item.word} style={styles.topUserItem}>
<div
key={item.word}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildWordSpec(item.word))}
>
<div style={styles.topUserName}>{item.word}</div>
<div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses
@@ -81,7 +91,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}}
>
{topBigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}>
<div
key={item.ngram}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildNgramSpec(item.ngram))}
>
<div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses
@@ -102,7 +116,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
}}
>
{topTrigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}>
<div
key={item.ngram}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => onExplore(buildNgramSpec(item.ngram))}
>
<div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}>
{item.count.toLocaleString()} uses

View File

@@ -1,4 +1,4 @@
import { memo, useMemo, useState } from "react";
import { memo, useMemo } from "react";
import {
LineChart,
Line,
@@ -13,7 +13,6 @@ import ActivityHeatmap from "../stats/ActivityHeatmap";
import { ReactWordcloud } from "@cp949/react-wordcloud";
import StatsStyling from "../styles/stats_styling";
import Card from "../components/Card";
import UserModal from "../components/UserModal";
import {
type SummaryResponse,
@@ -21,8 +20,15 @@ import {
type UserEndpointResponse,
type TimeAnalysisResponse,
type LinguisticAnalysisResponse,
type User,
} from "../types/ApiTypes";
import {
buildAllRecordsSpec,
buildDateBucketSpec,
buildOneTimeUsersSpec,
buildUserSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
const MAX_WORDCLOUD_WORDS = 250;
@@ -39,6 +45,7 @@ type SummaryStatsProps = {
timeData: TimeAnalysisResponse | null;
linguisticData: LinguisticAnalysisResponse | null;
summary: SummaryResponse | null;
onExplore: (spec: CorpusExplorerSpec) => void;
};
type WordCloudPanelProps = {
@@ -60,7 +67,7 @@ function formatDateRange(startUnix: number, endUnix: number) {
day: "2-digit",
});
return `${fmt(start)} ${fmt(end)}`;
return `${fmt(start)} -> ${fmt(end)}`;
}
function convertFrequencyData(data: FrequencyWord[]) {
@@ -70,25 +77,22 @@ function convertFrequencyData(data: FrequencyWord[]) {
}));
}
const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
>
Explore
</button>
);
const SummaryStats = ({
userData,
timeData,
linguisticData,
summary,
onExplore,
}: SummaryStatsProps) => {
const [selectedUser, setSelectedUser] = useState<string | null>(null);
const usersByAuthor = useMemo(() => {
const nextMap = new Map<string, User>();
for (const user of userData?.users ?? []) {
nextMap.set(user.author, user);
}
return nextMap;
}, [userData?.users]);
const selectedUserData: User | null = selectedUser
? usersByAuthor.get(selectedUser) ?? null
: null;
const wordCloudWords = useMemo(
() =>
convertFrequencyData(
@@ -104,49 +108,41 @@ const SummaryStats = ({
return (
<div style={styles.page}>
{/* main grid*/}
<div style={{ ...styles.container, ...styles.grid }}>
<Card
label="Total Activity"
value={summary?.total_events ?? ""}
value={summary?.total_events ?? "-"}
sublabel="Posts + comments"
style={{
gridColumn: "span 4",
}}
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
style={{ gridColumn: "span 4" }}
/>
<Card
label="Active People"
value={summary?.unique_users ?? ""}
value={summary?.unique_users ?? "-"}
sublabel="Distinct users"
style={{
gridColumn: "span 4",
}}
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
style={{ gridColumn: "span 4" }}
/>
<Card
label="Posts vs Comments"
value={
summary ? `${summary.total_posts} / ${summary.total_comments}` : ""
summary ? `${summary.total_posts} / ${summary.total_comments}` : "-"
}
sublabel={`Comments per post: ${summary?.comments_per_post ?? ""}`}
style={{
gridColumn: "span 4",
}}
sublabel={`Comments per post: ${summary?.comments_per_post ?? "-"}`}
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
style={{ gridColumn: "span 4" }}
/>
<Card
label="Time Range"
value={
summary?.time_range
? formatDateRange(
summary.time_range.start,
summary.time_range.end,
)
: "—"
? formatDateRange(summary.time_range.start, summary.time_range.end)
: "-"
}
sublabel="Based on dataset timestamps"
style={{
gridColumn: "span 4",
}}
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
style={{ gridColumn: "span 4" }}
/>
<Card
@@ -154,38 +150,44 @@ const SummaryStats = ({
value={
typeof summary?.lurker_ratio === "number"
? `${Math.round(summary.lurker_ratio * 100)}%`
: ""
: "-"
}
sublabel="Users with only one event"
style={{
gridColumn: "span 4",
}}
rightSlot={renderExploreButton(() => onExplore(buildOneTimeUsersSpec()))}
style={{ gridColumn: "span 4" }}
/>
<Card
label="Sources"
value={summary?.sources?.length ?? ""}
value={summary?.sources?.length ?? "-"}
sublabel={
summary?.sources?.length
? summary.sources.slice(0, 3).join(", ") +
(summary.sources.length > 3 ? "" : "")
: ""
(summary.sources.length > 3 ? "..." : "")
: "-"
}
style={{
gridColumn: "span 4",
}}
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
style={{ gridColumn: "span 4" }}
/>
{/* events per day */}
<div style={{ ...styles.card, gridColumn: "span 5" }}>
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
<p style={styles.sectionSubtitle}>
How much posting happened each day.
</p>
<p style={styles.sectionSubtitle}>How much posting happened each day.</p>
<div style={styles.chartWrapper}>
<ResponsiveContainer width="100%" height="100%">
<LineChart data={timeData?.events_per_day ?? []}>
<LineChart
data={timeData?.events_per_day ?? []}
onClick={(state: unknown) => {
const payload = (state as { activePayload?: Array<{ payload?: { date?: string } }> })
?.activePayload?.[0]?.payload as
| { date?: string }
| undefined;
if (payload?.date) {
onExplore(buildDateBucketSpec(String(payload.date)));
}
}}
>
<CartesianGrid strokeDasharray="3 3" />
<XAxis dataKey="date" />
<YAxis />
@@ -201,7 +203,6 @@ const SummaryStats = ({
</div>
</div>
{/* Word Cloud */}
<div style={{ ...styles.card, gridColumn: "span 4" }}>
<h2 style={styles.sectionTitle}>Common Words</h2>
<p style={styles.sectionSubtitle}>
@@ -213,7 +214,6 @@ const SummaryStats = ({
</div>
</div>
{/* Top Users */}
<div
style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }}
>
@@ -225,7 +225,7 @@ const SummaryStats = ({
<div
key={`${item.author}-${item.source}`}
style={{ ...styles.topUserItem, cursor: "pointer" }}
onClick={() => setSelectedUser(item.author)}
onClick={() => onExplore(buildUserSpec(item.author))}
>
<div style={styles.topUserName}>{item.author}</div>
<div style={styles.topUserMeta}>
@@ -236,7 +236,6 @@ const SummaryStats = ({
</div>
</div>
{/* Heatmap */}
<div style={{ ...styles.card, gridColumn: "span 12" }}>
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
<p style={styles.sectionSubtitle}>
@@ -248,13 +247,6 @@ const SummaryStats = ({
</div>
</div>
</div>
<UserModal
open={!!selectedUser}
onClose={() => setSelectedUser(null)}
username={selectedUser ?? ""}
userData={selectedUserData}
/>
</div>
);
};

View File

@@ -5,6 +5,12 @@ import { type TopUser, type InteractionGraph } from "../types/ApiTypes";
import StatsStyling from "../styles/stats_styling";
import Card from "./Card";
import {
buildReplyPairSpec,
toText,
buildUserSpec,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
@@ -39,6 +45,7 @@ type UserStatsProps = {
interactionGraph: InteractionGraph;
totalUsers: number;
mostCommentHeavyUser: { author: string; commentShare: number } | null;
onExplore: (spec: CorpusExplorerSpec) => void;
};
const UserStats = ({
@@ -46,6 +53,7 @@ const UserStats = ({
interactionGraph,
totalUsers,
mostCommentHeavyUser,
onExplore,
}: UserStatsProps) => {
const graphData = useMemo(
() => ApiToGraphData(interactionGraph),
@@ -87,9 +95,9 @@ const UserStats = ({
null,
);
const mostActiveUser = topUsers.find(
(u) => u.author !== "[deleted]",
);
const mostActiveUser = topUsers.find((u) => u.author !== "[deleted]");
const strongestLinkSource = strongestLink ? toText(strongestLink.source) : "";
const strongestLinkTarget = strongestLink ? toText(strongestLink.target) : "";
return (
<div style={styles.page}>
@@ -114,37 +122,69 @@ const UserStats = ({
/>
<Card
label="Most Active User"
value={mostActiveUser?.author ?? ""}
value={mostActiveUser?.author ?? "-"}
sublabel={
mostActiveUser
? `${mostActiveUser.count.toLocaleString()} events`
: "No user activity found"
}
rightSlot={
mostActiveUser ? (
<button
onClick={() => onExplore(buildUserSpec(mostActiveUser.author))}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Strongest User Link"
value={
strongestLink
? `${strongestLink.source} -> ${strongestLink.target}`
: ""
strongestLinkSource && strongestLinkTarget
? `${strongestLinkSource} -> ${strongestLinkTarget}`
: "-"
}
sublabel={
strongestLink
? `${strongestLink.value.toLocaleString()} replies`
: "No graph links after filtering"
}
rightSlot={
strongestLinkSource && strongestLinkTarget ? (
<button
onClick={() =>
onExplore(buildReplyPairSpec(strongestLinkSource, strongestLinkTarget))
}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 6" }}
/>
<Card
label="Most Comment-Heavy User"
value={mostCommentHeavyUser?.author ?? ""}
value={mostCommentHeavyUser?.author ?? "-"}
sublabel={
mostCommentHeavyUser
? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments`
: "No user distribution available"
}
rightSlot={
mostCommentHeavyUser ? (
<button
onClick={() => onExplore(buildUserSpec(mostCommentHeavyUser.author))}
style={styles.buttonSecondary}
>
Explore
</button>
) : null
}
style={{ gridColumn: "span 6" }}
/>
@@ -166,6 +206,19 @@ const UserStats = ({
linkDirectionalParticleSpeed={0.004}
linkWidth={(link) => Math.sqrt(Number(link.value))}
nodeLabel={(node) => `${node.id}`}
onNodeClick={(node) => {
const userId = toText(node.id);
if (userId) {
onExplore(buildUserSpec(userId));
}
}}
onLinkClick={(link) => {
const source = toText(link.source);
const target = toText(link.target);
if (source && target) {
onExplore(buildReplyPairSpec(source, target));
}
}}
/>
</div>
</div>

View File

@@ -22,12 +22,10 @@ const DatasetEditPage = () => {
const [isSaving, setIsSaving] = useState(false);
const [isDeleting, setIsDeleting] = useState(false);
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
const [hasError, setHasError] = useState(false);
const [datasetName, setDatasetName] = useState("");
useEffect(() => {
if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) {
setHasError(true);
setStatusMessage("Invalid dataset id.");
setLoading(false);
return;
@@ -35,7 +33,6 @@ const DatasetEditPage = () => {
const token = localStorage.getItem("access_token");
if (!token) {
setHasError(true);
setStatusMessage("You must be signed in to edit datasets.");
setLoading(false);
return;
@@ -49,7 +46,6 @@ const DatasetEditPage = () => {
setDatasetName(response.data.name || "");
})
.catch((error: unknown) => {
setHasError(true);
if (axios.isAxiosError(error)) {
setStatusMessage(
String(error.response?.data?.error || error.message),
@@ -68,21 +64,18 @@ const DatasetEditPage = () => {
const trimmedName = datasetName.trim();
if (!trimmedName) {
setHasError(true);
setStatusMessage("Please enter a valid dataset name.");
return;
}
const token = localStorage.getItem("access_token");
if (!token) {
setHasError(true);
setStatusMessage("You must be signed in to save changes.");
return;
}
try {
setIsSaving(true);
setHasError(false);
setStatusMessage("");
await axios.patch(
@@ -93,7 +86,6 @@ const DatasetEditPage = () => {
navigate("/datasets", { replace: true });
} catch (error: unknown) {
setHasError(true);
if (axios.isAxiosError(error)) {
setStatusMessage(
String(
@@ -111,7 +103,6 @@ const DatasetEditPage = () => {
const deleteDataset = async () => {
const deleteToken = localStorage.getItem("access_token");
if (!deleteToken) {
setHasError(true);
setStatusMessage("You must be signed in to delete datasets.");
setIsDeleteModalOpen(false);
return;
@@ -119,7 +110,6 @@ const DatasetEditPage = () => {
try {
setIsDeleting(true);
setHasError(false);
setStatusMessage("");
await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, {
@@ -129,7 +119,6 @@ const DatasetEditPage = () => {
setIsDeleteModalOpen(false);
navigate("/datasets", { replace: true });
} catch (error: unknown) {
setHasError(true);
if (axios.isAxiosError(error)) {
setStatusMessage(
String(

View File

@@ -1,4 +1,4 @@
import { useEffect, useState, useRef } from "react";
import { useEffect, useRef, useState } from "react";
import axios from "axios";
import { useParams } from "react-router-dom";
import StatsStyling from "../styles/stats_styling";
@@ -8,6 +8,7 @@ import UserStats from "../components/UserStats";
import LinguisticStats from "../components/LinguisticStats";
import InteractionalStats from "../components/InteractionalStats";
import CulturalStats from "../components/CulturalStats";
import CorpusExplorer from "../components/CorpusExplorer";
import {
type SummaryResponse,
@@ -19,10 +20,15 @@ import {
type InteractionAnalysisResponse,
type CulturalAnalysisResponse,
} from "../types/ApiTypes";
import {
buildExplorerContext,
type CorpusExplorerSpec,
type DatasetRecord,
} from "../utils/corpusExplorer";
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
const styles = StatsStyling;
const DELETED_USERS = ["[deleted]"];
const DELETED_USERS = ["[deleted]", "automoderator"];
const isDeletedUser = (value: string | null | undefined) =>
DELETED_USERS.includes((value ?? "").trim().toLowerCase());
@@ -40,6 +46,97 @@ type UserStatsMeta = {
mostCommentHeavyUser: { author: string; commentShare: number } | null;
};
type ExplorerState = {
open: boolean;
title: string;
description: string;
emptyMessage: string;
records: DatasetRecord[];
loading: boolean;
error: string;
};
const EMPTY_EXPLORER_STATE: ExplorerState = {
open: false,
title: "Corpus Explorer",
description: "",
emptyMessage: "No records found.",
records: [],
loading: false,
error: "",
};
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
if (typeof payload === "string") {
try {
return normalizeRecordPayload(JSON.parse(payload));
} catch {
throw new Error("Corpus endpoint returned a non-JSON string payload.");
}
}
if (
payload &&
typeof payload === "object" &&
"error" in payload &&
typeof (payload as { error?: unknown }).error === "string"
) {
throw new Error((payload as { error: string }).error);
}
if (Array.isArray(payload)) {
return payload as DatasetRecord[];
}
if (
payload &&
typeof payload === "object" &&
"data" in payload &&
Array.isArray((payload as { data?: unknown }).data)
) {
return (payload as { data: DatasetRecord[] }).data;
}
if (
payload &&
typeof payload === "object" &&
"records" in payload &&
Array.isArray((payload as { records?: unknown }).records)
) {
return (payload as { records: DatasetRecord[] }).records;
}
if (
payload &&
typeof payload === "object" &&
"rows" in payload &&
Array.isArray((payload as { rows?: unknown }).rows)
) {
return (payload as { rows: DatasetRecord[] }).rows;
}
if (
payload &&
typeof payload === "object" &&
"result" in payload &&
Array.isArray((payload as { result?: unknown }).result)
) {
return (payload as { result: DatasetRecord[] }).result;
}
if (payload && typeof payload === "object") {
const values = Object.values(payload);
if (values.length === 1 && Array.isArray(values[0])) {
return values[0] as DatasetRecord[];
}
if (values.every((value) => value && typeof value === "object")) {
return values as DatasetRecord[];
}
}
throw new Error("Corpus endpoint returned an unexpected payload.");
};
const StatPage = () => {
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
const [error, setError] = useState("");
@@ -61,6 +158,12 @@ const StatPage = () => {
totalUsers: 0,
mostCommentHeavyUser: null,
});
const [appliedFilters, setAppliedFilters] = useState<Record<string, string>>({});
const [allRecords, setAllRecords] = useState<DatasetRecord[] | null>(null);
const [allRecordsKey, setAllRecordsKey] = useState("");
const [explorerState, setExplorerState] = useState<ExplorerState>(
EMPTY_EXPLORER_STATE,
);
const searchInputRef = useRef<HTMLInputElement>(null);
const beforeDateRef = useRef<HTMLInputElement>(null);
@@ -104,6 +207,82 @@ const StatPage = () => {
};
};
const getFilterKey = (params: Record<string, string>) =>
JSON.stringify(Object.entries(params).sort(([a], [b]) => a.localeCompare(b)));
const ensureFilteredRecords = async () => {
if (!datasetId) {
throw new Error("Missing dataset id.");
}
const authHeaders = getAuthHeaders();
if (!authHeaders) {
throw new Error("You must be signed in to load corpus records.");
}
const filterKey = getFilterKey(appliedFilters);
if (allRecords && allRecordsKey === filterKey) {
return allRecords;
}
const response = await axios.get<unknown>(
`${API_BASE_URL}/dataset/${datasetId}/all`,
{
params: appliedFilters,
headers: authHeaders,
},
);
const normalizedRecords = normalizeRecordPayload(response.data);
setAllRecords(normalizedRecords);
setAllRecordsKey(filterKey);
return normalizedRecords;
};
const openExplorer = async (spec: CorpusExplorerSpec) => {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: true,
error: "",
});
try {
const records = await ensureFilteredRecords();
const context = buildExplorerContext(records);
const matched = records.filter((record) => spec.matcher(record, context));
matched.sort((a, b) => {
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
return bValue.localeCompare(aValue);
});
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: matched,
loading: false,
error: "",
});
} catch (e) {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: false,
error: `Failed to load corpus records: ${String(e)}`,
});
}
};
const getStats = (params: Record<string, string> = {}) => {
if (!datasetId) {
setError("Missing dataset id. Open /dataset/<id>/stats.");
@@ -118,22 +297,20 @@ const StatPage = () => {
setError("");
setLoading(true);
setAppliedFilters(params);
setAllRecords(null);
setAllRecordsKey("");
setExplorerState((current) => ({ ...current, open: false }));
Promise.all([
axios.get<TimeAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/temporal`,
{
params,
headers: authHeaders,
},
),
axios.get<UserEndpointResponse>(
`${API_BASE_URL}/dataset/${datasetId}/user`,
{
params,
headers: authHeaders,
},
),
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
params,
headers: authHeaders,
}),
axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
params,
headers: authHeaders,
}),
axios.get<LinguisticAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/linguistic`,
{
@@ -141,13 +318,10 @@ const StatPage = () => {
headers: authHeaders,
},
),
axios.get<EmotionalAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/emotional`,
{
params,
headers: authHeaders,
},
),
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
params,
headers: authHeaders,
}),
axios.get<InteractionAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/interactional`,
{
@@ -155,20 +329,14 @@ const StatPage = () => {
headers: authHeaders,
},
),
axios.get<SummaryResponse>(
`${API_BASE_URL}/dataset/${datasetId}/summary`,
{
params,
headers: authHeaders,
},
),
axios.get<CulturalAnalysisResponse>(
`${API_BASE_URL}/dataset/${datasetId}/cultural`,
{
params,
headers: authHeaders,
},
),
axios.get<SummaryResponse>(`${API_BASE_URL}/dataset/${datasetId}/summary`, {
params,
headers: authHeaders,
}),
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
params,
headers: authHeaders,
}),
])
.then(
([
@@ -182,8 +350,7 @@ const StatPage = () => {
]) => {
const usersList = userRes.data.users ?? [];
const topUsersList = userRes.data.top_users ?? [];
const interactionGraphRaw =
interactionRes.data?.interaction_graph ?? {};
const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
const filteredUsers: typeof usersList = [];
@@ -194,18 +361,14 @@ const StatPage = () => {
const filteredTopUsers: typeof topUsersList = [];
for (const user of topUsersList) {
if (isDeletedUser(user.author)) continue;
filteredTopUsers.push(user);
if (isDeletedUser(user.author)) continue;
filteredTopUsers.push(user);
}
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] =
null;
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] = null;
for (const user of filteredUsers) {
const currentShare = user.comment_share ?? 0;
if (
!mostCommentHeavyUser ||
currentShare > mostCommentHeavyUser.commentShare
) {
if (!mostCommentHeavyUser || currentShare > mostCommentHeavyUser.commentShare) {
mostCommentHeavyUser = {
author: user.author,
commentShare: currentShare,
@@ -221,8 +384,7 @@ const StatPage = () => {
}
}
const filteredInteractionGraph: Record<string, Record<string, number>> =
{};
const filteredInteractionGraph: Record<string, Record<string, number>> = {};
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
if (isDeletedUser(source)) {
continue;
@@ -279,7 +441,7 @@ const StatPage = () => {
setSummary(filteredSummary || null);
},
)
.catch((e) => setError("Failed to load statistics: " + String(e)))
.catch((e) => setError(`Failed to load statistics: ${String(e)}`))
.finally(() => setLoading(false));
};
@@ -302,6 +464,9 @@ const StatPage = () => {
useEffect(() => {
setError("");
setAllRecords(null);
setAllRecordsKey("");
setExplorerState(EMPTY_EXPLORER_STATE);
if (!datasetId) {
setError("Missing dataset id. Open /dataset/<id>/stats.");
return;
@@ -398,9 +563,7 @@ const StatPage = () => {
<button
onClick={() => setActiveView("summary")}
style={
activeView === "summary"
? styles.buttonPrimary
: styles.buttonSecondary
activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary
}
>
Summary
@@ -418,11 +581,7 @@ const StatPage = () => {
<button
onClick={() => setActiveView("user")}
style={
activeView === "user"
? styles.buttonPrimary
: styles.buttonSecondary
}
style={activeView === "user" ? styles.buttonPrimary : styles.buttonSecondary}
>
Users
</button>
@@ -449,9 +608,7 @@ const StatPage = () => {
<button
onClick={() => setActiveView("cultural")}
style={
activeView === "cultural"
? styles.buttonPrimary
: styles.buttonSecondary
activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary
}
>
Cultural
@@ -464,11 +621,12 @@ const StatPage = () => {
timeData={timeData}
linguisticData={linguisticData}
summary={summary}
onExplore={openExplorer}
/>
)}
{activeView === "emotional" && emotionalData && (
<EmotionalStats emotionalData={emotionalData} />
<EmotionalStats emotionalData={emotionalData} onExplore={openExplorer} />
)}
{activeView === "emotional" && !emotionalData && (
@@ -483,6 +641,7 @@ const StatPage = () => {
interactionGraph={interactionData.interaction_graph}
totalUsers={userStatsMeta.totalUsers}
mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser}
onExplore={openExplorer}
/>
)}
@@ -493,7 +652,7 @@ const StatPage = () => {
)}
{activeView === "linguistic" && linguisticData && (
<LinguisticStats data={linguisticData} />
<LinguisticStats data={linguisticData} onExplore={openExplorer} />
)}
{activeView === "linguistic" && !linguisticData && (
@@ -503,7 +662,7 @@ const StatPage = () => {
)}
{activeView === "interactional" && interactionData && (
<InteractionalStats data={interactionData} />
<InteractionalStats data={interactionData} onExplore={openExplorer} />
)}
{activeView === "interactional" && !interactionData && (
@@ -513,7 +672,7 @@ const StatPage = () => {
)}
{activeView === "cultural" && culturalData && (
<CulturalStats data={culturalData} />
<CulturalStats data={culturalData} onExplore={openExplorer} />
)}
{activeView === "cultural" && !culturalData && (
@@ -521,6 +680,17 @@ const StatPage = () => {
No cultural data available.
</div>
)}
<CorpusExplorer
open={explorerState.open}
onClose={() => setExplorerState((current) => ({ ...current, open: false }))}
title={explorerState.title}
description={explorerState.description}
records={explorerState.records}
loading={explorerState.loading}
error={explorerState.error}
emptyMessage={explorerState.emptyMessage}
/>
</div>
);
};

View File

@@ -0,0 +1,405 @@
import type { CSSProperties } from "react";
type EntityRecord = {
text?: string;
[key: string]: unknown;
};
type DatasetRecord = {
id?: string | number;
post_id?: string | number | null;
parent_id?: string | number | null;
author?: string | null;
title?: string | null;
content?: string | null;
timestamp?: string | number | null;
date?: string | null;
dt?: string | null;
hour?: number | null;
weekday?: string | null;
reply_to?: string | number | null;
source?: string | null;
topic?: string | null;
topic_confidence?: number | null;
type?: string | null;
ner_entities?: EntityRecord[] | null;
emotion_anger?: number | null;
emotion_disgust?: number | null;
emotion_fear?: number | null;
emotion_joy?: number | null;
emotion_sadness?: number | null;
[key: string]: unknown;
};
type CorpusExplorerContext = {
authorByPostId: Map<string, string>;
authorEventCounts: Map<string, number>;
authorCommentCounts: Map<string, number>;
};
type CorpusExplorerSpec = {
title: string;
description: string;
emptyMessage?: string;
matcher: (record: DatasetRecord, context: CorpusExplorerContext) => boolean;
};
const IN_GROUP_PATTERN = /\b(we|us|our|ourselves)\b/gi;
const OUT_GROUP_PATTERN = /\b(they|them|their|themselves)\b/gi;
const HEDGE_PATTERN = /\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b/i;
const CERTAINTY_PATTERN = /\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b/i;
const DEONTIC_PATTERN = /\b(must|should|need|needs|have to|has to|ought|required|require)\b/i;
const PERMISSION_PATTERN = /\b(can|allowed|okay|ok|permitted)\b/i;
const EMOTION_KEYS = [
"emotion_anger",
"emotion_disgust",
"emotion_fear",
"emotion_joy",
"emotion_sadness",
] as const;
const shrinkButtonStyle: CSSProperties = {
padding: "4px 8px",
fontSize: 12,
};
const toText = (value: unknown) => {
if (typeof value === "string") {
return value;
}
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (value && typeof value === "object" && "id" in value) {
const id = (value as { id?: unknown }).id;
if (typeof id === "string" || typeof id === "number") {
return String(id);
}
}
return "";
};
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
const getRecordText = (record: DatasetRecord) =>
`${record.title ?? ""} ${record.content ?? ""}`.trim();
const escapeRegExp = (value: string) =>
value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const buildPhrasePattern = (phrase: string) => {
const tokens = phrase
.toLowerCase()
.trim()
.split(/\s+/)
.filter(Boolean)
.map(escapeRegExp);
if (!tokens.length) {
return null;
}
return new RegExp(`\\b${tokens.join("\\s+")}\\b`, "i");
};
const countMatches = (pattern: RegExp, text: string) =>
Array.from(text.matchAll(new RegExp(pattern.source, "gi"))).length;
const getDateBucket = (record: DatasetRecord) => {
if (typeof record.date === "string" && record.date) {
return record.date.slice(0, 10);
}
if (typeof record.dt === "string" && record.dt) {
return record.dt.slice(0, 10);
}
if (typeof record.timestamp === "number") {
return new Date(record.timestamp * 1000).toISOString().slice(0, 10);
}
if (typeof record.timestamp === "string" && record.timestamp) {
const numeric = Number(record.timestamp);
if (Number.isFinite(numeric)) {
return new Date(numeric * 1000).toISOString().slice(0, 10);
}
}
return "";
};
const getDominantEmotion = (record: DatasetRecord) => {
let bestKey = "";
let bestValue = Number.NEGATIVE_INFINITY;
for (const key of EMOTION_KEYS) {
const value = Number(record[key] ?? Number.NEGATIVE_INFINITY);
if (value > bestValue) {
bestValue = value;
bestKey = key;
}
}
return bestKey.replace("emotion_", "");
};
const matchesPhrase = (record: DatasetRecord, phrase: string) => {
const pattern = buildPhrasePattern(phrase);
if (!pattern) {
return false;
}
return pattern.test(getRecordText(record).toLowerCase());
};
const recordIdentityBucket = (record: DatasetRecord) => {
const text = getRecordText(record).toLowerCase();
const inHits = countMatches(IN_GROUP_PATTERN, text);
const outHits = countMatches(OUT_GROUP_PATTERN, text);
if (inHits > outHits) {
return "in";
}
if (outHits > inHits) {
return "out";
}
return "tie";
};
const createAuthorEventCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
if (!author) {
continue;
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
if (!author || record.type !== "comment") {
continue;
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorByPostId = (records: DatasetRecord[]) => {
const map = new Map<string, string>();
for (const record of records) {
const postId = record.post_id;
const author = toText(record.author).trim();
if (postId === null || postId === undefined || !author) {
continue;
}
map.set(String(postId), author);
}
return map;
};
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
authorByPostId: createAuthorByPostId(records),
authorEventCounts: createAuthorEventCounts(records),
authorCommentCounts: createAuthorCommentCounts(records),
});
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
title: "Corpus Explorer",
description: "All records in the current filtered dataset.",
emptyMessage: "No records match the current filters.",
matcher: () => true,
});
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
title: `User: ${author}`,
description: `All records authored by ${author}.`,
emptyMessage: `No records found for ${author}.`,
matcher: (record) => normalize(record.author) === normalize(author),
});
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
title: `Topic: ${topic}`,
description: `Records assigned to the ${topic} topic bucket.`,
emptyMessage: `No records found in the ${topic} topic bucket.`,
matcher: (record) => normalize(record.topic) === normalize(topic),
});
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
title: `Date Bucket: ${date}`,
description: `Records from the ${date} activity bucket.`,
emptyMessage: `No records found on ${date}.`,
matcher: (record) => getDateBucket(record) === date,
});
const buildWordSpec = (word: string): CorpusExplorerSpec => ({
title: `Word: ${word}`,
description: `Records containing the word ${word}.`,
emptyMessage: `No records mention ${word}.`,
matcher: (record) => matchesPhrase(record, word),
});
const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
title: `N-gram: ${ngram}`,
description: `Records containing the phrase ${ngram}.`,
emptyMessage: `No records contain the phrase ${ngram}.`,
matcher: (record) => matchesPhrase(record, ngram),
});
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
title: `Entity: ${entity}`,
description: `Records mentioning the ${entity} entity.`,
emptyMessage: `No records found for the ${entity} entity.`,
matcher: (record) => {
const target = normalize(entity);
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
},
});
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
title: `Source: ${source}`,
description: `Records from the ${source} source.`,
emptyMessage: `No records found for ${source}.`,
matcher: (record) => normalize(record.source) === normalize(source),
});
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
title: `Dominant Emotion: ${emotion}`,
description: `Records where ${emotion} is the strongest emotion score.`,
emptyMessage: `No records found with dominant emotion ${emotion}.`,
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
});
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
title: `Reply Path: ${source} -> ${target}`,
description: `Reply records authored by ${source} in response to ${target}.`,
emptyMessage: `No reply records found for ${source} -> ${target}.`,
matcher: (record, context) => {
if (normalize(record.author) !== normalize(source)) {
return false;
}
const replyTo = record.reply_to;
if (replyTo === null || replyTo === undefined || replyTo === "") {
return false;
}
const replyTarget = context.authorByPostId.get(String(replyTo));
return normalize(replyTarget) === normalize(target);
},
});
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
title: "One-Time Users",
description: "Records written by authors who appear exactly once in the filtered corpus.",
emptyMessage: "No one-time-user records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
return !!author && context.authorEventCounts.get(author) === 1;
},
});
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
title: "Top Commenters",
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
emptyMessage: "No top-commenter records found.",
matcher: (record, context) => {
if (record.type !== "comment") {
return false;
}
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, topAuthorCount)
.map(([author]) => author);
return rankedAuthors.includes(toText(record.author).trim());
},
});
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
title: "Single-Comment Authors",
description: "Comment records from authors who commented exactly once.",
emptyMessage: "No single-comment-author records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
},
});
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
const labels = {
in: "In-Group Posts",
out: "Out-Group Posts",
tie: "Balanced Posts",
} as const;
return {
title: labels[bucket],
description: `Records in the ${labels[bucket].toLowerCase()} cultural bucket.`,
emptyMessage: `No records found for ${labels[bucket].toLowerCase()}.`,
matcher: (record) => recordIdentityBucket(record) === bucket,
};
};
const buildPatternSpec = (
title: string,
description: string,
pattern: RegExp,
): CorpusExplorerSpec => ({
title,
description,
emptyMessage: `No records found for ${title.toLowerCase()}.`,
matcher: (record) => pattern.test(getRecordText(record)),
});
const buildHedgeSpec = () =>
buildPatternSpec("Hedging Words", "Records containing hedging language.", HEDGE_PATTERN);
const buildCertaintySpec = () =>
buildPatternSpec("Certainty Words", "Records containing certainty language.", CERTAINTY_PATTERN);
const buildDeonticSpec = () =>
buildPatternSpec("Need/Should Words", "Records containing deontic language.", DEONTIC_PATTERN);
const buildPermissionSpec = () =>
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
const getExplorerButtonStyle = () => shrinkButtonStyle;
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
export {
buildAllRecordsSpec,
buildCertaintySpec,
buildDateBucketSpec,
buildDeonticSpec,
buildDominantEmotionSpec,
buildEntitySpec,
buildExplorerContext,
buildHedgeSpec,
buildIdentityBucketSpec,
buildNgramSpec,
buildOneTimeUsersSpec,
buildPermissionSpec,
buildReplyPairSpec,
buildSingleCommentAuthorsSpec,
buildSourceSpec,
buildTopicSpec,
buildTopCommentersSpec,
buildUserSpec,
buildWordSpec,
getDateBucket,
getExplorerButtonStyle,
toText,
};