diff --git a/frontend/src/components/SummaryStats.tsx b/frontend/src/components/SummaryStats.tsx
index d53d6da..9597fca 100644
--- a/frontend/src/components/SummaryStats.tsx
+++ b/frontend/src/components/SummaryStats.tsx
@@ -26,12 +26,12 @@ import {
buildDateBucketSpec,
buildOneTimeUsersSpec,
buildUserSpec,
- getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
const MAX_WORDCLOUD_WORDS = 250;
+const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 };
const WORDCLOUD_OPTIONS = {
rotations: 2,
@@ -80,7 +80,7 @@ function convertFrequencyData(data: FrequencyWord[]) {
const renderExploreButton = (onClick: () => void) => (
diff --git a/frontend/src/components/UserStats.tsx b/frontend/src/components/UserStats.tsx
index 50d96f3..fc2a57e 100644
--- a/frontend/src/components/UserStats.tsx
+++ b/frontend/src/components/UserStats.tsx
@@ -20,7 +20,7 @@ type GraphLink = {
value: number;
};
-function ApiToGraphData(apiData: InteractionGraph) {
+function toGraphData(apiData: InteractionGraph) {
const links: GraphLink[] = [];
const connectedNodeIds = new Set();
@@ -56,7 +56,7 @@ const UserStats = ({
onExplore,
}: UserStatsProps) => {
const graphData = useMemo(
- () => ApiToGraphData(interactionGraph),
+ () => toGraphData(interactionGraph),
[interactionGraph],
);
const graphContainerRef = useRef(null);
diff --git a/frontend/src/pages/Stats.tsx b/frontend/src/pages/Stats.tsx
index 0651a30..a14e733 100644
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -66,6 +66,26 @@ const EMPTY_EXPLORER_STATE: ExplorerState = {
error: "",
};
+const createExplorerState = (
+ spec: CorpusExplorerSpec,
+ patch: Partial = {},
+): ExplorerState => ({
+ open: true,
+ title: spec.title,
+ description: spec.description,
+ emptyMessage: spec.emptyMessage ?? "No matching records found.",
+ records: [],
+ loading: false,
+ error: "",
+ ...patch,
+});
+
+const compareRecordsByNewest = (a: DatasetRecord, b: DatasetRecord) => {
+ const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
+ const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
+ return bValue.localeCompare(aValue);
+};
+
const parseJsonLikePayload = (value: string): unknown => {
const normalized = value
.replace(/\uFEFF/g, "")
@@ -86,16 +106,23 @@ const parseJsonLikePayload = (value: string): unknown => {
return JSON.parse(normalized);
};
+const tryParseRecords = (value: string) => {
+ try {
+ return normalizeRecordPayload(parseJsonLikePayload(value));
+ } catch {
+ return null;
+ }
+};
+
const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
const trimmed = payload.trim();
if (!trimmed) {
return [];
}
- try {
- return normalizeRecordPayload(parseJsonLikePayload(trimmed));
- } catch {
- // Continue with additional fallback formats below.
+ const direct = tryParseRecords(trimmed);
+ if (direct) {
+ return direct;
}
const ndjsonLines = trimmed
@@ -106,29 +133,24 @@ const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
try {
return ndjsonLines.map((line) => parseJsonLikePayload(line)) as DatasetRecord[];
} catch {
- // Continue with wrapped JSON extraction.
}
}
const bracketStart = trimmed.indexOf("[");
const bracketEnd = trimmed.lastIndexOf("]");
if (bracketStart !== -1 && bracketEnd > bracketStart) {
- const candidate = trimmed.slice(bracketStart, bracketEnd + 1);
- try {
- return normalizeRecordPayload(parseJsonLikePayload(candidate));
- } catch {
- // Continue with object extraction.
+ const parsed = tryParseRecords(trimmed.slice(bracketStart, bracketEnd + 1));
+ if (parsed) {
+ return parsed;
}
}
const braceStart = trimmed.indexOf("{");
const braceEnd = trimmed.lastIndexOf("}");
if (braceStart !== -1 && braceEnd > braceStart) {
- const candidate = trimmed.slice(braceStart, braceEnd + 1);
- try {
- return normalizeRecordPayload(parseJsonLikePayload(candidate));
- } catch {
- return null;
+ const parsed = tryParseRecords(trimmed.slice(braceStart, braceEnd + 1));
+ if (parsed) {
+ return parsed;
}
}
@@ -316,45 +338,22 @@ const StatPage = () => {
};
const openExplorer = async (spec: CorpusExplorerSpec) => {
- setExplorerState({
- open: true,
- title: spec.title,
- description: spec.description,
- emptyMessage: spec.emptyMessage ?? "No matching records found.",
- records: [],
- loading: true,
- error: "",
- });
+ setExplorerState(createExplorerState(spec, { loading: true }));
try {
const records = await ensureFilteredRecords();
const context = buildExplorerContext(records);
- const matched = records.filter((record) => spec.matcher(record, context));
- matched.sort((a, b) => {
- const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
- const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
- return bValue.localeCompare(aValue);
- });
+ const matched = records
+ .filter((record) => spec.matcher(record, context))
+ .sort(compareRecordsByNewest);
- setExplorerState({
- open: true,
- title: spec.title,
- description: spec.description,
- emptyMessage: spec.emptyMessage ?? "No matching records found.",
- records: matched,
- loading: false,
- error: "",
- });
+ setExplorerState(createExplorerState(spec, { records: matched }));
} catch (e) {
- setExplorerState({
- open: true,
- title: spec.title,
- description: spec.description,
- emptyMessage: spec.emptyMessage ?? "No matching records found.",
- records: [],
- loading: false,
- error: `Failed to load corpus records: ${String(e)}`,
- });
+ setExplorerState(
+ createExplorerState(spec, {
+ error: `Failed to load corpus records: ${String(e)}`,
+ }),
+ );
}
};
diff --git a/frontend/src/utils/corpusExplorer.ts b/frontend/src/utils/corpusExplorer.ts
index e1ddb70..24801e3 100644
--- a/frontend/src/utils/corpusExplorer.ts
+++ b/frontend/src/utils/corpusExplorer.ts
@@ -1,5 +1,3 @@
-import type { CSSProperties } from "react";
-
type EntityRecord = {
text?: string;
[key: string]: unknown;
@@ -58,11 +56,6 @@ const EMOTION_KEYS = [
"emotion_sadness",
] as const;
-const shrinkButtonStyle: CSSProperties = {
- padding: "4px 8px",
- fontSize: 12,
-};
-
const toText = (value: unknown) => {
if (typeof value === "string") {
return value;
@@ -83,6 +76,7 @@ const toText = (value: unknown) => {
};
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
+const getAuthor = (record: DatasetRecord) => toText(record.author).trim();
const getRecordText = (record: DatasetRecord) =>
`${record.title ?? ""} ${record.content ?? ""}`.trim();
@@ -152,11 +146,11 @@ const matchesPhrase = (record: DatasetRecord, phrase: string) => {
return false;
}
- return pattern.test(getRecordText(record).toLowerCase());
+ return pattern.test(getRecordText(record));
};
const recordIdentityBucket = (record: DatasetRecord) => {
- const text = getRecordText(record).toLowerCase();
+ const text = getRecordText(record);
const inHits = countMatches(IN_GROUP_PATTERN, text);
const outHits = countMatches(OUT_GROUP_PATTERN, text);
@@ -171,48 +165,30 @@ const recordIdentityBucket = (record: DatasetRecord) => {
return "tie";
};
-const createAuthorEventCounts = (records: DatasetRecord[]) => {
- const counts = new Map();
+const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => {
+ const authorByPostId = new Map();
+ const authorEventCounts = new Map();
+ const authorCommentCounts = new Map();
+
for (const record of records) {
- const author = toText(record.author).trim();
+ const author = getAuthor(record);
if (!author) {
continue;
}
- counts.set(author, (counts.get(author) ?? 0) + 1);
- }
- return counts;
-};
-const createAuthorCommentCounts = (records: DatasetRecord[]) => {
- const counts = new Map();
- for (const record of records) {
- const author = toText(record.author).trim();
- if (!author || record.type !== "comment") {
- continue;
+ authorEventCounts.set(author, (authorEventCounts.get(author) ?? 0) + 1);
+
+ if (record.type === "comment") {
+ authorCommentCounts.set(author, (authorCommentCounts.get(author) ?? 0) + 1);
}
- counts.set(author, (counts.get(author) ?? 0) + 1);
- }
- return counts;
-};
-const createAuthorByPostId = (records: DatasetRecord[]) => {
- const map = new Map();
- for (const record of records) {
- const postId = record.post_id;
- const author = toText(record.author).trim();
- if (postId === null || postId === undefined || !author) {
- continue;
+ if (record.post_id !== null && record.post_id !== undefined) {
+ authorByPostId.set(String(record.post_id), author);
}
- map.set(String(postId), author);
}
- return map;
-};
-const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
- authorByPostId: createAuthorByPostId(records),
- authorEventCounts: createAuthorEventCounts(records),
- authorCommentCounts: createAuthorCommentCounts(records),
-});
+ return { authorByPostId, authorEventCounts, authorCommentCounts };
+};
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
title: "Corpus Explorer",
@@ -221,19 +197,27 @@ const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
matcher: () => true,
});
-const buildUserSpec = (author: string): CorpusExplorerSpec => ({
- title: `User: ${author}`,
- description: `All records authored by ${author}.`,
- emptyMessage: `No records found for ${author}.`,
- matcher: (record) => normalize(record.author) === normalize(author),
-});
+const buildUserSpec = (author: string): CorpusExplorerSpec => {
+ const target = normalize(author);
-const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
- title: `Topic: ${topic}`,
- description: `Records assigned to the ${topic} topic bucket.`,
- emptyMessage: `No records found in the ${topic} topic bucket.`,
- matcher: (record) => normalize(record.topic) === normalize(topic),
-});
+ return {
+ title: `User: ${author}`,
+ description: `All records authored by ${author}.`,
+ emptyMessage: `No records found for ${author}.`,
+ matcher: (record) => normalize(record.author) === target,
+ };
+};
+
+const buildTopicSpec = (topic: string): CorpusExplorerSpec => {
+ const target = normalize(topic);
+
+ return {
+ title: `Topic: ${topic}`,
+ description: `Records assigned to the ${topic} topic bucket.`,
+ emptyMessage: `No records found in the ${topic} topic bucket.`,
+ matcher: (record) => normalize(record.topic) === target,
+ };
+};
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
title: `Date Bucket: ${date}`,
@@ -256,88 +240,75 @@ const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
matcher: (record) => matchesPhrase(record, ngram),
});
-const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
- title: `Entity: ${entity}`,
- description: `Records mentioning the ${entity} entity.`,
- emptyMessage: `No records found for the ${entity} entity.`,
- matcher: (record) => {
- const target = normalize(entity);
- const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
- return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
- },
-});
+const buildEntitySpec = (entity: string): CorpusExplorerSpec => {
+ const target = normalize(entity);
-const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
- title: `Source: ${source}`,
- description: `Records from the ${source} source.`,
- emptyMessage: `No records found for ${source}.`,
- matcher: (record) => normalize(record.source) === normalize(source),
-});
+ return {
+ title: `Entity: ${entity}`,
+ description: `Records mentioning the ${entity} entity.`,
+ emptyMessage: `No records found for the ${entity} entity.`,
+ matcher: (record) => {
+ const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
+ return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
+ },
+ };
+};
-const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
- title: `Dominant Emotion: ${emotion}`,
- description: `Records where ${emotion} is the strongest emotion score.`,
- emptyMessage: `No records found with dominant emotion ${emotion}.`,
- matcher: (record) => getDominantEmotion(record) === normalize(emotion),
-});
+const buildSourceSpec = (source: string): CorpusExplorerSpec => {
+ const target = normalize(source);
-const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
- title: `Reply Path: ${source} -> ${target}`,
- description: `Reply records authored by ${source} in response to ${target}.`,
- emptyMessage: `No reply records found for ${source} -> ${target}.`,
- matcher: (record, context) => {
- if (normalize(record.author) !== normalize(source)) {
- return false;
- }
+ return {
+ title: `Source: ${source}`,
+ description: `Records from the ${source} source.`,
+ emptyMessage: `No records found for ${source}.`,
+ matcher: (record) => normalize(record.source) === target,
+ };
+};
- const replyTo = record.reply_to;
- if (replyTo === null || replyTo === undefined || replyTo === "") {
- return false;
- }
+const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => {
+ const target = normalize(emotion);
- const replyTarget = context.authorByPostId.get(String(replyTo));
- return normalize(replyTarget) === normalize(target);
- },
-});
+ return {
+ title: `Dominant Emotion: ${emotion}`,
+ description: `Records where ${emotion} is the strongest emotion score.`,
+ emptyMessage: `No records found with dominant emotion ${emotion}.`,
+ matcher: (record) => getDominantEmotion(record) === target,
+ };
+};
+
+const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => {
+ const sourceName = normalize(source);
+ const targetName = normalize(target);
+
+ return {
+ title: `Reply Path: ${source} -> ${target}`,
+ description: `Reply records authored by ${source} in response to ${target}.`,
+ emptyMessage: `No reply records found for ${source} -> ${target}.`,
+ matcher: (record, context) => {
+ if (normalize(record.author) !== sourceName) {
+ return false;
+ }
+
+ const replyTo = record.reply_to;
+ if (replyTo === null || replyTo === undefined || replyTo === "") {
+ return false;
+ }
+
+ return normalize(context.authorByPostId.get(String(replyTo))) === targetName;
+ },
+ };
+};
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
title: "One-Time Users",
description: "Records written by authors who appear exactly once in the filtered corpus.",
emptyMessage: "No one-time-user records found.",
matcher: (record, context) => {
- const author = toText(record.author).trim();
+ const author = getAuthor(record);
return !!author && context.authorEventCounts.get(author) === 1;
},
});
-const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
- title: "Top Commenters",
- description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
- emptyMessage: "No top-commenter records found.",
- matcher: (record, context) => {
- if (record.type !== "comment") {
- return false;
- }
-
- const rankedAuthors = Array.from(context.authorCommentCounts.entries())
- .sort((a, b) => b[1] - a[1])
- .slice(0, topAuthorCount)
- .map(([author]) => author);
-
- return rankedAuthors.includes(toText(record.author).trim());
- },
-});
-
-const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
- title: "Single-Comment Authors",
- description: "Comment records from authors who commented exactly once.",
- emptyMessage: "No single-comment-author records found.",
- matcher: (record, context) => {
- const author = toText(record.author).trim();
- return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
- },
-});
-
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
const labels = {
in: "In-Group Posts",
@@ -376,9 +347,7 @@ const buildDeonticSpec = () =>
const buildPermissionSpec = () =>
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
-const getExplorerButtonStyle = () => shrinkButtonStyle;
-
-export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
+export type { DatasetRecord, CorpusExplorerSpec };
export {
buildAllRecordsSpec,
buildCertaintySpec,
@@ -393,13 +362,10 @@ export {
buildOneTimeUsersSpec,
buildPermissionSpec,
buildReplyPairSpec,
- buildSingleCommentAuthorsSpec,
buildSourceSpec,
buildTopicSpec,
- buildTopCommentersSpec,
buildUserSpec,
buildWordSpec,
getDateBucket,
- getExplorerButtonStyle,
toText,
};