refactor: streamline CorpusExplorer components

This commit is contained in:
2026-04-13 17:06:46 +01:00
parent bc356848ef
commit c11434344a
6 changed files with 147 additions and 202 deletions

View File

@@ -1,4 +1,4 @@
import { useEffect, useMemo, useState } from "react";
import { useEffect, useState } from "react";
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
import StatsStyling from "../styles/stats_styling";
@@ -103,11 +103,6 @@ const CorpusExplorer = ({
}
}, [open, title, records.length]);
const visibleRecords = useMemo(
() => records.slice(0, visibleCount),
[records, visibleCount],
);
const hasMoreRecords = visibleCount < records.length;
return (
@@ -158,7 +153,7 @@ const CorpusExplorer = ({
paddingRight: 4,
}}
>
{visibleRecords.map((record, index) => {
{records.slice(0, visibleCount).map((record, index) => {
const recordKey = getRecordKey(record, index);
const titleText = getRecordTitle(record);
const content = cleanText(record.content);

View File

@@ -8,11 +8,11 @@ import {
buildHedgeSpec,
buildIdentityBucketSpec,
buildPermissionSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 };
type CulturalStatsProps = {
data: CulturalAnalysisResponse;
@@ -22,7 +22,7 @@ type CulturalStatsProps = {
const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
style={{ ...styles.buttonSecondary, ...exploreButtonStyle }}
>
Explore
</button>
@@ -59,21 +59,6 @@ const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
};
const stanceSublabel = (
per1kTokens: number | undefined,
emotionAvg: Record<string, number> | undefined,
) => {
const rateLabel =
typeof per1kTokens === "number"
? `${per1kTokens.toFixed(1)} per 1k words`
: "Word frequency";
const emotionLabel = topEmotion(emotionAvg);
return emotionLabel === "—"
? rateLabel
: `${rateLabel} • Avg mood: ${emotionLabel}`;
};
return (
<div style={styles.page}>
<div style={{ ...styles.container, ...styles.grid }}>

View File

@@ -26,12 +26,12 @@ import {
buildDateBucketSpec,
buildOneTimeUsersSpec,
buildUserSpec,
getExplorerButtonStyle,
type CorpusExplorerSpec,
} from "../utils/corpusExplorer";
const styles = StatsStyling;
const MAX_WORDCLOUD_WORDS = 250;
const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 };
const WORDCLOUD_OPTIONS = {
rotations: 2,
@@ -80,7 +80,7 @@ function convertFrequencyData(data: FrequencyWord[]) {
const renderExploreButton = (onClick: () => void) => (
<button
onClick={onClick}
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
style={{ ...styles.buttonSecondary, ...exploreButtonStyle }}
>
Explore
</button>

View File

@@ -20,7 +20,7 @@ type GraphLink = {
value: number;
};
function ApiToGraphData(apiData: InteractionGraph) {
function toGraphData(apiData: InteractionGraph) {
const links: GraphLink[] = [];
const connectedNodeIds = new Set<string>();
@@ -56,7 +56,7 @@ const UserStats = ({
onExplore,
}: UserStatsProps) => {
const graphData = useMemo(
() => ApiToGraphData(interactionGraph),
() => toGraphData(interactionGraph),
[interactionGraph],
);
const graphContainerRef = useRef<HTMLDivElement | null>(null);

View File

@@ -66,6 +66,26 @@ const EMPTY_EXPLORER_STATE: ExplorerState = {
error: "",
};
const createExplorerState = (
spec: CorpusExplorerSpec,
patch: Partial<ExplorerState> = {},
): ExplorerState => ({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: false,
error: "",
...patch,
});
const compareRecordsByNewest = (a: DatasetRecord, b: DatasetRecord) => {
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
return bValue.localeCompare(aValue);
};
const parseJsonLikePayload = (value: string): unknown => {
const normalized = value
.replace(/\uFEFF/g, "")
@@ -86,16 +106,23 @@ const parseJsonLikePayload = (value: string): unknown => {
return JSON.parse(normalized);
};
const tryParseRecords = (value: string) => {
try {
return normalizeRecordPayload(parseJsonLikePayload(value));
} catch {
return null;
}
};
const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
const trimmed = payload.trim();
if (!trimmed) {
return [];
}
try {
return normalizeRecordPayload(parseJsonLikePayload(trimmed));
} catch {
// Continue with additional fallback formats below.
const direct = tryParseRecords(trimmed);
if (direct) {
return direct;
}
const ndjsonLines = trimmed
@@ -106,29 +133,24 @@ const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
try {
return ndjsonLines.map((line) => parseJsonLikePayload(line)) as DatasetRecord[];
} catch {
// Continue with wrapped JSON extraction.
}
}
const bracketStart = trimmed.indexOf("[");
const bracketEnd = trimmed.lastIndexOf("]");
if (bracketStart !== -1 && bracketEnd > bracketStart) {
const candidate = trimmed.slice(bracketStart, bracketEnd + 1);
try {
return normalizeRecordPayload(parseJsonLikePayload(candidate));
} catch {
// Continue with object extraction.
const parsed = tryParseRecords(trimmed.slice(bracketStart, bracketEnd + 1));
if (parsed) {
return parsed;
}
}
const braceStart = trimmed.indexOf("{");
const braceEnd = trimmed.lastIndexOf("}");
if (braceStart !== -1 && braceEnd > braceStart) {
const candidate = trimmed.slice(braceStart, braceEnd + 1);
try {
return normalizeRecordPayload(parseJsonLikePayload(candidate));
} catch {
return null;
const parsed = tryParseRecords(trimmed.slice(braceStart, braceEnd + 1));
if (parsed) {
return parsed;
}
}
@@ -316,45 +338,22 @@ const StatPage = () => {
};
const openExplorer = async (spec: CorpusExplorerSpec) => {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: true,
error: "",
});
setExplorerState(createExplorerState(spec, { loading: true }));
try {
const records = await ensureFilteredRecords();
const context = buildExplorerContext(records);
const matched = records.filter((record) => spec.matcher(record, context));
matched.sort((a, b) => {
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
return bValue.localeCompare(aValue);
});
const matched = records
.filter((record) => spec.matcher(record, context))
.sort(compareRecordsByNewest);
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: matched,
loading: false,
error: "",
});
setExplorerState(createExplorerState(spec, { records: matched }));
} catch (e) {
setExplorerState({
open: true,
title: spec.title,
description: spec.description,
emptyMessage: spec.emptyMessage ?? "No matching records found.",
records: [],
loading: false,
error: `Failed to load corpus records: ${String(e)}`,
});
setExplorerState(
createExplorerState(spec, {
error: `Failed to load corpus records: ${String(e)}`,
}),
);
}
};

View File

@@ -1,5 +1,3 @@
import type { CSSProperties } from "react";
type EntityRecord = {
text?: string;
[key: string]: unknown;
@@ -58,11 +56,6 @@ const EMOTION_KEYS = [
"emotion_sadness",
] as const;
const shrinkButtonStyle: CSSProperties = {
padding: "4px 8px",
fontSize: 12,
};
const toText = (value: unknown) => {
if (typeof value === "string") {
return value;
@@ -83,6 +76,7 @@ const toText = (value: unknown) => {
};
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
const getAuthor = (record: DatasetRecord) => toText(record.author).trim();
const getRecordText = (record: DatasetRecord) =>
`${record.title ?? ""} ${record.content ?? ""}`.trim();
@@ -152,11 +146,11 @@ const matchesPhrase = (record: DatasetRecord, phrase: string) => {
return false;
}
return pattern.test(getRecordText(record).toLowerCase());
return pattern.test(getRecordText(record));
};
const recordIdentityBucket = (record: DatasetRecord) => {
const text = getRecordText(record).toLowerCase();
const text = getRecordText(record);
const inHits = countMatches(IN_GROUP_PATTERN, text);
const outHits = countMatches(OUT_GROUP_PATTERN, text);
@@ -171,48 +165,30 @@ const recordIdentityBucket = (record: DatasetRecord) => {
return "tie";
};
const createAuthorEventCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => {
const authorByPostId = new Map<string, string>();
const authorEventCounts = new Map<string, number>();
const authorCommentCounts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
const author = getAuthor(record);
if (!author) {
continue;
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
const counts = new Map<string, number>();
for (const record of records) {
const author = toText(record.author).trim();
if (!author || record.type !== "comment") {
continue;
authorEventCounts.set(author, (authorEventCounts.get(author) ?? 0) + 1);
if (record.type === "comment") {
authorCommentCounts.set(author, (authorCommentCounts.get(author) ?? 0) + 1);
}
counts.set(author, (counts.get(author) ?? 0) + 1);
}
return counts;
};
const createAuthorByPostId = (records: DatasetRecord[]) => {
const map = new Map<string, string>();
for (const record of records) {
const postId = record.post_id;
const author = toText(record.author).trim();
if (postId === null || postId === undefined || !author) {
continue;
if (record.post_id !== null && record.post_id !== undefined) {
authorByPostId.set(String(record.post_id), author);
}
map.set(String(postId), author);
}
return map;
};
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
authorByPostId: createAuthorByPostId(records),
authorEventCounts: createAuthorEventCounts(records),
authorCommentCounts: createAuthorCommentCounts(records),
});
return { authorByPostId, authorEventCounts, authorCommentCounts };
};
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
title: "Corpus Explorer",
@@ -221,19 +197,27 @@ const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
matcher: () => true,
});
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
title: `User: ${author}`,
description: `All records authored by ${author}.`,
emptyMessage: `No records found for ${author}.`,
matcher: (record) => normalize(record.author) === normalize(author),
});
const buildUserSpec = (author: string): CorpusExplorerSpec => {
const target = normalize(author);
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
title: `Topic: ${topic}`,
description: `Records assigned to the ${topic} topic bucket.`,
emptyMessage: `No records found in the ${topic} topic bucket.`,
matcher: (record) => normalize(record.topic) === normalize(topic),
});
return {
title: `User: ${author}`,
description: `All records authored by ${author}.`,
emptyMessage: `No records found for ${author}.`,
matcher: (record) => normalize(record.author) === target,
};
};
const buildTopicSpec = (topic: string): CorpusExplorerSpec => {
const target = normalize(topic);
return {
title: `Topic: ${topic}`,
description: `Records assigned to the ${topic} topic bucket.`,
emptyMessage: `No records found in the ${topic} topic bucket.`,
matcher: (record) => normalize(record.topic) === target,
};
};
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
title: `Date Bucket: ${date}`,
@@ -256,88 +240,75 @@ const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
matcher: (record) => matchesPhrase(record, ngram),
});
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
title: `Entity: ${entity}`,
description: `Records mentioning the ${entity} entity.`,
emptyMessage: `No records found for the ${entity} entity.`,
matcher: (record) => {
const target = normalize(entity);
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
},
});
const buildEntitySpec = (entity: string): CorpusExplorerSpec => {
const target = normalize(entity);
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
title: `Source: ${source}`,
description: `Records from the ${source} source.`,
emptyMessage: `No records found for ${source}.`,
matcher: (record) => normalize(record.source) === normalize(source),
});
return {
title: `Entity: ${entity}`,
description: `Records mentioning the ${entity} entity.`,
emptyMessage: `No records found for the ${entity} entity.`,
matcher: (record) => {
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
},
};
};
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
title: `Dominant Emotion: ${emotion}`,
description: `Records where ${emotion} is the strongest emotion score.`,
emptyMessage: `No records found with dominant emotion ${emotion}.`,
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
});
const buildSourceSpec = (source: string): CorpusExplorerSpec => {
const target = normalize(source);
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
title: `Reply Path: ${source} -> ${target}`,
description: `Reply records authored by ${source} in response to ${target}.`,
emptyMessage: `No reply records found for ${source} -> ${target}.`,
matcher: (record, context) => {
if (normalize(record.author) !== normalize(source)) {
return false;
}
return {
title: `Source: ${source}`,
description: `Records from the ${source} source.`,
emptyMessage: `No records found for ${source}.`,
matcher: (record) => normalize(record.source) === target,
};
};
const replyTo = record.reply_to;
if (replyTo === null || replyTo === undefined || replyTo === "") {
return false;
}
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => {
const target = normalize(emotion);
const replyTarget = context.authorByPostId.get(String(replyTo));
return normalize(replyTarget) === normalize(target);
},
});
return {
title: `Dominant Emotion: ${emotion}`,
description: `Records where ${emotion} is the strongest emotion score.`,
emptyMessage: `No records found with dominant emotion ${emotion}.`,
matcher: (record) => getDominantEmotion(record) === target,
};
};
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => {
const sourceName = normalize(source);
const targetName = normalize(target);
return {
title: `Reply Path: ${source} -> ${target}`,
description: `Reply records authored by ${source} in response to ${target}.`,
emptyMessage: `No reply records found for ${source} -> ${target}.`,
matcher: (record, context) => {
if (normalize(record.author) !== sourceName) {
return false;
}
const replyTo = record.reply_to;
if (replyTo === null || replyTo === undefined || replyTo === "") {
return false;
}
return normalize(context.authorByPostId.get(String(replyTo))) === targetName;
},
};
};
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
title: "One-Time Users",
description: "Records written by authors who appear exactly once in the filtered corpus.",
emptyMessage: "No one-time-user records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
const author = getAuthor(record);
return !!author && context.authorEventCounts.get(author) === 1;
},
});
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
title: "Top Commenters",
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
emptyMessage: "No top-commenter records found.",
matcher: (record, context) => {
if (record.type !== "comment") {
return false;
}
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, topAuthorCount)
.map(([author]) => author);
return rankedAuthors.includes(toText(record.author).trim());
},
});
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
title: "Single-Comment Authors",
description: "Comment records from authors who commented exactly once.",
emptyMessage: "No single-comment-author records found.",
matcher: (record, context) => {
const author = toText(record.author).trim();
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
},
});
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
const labels = {
in: "In-Group Posts",
@@ -376,9 +347,7 @@ const buildDeonticSpec = () =>
const buildPermissionSpec = () =>
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
const getExplorerButtonStyle = () => shrinkButtonStyle;
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
export type { DatasetRecord, CorpusExplorerSpec };
export {
buildAllRecordsSpec,
buildCertaintySpec,
@@ -393,13 +362,10 @@ export {
buildOneTimeUsersSpec,
buildPermissionSpec,
buildReplyPairSpec,
buildSingleCommentAuthorsSpec,
buildSourceSpec,
buildTopicSpec,
buildTopCommentersSpec,
buildUserSpec,
buildWordSpec,
getDateBucket,
getExplorerButtonStyle,
toText,
};