From c11434344a93b137fa67c461d302b0db6903fadf Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Mon, 13 Apr 2026 17:06:46 +0100 Subject: [PATCH] refactor: streamline CorpusExplorer components --- frontend/src/components/CorpusExplorer.tsx | 9 +- frontend/src/components/CulturalStats.tsx | 19 +- frontend/src/components/SummaryStats.tsx | 4 +- frontend/src/components/UserStats.tsx | 4 +- frontend/src/pages/Stats.tsx | 95 +++++---- frontend/src/utils/corpusExplorer.ts | 218 +++++++++------------ 6 files changed, 147 insertions(+), 202 deletions(-) diff --git a/frontend/src/components/CorpusExplorer.tsx b/frontend/src/components/CorpusExplorer.tsx index b80bda8..6c8c781 100644 --- a/frontend/src/components/CorpusExplorer.tsx +++ b/frontend/src/components/CorpusExplorer.tsx @@ -1,4 +1,4 @@ -import { useEffect, useMemo, useState } from "react"; +import { useEffect, useState } from "react"; import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react"; import StatsStyling from "../styles/stats_styling"; @@ -103,11 +103,6 @@ const CorpusExplorer = ({ } }, [open, title, records.length]); - const visibleRecords = useMemo( - () => records.slice(0, visibleCount), - [records, visibleCount], - ); - const hasMoreRecords = visibleCount < records.length; return ( @@ -158,7 +153,7 @@ const CorpusExplorer = ({ paddingRight: 4, }} > - {visibleRecords.map((record, index) => { + {records.slice(0, visibleCount).map((record, index) => { const recordKey = getRecordKey(record, index); const titleText = getRecordTitle(record); const content = cleanText(record.content); diff --git a/frontend/src/components/CulturalStats.tsx b/frontend/src/components/CulturalStats.tsx index d489168..38b7ee6 100644 --- a/frontend/src/components/CulturalStats.tsx +++ b/frontend/src/components/CulturalStats.tsx @@ -8,11 +8,11 @@ import { buildHedgeSpec, buildIdentityBucketSpec, buildPermissionSpec, - getExplorerButtonStyle, type CorpusExplorerSpec, } from "../utils/corpusExplorer"; const styles = StatsStyling; +const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 }; type CulturalStatsProps = { data: CulturalAnalysisResponse; @@ -22,7 +22,7 @@ type CulturalStatsProps = { const renderExploreButton = (onClick: () => void) => ( @@ -59,21 +59,6 @@ const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => { return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`; }; - const stanceSublabel = ( - per1kTokens: number | undefined, - emotionAvg: Record | undefined, - ) => { - const rateLabel = - typeof per1kTokens === "number" - ? `${per1kTokens.toFixed(1)} per 1k words` - : "Word frequency"; - const emotionLabel = topEmotion(emotionAvg); - - return emotionLabel === "—" - ? rateLabel - : `${rateLabel} • Avg mood: ${emotionLabel}`; - }; - return (
diff --git a/frontend/src/components/SummaryStats.tsx b/frontend/src/components/SummaryStats.tsx index d53d6da..9597fca 100644 --- a/frontend/src/components/SummaryStats.tsx +++ b/frontend/src/components/SummaryStats.tsx @@ -26,12 +26,12 @@ import { buildDateBucketSpec, buildOneTimeUsersSpec, buildUserSpec, - getExplorerButtonStyle, type CorpusExplorerSpec, } from "../utils/corpusExplorer"; const styles = StatsStyling; const MAX_WORDCLOUD_WORDS = 250; +const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 }; const WORDCLOUD_OPTIONS = { rotations: 2, @@ -80,7 +80,7 @@ function convertFrequencyData(data: FrequencyWord[]) { const renderExploreButton = (onClick: () => void) => ( diff --git a/frontend/src/components/UserStats.tsx b/frontend/src/components/UserStats.tsx index 50d96f3..fc2a57e 100644 --- a/frontend/src/components/UserStats.tsx +++ b/frontend/src/components/UserStats.tsx @@ -20,7 +20,7 @@ type GraphLink = { value: number; }; -function ApiToGraphData(apiData: InteractionGraph) { +function toGraphData(apiData: InteractionGraph) { const links: GraphLink[] = []; const connectedNodeIds = new Set(); @@ -56,7 +56,7 @@ const UserStats = ({ onExplore, }: UserStatsProps) => { const graphData = useMemo( - () => ApiToGraphData(interactionGraph), + () => toGraphData(interactionGraph), [interactionGraph], ); const graphContainerRef = useRef(null); diff --git a/frontend/src/pages/Stats.tsx b/frontend/src/pages/Stats.tsx index 0651a30..a14e733 100644 --- a/frontend/src/pages/Stats.tsx +++ b/frontend/src/pages/Stats.tsx @@ -66,6 +66,26 @@ const EMPTY_EXPLORER_STATE: ExplorerState = { error: "", }; +const createExplorerState = ( + spec: CorpusExplorerSpec, + patch: Partial = {}, +): ExplorerState => ({ + open: true, + title: spec.title, + description: spec.description, + emptyMessage: spec.emptyMessage ?? "No matching records found.", + records: [], + loading: false, + error: "", + ...patch, +}); + +const compareRecordsByNewest = (a: DatasetRecord, b: DatasetRecord) => { + const aValue = String(a.dt ?? a.date ?? a.timestamp ?? ""); + const bValue = String(b.dt ?? b.date ?? b.timestamp ?? ""); + return bValue.localeCompare(aValue); +}; + const parseJsonLikePayload = (value: string): unknown => { const normalized = value .replace(/\uFEFF/g, "") @@ -86,16 +106,23 @@ const parseJsonLikePayload = (value: string): unknown => { return JSON.parse(normalized); }; +const tryParseRecords = (value: string) => { + try { + return normalizeRecordPayload(parseJsonLikePayload(value)); + } catch { + return null; + } +}; + const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => { const trimmed = payload.trim(); if (!trimmed) { return []; } - try { - return normalizeRecordPayload(parseJsonLikePayload(trimmed)); - } catch { - // Continue with additional fallback formats below. + const direct = tryParseRecords(trimmed); + if (direct) { + return direct; } const ndjsonLines = trimmed @@ -106,29 +133,24 @@ const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => { try { return ndjsonLines.map((line) => parseJsonLikePayload(line)) as DatasetRecord[]; } catch { - // Continue with wrapped JSON extraction. } } const bracketStart = trimmed.indexOf("["); const bracketEnd = trimmed.lastIndexOf("]"); if (bracketStart !== -1 && bracketEnd > bracketStart) { - const candidate = trimmed.slice(bracketStart, bracketEnd + 1); - try { - return normalizeRecordPayload(parseJsonLikePayload(candidate)); - } catch { - // Continue with object extraction. + const parsed = tryParseRecords(trimmed.slice(bracketStart, bracketEnd + 1)); + if (parsed) { + return parsed; } } const braceStart = trimmed.indexOf("{"); const braceEnd = trimmed.lastIndexOf("}"); if (braceStart !== -1 && braceEnd > braceStart) { - const candidate = trimmed.slice(braceStart, braceEnd + 1); - try { - return normalizeRecordPayload(parseJsonLikePayload(candidate)); - } catch { - return null; + const parsed = tryParseRecords(trimmed.slice(braceStart, braceEnd + 1)); + if (parsed) { + return parsed; } } @@ -316,45 +338,22 @@ const StatPage = () => { }; const openExplorer = async (spec: CorpusExplorerSpec) => { - setExplorerState({ - open: true, - title: spec.title, - description: spec.description, - emptyMessage: spec.emptyMessage ?? "No matching records found.", - records: [], - loading: true, - error: "", - }); + setExplorerState(createExplorerState(spec, { loading: true })); try { const records = await ensureFilteredRecords(); const context = buildExplorerContext(records); - const matched = records.filter((record) => spec.matcher(record, context)); - matched.sort((a, b) => { - const aValue = String(a.dt ?? a.date ?? a.timestamp ?? ""); - const bValue = String(b.dt ?? b.date ?? b.timestamp ?? ""); - return bValue.localeCompare(aValue); - }); + const matched = records + .filter((record) => spec.matcher(record, context)) + .sort(compareRecordsByNewest); - setExplorerState({ - open: true, - title: spec.title, - description: spec.description, - emptyMessage: spec.emptyMessage ?? "No matching records found.", - records: matched, - loading: false, - error: "", - }); + setExplorerState(createExplorerState(spec, { records: matched })); } catch (e) { - setExplorerState({ - open: true, - title: spec.title, - description: spec.description, - emptyMessage: spec.emptyMessage ?? "No matching records found.", - records: [], - loading: false, - error: `Failed to load corpus records: ${String(e)}`, - }); + setExplorerState( + createExplorerState(spec, { + error: `Failed to load corpus records: ${String(e)}`, + }), + ); } }; diff --git a/frontend/src/utils/corpusExplorer.ts b/frontend/src/utils/corpusExplorer.ts index e1ddb70..24801e3 100644 --- a/frontend/src/utils/corpusExplorer.ts +++ b/frontend/src/utils/corpusExplorer.ts @@ -1,5 +1,3 @@ -import type { CSSProperties } from "react"; - type EntityRecord = { text?: string; [key: string]: unknown; @@ -58,11 +56,6 @@ const EMOTION_KEYS = [ "emotion_sadness", ] as const; -const shrinkButtonStyle: CSSProperties = { - padding: "4px 8px", - fontSize: 12, -}; - const toText = (value: unknown) => { if (typeof value === "string") { return value; @@ -83,6 +76,7 @@ const toText = (value: unknown) => { }; const normalize = (value: unknown) => toText(value).trim().toLowerCase(); +const getAuthor = (record: DatasetRecord) => toText(record.author).trim(); const getRecordText = (record: DatasetRecord) => `${record.title ?? ""} ${record.content ?? ""}`.trim(); @@ -152,11 +146,11 @@ const matchesPhrase = (record: DatasetRecord, phrase: string) => { return false; } - return pattern.test(getRecordText(record).toLowerCase()); + return pattern.test(getRecordText(record)); }; const recordIdentityBucket = (record: DatasetRecord) => { - const text = getRecordText(record).toLowerCase(); + const text = getRecordText(record); const inHits = countMatches(IN_GROUP_PATTERN, text); const outHits = countMatches(OUT_GROUP_PATTERN, text); @@ -171,48 +165,30 @@ const recordIdentityBucket = (record: DatasetRecord) => { return "tie"; }; -const createAuthorEventCounts = (records: DatasetRecord[]) => { - const counts = new Map(); +const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => { + const authorByPostId = new Map(); + const authorEventCounts = new Map(); + const authorCommentCounts = new Map(); + for (const record of records) { - const author = toText(record.author).trim(); + const author = getAuthor(record); if (!author) { continue; } - counts.set(author, (counts.get(author) ?? 0) + 1); - } - return counts; -}; -const createAuthorCommentCounts = (records: DatasetRecord[]) => { - const counts = new Map(); - for (const record of records) { - const author = toText(record.author).trim(); - if (!author || record.type !== "comment") { - continue; + authorEventCounts.set(author, (authorEventCounts.get(author) ?? 0) + 1); + + if (record.type === "comment") { + authorCommentCounts.set(author, (authorCommentCounts.get(author) ?? 0) + 1); } - counts.set(author, (counts.get(author) ?? 0) + 1); - } - return counts; -}; -const createAuthorByPostId = (records: DatasetRecord[]) => { - const map = new Map(); - for (const record of records) { - const postId = record.post_id; - const author = toText(record.author).trim(); - if (postId === null || postId === undefined || !author) { - continue; + if (record.post_id !== null && record.post_id !== undefined) { + authorByPostId.set(String(record.post_id), author); } - map.set(String(postId), author); } - return map; -}; -const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({ - authorByPostId: createAuthorByPostId(records), - authorEventCounts: createAuthorEventCounts(records), - authorCommentCounts: createAuthorCommentCounts(records), -}); + return { authorByPostId, authorEventCounts, authorCommentCounts }; +}; const buildAllRecordsSpec = (): CorpusExplorerSpec => ({ title: "Corpus Explorer", @@ -221,19 +197,27 @@ const buildAllRecordsSpec = (): CorpusExplorerSpec => ({ matcher: () => true, }); -const buildUserSpec = (author: string): CorpusExplorerSpec => ({ - title: `User: ${author}`, - description: `All records authored by ${author}.`, - emptyMessage: `No records found for ${author}.`, - matcher: (record) => normalize(record.author) === normalize(author), -}); +const buildUserSpec = (author: string): CorpusExplorerSpec => { + const target = normalize(author); -const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({ - title: `Topic: ${topic}`, - description: `Records assigned to the ${topic} topic bucket.`, - emptyMessage: `No records found in the ${topic} topic bucket.`, - matcher: (record) => normalize(record.topic) === normalize(topic), -}); + return { + title: `User: ${author}`, + description: `All records authored by ${author}.`, + emptyMessage: `No records found for ${author}.`, + matcher: (record) => normalize(record.author) === target, + }; +}; + +const buildTopicSpec = (topic: string): CorpusExplorerSpec => { + const target = normalize(topic); + + return { + title: `Topic: ${topic}`, + description: `Records assigned to the ${topic} topic bucket.`, + emptyMessage: `No records found in the ${topic} topic bucket.`, + matcher: (record) => normalize(record.topic) === target, + }; +}; const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({ title: `Date Bucket: ${date}`, @@ -256,88 +240,75 @@ const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({ matcher: (record) => matchesPhrase(record, ngram), }); -const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({ - title: `Entity: ${entity}`, - description: `Records mentioning the ${entity} entity.`, - emptyMessage: `No records found for the ${entity} entity.`, - matcher: (record) => { - const target = normalize(entity); - const entities = Array.isArray(record.ner_entities) ? record.ner_entities : []; - return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity); - }, -}); +const buildEntitySpec = (entity: string): CorpusExplorerSpec => { + const target = normalize(entity); -const buildSourceSpec = (source: string): CorpusExplorerSpec => ({ - title: `Source: ${source}`, - description: `Records from the ${source} source.`, - emptyMessage: `No records found for ${source}.`, - matcher: (record) => normalize(record.source) === normalize(source), -}); + return { + title: `Entity: ${entity}`, + description: `Records mentioning the ${entity} entity.`, + emptyMessage: `No records found for the ${entity} entity.`, + matcher: (record) => { + const entities = Array.isArray(record.ner_entities) ? record.ner_entities : []; + return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity); + }, + }; +}; -const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({ - title: `Dominant Emotion: ${emotion}`, - description: `Records where ${emotion} is the strongest emotion score.`, - emptyMessage: `No records found with dominant emotion ${emotion}.`, - matcher: (record) => getDominantEmotion(record) === normalize(emotion), -}); +const buildSourceSpec = (source: string): CorpusExplorerSpec => { + const target = normalize(source); -const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({ - title: `Reply Path: ${source} -> ${target}`, - description: `Reply records authored by ${source} in response to ${target}.`, - emptyMessage: `No reply records found for ${source} -> ${target}.`, - matcher: (record, context) => { - if (normalize(record.author) !== normalize(source)) { - return false; - } + return { + title: `Source: ${source}`, + description: `Records from the ${source} source.`, + emptyMessage: `No records found for ${source}.`, + matcher: (record) => normalize(record.source) === target, + }; +}; - const replyTo = record.reply_to; - if (replyTo === null || replyTo === undefined || replyTo === "") { - return false; - } +const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => { + const target = normalize(emotion); - const replyTarget = context.authorByPostId.get(String(replyTo)); - return normalize(replyTarget) === normalize(target); - }, -}); + return { + title: `Dominant Emotion: ${emotion}`, + description: `Records where ${emotion} is the strongest emotion score.`, + emptyMessage: `No records found with dominant emotion ${emotion}.`, + matcher: (record) => getDominantEmotion(record) === target, + }; +}; + +const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => { + const sourceName = normalize(source); + const targetName = normalize(target); + + return { + title: `Reply Path: ${source} -> ${target}`, + description: `Reply records authored by ${source} in response to ${target}.`, + emptyMessage: `No reply records found for ${source} -> ${target}.`, + matcher: (record, context) => { + if (normalize(record.author) !== sourceName) { + return false; + } + + const replyTo = record.reply_to; + if (replyTo === null || replyTo === undefined || replyTo === "") { + return false; + } + + return normalize(context.authorByPostId.get(String(replyTo))) === targetName; + }, + }; +}; const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({ title: "One-Time Users", description: "Records written by authors who appear exactly once in the filtered corpus.", emptyMessage: "No one-time-user records found.", matcher: (record, context) => { - const author = toText(record.author).trim(); + const author = getAuthor(record); return !!author && context.authorEventCounts.get(author) === 1; }, }); -const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({ - title: "Top Commenters", - description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`, - emptyMessage: "No top-commenter records found.", - matcher: (record, context) => { - if (record.type !== "comment") { - return false; - } - - const rankedAuthors = Array.from(context.authorCommentCounts.entries()) - .sort((a, b) => b[1] - a[1]) - .slice(0, topAuthorCount) - .map(([author]) => author); - - return rankedAuthors.includes(toText(record.author).trim()); - }, -}); - -const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({ - title: "Single-Comment Authors", - description: "Comment records from authors who commented exactly once.", - emptyMessage: "No single-comment-author records found.", - matcher: (record, context) => { - const author = toText(record.author).trim(); - return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1; - }, -}); - const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => { const labels = { in: "In-Group Posts", @@ -376,9 +347,7 @@ const buildDeonticSpec = () => const buildPermissionSpec = () => buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN); -const getExplorerButtonStyle = () => shrinkButtonStyle; - -export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec }; +export type { DatasetRecord, CorpusExplorerSpec }; export { buildAllRecordsSpec, buildCertaintySpec, @@ -393,13 +362,10 @@ export { buildOneTimeUsersSpec, buildPermissionSpec, buildReplyPairSpec, - buildSingleCommentAuthorsSpec, buildSourceSpec, buildTopicSpec, - buildTopCommentersSpec, buildUserSpec, buildWordSpec, getDateBucket, - getExplorerButtonStyle, toText, };