Corpus Explorer Feature #11
@@ -1,4 +1,4 @@
|
|||||||
import { useEffect, useMemo, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
|
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
|
||||||
|
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
@@ -103,11 +103,6 @@ const CorpusExplorer = ({
|
|||||||
}
|
}
|
||||||
}, [open, title, records.length]);
|
}, [open, title, records.length]);
|
||||||
|
|
||||||
const visibleRecords = useMemo(
|
|
||||||
() => records.slice(0, visibleCount),
|
|
||||||
[records, visibleCount],
|
|
||||||
);
|
|
||||||
|
|
||||||
const hasMoreRecords = visibleCount < records.length;
|
const hasMoreRecords = visibleCount < records.length;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -158,7 +153,7 @@ const CorpusExplorer = ({
|
|||||||
paddingRight: 4,
|
paddingRight: 4,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{visibleRecords.map((record, index) => {
|
{records.slice(0, visibleCount).map((record, index) => {
|
||||||
const recordKey = getRecordKey(record, index);
|
const recordKey = getRecordKey(record, index);
|
||||||
const titleText = getRecordTitle(record);
|
const titleText = getRecordTitle(record);
|
||||||
const content = cleanText(record.content);
|
const content = cleanText(record.content);
|
||||||
|
|||||||
@@ -8,11 +8,11 @@ import {
|
|||||||
buildHedgeSpec,
|
buildHedgeSpec,
|
||||||
buildIdentityBucketSpec,
|
buildIdentityBucketSpec,
|
||||||
buildPermissionSpec,
|
buildPermissionSpec,
|
||||||
getExplorerButtonStyle,
|
|
||||||
type CorpusExplorerSpec,
|
type CorpusExplorerSpec,
|
||||||
} from "../utils/corpusExplorer";
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 };
|
||||||
|
|
||||||
type CulturalStatsProps = {
|
type CulturalStatsProps = {
|
||||||
data: CulturalAnalysisResponse;
|
data: CulturalAnalysisResponse;
|
||||||
@@ -22,7 +22,7 @@ type CulturalStatsProps = {
|
|||||||
const renderExploreButton = (onClick: () => void) => (
|
const renderExploreButton = (onClick: () => void) => (
|
||||||
<button
|
<button
|
||||||
onClick={onClick}
|
onClick={onClick}
|
||||||
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
style={{ ...styles.buttonSecondary, ...exploreButtonStyle }}
|
||||||
>
|
>
|
||||||
Explore
|
Explore
|
||||||
</button>
|
</button>
|
||||||
@@ -59,21 +59,6 @@ const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
|
|||||||
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
||||||
};
|
};
|
||||||
|
|
||||||
const stanceSublabel = (
|
|
||||||
per1kTokens: number | undefined,
|
|
||||||
emotionAvg: Record<string, number> | undefined,
|
|
||||||
) => {
|
|
||||||
const rateLabel =
|
|
||||||
typeof per1kTokens === "number"
|
|
||||||
? `${per1kTokens.toFixed(1)} per 1k words`
|
|
||||||
: "Word frequency";
|
|
||||||
const emotionLabel = topEmotion(emotionAvg);
|
|
||||||
|
|
||||||
return emotionLabel === "—"
|
|
||||||
? rateLabel
|
|
||||||
: `${rateLabel} • Avg mood: ${emotionLabel}`;
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
<div style={{ ...styles.container, ...styles.grid }}>
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
|
|||||||
@@ -26,12 +26,12 @@ import {
|
|||||||
buildDateBucketSpec,
|
buildDateBucketSpec,
|
||||||
buildOneTimeUsersSpec,
|
buildOneTimeUsersSpec,
|
||||||
buildUserSpec,
|
buildUserSpec,
|
||||||
getExplorerButtonStyle,
|
|
||||||
type CorpusExplorerSpec,
|
type CorpusExplorerSpec,
|
||||||
} from "../utils/corpusExplorer";
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
const MAX_WORDCLOUD_WORDS = 250;
|
const MAX_WORDCLOUD_WORDS = 250;
|
||||||
|
const exploreButtonStyle = { padding: "4px 8px", fontSize: 12 };
|
||||||
|
|
||||||
const WORDCLOUD_OPTIONS = {
|
const WORDCLOUD_OPTIONS = {
|
||||||
rotations: 2,
|
rotations: 2,
|
||||||
@@ -80,7 +80,7 @@ function convertFrequencyData(data: FrequencyWord[]) {
|
|||||||
const renderExploreButton = (onClick: () => void) => (
|
const renderExploreButton = (onClick: () => void) => (
|
||||||
<button
|
<button
|
||||||
onClick={onClick}
|
onClick={onClick}
|
||||||
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
style={{ ...styles.buttonSecondary, ...exploreButtonStyle }}
|
||||||
>
|
>
|
||||||
Explore
|
Explore
|
||||||
</button>
|
</button>
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ type GraphLink = {
|
|||||||
value: number;
|
value: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
function ApiToGraphData(apiData: InteractionGraph) {
|
function toGraphData(apiData: InteractionGraph) {
|
||||||
const links: GraphLink[] = [];
|
const links: GraphLink[] = [];
|
||||||
const connectedNodeIds = new Set<string>();
|
const connectedNodeIds = new Set<string>();
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ const UserStats = ({
|
|||||||
onExplore,
|
onExplore,
|
||||||
}: UserStatsProps) => {
|
}: UserStatsProps) => {
|
||||||
const graphData = useMemo(
|
const graphData = useMemo(
|
||||||
() => ApiToGraphData(interactionGraph),
|
() => toGraphData(interactionGraph),
|
||||||
[interactionGraph],
|
[interactionGraph],
|
||||||
);
|
);
|
||||||
const graphContainerRef = useRef<HTMLDivElement | null>(null);
|
const graphContainerRef = useRef<HTMLDivElement | null>(null);
|
||||||
|
|||||||
@@ -66,6 +66,26 @@ const EMPTY_EXPLORER_STATE: ExplorerState = {
|
|||||||
error: "",
|
error: "",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const createExplorerState = (
|
||||||
|
spec: CorpusExplorerSpec,
|
||||||
|
patch: Partial<ExplorerState> = {},
|
||||||
|
): ExplorerState => ({
|
||||||
|
open: true,
|
||||||
|
title: spec.title,
|
||||||
|
description: spec.description,
|
||||||
|
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||||
|
records: [],
|
||||||
|
loading: false,
|
||||||
|
error: "",
|
||||||
|
...patch,
|
||||||
|
});
|
||||||
|
|
||||||
|
const compareRecordsByNewest = (a: DatasetRecord, b: DatasetRecord) => {
|
||||||
|
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
||||||
|
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
||||||
|
return bValue.localeCompare(aValue);
|
||||||
|
};
|
||||||
|
|
||||||
const parseJsonLikePayload = (value: string): unknown => {
|
const parseJsonLikePayload = (value: string): unknown => {
|
||||||
const normalized = value
|
const normalized = value
|
||||||
.replace(/\uFEFF/g, "")
|
.replace(/\uFEFF/g, "")
|
||||||
@@ -86,16 +106,23 @@ const parseJsonLikePayload = (value: string): unknown => {
|
|||||||
return JSON.parse(normalized);
|
return JSON.parse(normalized);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const tryParseRecords = (value: string) => {
|
||||||
|
try {
|
||||||
|
return normalizeRecordPayload(parseJsonLikePayload(value));
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
|
const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
|
||||||
const trimmed = payload.trim();
|
const trimmed = payload.trim();
|
||||||
if (!trimmed) {
|
if (!trimmed) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
const direct = tryParseRecords(trimmed);
|
||||||
return normalizeRecordPayload(parseJsonLikePayload(trimmed));
|
if (direct) {
|
||||||
} catch {
|
return direct;
|
||||||
// Continue with additional fallback formats below.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const ndjsonLines = trimmed
|
const ndjsonLines = trimmed
|
||||||
@@ -106,29 +133,24 @@ const parseRecordStringPayload = (payload: string): DatasetRecord[] | null => {
|
|||||||
try {
|
try {
|
||||||
return ndjsonLines.map((line) => parseJsonLikePayload(line)) as DatasetRecord[];
|
return ndjsonLines.map((line) => parseJsonLikePayload(line)) as DatasetRecord[];
|
||||||
} catch {
|
} catch {
|
||||||
// Continue with wrapped JSON extraction.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const bracketStart = trimmed.indexOf("[");
|
const bracketStart = trimmed.indexOf("[");
|
||||||
const bracketEnd = trimmed.lastIndexOf("]");
|
const bracketEnd = trimmed.lastIndexOf("]");
|
||||||
if (bracketStart !== -1 && bracketEnd > bracketStart) {
|
if (bracketStart !== -1 && bracketEnd > bracketStart) {
|
||||||
const candidate = trimmed.slice(bracketStart, bracketEnd + 1);
|
const parsed = tryParseRecords(trimmed.slice(bracketStart, bracketEnd + 1));
|
||||||
try {
|
if (parsed) {
|
||||||
return normalizeRecordPayload(parseJsonLikePayload(candidate));
|
return parsed;
|
||||||
} catch {
|
|
||||||
// Continue with object extraction.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const braceStart = trimmed.indexOf("{");
|
const braceStart = trimmed.indexOf("{");
|
||||||
const braceEnd = trimmed.lastIndexOf("}");
|
const braceEnd = trimmed.lastIndexOf("}");
|
||||||
if (braceStart !== -1 && braceEnd > braceStart) {
|
if (braceStart !== -1 && braceEnd > braceStart) {
|
||||||
const candidate = trimmed.slice(braceStart, braceEnd + 1);
|
const parsed = tryParseRecords(trimmed.slice(braceStart, braceEnd + 1));
|
||||||
try {
|
if (parsed) {
|
||||||
return normalizeRecordPayload(parseJsonLikePayload(candidate));
|
return parsed;
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -316,45 +338,22 @@ const StatPage = () => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const openExplorer = async (spec: CorpusExplorerSpec) => {
|
const openExplorer = async (spec: CorpusExplorerSpec) => {
|
||||||
setExplorerState({
|
setExplorerState(createExplorerState(spec, { loading: true }));
|
||||||
open: true,
|
|
||||||
title: spec.title,
|
|
||||||
description: spec.description,
|
|
||||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
|
||||||
records: [],
|
|
||||||
loading: true,
|
|
||||||
error: "",
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const records = await ensureFilteredRecords();
|
const records = await ensureFilteredRecords();
|
||||||
const context = buildExplorerContext(records);
|
const context = buildExplorerContext(records);
|
||||||
const matched = records.filter((record) => spec.matcher(record, context));
|
const matched = records
|
||||||
matched.sort((a, b) => {
|
.filter((record) => spec.matcher(record, context))
|
||||||
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
.sort(compareRecordsByNewest);
|
||||||
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
|
||||||
return bValue.localeCompare(aValue);
|
|
||||||
});
|
|
||||||
|
|
||||||
setExplorerState({
|
setExplorerState(createExplorerState(spec, { records: matched }));
|
||||||
open: true,
|
|
||||||
title: spec.title,
|
|
||||||
description: spec.description,
|
|
||||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
|
||||||
records: matched,
|
|
||||||
loading: false,
|
|
||||||
error: "",
|
|
||||||
});
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
setExplorerState({
|
setExplorerState(
|
||||||
open: true,
|
createExplorerState(spec, {
|
||||||
title: spec.title,
|
error: `Failed to load corpus records: ${String(e)}`,
|
||||||
description: spec.description,
|
}),
|
||||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
);
|
||||||
records: [],
|
|
||||||
loading: false,
|
|
||||||
error: `Failed to load corpus records: ${String(e)}`,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import type { CSSProperties } from "react";
|
|
||||||
|
|
||||||
type EntityRecord = {
|
type EntityRecord = {
|
||||||
text?: string;
|
text?: string;
|
||||||
[key: string]: unknown;
|
[key: string]: unknown;
|
||||||
@@ -58,11 +56,6 @@ const EMOTION_KEYS = [
|
|||||||
"emotion_sadness",
|
"emotion_sadness",
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
const shrinkButtonStyle: CSSProperties = {
|
|
||||||
padding: "4px 8px",
|
|
||||||
fontSize: 12,
|
|
||||||
};
|
|
||||||
|
|
||||||
const toText = (value: unknown) => {
|
const toText = (value: unknown) => {
|
||||||
if (typeof value === "string") {
|
if (typeof value === "string") {
|
||||||
return value;
|
return value;
|
||||||
@@ -83,6 +76,7 @@ const toText = (value: unknown) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
|
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
|
||||||
|
const getAuthor = (record: DatasetRecord) => toText(record.author).trim();
|
||||||
|
|
||||||
const getRecordText = (record: DatasetRecord) =>
|
const getRecordText = (record: DatasetRecord) =>
|
||||||
`${record.title ?? ""} ${record.content ?? ""}`.trim();
|
`${record.title ?? ""} ${record.content ?? ""}`.trim();
|
||||||
@@ -152,11 +146,11 @@ const matchesPhrase = (record: DatasetRecord, phrase: string) => {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pattern.test(getRecordText(record).toLowerCase());
|
return pattern.test(getRecordText(record));
|
||||||
};
|
};
|
||||||
|
|
||||||
const recordIdentityBucket = (record: DatasetRecord) => {
|
const recordIdentityBucket = (record: DatasetRecord) => {
|
||||||
const text = getRecordText(record).toLowerCase();
|
const text = getRecordText(record);
|
||||||
const inHits = countMatches(IN_GROUP_PATTERN, text);
|
const inHits = countMatches(IN_GROUP_PATTERN, text);
|
||||||
const outHits = countMatches(OUT_GROUP_PATTERN, text);
|
const outHits = countMatches(OUT_GROUP_PATTERN, text);
|
||||||
|
|
||||||
@@ -171,48 +165,30 @@ const recordIdentityBucket = (record: DatasetRecord) => {
|
|||||||
return "tie";
|
return "tie";
|
||||||
};
|
};
|
||||||
|
|
||||||
const createAuthorEventCounts = (records: DatasetRecord[]) => {
|
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => {
|
||||||
const counts = new Map<string, number>();
|
const authorByPostId = new Map<string, string>();
|
||||||
|
const authorEventCounts = new Map<string, number>();
|
||||||
|
const authorCommentCounts = new Map<string, number>();
|
||||||
|
|
||||||
for (const record of records) {
|
for (const record of records) {
|
||||||
const author = toText(record.author).trim();
|
const author = getAuthor(record);
|
||||||
if (!author) {
|
if (!author) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
counts.set(author, (counts.get(author) ?? 0) + 1);
|
|
||||||
}
|
|
||||||
return counts;
|
|
||||||
};
|
|
||||||
|
|
||||||
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
|
authorEventCounts.set(author, (authorEventCounts.get(author) ?? 0) + 1);
|
||||||
const counts = new Map<string, number>();
|
|
||||||
for (const record of records) {
|
if (record.type === "comment") {
|
||||||
const author = toText(record.author).trim();
|
authorCommentCounts.set(author, (authorCommentCounts.get(author) ?? 0) + 1);
|
||||||
if (!author || record.type !== "comment") {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
counts.set(author, (counts.get(author) ?? 0) + 1);
|
|
||||||
}
|
|
||||||
return counts;
|
|
||||||
};
|
|
||||||
|
|
||||||
const createAuthorByPostId = (records: DatasetRecord[]) => {
|
if (record.post_id !== null && record.post_id !== undefined) {
|
||||||
const map = new Map<string, string>();
|
authorByPostId.set(String(record.post_id), author);
|
||||||
for (const record of records) {
|
|
||||||
const postId = record.post_id;
|
|
||||||
const author = toText(record.author).trim();
|
|
||||||
if (postId === null || postId === undefined || !author) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
map.set(String(postId), author);
|
|
||||||
}
|
}
|
||||||
return map;
|
|
||||||
};
|
|
||||||
|
|
||||||
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
|
return { authorByPostId, authorEventCounts, authorCommentCounts };
|
||||||
authorByPostId: createAuthorByPostId(records),
|
};
|
||||||
authorEventCounts: createAuthorEventCounts(records),
|
|
||||||
authorCommentCounts: createAuthorCommentCounts(records),
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
|
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
|
||||||
title: "Corpus Explorer",
|
title: "Corpus Explorer",
|
||||||
@@ -221,19 +197,27 @@ const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
|
|||||||
matcher: () => true,
|
matcher: () => true,
|
||||||
});
|
});
|
||||||
|
|
||||||
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
|
const buildUserSpec = (author: string): CorpusExplorerSpec => {
|
||||||
title: `User: ${author}`,
|
const target = normalize(author);
|
||||||
description: `All records authored by ${author}.`,
|
|
||||||
emptyMessage: `No records found for ${author}.`,
|
|
||||||
matcher: (record) => normalize(record.author) === normalize(author),
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
|
return {
|
||||||
title: `Topic: ${topic}`,
|
title: `User: ${author}`,
|
||||||
description: `Records assigned to the ${topic} topic bucket.`,
|
description: `All records authored by ${author}.`,
|
||||||
emptyMessage: `No records found in the ${topic} topic bucket.`,
|
emptyMessage: `No records found for ${author}.`,
|
||||||
matcher: (record) => normalize(record.topic) === normalize(topic),
|
matcher: (record) => normalize(record.author) === target,
|
||||||
});
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildTopicSpec = (topic: string): CorpusExplorerSpec => {
|
||||||
|
const target = normalize(topic);
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: `Topic: ${topic}`,
|
||||||
|
description: `Records assigned to the ${topic} topic bucket.`,
|
||||||
|
emptyMessage: `No records found in the ${topic} topic bucket.`,
|
||||||
|
matcher: (record) => normalize(record.topic) === target,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
|
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
|
||||||
title: `Date Bucket: ${date}`,
|
title: `Date Bucket: ${date}`,
|
||||||
@@ -256,88 +240,75 @@ const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
|
|||||||
matcher: (record) => matchesPhrase(record, ngram),
|
matcher: (record) => matchesPhrase(record, ngram),
|
||||||
});
|
});
|
||||||
|
|
||||||
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
|
const buildEntitySpec = (entity: string): CorpusExplorerSpec => {
|
||||||
title: `Entity: ${entity}`,
|
const target = normalize(entity);
|
||||||
description: `Records mentioning the ${entity} entity.`,
|
|
||||||
emptyMessage: `No records found for the ${entity} entity.`,
|
|
||||||
matcher: (record) => {
|
|
||||||
const target = normalize(entity);
|
|
||||||
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
|
|
||||||
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
|
return {
|
||||||
title: `Source: ${source}`,
|
title: `Entity: ${entity}`,
|
||||||
description: `Records from the ${source} source.`,
|
description: `Records mentioning the ${entity} entity.`,
|
||||||
emptyMessage: `No records found for ${source}.`,
|
emptyMessage: `No records found for the ${entity} entity.`,
|
||||||
matcher: (record) => normalize(record.source) === normalize(source),
|
matcher: (record) => {
|
||||||
});
|
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
|
||||||
|
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
|
const buildSourceSpec = (source: string): CorpusExplorerSpec => {
|
||||||
title: `Dominant Emotion: ${emotion}`,
|
const target = normalize(source);
|
||||||
description: `Records where ${emotion} is the strongest emotion score.`,
|
|
||||||
emptyMessage: `No records found with dominant emotion ${emotion}.`,
|
|
||||||
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
|
return {
|
||||||
title: `Reply Path: ${source} -> ${target}`,
|
title: `Source: ${source}`,
|
||||||
description: `Reply records authored by ${source} in response to ${target}.`,
|
description: `Records from the ${source} source.`,
|
||||||
emptyMessage: `No reply records found for ${source} -> ${target}.`,
|
emptyMessage: `No records found for ${source}.`,
|
||||||
matcher: (record, context) => {
|
matcher: (record) => normalize(record.source) === target,
|
||||||
if (normalize(record.author) !== normalize(source)) {
|
};
|
||||||
return false;
|
};
|
||||||
}
|
|
||||||
|
|
||||||
const replyTo = record.reply_to;
|
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => {
|
||||||
if (replyTo === null || replyTo === undefined || replyTo === "") {
|
const target = normalize(emotion);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const replyTarget = context.authorByPostId.get(String(replyTo));
|
return {
|
||||||
return normalize(replyTarget) === normalize(target);
|
title: `Dominant Emotion: ${emotion}`,
|
||||||
},
|
description: `Records where ${emotion} is the strongest emotion score.`,
|
||||||
});
|
emptyMessage: `No records found with dominant emotion ${emotion}.`,
|
||||||
|
matcher: (record) => getDominantEmotion(record) === target,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => {
|
||||||
|
const sourceName = normalize(source);
|
||||||
|
const targetName = normalize(target);
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: `Reply Path: ${source} -> ${target}`,
|
||||||
|
description: `Reply records authored by ${source} in response to ${target}.`,
|
||||||
|
emptyMessage: `No reply records found for ${source} -> ${target}.`,
|
||||||
|
matcher: (record, context) => {
|
||||||
|
if (normalize(record.author) !== sourceName) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const replyTo = record.reply_to;
|
||||||
|
if (replyTo === null || replyTo === undefined || replyTo === "") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalize(context.authorByPostId.get(String(replyTo))) === targetName;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
|
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
|
||||||
title: "One-Time Users",
|
title: "One-Time Users",
|
||||||
description: "Records written by authors who appear exactly once in the filtered corpus.",
|
description: "Records written by authors who appear exactly once in the filtered corpus.",
|
||||||
emptyMessage: "No one-time-user records found.",
|
emptyMessage: "No one-time-user records found.",
|
||||||
matcher: (record, context) => {
|
matcher: (record, context) => {
|
||||||
const author = toText(record.author).trim();
|
const author = getAuthor(record);
|
||||||
return !!author && context.authorEventCounts.get(author) === 1;
|
return !!author && context.authorEventCounts.get(author) === 1;
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
|
|
||||||
title: "Top Commenters",
|
|
||||||
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
|
|
||||||
emptyMessage: "No top-commenter records found.",
|
|
||||||
matcher: (record, context) => {
|
|
||||||
if (record.type !== "comment") {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
|
|
||||||
.sort((a, b) => b[1] - a[1])
|
|
||||||
.slice(0, topAuthorCount)
|
|
||||||
.map(([author]) => author);
|
|
||||||
|
|
||||||
return rankedAuthors.includes(toText(record.author).trim());
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
|
|
||||||
title: "Single-Comment Authors",
|
|
||||||
description: "Comment records from authors who commented exactly once.",
|
|
||||||
emptyMessage: "No single-comment-author records found.",
|
|
||||||
matcher: (record, context) => {
|
|
||||||
const author = toText(record.author).trim();
|
|
||||||
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
|
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
|
||||||
const labels = {
|
const labels = {
|
||||||
in: "In-Group Posts",
|
in: "In-Group Posts",
|
||||||
@@ -376,9 +347,7 @@ const buildDeonticSpec = () =>
|
|||||||
const buildPermissionSpec = () =>
|
const buildPermissionSpec = () =>
|
||||||
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
|
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
|
||||||
|
|
||||||
const getExplorerButtonStyle = () => shrinkButtonStyle;
|
export type { DatasetRecord, CorpusExplorerSpec };
|
||||||
|
|
||||||
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
|
|
||||||
export {
|
export {
|
||||||
buildAllRecordsSpec,
|
buildAllRecordsSpec,
|
||||||
buildCertaintySpec,
|
buildCertaintySpec,
|
||||||
@@ -393,13 +362,10 @@ export {
|
|||||||
buildOneTimeUsersSpec,
|
buildOneTimeUsersSpec,
|
||||||
buildPermissionSpec,
|
buildPermissionSpec,
|
||||||
buildReplyPairSpec,
|
buildReplyPairSpec,
|
||||||
buildSingleCommentAuthorsSpec,
|
|
||||||
buildSourceSpec,
|
buildSourceSpec,
|
||||||
buildTopicSpec,
|
buildTopicSpec,
|
||||||
buildTopCommentersSpec,
|
|
||||||
buildUserSpec,
|
buildUserSpec,
|
||||||
buildWordSpec,
|
buildWordSpec,
|
||||||
getDateBucket,
|
getDateBucket,
|
||||||
getExplorerButtonStyle,
|
|
||||||
toText,
|
toText,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user