Corpus Explorer Feature #11
175
frontend/src/components/CorpusExplorer.tsx
Normal file
175
frontend/src/components/CorpusExplorer.tsx
Normal file
@@ -0,0 +1,175 @@
|
||||
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
|
||||
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { DatasetRecord } from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
const cleanText = (value: unknown) => {
|
||||
if (typeof value !== "string") {
|
||||
return "";
|
||||
}
|
||||
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const lowered = trimmed.toLowerCase();
|
||||
if (lowered === "nan" || lowered === "null" || lowered === "undefined") {
|
||||
return "";
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
};
|
||||
|
||||
const displayText = (value: unknown, fallback: string) => {
|
||||
const cleaned = cleanText(value);
|
||||
return cleaned || fallback;
|
||||
};
|
||||
|
||||
type CorpusExplorerProps = {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
title: string;
|
||||
description: string;
|
||||
records: DatasetRecord[];
|
||||
loading: boolean;
|
||||
error: string;
|
||||
emptyMessage: string;
|
||||
};
|
||||
|
||||
const formatRecordDate = (record: DatasetRecord) => {
|
||||
if (typeof record.dt === "string" && record.dt) {
|
||||
const date = new Date(record.dt);
|
||||
if (!Number.isNaN(date.getTime())) {
|
||||
return date.toLocaleString();
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof record.date === "string" && record.date) {
|
||||
return record.date;
|
||||
}
|
||||
|
||||
if (typeof record.timestamp === "number") {
|
||||
return new Date(record.timestamp * 1000).toLocaleString();
|
||||
}
|
||||
|
||||
return "Unknown time";
|
||||
};
|
||||
|
||||
const getRecordKey = (record: DatasetRecord, index: number) =>
|
||||
String(record.id ?? record.post_id ?? `${record.author ?? "record"}-${index}`);
|
||||
|
||||
const getRecordTitle = (record: DatasetRecord) => {
|
||||
if (record.type === "comment") {
|
||||
return "";
|
||||
}
|
||||
|
||||
const title = cleanText(record.title);
|
||||
if (title) {
|
||||
return title;
|
||||
}
|
||||
|
||||
const content = cleanText(record.content);
|
||||
if (!content) {
|
||||
return "Untitled record";
|
||||
}
|
||||
|
||||
return content.length > 120 ? `${content.slice(0, 117)}...` : content;
|
||||
};
|
||||
|
||||
const getRecordExcerpt = (record: DatasetRecord) => {
|
||||
const content = cleanText(record.content);
|
||||
if (!content) {
|
||||
return "No content available.";
|
||||
}
|
||||
|
||||
return content.length > 320 ? `${content.slice(0, 317)}...` : content;
|
||||
};
|
||||
|
||||
const CorpusExplorer = ({
|
||||
open,
|
||||
onClose,
|
||||
title,
|
||||
description,
|
||||
records,
|
||||
loading,
|
||||
error,
|
||||
emptyMessage,
|
||||
}: CorpusExplorerProps) => (
|
||||
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
|
||||
<div style={styles.modalBackdrop} />
|
||||
|
||||
<div style={styles.modalContainer}>
|
||||
<DialogPanel
|
||||
style={{
|
||||
...styles.card,
|
||||
...styles.modalPanel,
|
||||
width: "min(960px, 96vw)",
|
||||
maxHeight: "88vh",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 12,
|
||||
}}
|
||||
>
|
||||
<div style={styles.headerBar}>
|
||||
<div>
|
||||
<DialogTitle style={styles.sectionTitle}>{title}</DialogTitle>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
{description} {loading ? "Loading records..." : `${records.length.toLocaleString()} records.`}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<button onClick={onClose} style={styles.buttonSecondary}>
|
||||
Close
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error ? <p style={styles.sectionSubtitle}>{error}</p> : null}
|
||||
|
||||
{!loading && !error && !records.length ? (
|
||||
<p style={styles.sectionSubtitle}>{emptyMessage}</p>
|
||||
) : null}
|
||||
|
||||
{loading ? (
|
||||
<div style={styles.topUserMeta}>Preparing corpus slice...</div>
|
||||
) : null}
|
||||
|
||||
{!loading && !error && records.length ? (
|
||||
<div
|
||||
style={{
|
||||
...styles.topUsersList,
|
||||
overflowY: "auto",
|
||||
paddingRight: 4,
|
||||
}}
|
||||
>
|
||||
{records.map((record, index) => (
|
||||
<div key={getRecordKey(record, index)} style={styles.topUserItem}>
|
||||
<div style={{ ...styles.headerBar, alignItems: "flex-start" }}>
|
||||
<div>
|
||||
{getRecordTitle(record) ? (
|
||||
<div style={styles.topUserName}>{getRecordTitle(record)}</div>
|
||||
) : null}
|
||||
<div style={styles.topUserMeta}>
|
||||
{displayText(record.author, "Unknown author")} • {displayText(record.source, "Unknown source")} • {displayText(record.type, "record")} • {formatRecordDate(record)}
|
||||
</div>
|
||||
</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{cleanText(record.topic) ? `Topic: ${cleanText(record.topic)}` : ""}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.topUserMeta, marginTop: 8, whiteSpace: "pre-wrap" }}>
|
||||
{getRecordExcerpt(record)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : null}
|
||||
</DialogPanel>
|
||||
</div>
|
||||
</Dialog>
|
||||
);
|
||||
|
||||
export default CorpusExplorer;
|
||||
@@ -1,14 +1,34 @@
|
||||
import Card from "./Card";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
|
||||
import {
|
||||
buildCertaintySpec,
|
||||
buildDeonticSpec,
|
||||
buildEntitySpec,
|
||||
buildHedgeSpec,
|
||||
buildIdentityBucketSpec,
|
||||
buildPermissionSpec,
|
||||
getExplorerButtonStyle,
|
||||
type CorpusExplorerSpec,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type CulturalStatsProps = {
|
||||
data: CulturalAnalysisResponse;
|
||||
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||
};
|
||||
|
||||
const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
const renderExploreButton = (onClick: () => void) => (
|
||||
<button
|
||||
onClick={onClick}
|
||||
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
||||
>
|
||||
Explore
|
||||
</button>
|
||||
);
|
||||
|
||||
const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
|
||||
const identity = data.identity_markers;
|
||||
const stance = data.stance_markers;
|
||||
const inGroupWords = identity?.in_group_usage ?? 0;
|
||||
@@ -30,7 +50,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
|
||||
const entries = Object.entries(emotionAvg ?? {});
|
||||
if (!entries.length) {
|
||||
return "—";
|
||||
return "-";
|
||||
}
|
||||
|
||||
entries.sort((a, b) => b[1] - a[1]);
|
||||
@@ -64,21 +84,30 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
/>
|
||||
<Card
|
||||
label="In-Group Posts"
|
||||
value={identity?.in_group_posts?.toLocaleString() ?? "—"}
|
||||
value={identity?.in_group_posts?.toLocaleString() ?? "-"}
|
||||
sublabel='Posts leaning toward "us" language'
|
||||
rightSlot={renderExploreButton(() =>
|
||||
onExplore(buildIdentityBucketSpec("in")),
|
||||
)}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Out-Group Posts"
|
||||
value={identity?.out_group_posts?.toLocaleString() ?? "—"}
|
||||
value={identity?.out_group_posts?.toLocaleString() ?? "-"}
|
||||
sublabel='Posts leaning toward "them" language'
|
||||
rightSlot={renderExploreButton(() =>
|
||||
onExplore(buildIdentityBucketSpec("out")),
|
||||
)}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Balanced Posts"
|
||||
value={identity?.tie_posts?.toLocaleString() ?? "—"}
|
||||
value={identity?.tie_posts?.toLocaleString() ?? "-"}
|
||||
sublabel="Posts with equal us/them signals"
|
||||
rightSlot={renderExploreButton(() =>
|
||||
onExplore(buildIdentityBucketSpec("tie")),
|
||||
)}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
@@ -90,7 +119,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
<Card
|
||||
label="In-Group Share"
|
||||
value={
|
||||
inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`
|
||||
inGroupWordRate === null ? "-" : `${inGroupWordRate.toFixed(2)}%`
|
||||
}
|
||||
sublabel="Share of all words"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
@@ -98,7 +127,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
<Card
|
||||
label="Out-Group Share"
|
||||
value={
|
||||
outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`
|
||||
outGroupWordRate === null ? "-" : `${outGroupWordRate.toFixed(2)}%`
|
||||
}
|
||||
sublabel="Share of all words"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
@@ -106,42 +135,46 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
|
||||
<Card
|
||||
label="Hedging Words"
|
||||
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
||||
value={stance?.hedge_total?.toLocaleString() ?? "-"}
|
||||
sublabel={
|
||||
typeof stance?.hedge_per_1k_tokens === "number"
|
||||
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
|
||||
: "Word frequency"
|
||||
}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildHedgeSpec()))}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Certainty Words"
|
||||
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
||||
value={stance?.certainty_total?.toLocaleString() ?? "-"}
|
||||
sublabel={
|
||||
typeof stance?.certainty_per_1k_tokens === "number"
|
||||
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
|
||||
: "Word frequency"
|
||||
}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildCertaintySpec()))}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Need/Should Words"
|
||||
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
||||
value={stance?.deontic_total?.toLocaleString() ?? "-"}
|
||||
sublabel={
|
||||
typeof stance?.deontic_per_1k_tokens === "number"
|
||||
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
|
||||
: "Word frequency"
|
||||
}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildDeonticSpec()))}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Permission Words"
|
||||
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
||||
value={stance?.permission_total?.toLocaleString() ?? "-"}
|
||||
sublabel={
|
||||
typeof stance?.permission_per_1k_tokens === "number"
|
||||
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
|
||||
: "Word frequency"
|
||||
}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildPermissionSpec()))}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
@@ -150,8 +183,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
<p style={styles.sectionSubtitle}>
|
||||
Most likely emotion when in-group wording is stronger.
|
||||
</p>
|
||||
<div style={styles.topUserName}>
|
||||
{topEmotion(identity?.in_group_emotion_avg)}
|
||||
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
|
||||
<div style={{ marginTop: 12 }}>
|
||||
<button
|
||||
onClick={() => onExplore(buildIdentityBucketSpec("in"))}
|
||||
style={styles.buttonSecondary}
|
||||
>
|
||||
Explore records
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -160,8 +199,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
<p style={styles.sectionSubtitle}>
|
||||
Most likely emotion when out-group wording is stronger.
|
||||
</p>
|
||||
<div style={styles.topUserName}>
|
||||
{topEmotion(identity?.out_group_emotion_avg)}
|
||||
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
|
||||
<div style={{ marginTop: 12 }}>
|
||||
<button
|
||||
onClick={() => onExplore(buildIdentityBucketSpec("out"))}
|
||||
style={styles.buttonSecondary}
|
||||
>
|
||||
Explore records
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -171,9 +216,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
Most mentioned entities and the mood that appears most with each.
|
||||
</p>
|
||||
{!entities.length ? (
|
||||
<div style={styles.topUserMeta}>
|
||||
No entity-level cultural data available.
|
||||
</div>
|
||||
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
|
||||
) : (
|
||||
<div
|
||||
style={{
|
||||
@@ -183,7 +226,11 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
}}
|
||||
>
|
||||
{entities.map(([entity, aggregate]) => (
|
||||
<div key={entity} style={styles.topUserItem}>
|
||||
<div
|
||||
key={entity}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildEntitySpec(entity))}
|
||||
>
|
||||
<div style={styles.topUserName}>{entity}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{aggregate.post_count.toLocaleString()} posts • Likely mood:{" "}
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
import type { EmotionalAnalysisResponse } from "../types/ApiTypes";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import {
|
||||
buildDominantEmotionSpec,
|
||||
buildSourceSpec,
|
||||
buildTopicSpec,
|
||||
type CorpusExplorerSpec,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type EmotionalStatsProps = {
|
||||
emotionalData: EmotionalAnalysisResponse;
|
||||
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||
};
|
||||
|
||||
const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
||||
const EmotionalStats = ({ emotionalData, onExplore }: EmotionalStatsProps) => {
|
||||
const rows = emotionalData.average_emotion_by_topic ?? [];
|
||||
const overallEmotionAverage = emotionalData.overall_emotion_average ?? [];
|
||||
const dominantEmotionDistribution =
|
||||
@@ -126,7 +133,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
||||
{[...overallEmotionAverage]
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.map((row) => (
|
||||
<div key={row.emotion} style={styles.topUserItem}>
|
||||
<div
|
||||
key={row.emotion}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
|
||||
>
|
||||
<div style={styles.topUserName}>
|
||||
{formatEmotion(row.emotion)}
|
||||
</div>
|
||||
@@ -157,7 +168,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
||||
{[...dominantEmotionDistribution]
|
||||
.sort((a, b) => b.ratio - a.ratio)
|
||||
.map((row) => (
|
||||
<div key={row.emotion} style={styles.topUserItem}>
|
||||
<div
|
||||
key={row.emotion}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
|
||||
>
|
||||
<div style={styles.topUserName}>
|
||||
{formatEmotion(row.emotion)}
|
||||
</div>
|
||||
@@ -189,7 +204,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
||||
{[...emotionBySource]
|
||||
.sort((a, b) => b.event_count - a.event_count)
|
||||
.map((row) => (
|
||||
<div key={row.source} style={styles.topUserItem}>
|
||||
<div
|
||||
key={row.source}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildSourceSpec(row.source))}
|
||||
>
|
||||
<div style={styles.topUserName}>{row.source}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{formatEmotion(row.dominant_emotion)} •{" "}
|
||||
@@ -211,7 +230,8 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
||||
{strongestPerTopic.map((topic) => (
|
||||
<div
|
||||
key={topic.topic}
|
||||
style={{ ...styles.cardBase, gridColumn: "span 4" }}
|
||||
style={{ ...styles.cardBase, gridColumn: "span 4", cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildTopicSpec(topic.topic))}
|
||||
>
|
||||
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>
|
||||
{topic.topic}
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
import Card from "./Card";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
|
||||
import {
|
||||
buildNgramSpec,
|
||||
buildWordSpec,
|
||||
type CorpusExplorerSpec,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type LinguisticStatsProps = {
|
||||
data: LinguisticAnalysisResponse;
|
||||
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||
};
|
||||
|
||||
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||
const LinguisticStats = ({ data, onExplore }: LinguisticStatsProps) => {
|
||||
const lexical = data.lexical_diversity;
|
||||
const words = data.word_frequencies ?? [];
|
||||
const bigrams = data.common_two_phrases ?? [];
|
||||
@@ -60,7 +66,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||
}}
|
||||
>
|
||||
{topWords.map((item) => (
|
||||
<div key={item.word} style={styles.topUserItem}>
|
||||
<div
|
||||
key={item.word}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildWordSpec(item.word))}
|
||||
>
|
||||
<div style={styles.topUserName}>{item.word}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{item.count.toLocaleString()} uses
|
||||
@@ -81,7 +91,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||
}}
|
||||
>
|
||||
{topBigrams.map((item) => (
|
||||
<div key={item.ngram} style={styles.topUserItem}>
|
||||
<div
|
||||
key={item.ngram}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildNgramSpec(item.ngram))}
|
||||
>
|
||||
<div style={styles.topUserName}>{item.ngram}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{item.count.toLocaleString()} uses
|
||||
@@ -102,7 +116,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||
}}
|
||||
>
|
||||
{topTrigrams.map((item) => (
|
||||
<div key={item.ngram} style={styles.topUserItem}>
|
||||
<div
|
||||
key={item.ngram}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => onExplore(buildNgramSpec(item.ngram))}
|
||||
>
|
||||
<div style={styles.topUserName}>{item.ngram}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{item.count.toLocaleString()} uses
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { memo, useMemo, useState } from "react";
|
||||
import { memo, useMemo } from "react";
|
||||
import {
|
||||
LineChart,
|
||||
Line,
|
||||
@@ -13,7 +13,6 @@ import ActivityHeatmap from "../stats/ActivityHeatmap";
|
||||
import { ReactWordcloud } from "@cp949/react-wordcloud";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import Card from "../components/Card";
|
||||
import UserModal from "../components/UserModal";
|
||||
|
||||
import {
|
||||
type SummaryResponse,
|
||||
@@ -21,8 +20,15 @@ import {
|
||||
type UserEndpointResponse,
|
||||
type TimeAnalysisResponse,
|
||||
type LinguisticAnalysisResponse,
|
||||
type User,
|
||||
} from "../types/ApiTypes";
|
||||
import {
|
||||
buildAllRecordsSpec,
|
||||
buildDateBucketSpec,
|
||||
buildOneTimeUsersSpec,
|
||||
buildUserSpec,
|
||||
getExplorerButtonStyle,
|
||||
type CorpusExplorerSpec,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
const MAX_WORDCLOUD_WORDS = 250;
|
||||
@@ -39,6 +45,7 @@ type SummaryStatsProps = {
|
||||
timeData: TimeAnalysisResponse | null;
|
||||
linguisticData: LinguisticAnalysisResponse | null;
|
||||
summary: SummaryResponse | null;
|
||||
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||
};
|
||||
|
||||
type WordCloudPanelProps = {
|
||||
@@ -60,7 +67,7 @@ function formatDateRange(startUnix: number, endUnix: number) {
|
||||
day: "2-digit",
|
||||
});
|
||||
|
||||
return `${fmt(start)} → ${fmt(end)}`;
|
||||
return `${fmt(start)} -> ${fmt(end)}`;
|
||||
}
|
||||
|
||||
function convertFrequencyData(data: FrequencyWord[]) {
|
||||
@@ -70,25 +77,22 @@ function convertFrequencyData(data: FrequencyWord[]) {
|
||||
}));
|
||||
}
|
||||
|
||||
const renderExploreButton = (onClick: () => void) => (
|
||||
<button
|
||||
onClick={onClick}
|
||||
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
||||
>
|
||||
Explore
|
||||
</button>
|
||||
);
|
||||
|
||||
const SummaryStats = ({
|
||||
userData,
|
||||
timeData,
|
||||
linguisticData,
|
||||
summary,
|
||||
onExplore,
|
||||
}: SummaryStatsProps) => {
|
||||
const [selectedUser, setSelectedUser] = useState<string | null>(null);
|
||||
const usersByAuthor = useMemo(() => {
|
||||
const nextMap = new Map<string, User>();
|
||||
for (const user of userData?.users ?? []) {
|
||||
nextMap.set(user.author, user);
|
||||
}
|
||||
return nextMap;
|
||||
}, [userData?.users]);
|
||||
|
||||
const selectedUserData: User | null = selectedUser
|
||||
? usersByAuthor.get(selectedUser) ?? null
|
||||
: null;
|
||||
|
||||
const wordCloudWords = useMemo(
|
||||
() =>
|
||||
convertFrequencyData(
|
||||
@@ -104,49 +108,41 @@ const SummaryStats = ({
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
{/* main grid*/}
|
||||
<div style={{ ...styles.container, ...styles.grid }}>
|
||||
<Card
|
||||
label="Total Activity"
|
||||
value={summary?.total_events ?? "—"}
|
||||
value={summary?.total_events ?? "-"}
|
||||
sublabel="Posts + comments"
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
<Card
|
||||
label="Active People"
|
||||
value={summary?.unique_users ?? "—"}
|
||||
value={summary?.unique_users ?? "-"}
|
||||
sublabel="Distinct users"
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
<Card
|
||||
label="Posts vs Comments"
|
||||
value={
|
||||
summary ? `${summary.total_posts} / ${summary.total_comments}` : "—"
|
||||
summary ? `${summary.total_posts} / ${summary.total_comments}` : "-"
|
||||
}
|
||||
sublabel={`Comments per post: ${summary?.comments_per_post ?? "—"}`}
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
sublabel={`Comments per post: ${summary?.comments_per_post ?? "-"}`}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Time Range"
|
||||
value={
|
||||
summary?.time_range
|
||||
? formatDateRange(
|
||||
summary.time_range.start,
|
||||
summary.time_range.end,
|
||||
)
|
||||
: "—"
|
||||
? formatDateRange(summary.time_range.start, summary.time_range.end)
|
||||
: "-"
|
||||
}
|
||||
sublabel="Based on dataset timestamps"
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
@@ -154,38 +150,44 @@ const SummaryStats = ({
|
||||
value={
|
||||
typeof summary?.lurker_ratio === "number"
|
||||
? `${Math.round(summary.lurker_ratio * 100)}%`
|
||||
: "—"
|
||||
: "-"
|
||||
}
|
||||
sublabel="Users with only one event"
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildOneTimeUsersSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Sources"
|
||||
value={summary?.sources?.length ?? "—"}
|
||||
value={summary?.sources?.length ?? "-"}
|
||||
sublabel={
|
||||
summary?.sources?.length
|
||||
? summary.sources.slice(0, 3).join(", ") +
|
||||
(summary.sources.length > 3 ? "…" : "")
|
||||
: "—"
|
||||
(summary.sources.length > 3 ? "..." : "")
|
||||
: "-"
|
||||
}
|
||||
style={{
|
||||
gridColumn: "span 4",
|
||||
}}
|
||||
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
|
||||
{/* events per day */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
||||
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
How much posting happened each day.
|
||||
</p>
|
||||
<p style={styles.sectionSubtitle}>How much posting happened each day.</p>
|
||||
|
||||
<div style={styles.chartWrapper}>
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<LineChart data={timeData?.events_per_day ?? []}>
|
||||
<LineChart
|
||||
data={timeData?.events_per_day ?? []}
|
||||
onClick={(state: unknown) => {
|
||||
const payload = (state as { activePayload?: Array<{ payload?: { date?: string } }> })
|
||||
?.activePayload?.[0]?.payload as
|
||||
| { date?: string }
|
||||
| undefined;
|
||||
if (payload?.date) {
|
||||
onExplore(buildDateBucketSpec(String(payload.date)));
|
||||
}
|
||||
}}
|
||||
>
|
||||
<CartesianGrid strokeDasharray="3 3" />
|
||||
<XAxis dataKey="date" />
|
||||
<YAxis />
|
||||
@@ -201,7 +203,6 @@ const SummaryStats = ({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Word Cloud */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Common Words</h2>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
@@ -213,7 +214,6 @@ const SummaryStats = ({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Top Users */}
|
||||
<div
|
||||
style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }}
|
||||
>
|
||||
@@ -225,7 +225,7 @@ const SummaryStats = ({
|
||||
<div
|
||||
key={`${item.author}-${item.source}`}
|
||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||
onClick={() => setSelectedUser(item.author)}
|
||||
onClick={() => onExplore(buildUserSpec(item.author))}
|
||||
>
|
||||
<div style={styles.topUserName}>{item.author}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
@@ -236,7 +236,6 @@ const SummaryStats = ({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Heatmap */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
@@ -248,13 +247,6 @@ const SummaryStats = ({
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<UserModal
|
||||
open={!!selectedUser}
|
||||
onClose={() => setSelectedUser(null)}
|
||||
username={selectedUser ?? ""}
|
||||
userData={selectedUserData}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -5,6 +5,12 @@ import { type TopUser, type InteractionGraph } from "../types/ApiTypes";
|
||||
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import Card from "./Card";
|
||||
import {
|
||||
buildReplyPairSpec,
|
||||
toText,
|
||||
buildUserSpec,
|
||||
type CorpusExplorerSpec,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
@@ -39,6 +45,7 @@ type UserStatsProps = {
|
||||
interactionGraph: InteractionGraph;
|
||||
totalUsers: number;
|
||||
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
||||
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||
};
|
||||
|
||||
const UserStats = ({
|
||||
@@ -46,6 +53,7 @@ const UserStats = ({
|
||||
interactionGraph,
|
||||
totalUsers,
|
||||
mostCommentHeavyUser,
|
||||
onExplore,
|
||||
}: UserStatsProps) => {
|
||||
const graphData = useMemo(
|
||||
() => ApiToGraphData(interactionGraph),
|
||||
@@ -87,9 +95,9 @@ const UserStats = ({
|
||||
null,
|
||||
);
|
||||
|
||||
const mostActiveUser = topUsers.find(
|
||||
(u) => u.author !== "[deleted]",
|
||||
);
|
||||
const mostActiveUser = topUsers.find((u) => u.author !== "[deleted]");
|
||||
const strongestLinkSource = strongestLink ? toText(strongestLink.source) : "";
|
||||
const strongestLinkTarget = strongestLink ? toText(strongestLink.target) : "";
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
@@ -114,37 +122,69 @@ const UserStats = ({
|
||||
/>
|
||||
<Card
|
||||
label="Most Active User"
|
||||
value={mostActiveUser?.author ?? "—"}
|
||||
value={mostActiveUser?.author ?? "-"}
|
||||
sublabel={
|
||||
mostActiveUser
|
||||
? `${mostActiveUser.count.toLocaleString()} events`
|
||||
: "No user activity found"
|
||||
}
|
||||
rightSlot={
|
||||
mostActiveUser ? (
|
||||
<button
|
||||
onClick={() => onExplore(buildUserSpec(mostActiveUser.author))}
|
||||
style={styles.buttonSecondary}
|
||||
>
|
||||
Explore
|
||||
</button>
|
||||
) : null
|
||||
}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Strongest User Link"
|
||||
value={
|
||||
strongestLink
|
||||
? `${strongestLink.source} -> ${strongestLink.target}`
|
||||
: "—"
|
||||
strongestLinkSource && strongestLinkTarget
|
||||
? `${strongestLinkSource} -> ${strongestLinkTarget}`
|
||||
: "-"
|
||||
}
|
||||
sublabel={
|
||||
strongestLink
|
||||
? `${strongestLink.value.toLocaleString()} replies`
|
||||
: "No graph links after filtering"
|
||||
}
|
||||
rightSlot={
|
||||
strongestLinkSource && strongestLinkTarget ? (
|
||||
<button
|
||||
onClick={() =>
|
||||
onExplore(buildReplyPairSpec(strongestLinkSource, strongestLinkTarget))
|
||||
}
|
||||
style={styles.buttonSecondary}
|
||||
>
|
||||
Explore
|
||||
</button>
|
||||
) : null
|
||||
}
|
||||
style={{ gridColumn: "span 6" }}
|
||||
/>
|
||||
<Card
|
||||
label="Most Comment-Heavy User"
|
||||
value={mostCommentHeavyUser?.author ?? "—"}
|
||||
value={mostCommentHeavyUser?.author ?? "-"}
|
||||
sublabel={
|
||||
mostCommentHeavyUser
|
||||
? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments`
|
||||
: "No user distribution available"
|
||||
}
|
||||
rightSlot={
|
||||
mostCommentHeavyUser ? (
|
||||
<button
|
||||
onClick={() => onExplore(buildUserSpec(mostCommentHeavyUser.author))}
|
||||
style={styles.buttonSecondary}
|
||||
>
|
||||
Explore
|
||||
</button>
|
||||
) : null
|
||||
}
|
||||
style={{ gridColumn: "span 6" }}
|
||||
/>
|
||||
|
||||
@@ -166,6 +206,19 @@ const UserStats = ({
|
||||
linkDirectionalParticleSpeed={0.004}
|
||||
linkWidth={(link) => Math.sqrt(Number(link.value))}
|
||||
nodeLabel={(node) => `${node.id}`}
|
||||
onNodeClick={(node) => {
|
||||
const userId = toText(node.id);
|
||||
if (userId) {
|
||||
onExplore(buildUserSpec(userId));
|
||||
}
|
||||
}}
|
||||
onLinkClick={(link) => {
|
||||
const source = toText(link.source);
|
||||
const target = toText(link.target);
|
||||
if (source && target) {
|
||||
onExplore(buildReplyPairSpec(source, target));
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -22,12 +22,10 @@ const DatasetEditPage = () => {
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
|
||||
const [hasError, setHasError] = useState(false);
|
||||
|
||||
const [datasetName, setDatasetName] = useState("");
|
||||
useEffect(() => {
|
||||
if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) {
|
||||
setHasError(true);
|
||||
setStatusMessage("Invalid dataset id.");
|
||||
setLoading(false);
|
||||
return;
|
||||
@@ -35,7 +33,6 @@ const DatasetEditPage = () => {
|
||||
|
||||
const token = localStorage.getItem("access_token");
|
||||
if (!token) {
|
||||
setHasError(true);
|
||||
setStatusMessage("You must be signed in to edit datasets.");
|
||||
setLoading(false);
|
||||
return;
|
||||
@@ -49,7 +46,6 @@ const DatasetEditPage = () => {
|
||||
setDatasetName(response.data.name || "");
|
||||
})
|
||||
.catch((error: unknown) => {
|
||||
setHasError(true);
|
||||
if (axios.isAxiosError(error)) {
|
||||
setStatusMessage(
|
||||
String(error.response?.data?.error || error.message),
|
||||
@@ -68,21 +64,18 @@ const DatasetEditPage = () => {
|
||||
|
||||
const trimmedName = datasetName.trim();
|
||||
if (!trimmedName) {
|
||||
setHasError(true);
|
||||
setStatusMessage("Please enter a valid dataset name.");
|
||||
return;
|
||||
}
|
||||
|
||||
const token = localStorage.getItem("access_token");
|
||||
if (!token) {
|
||||
setHasError(true);
|
||||
setStatusMessage("You must be signed in to save changes.");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
setIsSaving(true);
|
||||
setHasError(false);
|
||||
setStatusMessage("");
|
||||
|
||||
await axios.patch(
|
||||
@@ -93,7 +86,6 @@ const DatasetEditPage = () => {
|
||||
|
||||
navigate("/datasets", { replace: true });
|
||||
} catch (error: unknown) {
|
||||
setHasError(true);
|
||||
if (axios.isAxiosError(error)) {
|
||||
setStatusMessage(
|
||||
String(
|
||||
@@ -111,7 +103,6 @@ const DatasetEditPage = () => {
|
||||
const deleteDataset = async () => {
|
||||
const deleteToken = localStorage.getItem("access_token");
|
||||
if (!deleteToken) {
|
||||
setHasError(true);
|
||||
setStatusMessage("You must be signed in to delete datasets.");
|
||||
setIsDeleteModalOpen(false);
|
||||
return;
|
||||
@@ -119,7 +110,6 @@ const DatasetEditPage = () => {
|
||||
|
||||
try {
|
||||
setIsDeleting(true);
|
||||
setHasError(false);
|
||||
setStatusMessage("");
|
||||
|
||||
await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, {
|
||||
@@ -129,7 +119,6 @@ const DatasetEditPage = () => {
|
||||
setIsDeleteModalOpen(false);
|
||||
navigate("/datasets", { replace: true });
|
||||
} catch (error: unknown) {
|
||||
setHasError(true);
|
||||
if (axios.isAxiosError(error)) {
|
||||
setStatusMessage(
|
||||
String(
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useEffect, useState, useRef } from "react";
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import axios from "axios";
|
||||
import { useParams } from "react-router-dom";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
@@ -8,6 +8,7 @@ import UserStats from "../components/UserStats";
|
||||
import LinguisticStats from "../components/LinguisticStats";
|
||||
import InteractionalStats from "../components/InteractionalStats";
|
||||
import CulturalStats from "../components/CulturalStats";
|
||||
import CorpusExplorer from "../components/CorpusExplorer";
|
||||
|
||||
import {
|
||||
type SummaryResponse,
|
||||
@@ -19,10 +20,15 @@ import {
|
||||
type InteractionAnalysisResponse,
|
||||
type CulturalAnalysisResponse,
|
||||
} from "../types/ApiTypes";
|
||||
import {
|
||||
buildExplorerContext,
|
||||
type CorpusExplorerSpec,
|
||||
type DatasetRecord,
|
||||
} from "../utils/corpusExplorer";
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
|
||||
const styles = StatsStyling;
|
||||
const DELETED_USERS = ["[deleted]"];
|
||||
const DELETED_USERS = ["[deleted]", "automoderator"];
|
||||
|
||||
const isDeletedUser = (value: string | null | undefined) =>
|
||||
DELETED_USERS.includes((value ?? "").trim().toLowerCase());
|
||||
@@ -40,6 +46,97 @@ type UserStatsMeta = {
|
||||
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
||||
};
|
||||
|
||||
type ExplorerState = {
|
||||
open: boolean;
|
||||
title: string;
|
||||
description: string;
|
||||
emptyMessage: string;
|
||||
records: DatasetRecord[];
|
||||
loading: boolean;
|
||||
error: string;
|
||||
};
|
||||
|
||||
const EMPTY_EXPLORER_STATE: ExplorerState = {
|
||||
open: false,
|
||||
title: "Corpus Explorer",
|
||||
description: "",
|
||||
emptyMessage: "No records found.",
|
||||
records: [],
|
||||
loading: false,
|
||||
error: "",
|
||||
};
|
||||
|
||||
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
|
||||
if (typeof payload === "string") {
|
||||
try {
|
||||
return normalizeRecordPayload(JSON.parse(payload));
|
||||
} catch {
|
||||
throw new Error("Corpus endpoint returned a non-JSON string payload.");
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
payload &&
|
||||
typeof payload === "object" &&
|
||||
"error" in payload &&
|
||||
typeof (payload as { error?: unknown }).error === "string"
|
||||
) {
|
||||
throw new Error((payload as { error: string }).error);
|
||||
}
|
||||
|
||||
if (Array.isArray(payload)) {
|
||||
return payload as DatasetRecord[];
|
||||
}
|
||||
|
||||
if (
|
||||
payload &&
|
||||
typeof payload === "object" &&
|
||||
"data" in payload &&
|
||||
Array.isArray((payload as { data?: unknown }).data)
|
||||
) {
|
||||
return (payload as { data: DatasetRecord[] }).data;
|
||||
}
|
||||
|
||||
if (
|
||||
payload &&
|
||||
typeof payload === "object" &&
|
||||
"records" in payload &&
|
||||
Array.isArray((payload as { records?: unknown }).records)
|
||||
) {
|
||||
return (payload as { records: DatasetRecord[] }).records;
|
||||
}
|
||||
|
||||
if (
|
||||
payload &&
|
||||
typeof payload === "object" &&
|
||||
"rows" in payload &&
|
||||
Array.isArray((payload as { rows?: unknown }).rows)
|
||||
) {
|
||||
return (payload as { rows: DatasetRecord[] }).rows;
|
||||
}
|
||||
|
||||
if (
|
||||
payload &&
|
||||
typeof payload === "object" &&
|
||||
"result" in payload &&
|
||||
Array.isArray((payload as { result?: unknown }).result)
|
||||
) {
|
||||
return (payload as { result: DatasetRecord[] }).result;
|
||||
}
|
||||
|
||||
if (payload && typeof payload === "object") {
|
||||
const values = Object.values(payload);
|
||||
if (values.length === 1 && Array.isArray(values[0])) {
|
||||
return values[0] as DatasetRecord[];
|
||||
}
|
||||
if (values.every((value) => value && typeof value === "object")) {
|
||||
return values as DatasetRecord[];
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Corpus endpoint returned an unexpected payload.");
|
||||
};
|
||||
|
||||
const StatPage = () => {
|
||||
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
||||
const [error, setError] = useState("");
|
||||
@@ -61,6 +158,12 @@ const StatPage = () => {
|
||||
totalUsers: 0,
|
||||
mostCommentHeavyUser: null,
|
||||
});
|
||||
const [appliedFilters, setAppliedFilters] = useState<Record<string, string>>({});
|
||||
const [allRecords, setAllRecords] = useState<DatasetRecord[] | null>(null);
|
||||
const [allRecordsKey, setAllRecordsKey] = useState("");
|
||||
const [explorerState, setExplorerState] = useState<ExplorerState>(
|
||||
EMPTY_EXPLORER_STATE,
|
||||
);
|
||||
|
||||
const searchInputRef = useRef<HTMLInputElement>(null);
|
||||
const beforeDateRef = useRef<HTMLInputElement>(null);
|
||||
@@ -104,6 +207,82 @@ const StatPage = () => {
|
||||
};
|
||||
};
|
||||
|
||||
const getFilterKey = (params: Record<string, string>) =>
|
||||
JSON.stringify(Object.entries(params).sort(([a], [b]) => a.localeCompare(b)));
|
||||
|
||||
const ensureFilteredRecords = async () => {
|
||||
if (!datasetId) {
|
||||
throw new Error("Missing dataset id.");
|
||||
}
|
||||
|
||||
const authHeaders = getAuthHeaders();
|
||||
if (!authHeaders) {
|
||||
throw new Error("You must be signed in to load corpus records.");
|
||||
}
|
||||
|
||||
const filterKey = getFilterKey(appliedFilters);
|
||||
if (allRecords && allRecordsKey === filterKey) {
|
||||
return allRecords;
|
||||
}
|
||||
|
||||
const response = await axios.get<unknown>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/all`,
|
||||
{
|
||||
params: appliedFilters,
|
||||
headers: authHeaders,
|
||||
},
|
||||
);
|
||||
|
||||
const normalizedRecords = normalizeRecordPayload(response.data);
|
||||
|
||||
setAllRecords(normalizedRecords);
|
||||
setAllRecordsKey(filterKey);
|
||||
return normalizedRecords;
|
||||
};
|
||||
|
||||
const openExplorer = async (spec: CorpusExplorerSpec) => {
|
||||
setExplorerState({
|
||||
open: true,
|
||||
title: spec.title,
|
||||
description: spec.description,
|
||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||
records: [],
|
||||
loading: true,
|
||||
error: "",
|
||||
});
|
||||
|
||||
try {
|
||||
const records = await ensureFilteredRecords();
|
||||
const context = buildExplorerContext(records);
|
||||
const matched = records.filter((record) => spec.matcher(record, context));
|
||||
matched.sort((a, b) => {
|
||||
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
||||
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
||||
return bValue.localeCompare(aValue);
|
||||
});
|
||||
|
||||
setExplorerState({
|
||||
open: true,
|
||||
title: spec.title,
|
||||
description: spec.description,
|
||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||
records: matched,
|
||||
loading: false,
|
||||
error: "",
|
||||
});
|
||||
} catch (e) {
|
||||
setExplorerState({
|
||||
open: true,
|
||||
title: spec.title,
|
||||
description: spec.description,
|
||||
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||
records: [],
|
||||
loading: false,
|
||||
error: `Failed to load corpus records: ${String(e)}`,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const getStats = (params: Record<string, string> = {}) => {
|
||||
if (!datasetId) {
|
||||
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
||||
@@ -118,22 +297,20 @@ const StatPage = () => {
|
||||
|
||||
setError("");
|
||||
setLoading(true);
|
||||
setAppliedFilters(params);
|
||||
setAllRecords(null);
|
||||
setAllRecordsKey("");
|
||||
setExplorerState((current) => ({ ...current, open: false }));
|
||||
|
||||
Promise.all([
|
||||
axios.get<TimeAnalysisResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/temporal`,
|
||||
{
|
||||
params,
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<UserEndpointResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/user`,
|
||||
{
|
||||
params,
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<LinguisticAnalysisResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/linguistic`,
|
||||
{
|
||||
@@ -141,13 +318,10 @@ const StatPage = () => {
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<EmotionalAnalysisResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/emotional`,
|
||||
{
|
||||
params,
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<InteractionAnalysisResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/interactional`,
|
||||
{
|
||||
@@ -155,20 +329,14 @@ const StatPage = () => {
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<SummaryResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/summary`,
|
||||
{
|
||||
params,
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<CulturalAnalysisResponse>(
|
||||
`${API_BASE_URL}/dataset/${datasetId}/cultural`,
|
||||
{
|
||||
params,
|
||||
headers: authHeaders,
|
||||
},
|
||||
),
|
||||
axios.get<SummaryResponse>(`${API_BASE_URL}/dataset/${datasetId}/summary`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
])
|
||||
.then(
|
||||
([
|
||||
@@ -182,8 +350,7 @@ const StatPage = () => {
|
||||
]) => {
|
||||
const usersList = userRes.data.users ?? [];
|
||||
const topUsersList = userRes.data.top_users ?? [];
|
||||
const interactionGraphRaw =
|
||||
interactionRes.data?.interaction_graph ?? {};
|
||||
const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
|
||||
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
|
||||
|
||||
const filteredUsers: typeof usersList = [];
|
||||
@@ -194,18 +361,14 @@ const StatPage = () => {
|
||||
|
||||
const filteredTopUsers: typeof topUsersList = [];
|
||||
for (const user of topUsersList) {
|
||||
if (isDeletedUser(user.author)) continue;
|
||||
filteredTopUsers.push(user);
|
||||
if (isDeletedUser(user.author)) continue;
|
||||
filteredTopUsers.push(user);
|
||||
}
|
||||
|
||||
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] =
|
||||
null;
|
||||
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] = null;
|
||||
for (const user of filteredUsers) {
|
||||
const currentShare = user.comment_share ?? 0;
|
||||
if (
|
||||
!mostCommentHeavyUser ||
|
||||
currentShare > mostCommentHeavyUser.commentShare
|
||||
) {
|
||||
if (!mostCommentHeavyUser || currentShare > mostCommentHeavyUser.commentShare) {
|
||||
mostCommentHeavyUser = {
|
||||
author: user.author,
|
||||
commentShare: currentShare,
|
||||
@@ -221,8 +384,7 @@ const StatPage = () => {
|
||||
}
|
||||
}
|
||||
|
||||
const filteredInteractionGraph: Record<string, Record<string, number>> =
|
||||
{};
|
||||
const filteredInteractionGraph: Record<string, Record<string, number>> = {};
|
||||
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
|
||||
if (isDeletedUser(source)) {
|
||||
continue;
|
||||
@@ -279,7 +441,7 @@ const StatPage = () => {
|
||||
setSummary(filteredSummary || null);
|
||||
},
|
||||
)
|
||||
.catch((e) => setError("Failed to load statistics: " + String(e)))
|
||||
.catch((e) => setError(`Failed to load statistics: ${String(e)}`))
|
||||
.finally(() => setLoading(false));
|
||||
};
|
||||
|
||||
@@ -302,6 +464,9 @@ const StatPage = () => {
|
||||
|
||||
useEffect(() => {
|
||||
setError("");
|
||||
setAllRecords(null);
|
||||
setAllRecordsKey("");
|
||||
setExplorerState(EMPTY_EXPLORER_STATE);
|
||||
if (!datasetId) {
|
||||
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
||||
return;
|
||||
@@ -398,9 +563,7 @@ const StatPage = () => {
|
||||
<button
|
||||
onClick={() => setActiveView("summary")}
|
||||
style={
|
||||
activeView === "summary"
|
||||
? styles.buttonPrimary
|
||||
: styles.buttonSecondary
|
||||
activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary
|
||||
}
|
||||
>
|
||||
Summary
|
||||
@@ -418,11 +581,7 @@ const StatPage = () => {
|
||||
|
||||
<button
|
||||
onClick={() => setActiveView("user")}
|
||||
style={
|
||||
activeView === "user"
|
||||
? styles.buttonPrimary
|
||||
: styles.buttonSecondary
|
||||
}
|
||||
style={activeView === "user" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
>
|
||||
Users
|
||||
</button>
|
||||
@@ -449,9 +608,7 @@ const StatPage = () => {
|
||||
<button
|
||||
onClick={() => setActiveView("cultural")}
|
||||
style={
|
||||
activeView === "cultural"
|
||||
? styles.buttonPrimary
|
||||
: styles.buttonSecondary
|
||||
activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary
|
||||
}
|
||||
>
|
||||
Cultural
|
||||
@@ -464,11 +621,12 @@ const StatPage = () => {
|
||||
timeData={timeData}
|
||||
linguisticData={linguisticData}
|
||||
summary={summary}
|
||||
onExplore={openExplorer}
|
||||
/>
|
||||
)}
|
||||
|
||||
{activeView === "emotional" && emotionalData && (
|
||||
<EmotionalStats emotionalData={emotionalData} />
|
||||
<EmotionalStats emotionalData={emotionalData} onExplore={openExplorer} />
|
||||
)}
|
||||
|
||||
{activeView === "emotional" && !emotionalData && (
|
||||
@@ -483,6 +641,7 @@ const StatPage = () => {
|
||||
interactionGraph={interactionData.interaction_graph}
|
||||
totalUsers={userStatsMeta.totalUsers}
|
||||
mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser}
|
||||
onExplore={openExplorer}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -493,7 +652,7 @@ const StatPage = () => {
|
||||
)}
|
||||
|
||||
{activeView === "linguistic" && linguisticData && (
|
||||
<LinguisticStats data={linguisticData} />
|
||||
<LinguisticStats data={linguisticData} onExplore={openExplorer} />
|
||||
)}
|
||||
|
||||
{activeView === "linguistic" && !linguisticData && (
|
||||
@@ -503,7 +662,7 @@ const StatPage = () => {
|
||||
)}
|
||||
|
||||
{activeView === "interactional" && interactionData && (
|
||||
<InteractionalStats data={interactionData} />
|
||||
<InteractionalStats data={interactionData} onExplore={openExplorer} />
|
||||
)}
|
||||
|
||||
{activeView === "interactional" && !interactionData && (
|
||||
@@ -513,7 +672,7 @@ const StatPage = () => {
|
||||
)}
|
||||
|
||||
{activeView === "cultural" && culturalData && (
|
||||
<CulturalStats data={culturalData} />
|
||||
<CulturalStats data={culturalData} onExplore={openExplorer} />
|
||||
)}
|
||||
|
||||
{activeView === "cultural" && !culturalData && (
|
||||
@@ -521,6 +680,17 @@ const StatPage = () => {
|
||||
No cultural data available.
|
||||
</div>
|
||||
)}
|
||||
|
||||
<CorpusExplorer
|
||||
open={explorerState.open}
|
||||
onClose={() => setExplorerState((current) => ({ ...current, open: false }))}
|
||||
title={explorerState.title}
|
||||
description={explorerState.description}
|
||||
records={explorerState.records}
|
||||
loading={explorerState.loading}
|
||||
error={explorerState.error}
|
||||
emptyMessage={explorerState.emptyMessage}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
405
frontend/src/utils/corpusExplorer.ts
Normal file
405
frontend/src/utils/corpusExplorer.ts
Normal file
@@ -0,0 +1,405 @@
|
||||
import type { CSSProperties } from "react";
|
||||
|
||||
type EntityRecord = {
|
||||
text?: string;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
type DatasetRecord = {
|
||||
id?: string | number;
|
||||
post_id?: string | number | null;
|
||||
parent_id?: string | number | null;
|
||||
author?: string | null;
|
||||
title?: string | null;
|
||||
content?: string | null;
|
||||
timestamp?: string | number | null;
|
||||
date?: string | null;
|
||||
dt?: string | null;
|
||||
hour?: number | null;
|
||||
weekday?: string | null;
|
||||
reply_to?: string | number | null;
|
||||
source?: string | null;
|
||||
topic?: string | null;
|
||||
topic_confidence?: number | null;
|
||||
type?: string | null;
|
||||
ner_entities?: EntityRecord[] | null;
|
||||
emotion_anger?: number | null;
|
||||
emotion_disgust?: number | null;
|
||||
emotion_fear?: number | null;
|
||||
emotion_joy?: number | null;
|
||||
emotion_sadness?: number | null;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
type CorpusExplorerContext = {
|
||||
authorByPostId: Map<string, string>;
|
||||
authorEventCounts: Map<string, number>;
|
||||
authorCommentCounts: Map<string, number>;
|
||||
};
|
||||
|
||||
type CorpusExplorerSpec = {
|
||||
title: string;
|
||||
description: string;
|
||||
emptyMessage?: string;
|
||||
matcher: (record: DatasetRecord, context: CorpusExplorerContext) => boolean;
|
||||
};
|
||||
|
||||
const IN_GROUP_PATTERN = /\b(we|us|our|ourselves)\b/gi;
|
||||
const OUT_GROUP_PATTERN = /\b(they|them|their|themselves)\b/gi;
|
||||
const HEDGE_PATTERN = /\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b/i;
|
||||
const CERTAINTY_PATTERN = /\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b/i;
|
||||
const DEONTIC_PATTERN = /\b(must|should|need|needs|have to|has to|ought|required|require)\b/i;
|
||||
const PERMISSION_PATTERN = /\b(can|allowed|okay|ok|permitted)\b/i;
|
||||
const EMOTION_KEYS = [
|
||||
"emotion_anger",
|
||||
"emotion_disgust",
|
||||
"emotion_fear",
|
||||
"emotion_joy",
|
||||
"emotion_sadness",
|
||||
] as const;
|
||||
|
||||
const shrinkButtonStyle: CSSProperties = {
|
||||
padding: "4px 8px",
|
||||
fontSize: 12,
|
||||
};
|
||||
|
||||
const toText = (value: unknown) => {
|
||||
if (typeof value === "string") {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (typeof value === "number" || typeof value === "boolean") {
|
||||
return String(value);
|
||||
}
|
||||
|
||||
if (value && typeof value === "object" && "id" in value) {
|
||||
const id = (value as { id?: unknown }).id;
|
||||
if (typeof id === "string" || typeof id === "number") {
|
||||
return String(id);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
};
|
||||
|
||||
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
|
||||
|
||||
const getRecordText = (record: DatasetRecord) =>
|
||||
`${record.title ?? ""} ${record.content ?? ""}`.trim();
|
||||
|
||||
const escapeRegExp = (value: string) =>
|
||||
value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
|
||||
const buildPhrasePattern = (phrase: string) => {
|
||||
const tokens = phrase
|
||||
.toLowerCase()
|
||||
.trim()
|
||||
.split(/\s+/)
|
||||
.filter(Boolean)
|
||||
.map(escapeRegExp);
|
||||
|
||||
if (!tokens.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new RegExp(`\\b${tokens.join("\\s+")}\\b`, "i");
|
||||
};
|
||||
|
||||
const countMatches = (pattern: RegExp, text: string) =>
|
||||
Array.from(text.matchAll(new RegExp(pattern.source, "gi"))).length;
|
||||
|
||||
const getDateBucket = (record: DatasetRecord) => {
|
||||
if (typeof record.date === "string" && record.date) {
|
||||
return record.date.slice(0, 10);
|
||||
}
|
||||
|
||||
if (typeof record.dt === "string" && record.dt) {
|
||||
return record.dt.slice(0, 10);
|
||||
}
|
||||
|
||||
if (typeof record.timestamp === "number") {
|
||||
return new Date(record.timestamp * 1000).toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
if (typeof record.timestamp === "string" && record.timestamp) {
|
||||
const numeric = Number(record.timestamp);
|
||||
if (Number.isFinite(numeric)) {
|
||||
return new Date(numeric * 1000).toISOString().slice(0, 10);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
};
|
||||
|
||||
const getDominantEmotion = (record: DatasetRecord) => {
|
||||
let bestKey = "";
|
||||
let bestValue = Number.NEGATIVE_INFINITY;
|
||||
|
||||
for (const key of EMOTION_KEYS) {
|
||||
const value = Number(record[key] ?? Number.NEGATIVE_INFINITY);
|
||||
if (value > bestValue) {
|
||||
bestValue = value;
|
||||
bestKey = key;
|
||||
}
|
||||
}
|
||||
|
||||
return bestKey.replace("emotion_", "");
|
||||
};
|
||||
|
||||
const matchesPhrase = (record: DatasetRecord, phrase: string) => {
|
||||
const pattern = buildPhrasePattern(phrase);
|
||||
if (!pattern) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return pattern.test(getRecordText(record).toLowerCase());
|
||||
};
|
||||
|
||||
const recordIdentityBucket = (record: DatasetRecord) => {
|
||||
const text = getRecordText(record).toLowerCase();
|
||||
const inHits = countMatches(IN_GROUP_PATTERN, text);
|
||||
const outHits = countMatches(OUT_GROUP_PATTERN, text);
|
||||
|
||||
if (inHits > outHits) {
|
||||
return "in";
|
||||
}
|
||||
|
||||
if (outHits > inHits) {
|
||||
return "out";
|
||||
}
|
||||
|
||||
return "tie";
|
||||
};
|
||||
|
||||
const createAuthorEventCounts = (records: DatasetRecord[]) => {
|
||||
const counts = new Map<string, number>();
|
||||
for (const record of records) {
|
||||
const author = toText(record.author).trim();
|
||||
if (!author) {
|
||||
continue;
|
||||
}
|
||||
counts.set(author, (counts.get(author) ?? 0) + 1);
|
||||
}
|
||||
return counts;
|
||||
};
|
||||
|
||||
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
|
||||
const counts = new Map<string, number>();
|
||||
for (const record of records) {
|
||||
const author = toText(record.author).trim();
|
||||
if (!author || record.type !== "comment") {
|
||||
continue;
|
||||
}
|
||||
counts.set(author, (counts.get(author) ?? 0) + 1);
|
||||
}
|
||||
return counts;
|
||||
};
|
||||
|
||||
const createAuthorByPostId = (records: DatasetRecord[]) => {
|
||||
const map = new Map<string, string>();
|
||||
for (const record of records) {
|
||||
const postId = record.post_id;
|
||||
const author = toText(record.author).trim();
|
||||
if (postId === null || postId === undefined || !author) {
|
||||
continue;
|
||||
}
|
||||
map.set(String(postId), author);
|
||||
}
|
||||
return map;
|
||||
};
|
||||
|
||||
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
|
||||
authorByPostId: createAuthorByPostId(records),
|
||||
authorEventCounts: createAuthorEventCounts(records),
|
||||
authorCommentCounts: createAuthorCommentCounts(records),
|
||||
});
|
||||
|
||||
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
|
||||
title: "Corpus Explorer",
|
||||
description: "All records in the current filtered dataset.",
|
||||
emptyMessage: "No records match the current filters.",
|
||||
matcher: () => true,
|
||||
});
|
||||
|
||||
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
|
||||
title: `User: ${author}`,
|
||||
description: `All records authored by ${author}.`,
|
||||
emptyMessage: `No records found for ${author}.`,
|
||||
matcher: (record) => normalize(record.author) === normalize(author),
|
||||
});
|
||||
|
||||
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
|
||||
title: `Topic: ${topic}`,
|
||||
description: `Records assigned to the ${topic} topic bucket.`,
|
||||
emptyMessage: `No records found in the ${topic} topic bucket.`,
|
||||
matcher: (record) => normalize(record.topic) === normalize(topic),
|
||||
});
|
||||
|
||||
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
|
||||
title: `Date Bucket: ${date}`,
|
||||
description: `Records from the ${date} activity bucket.`,
|
||||
emptyMessage: `No records found on ${date}.`,
|
||||
matcher: (record) => getDateBucket(record) === date,
|
||||
});
|
||||
|
||||
const buildWordSpec = (word: string): CorpusExplorerSpec => ({
|
||||
title: `Word: ${word}`,
|
||||
description: `Records containing the word ${word}.`,
|
||||
emptyMessage: `No records mention ${word}.`,
|
||||
matcher: (record) => matchesPhrase(record, word),
|
||||
});
|
||||
|
||||
const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
|
||||
title: `N-gram: ${ngram}`,
|
||||
description: `Records containing the phrase ${ngram}.`,
|
||||
emptyMessage: `No records contain the phrase ${ngram}.`,
|
||||
matcher: (record) => matchesPhrase(record, ngram),
|
||||
});
|
||||
|
||||
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
|
||||
title: `Entity: ${entity}`,
|
||||
description: `Records mentioning the ${entity} entity.`,
|
||||
emptyMessage: `No records found for the ${entity} entity.`,
|
||||
matcher: (record) => {
|
||||
const target = normalize(entity);
|
||||
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
|
||||
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
|
||||
},
|
||||
});
|
||||
|
||||
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
|
||||
title: `Source: ${source}`,
|
||||
description: `Records from the ${source} source.`,
|
||||
emptyMessage: `No records found for ${source}.`,
|
||||
matcher: (record) => normalize(record.source) === normalize(source),
|
||||
});
|
||||
|
||||
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
|
||||
title: `Dominant Emotion: ${emotion}`,
|
||||
description: `Records where ${emotion} is the strongest emotion score.`,
|
||||
emptyMessage: `No records found with dominant emotion ${emotion}.`,
|
||||
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
|
||||
});
|
||||
|
||||
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
|
||||
title: `Reply Path: ${source} -> ${target}`,
|
||||
description: `Reply records authored by ${source} in response to ${target}.`,
|
||||
emptyMessage: `No reply records found for ${source} -> ${target}.`,
|
||||
matcher: (record, context) => {
|
||||
if (normalize(record.author) !== normalize(source)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const replyTo = record.reply_to;
|
||||
if (replyTo === null || replyTo === undefined || replyTo === "") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const replyTarget = context.authorByPostId.get(String(replyTo));
|
||||
return normalize(replyTarget) === normalize(target);
|
||||
},
|
||||
});
|
||||
|
||||
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
|
||||
title: "One-Time Users",
|
||||
description: "Records written by authors who appear exactly once in the filtered corpus.",
|
||||
emptyMessage: "No one-time-user records found.",
|
||||
matcher: (record, context) => {
|
||||
const author = toText(record.author).trim();
|
||||
return !!author && context.authorEventCounts.get(author) === 1;
|
||||
},
|
||||
});
|
||||
|
||||
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
|
||||
title: "Top Commenters",
|
||||
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
|
||||
emptyMessage: "No top-commenter records found.",
|
||||
matcher: (record, context) => {
|
||||
if (record.type !== "comment") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, topAuthorCount)
|
||||
.map(([author]) => author);
|
||||
|
||||
return rankedAuthors.includes(toText(record.author).trim());
|
||||
},
|
||||
});
|
||||
|
||||
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
|
||||
title: "Single-Comment Authors",
|
||||
description: "Comment records from authors who commented exactly once.",
|
||||
emptyMessage: "No single-comment-author records found.",
|
||||
matcher: (record, context) => {
|
||||
const author = toText(record.author).trim();
|
||||
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
|
||||
},
|
||||
});
|
||||
|
||||
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
|
||||
const labels = {
|
||||
in: "In-Group Posts",
|
||||
out: "Out-Group Posts",
|
||||
tie: "Balanced Posts",
|
||||
} as const;
|
||||
|
||||
return {
|
||||
title: labels[bucket],
|
||||
description: `Records in the ${labels[bucket].toLowerCase()} cultural bucket.`,
|
||||
emptyMessage: `No records found for ${labels[bucket].toLowerCase()}.`,
|
||||
matcher: (record) => recordIdentityBucket(record) === bucket,
|
||||
};
|
||||
};
|
||||
|
||||
const buildPatternSpec = (
|
||||
title: string,
|
||||
description: string,
|
||||
pattern: RegExp,
|
||||
): CorpusExplorerSpec => ({
|
||||
title,
|
||||
description,
|
||||
emptyMessage: `No records found for ${title.toLowerCase()}.`,
|
||||
matcher: (record) => pattern.test(getRecordText(record)),
|
||||
});
|
||||
|
||||
const buildHedgeSpec = () =>
|
||||
buildPatternSpec("Hedging Words", "Records containing hedging language.", HEDGE_PATTERN);
|
||||
|
||||
const buildCertaintySpec = () =>
|
||||
buildPatternSpec("Certainty Words", "Records containing certainty language.", CERTAINTY_PATTERN);
|
||||
|
||||
const buildDeonticSpec = () =>
|
||||
buildPatternSpec("Need/Should Words", "Records containing deontic language.", DEONTIC_PATTERN);
|
||||
|
||||
const buildPermissionSpec = () =>
|
||||
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
|
||||
|
||||
const getExplorerButtonStyle = () => shrinkButtonStyle;
|
||||
|
||||
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
|
||||
export {
|
||||
buildAllRecordsSpec,
|
||||
buildCertaintySpec,
|
||||
buildDateBucketSpec,
|
||||
buildDeonticSpec,
|
||||
buildDominantEmotionSpec,
|
||||
buildEntitySpec,
|
||||
buildExplorerContext,
|
||||
buildHedgeSpec,
|
||||
buildIdentityBucketSpec,
|
||||
buildNgramSpec,
|
||||
buildOneTimeUsersSpec,
|
||||
buildPermissionSpec,
|
||||
buildReplyPairSpec,
|
||||
buildSingleCommentAuthorsSpec,
|
||||
buildSourceSpec,
|
||||
buildTopicSpec,
|
||||
buildTopCommentersSpec,
|
||||
buildUserSpec,
|
||||
buildWordSpec,
|
||||
getDateBucket,
|
||||
getExplorerButtonStyle,
|
||||
toText,
|
||||
};
|
||||
@@ -1,4 +1,5 @@
|
||||
import nltk
|
||||
import json
|
||||
import pandas as pd
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
@@ -27,6 +28,8 @@ DOMAIN_STOPWORDS = {
|
||||
"one",
|
||||
}
|
||||
|
||||
EXCLUDED_AUTHORS = {"[deleted]", "automoderator"}
|
||||
|
||||
nltk.download("stopwords")
|
||||
EXCLUDE_WORDS = set(stopwords.words("english")) | DOMAIN_STOPWORDS
|
||||
|
||||
@@ -46,6 +49,12 @@ class StatGen:
|
||||
filters = filters or {}
|
||||
filtered_df = df.copy()
|
||||
|
||||
if "author" in filtered_df.columns:
|
||||
normalized_authors = (
|
||||
filtered_df["author"].fillna("").astype(str).str.strip().str.lower()
|
||||
)
|
||||
filtered_df = filtered_df[~normalized_authors.isin(EXCLUDED_AUTHORS)]
|
||||
|
||||
search_query = filters.get("search_query", None)
|
||||
start_date_filter = filters.get("start_date", None)
|
||||
end_date_filter = filters.get("end_date", None)
|
||||
@@ -75,9 +84,15 @@ class StatGen:
|
||||
|
||||
return filtered_df
|
||||
|
||||
def _json_ready_records(self, df: pd.DataFrame) -> list[dict]:
|
||||
return json.loads(
|
||||
df.to_json(orient="records", date_format="iso", date_unit="s")
|
||||
)
|
||||
|
||||
## Public Methods
|
||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
||||
return self._prepare_filtered_df(df, filters).to_dict(orient="records")
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
return self._json_ready_records(filtered_df)
|
||||
|
||||
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
@@ -591,7 +591,8 @@ def get_full_dataset(dataset_id: int):
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
return jsonify(dataset_content.to_dict(orient="records")), 200
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.filter_dataset(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
|
||||
Reference in New Issue
Block a user