feat(frontend): implement corpus explorer
This allows you to view the posts & comments associated with a specific aggregate.
This commit is contained in:
175
frontend/src/components/CorpusExplorer.tsx
Normal file
175
frontend/src/components/CorpusExplorer.tsx
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
import { Dialog, DialogPanel, DialogTitle } from "@headlessui/react";
|
||||||
|
|
||||||
|
import StatsStyling from "../styles/stats_styling";
|
||||||
|
import type { DatasetRecord } from "../utils/corpusExplorer";
|
||||||
|
|
||||||
|
const styles = StatsStyling;
|
||||||
|
|
||||||
|
const cleanText = (value: unknown) => {
|
||||||
|
if (typeof value !== "string") {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const trimmed = value.trim();
|
||||||
|
if (!trimmed) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const lowered = trimmed.toLowerCase();
|
||||||
|
if (lowered === "nan" || lowered === "null" || lowered === "undefined") {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return trimmed;
|
||||||
|
};
|
||||||
|
|
||||||
|
const displayText = (value: unknown, fallback: string) => {
|
||||||
|
const cleaned = cleanText(value);
|
||||||
|
return cleaned || fallback;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CorpusExplorerProps = {
|
||||||
|
open: boolean;
|
||||||
|
onClose: () => void;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
records: DatasetRecord[];
|
||||||
|
loading: boolean;
|
||||||
|
error: string;
|
||||||
|
emptyMessage: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatRecordDate = (record: DatasetRecord) => {
|
||||||
|
if (typeof record.dt === "string" && record.dt) {
|
||||||
|
const date = new Date(record.dt);
|
||||||
|
if (!Number.isNaN(date.getTime())) {
|
||||||
|
return date.toLocaleString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof record.date === "string" && record.date) {
|
||||||
|
return record.date;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof record.timestamp === "number") {
|
||||||
|
return new Date(record.timestamp * 1000).toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
return "Unknown time";
|
||||||
|
};
|
||||||
|
|
||||||
|
const getRecordKey = (record: DatasetRecord, index: number) =>
|
||||||
|
String(record.id ?? record.post_id ?? `${record.author ?? "record"}-${index}`);
|
||||||
|
|
||||||
|
const getRecordTitle = (record: DatasetRecord) => {
|
||||||
|
if (record.type === "comment") {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const title = cleanText(record.title);
|
||||||
|
if (title) {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = cleanText(record.content);
|
||||||
|
if (!content) {
|
||||||
|
return "Untitled record";
|
||||||
|
}
|
||||||
|
|
||||||
|
return content.length > 120 ? `${content.slice(0, 117)}...` : content;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getRecordExcerpt = (record: DatasetRecord) => {
|
||||||
|
const content = cleanText(record.content);
|
||||||
|
if (!content) {
|
||||||
|
return "No content available.";
|
||||||
|
}
|
||||||
|
|
||||||
|
return content.length > 320 ? `${content.slice(0, 317)}...` : content;
|
||||||
|
};
|
||||||
|
|
||||||
|
const CorpusExplorer = ({
|
||||||
|
open,
|
||||||
|
onClose,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
records,
|
||||||
|
loading,
|
||||||
|
error,
|
||||||
|
emptyMessage,
|
||||||
|
}: CorpusExplorerProps) => (
|
||||||
|
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
|
||||||
|
<div style={styles.modalBackdrop} />
|
||||||
|
|
||||||
|
<div style={styles.modalContainer}>
|
||||||
|
<DialogPanel
|
||||||
|
style={{
|
||||||
|
...styles.card,
|
||||||
|
...styles.modalPanel,
|
||||||
|
width: "min(960px, 96vw)",
|
||||||
|
maxHeight: "88vh",
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
gap: 12,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div style={styles.headerBar}>
|
||||||
|
<div>
|
||||||
|
<DialogTitle style={styles.sectionTitle}>{title}</DialogTitle>
|
||||||
|
<p style={styles.sectionSubtitle}>
|
||||||
|
{description} {loading ? "Loading records..." : `${records.length.toLocaleString()} records.`}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button onClick={onClose} style={styles.buttonSecondary}>
|
||||||
|
Close
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error ? <p style={styles.sectionSubtitle}>{error}</p> : null}
|
||||||
|
|
||||||
|
{!loading && !error && !records.length ? (
|
||||||
|
<p style={styles.sectionSubtitle}>{emptyMessage}</p>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
{loading ? (
|
||||||
|
<div style={styles.topUserMeta}>Preparing corpus slice...</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
{!loading && !error && records.length ? (
|
||||||
|
<div
|
||||||
|
style={{
|
||||||
|
...styles.topUsersList,
|
||||||
|
overflowY: "auto",
|
||||||
|
paddingRight: 4,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{records.map((record, index) => (
|
||||||
|
<div key={getRecordKey(record, index)} style={styles.topUserItem}>
|
||||||
|
<div style={{ ...styles.headerBar, alignItems: "flex-start" }}>
|
||||||
|
<div>
|
||||||
|
{getRecordTitle(record) ? (
|
||||||
|
<div style={styles.topUserName}>{getRecordTitle(record)}</div>
|
||||||
|
) : null}
|
||||||
|
<div style={styles.topUserMeta}>
|
||||||
|
{displayText(record.author, "Unknown author")} • {displayText(record.source, "Unknown source")} • {displayText(record.type, "record")} • {formatRecordDate(record)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style={styles.topUserMeta}>
|
||||||
|
{cleanText(record.topic) ? `Topic: ${cleanText(record.topic)}` : ""}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.topUserMeta, marginTop: 8, whiteSpace: "pre-wrap" }}>
|
||||||
|
{getRecordExcerpt(record)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
</DialogPanel>
|
||||||
|
</div>
|
||||||
|
</Dialog>
|
||||||
|
);
|
||||||
|
|
||||||
|
export default CorpusExplorer;
|
||||||
@@ -1,14 +1,34 @@
|
|||||||
import Card from "./Card";
|
import Card from "./Card";
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
|
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
|
||||||
|
import {
|
||||||
|
buildCertaintySpec,
|
||||||
|
buildDeonticSpec,
|
||||||
|
buildEntitySpec,
|
||||||
|
buildHedgeSpec,
|
||||||
|
buildIdentityBucketSpec,
|
||||||
|
buildPermissionSpec,
|
||||||
|
getExplorerButtonStyle,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
|
||||||
type CulturalStatsProps = {
|
type CulturalStatsProps = {
|
||||||
data: CulturalAnalysisResponse;
|
data: CulturalAnalysisResponse;
|
||||||
|
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const CulturalStats = ({ data }: CulturalStatsProps) => {
|
const renderExploreButton = (onClick: () => void) => (
|
||||||
|
<button
|
||||||
|
onClick={onClick}
|
||||||
|
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
||||||
|
>
|
||||||
|
Explore
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
|
||||||
|
const CulturalStats = ({ data, onExplore }: CulturalStatsProps) => {
|
||||||
const identity = data.identity_markers;
|
const identity = data.identity_markers;
|
||||||
const stance = data.stance_markers;
|
const stance = data.stance_markers;
|
||||||
const inGroupWords = identity?.in_group_usage ?? 0;
|
const inGroupWords = identity?.in_group_usage ?? 0;
|
||||||
@@ -30,7 +50,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
|
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
|
||||||
const entries = Object.entries(emotionAvg ?? {});
|
const entries = Object.entries(emotionAvg ?? {});
|
||||||
if (!entries.length) {
|
if (!entries.length) {
|
||||||
return "—";
|
return "-";
|
||||||
}
|
}
|
||||||
|
|
||||||
entries.sort((a, b) => b[1] - a[1]);
|
entries.sort((a, b) => b[1] - a[1]);
|
||||||
@@ -64,21 +84,30 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="In-Group Posts"
|
label="In-Group Posts"
|
||||||
value={identity?.in_group_posts?.toLocaleString() ?? "—"}
|
value={identity?.in_group_posts?.toLocaleString() ?? "-"}
|
||||||
sublabel='Posts leaning toward "us" language'
|
sublabel='Posts leaning toward "us" language'
|
||||||
|
rightSlot={renderExploreButton(() =>
|
||||||
|
onExplore(buildIdentityBucketSpec("in")),
|
||||||
|
)}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Out-Group Posts"
|
label="Out-Group Posts"
|
||||||
value={identity?.out_group_posts?.toLocaleString() ?? "—"}
|
value={identity?.out_group_posts?.toLocaleString() ?? "-"}
|
||||||
sublabel='Posts leaning toward "them" language'
|
sublabel='Posts leaning toward "them" language'
|
||||||
|
rightSlot={renderExploreButton(() =>
|
||||||
|
onExplore(buildIdentityBucketSpec("out")),
|
||||||
|
)}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Balanced Posts"
|
label="Balanced Posts"
|
||||||
value={identity?.tie_posts?.toLocaleString() ?? "—"}
|
value={identity?.tie_posts?.toLocaleString() ?? "-"}
|
||||||
sublabel="Posts with equal us/them signals"
|
sublabel="Posts with equal us/them signals"
|
||||||
|
rightSlot={renderExploreButton(() =>
|
||||||
|
onExplore(buildIdentityBucketSpec("tie")),
|
||||||
|
)}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
@@ -90,7 +119,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
<Card
|
<Card
|
||||||
label="In-Group Share"
|
label="In-Group Share"
|
||||||
value={
|
value={
|
||||||
inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`
|
inGroupWordRate === null ? "-" : `${inGroupWordRate.toFixed(2)}%`
|
||||||
}
|
}
|
||||||
sublabel="Share of all words"
|
sublabel="Share of all words"
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
@@ -98,7 +127,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
<Card
|
<Card
|
||||||
label="Out-Group Share"
|
label="Out-Group Share"
|
||||||
value={
|
value={
|
||||||
outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`
|
outGroupWordRate === null ? "-" : `${outGroupWordRate.toFixed(2)}%`
|
||||||
}
|
}
|
||||||
sublabel="Share of all words"
|
sublabel="Share of all words"
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
@@ -106,42 +135,46 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Hedging Words"
|
label="Hedging Words"
|
||||||
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
value={stance?.hedge_total?.toLocaleString() ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
typeof stance?.hedge_per_1k_tokens === "number"
|
typeof stance?.hedge_per_1k_tokens === "number"
|
||||||
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
|
? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words`
|
||||||
: "Word frequency"
|
: "Word frequency"
|
||||||
}
|
}
|
||||||
|
rightSlot={renderExploreButton(() => onExplore(buildHedgeSpec()))}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Certainty Words"
|
label="Certainty Words"
|
||||||
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
value={stance?.certainty_total?.toLocaleString() ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
typeof stance?.certainty_per_1k_tokens === "number"
|
typeof stance?.certainty_per_1k_tokens === "number"
|
||||||
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
|
? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words`
|
||||||
: "Word frequency"
|
: "Word frequency"
|
||||||
}
|
}
|
||||||
|
rightSlot={renderExploreButton(() => onExplore(buildCertaintySpec()))}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Need/Should Words"
|
label="Need/Should Words"
|
||||||
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
value={stance?.deontic_total?.toLocaleString() ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
typeof stance?.deontic_per_1k_tokens === "number"
|
typeof stance?.deontic_per_1k_tokens === "number"
|
||||||
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
|
? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words`
|
||||||
: "Word frequency"
|
: "Word frequency"
|
||||||
}
|
}
|
||||||
|
rightSlot={renderExploreButton(() => onExplore(buildDeonticSpec()))}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Permission Words"
|
label="Permission Words"
|
||||||
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
value={stance?.permission_total?.toLocaleString() ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
typeof stance?.permission_per_1k_tokens === "number"
|
typeof stance?.permission_per_1k_tokens === "number"
|
||||||
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
|
? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words`
|
||||||
: "Word frequency"
|
: "Word frequency"
|
||||||
}
|
}
|
||||||
|
rightSlot={renderExploreButton(() => onExplore(buildPermissionSpec()))}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
@@ -150,8 +183,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>
|
||||||
Most likely emotion when in-group wording is stronger.
|
Most likely emotion when in-group wording is stronger.
|
||||||
</p>
|
</p>
|
||||||
<div style={styles.topUserName}>
|
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
|
||||||
{topEmotion(identity?.in_group_emotion_avg)}
|
<div style={{ marginTop: 12 }}>
|
||||||
|
<button
|
||||||
|
onClick={() => onExplore(buildIdentityBucketSpec("in"))}
|
||||||
|
style={styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Explore records
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -160,8 +199,14 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>
|
||||||
Most likely emotion when out-group wording is stronger.
|
Most likely emotion when out-group wording is stronger.
|
||||||
</p>
|
</p>
|
||||||
<div style={styles.topUserName}>
|
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
|
||||||
{topEmotion(identity?.out_group_emotion_avg)}
|
<div style={{ marginTop: 12 }}>
|
||||||
|
<button
|
||||||
|
onClick={() => onExplore(buildIdentityBucketSpec("out"))}
|
||||||
|
style={styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Explore records
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -171,9 +216,7 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
Most mentioned entities and the mood that appears most with each.
|
Most mentioned entities and the mood that appears most with each.
|
||||||
</p>
|
</p>
|
||||||
{!entities.length ? (
|
{!entities.length ? (
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
|
||||||
No entity-level cultural data available.
|
|
||||||
</div>
|
|
||||||
) : (
|
) : (
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
@@ -183,7 +226,11 @@ const CulturalStats = ({ data }: CulturalStatsProps) => {
|
|||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{entities.map(([entity, aggregate]) => (
|
{entities.map(([entity, aggregate]) => (
|
||||||
<div key={entity} style={styles.topUserItem}>
|
<div
|
||||||
|
key={entity}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildEntitySpec(entity))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>{entity}</div>
|
<div style={styles.topUserName}>{entity}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
{aggregate.post_count.toLocaleString()} posts • Likely mood:{" "}
|
{aggregate.post_count.toLocaleString()} posts • Likely mood:{" "}
|
||||||
|
|||||||
@@ -1,13 +1,20 @@
|
|||||||
import type { EmotionalAnalysisResponse } from "../types/ApiTypes";
|
import type { EmotionalAnalysisResponse } from "../types/ApiTypes";
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
|
import {
|
||||||
|
buildDominantEmotionSpec,
|
||||||
|
buildSourceSpec,
|
||||||
|
buildTopicSpec,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
|
||||||
type EmotionalStatsProps = {
|
type EmotionalStatsProps = {
|
||||||
emotionalData: EmotionalAnalysisResponse;
|
emotionalData: EmotionalAnalysisResponse;
|
||||||
|
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
const EmotionalStats = ({ emotionalData, onExplore }: EmotionalStatsProps) => {
|
||||||
const rows = emotionalData.average_emotion_by_topic ?? [];
|
const rows = emotionalData.average_emotion_by_topic ?? [];
|
||||||
const overallEmotionAverage = emotionalData.overall_emotion_average ?? [];
|
const overallEmotionAverage = emotionalData.overall_emotion_average ?? [];
|
||||||
const dominantEmotionDistribution =
|
const dominantEmotionDistribution =
|
||||||
@@ -126,7 +133,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
|||||||
{[...overallEmotionAverage]
|
{[...overallEmotionAverage]
|
||||||
.sort((a, b) => b.score - a.score)
|
.sort((a, b) => b.score - a.score)
|
||||||
.map((row) => (
|
.map((row) => (
|
||||||
<div key={row.emotion} style={styles.topUserItem}>
|
<div
|
||||||
|
key={row.emotion}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>
|
<div style={styles.topUserName}>
|
||||||
{formatEmotion(row.emotion)}
|
{formatEmotion(row.emotion)}
|
||||||
</div>
|
</div>
|
||||||
@@ -157,7 +168,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
|||||||
{[...dominantEmotionDistribution]
|
{[...dominantEmotionDistribution]
|
||||||
.sort((a, b) => b.ratio - a.ratio)
|
.sort((a, b) => b.ratio - a.ratio)
|
||||||
.map((row) => (
|
.map((row) => (
|
||||||
<div key={row.emotion} style={styles.topUserItem}>
|
<div
|
||||||
|
key={row.emotion}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildDominantEmotionSpec(row.emotion))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>
|
<div style={styles.topUserName}>
|
||||||
{formatEmotion(row.emotion)}
|
{formatEmotion(row.emotion)}
|
||||||
</div>
|
</div>
|
||||||
@@ -189,7 +204,11 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
|||||||
{[...emotionBySource]
|
{[...emotionBySource]
|
||||||
.sort((a, b) => b.event_count - a.event_count)
|
.sort((a, b) => b.event_count - a.event_count)
|
||||||
.map((row) => (
|
.map((row) => (
|
||||||
<div key={row.source} style={styles.topUserItem}>
|
<div
|
||||||
|
key={row.source}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildSourceSpec(row.source))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>{row.source}</div>
|
<div style={styles.topUserName}>{row.source}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
{formatEmotion(row.dominant_emotion)} •{" "}
|
{formatEmotion(row.dominant_emotion)} •{" "}
|
||||||
@@ -211,7 +230,8 @@ const EmotionalStats = ({ emotionalData }: EmotionalStatsProps) => {
|
|||||||
{strongestPerTopic.map((topic) => (
|
{strongestPerTopic.map((topic) => (
|
||||||
<div
|
<div
|
||||||
key={topic.topic}
|
key={topic.topic}
|
||||||
style={{ ...styles.cardBase, gridColumn: "span 4" }}
|
style={{ ...styles.cardBase, gridColumn: "span 4", cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildTopicSpec(topic.topic))}
|
||||||
>
|
>
|
||||||
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>
|
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>
|
||||||
{topic.topic}
|
{topic.topic}
|
||||||
|
|||||||
@@ -1,14 +1,20 @@
|
|||||||
import Card from "./Card";
|
import Card from "./Card";
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
|
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
|
||||||
|
import {
|
||||||
|
buildNgramSpec,
|
||||||
|
buildWordSpec,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
|
||||||
type LinguisticStatsProps = {
|
type LinguisticStatsProps = {
|
||||||
data: LinguisticAnalysisResponse;
|
data: LinguisticAnalysisResponse;
|
||||||
|
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
const LinguisticStats = ({ data, onExplore }: LinguisticStatsProps) => {
|
||||||
const lexical = data.lexical_diversity;
|
const lexical = data.lexical_diversity;
|
||||||
const words = data.word_frequencies ?? [];
|
const words = data.word_frequencies ?? [];
|
||||||
const bigrams = data.common_two_phrases ?? [];
|
const bigrams = data.common_two_phrases ?? [];
|
||||||
@@ -60,7 +66,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
|||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{topWords.map((item) => (
|
{topWords.map((item) => (
|
||||||
<div key={item.word} style={styles.topUserItem}>
|
<div
|
||||||
|
key={item.word}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildWordSpec(item.word))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>{item.word}</div>
|
<div style={styles.topUserName}>{item.word}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
{item.count.toLocaleString()} uses
|
{item.count.toLocaleString()} uses
|
||||||
@@ -81,7 +91,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
|||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{topBigrams.map((item) => (
|
{topBigrams.map((item) => (
|
||||||
<div key={item.ngram} style={styles.topUserItem}>
|
<div
|
||||||
|
key={item.ngram}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildNgramSpec(item.ngram))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>{item.ngram}</div>
|
<div style={styles.topUserName}>{item.ngram}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
{item.count.toLocaleString()} uses
|
{item.count.toLocaleString()} uses
|
||||||
@@ -102,7 +116,11 @@ const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
|||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{topTrigrams.map((item) => (
|
{topTrigrams.map((item) => (
|
||||||
<div key={item.ngram} style={styles.topUserItem}>
|
<div
|
||||||
|
key={item.ngram}
|
||||||
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
|
onClick={() => onExplore(buildNgramSpec(item.ngram))}
|
||||||
|
>
|
||||||
<div style={styles.topUserName}>{item.ngram}</div>
|
<div style={styles.topUserName}>{item.ngram}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
{item.count.toLocaleString()} uses
|
{item.count.toLocaleString()} uses
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { memo, useMemo, useState } from "react";
|
import { memo, useMemo } from "react";
|
||||||
import {
|
import {
|
||||||
LineChart,
|
LineChart,
|
||||||
Line,
|
Line,
|
||||||
@@ -13,7 +13,6 @@ import ActivityHeatmap from "../stats/ActivityHeatmap";
|
|||||||
import { ReactWordcloud } from "@cp949/react-wordcloud";
|
import { ReactWordcloud } from "@cp949/react-wordcloud";
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
import Card from "../components/Card";
|
import Card from "../components/Card";
|
||||||
import UserModal from "../components/UserModal";
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
type SummaryResponse,
|
type SummaryResponse,
|
||||||
@@ -21,8 +20,15 @@ import {
|
|||||||
type UserEndpointResponse,
|
type UserEndpointResponse,
|
||||||
type TimeAnalysisResponse,
|
type TimeAnalysisResponse,
|
||||||
type LinguisticAnalysisResponse,
|
type LinguisticAnalysisResponse,
|
||||||
type User,
|
|
||||||
} from "../types/ApiTypes";
|
} from "../types/ApiTypes";
|
||||||
|
import {
|
||||||
|
buildAllRecordsSpec,
|
||||||
|
buildDateBucketSpec,
|
||||||
|
buildOneTimeUsersSpec,
|
||||||
|
buildUserSpec,
|
||||||
|
getExplorerButtonStyle,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
const MAX_WORDCLOUD_WORDS = 250;
|
const MAX_WORDCLOUD_WORDS = 250;
|
||||||
@@ -39,6 +45,7 @@ type SummaryStatsProps = {
|
|||||||
timeData: TimeAnalysisResponse | null;
|
timeData: TimeAnalysisResponse | null;
|
||||||
linguisticData: LinguisticAnalysisResponse | null;
|
linguisticData: LinguisticAnalysisResponse | null;
|
||||||
summary: SummaryResponse | null;
|
summary: SummaryResponse | null;
|
||||||
|
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
type WordCloudPanelProps = {
|
type WordCloudPanelProps = {
|
||||||
@@ -60,7 +67,7 @@ function formatDateRange(startUnix: number, endUnix: number) {
|
|||||||
day: "2-digit",
|
day: "2-digit",
|
||||||
});
|
});
|
||||||
|
|
||||||
return `${fmt(start)} → ${fmt(end)}`;
|
return `${fmt(start)} -> ${fmt(end)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertFrequencyData(data: FrequencyWord[]) {
|
function convertFrequencyData(data: FrequencyWord[]) {
|
||||||
@@ -70,25 +77,22 @@ function convertFrequencyData(data: FrequencyWord[]) {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const renderExploreButton = (onClick: () => void) => (
|
||||||
|
<button
|
||||||
|
onClick={onClick}
|
||||||
|
style={{ ...styles.buttonSecondary, ...getExplorerButtonStyle() }}
|
||||||
|
>
|
||||||
|
Explore
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
|
||||||
const SummaryStats = ({
|
const SummaryStats = ({
|
||||||
userData,
|
userData,
|
||||||
timeData,
|
timeData,
|
||||||
linguisticData,
|
linguisticData,
|
||||||
summary,
|
summary,
|
||||||
|
onExplore,
|
||||||
}: SummaryStatsProps) => {
|
}: SummaryStatsProps) => {
|
||||||
const [selectedUser, setSelectedUser] = useState<string | null>(null);
|
|
||||||
const usersByAuthor = useMemo(() => {
|
|
||||||
const nextMap = new Map<string, User>();
|
|
||||||
for (const user of userData?.users ?? []) {
|
|
||||||
nextMap.set(user.author, user);
|
|
||||||
}
|
|
||||||
return nextMap;
|
|
||||||
}, [userData?.users]);
|
|
||||||
|
|
||||||
const selectedUserData: User | null = selectedUser
|
|
||||||
? usersByAuthor.get(selectedUser) ?? null
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const wordCloudWords = useMemo(
|
const wordCloudWords = useMemo(
|
||||||
() =>
|
() =>
|
||||||
convertFrequencyData(
|
convertFrequencyData(
|
||||||
@@ -104,49 +108,41 @@ const SummaryStats = ({
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
{/* main grid*/}
|
|
||||||
<div style={{ ...styles.container, ...styles.grid }}>
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
<Card
|
<Card
|
||||||
label="Total Activity"
|
label="Total Activity"
|
||||||
value={summary?.total_events ?? "—"}
|
value={summary?.total_events ?? "-"}
|
||||||
sublabel="Posts + comments"
|
sublabel="Posts + comments"
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Active People"
|
label="Active People"
|
||||||
value={summary?.unique_users ?? "—"}
|
value={summary?.unique_users ?? "-"}
|
||||||
sublabel="Distinct users"
|
sublabel="Distinct users"
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Posts vs Comments"
|
label="Posts vs Comments"
|
||||||
value={
|
value={
|
||||||
summary ? `${summary.total_posts} / ${summary.total_comments}` : "—"
|
summary ? `${summary.total_posts} / ${summary.total_comments}` : "-"
|
||||||
}
|
}
|
||||||
sublabel={`Comments per post: ${summary?.comments_per_post ?? "—"}`}
|
sublabel={`Comments per post: ${summary?.comments_per_post ?? "-"}`}
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Time Range"
|
label="Time Range"
|
||||||
value={
|
value={
|
||||||
summary?.time_range
|
summary?.time_range
|
||||||
? formatDateRange(
|
? formatDateRange(summary.time_range.start, summary.time_range.end)
|
||||||
summary.time_range.start,
|
: "-"
|
||||||
summary.time_range.end,
|
|
||||||
)
|
|
||||||
: "—"
|
|
||||||
}
|
}
|
||||||
sublabel="Based on dataset timestamps"
|
sublabel="Based on dataset timestamps"
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
@@ -154,38 +150,44 @@ const SummaryStats = ({
|
|||||||
value={
|
value={
|
||||||
typeof summary?.lurker_ratio === "number"
|
typeof summary?.lurker_ratio === "number"
|
||||||
? `${Math.round(summary.lurker_ratio * 100)}%`
|
? `${Math.round(summary.lurker_ratio * 100)}%`
|
||||||
: "—"
|
: "-"
|
||||||
}
|
}
|
||||||
sublabel="Users with only one event"
|
sublabel="Users with only one event"
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildOneTimeUsersSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Sources"
|
label="Sources"
|
||||||
value={summary?.sources?.length ?? "—"}
|
value={summary?.sources?.length ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
summary?.sources?.length
|
summary?.sources?.length
|
||||||
? summary.sources.slice(0, 3).join(", ") +
|
? summary.sources.slice(0, 3).join(", ") +
|
||||||
(summary.sources.length > 3 ? "…" : "")
|
(summary.sources.length > 3 ? "..." : "")
|
||||||
: "—"
|
: "-"
|
||||||
}
|
}
|
||||||
style={{
|
rightSlot={renderExploreButton(() => onExplore(buildAllRecordsSpec()))}
|
||||||
gridColumn: "span 4",
|
style={{ gridColumn: "span 4" }}
|
||||||
}}
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
{/* events per day */}
|
|
||||||
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
||||||
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
|
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
|
||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>How much posting happened each day.</p>
|
||||||
How much posting happened each day.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<div style={styles.chartWrapper}>
|
<div style={styles.chartWrapper}>
|
||||||
<ResponsiveContainer width="100%" height="100%">
|
<ResponsiveContainer width="100%" height="100%">
|
||||||
<LineChart data={timeData?.events_per_day ?? []}>
|
<LineChart
|
||||||
|
data={timeData?.events_per_day ?? []}
|
||||||
|
onClick={(state: unknown) => {
|
||||||
|
const payload = (state as { activePayload?: Array<{ payload?: { date?: string } }> })
|
||||||
|
?.activePayload?.[0]?.payload as
|
||||||
|
| { date?: string }
|
||||||
|
| undefined;
|
||||||
|
if (payload?.date) {
|
||||||
|
onExplore(buildDateBucketSpec(String(payload.date)));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
<CartesianGrid strokeDasharray="3 3" />
|
<CartesianGrid strokeDasharray="3 3" />
|
||||||
<XAxis dataKey="date" />
|
<XAxis dataKey="date" />
|
||||||
<YAxis />
|
<YAxis />
|
||||||
@@ -201,7 +203,6 @@ const SummaryStats = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Word Cloud */}
|
|
||||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
<h2 style={styles.sectionTitle}>Common Words</h2>
|
<h2 style={styles.sectionTitle}>Common Words</h2>
|
||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>
|
||||||
@@ -213,7 +214,6 @@ const SummaryStats = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Top Users */}
|
|
||||||
<div
|
<div
|
||||||
style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }}
|
style={{ ...styles.card, ...styles.scrollArea, gridColumn: "span 3" }}
|
||||||
>
|
>
|
||||||
@@ -225,7 +225,7 @@ const SummaryStats = ({
|
|||||||
<div
|
<div
|
||||||
key={`${item.author}-${item.source}`}
|
key={`${item.author}-${item.source}`}
|
||||||
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
style={{ ...styles.topUserItem, cursor: "pointer" }}
|
||||||
onClick={() => setSelectedUser(item.author)}
|
onClick={() => onExplore(buildUserSpec(item.author))}
|
||||||
>
|
>
|
||||||
<div style={styles.topUserName}>{item.author}</div>
|
<div style={styles.topUserName}>{item.author}</div>
|
||||||
<div style={styles.topUserMeta}>
|
<div style={styles.topUserMeta}>
|
||||||
@@ -236,7 +236,6 @@ const SummaryStats = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Heatmap */}
|
|
||||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
|
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
|
||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>
|
||||||
@@ -248,13 +247,6 @@ const SummaryStats = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<UserModal
|
|
||||||
open={!!selectedUser}
|
|
||||||
onClose={() => setSelectedUser(null)}
|
|
||||||
username={selectedUser ?? ""}
|
|
||||||
userData={selectedUserData}
|
|
||||||
/>
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -5,6 +5,12 @@ import { type TopUser, type InteractionGraph } from "../types/ApiTypes";
|
|||||||
|
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
import Card from "./Card";
|
import Card from "./Card";
|
||||||
|
import {
|
||||||
|
buildReplyPairSpec,
|
||||||
|
toText,
|
||||||
|
buildUserSpec,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
|
||||||
@@ -39,6 +45,7 @@ type UserStatsProps = {
|
|||||||
interactionGraph: InteractionGraph;
|
interactionGraph: InteractionGraph;
|
||||||
totalUsers: number;
|
totalUsers: number;
|
||||||
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
||||||
|
onExplore: (spec: CorpusExplorerSpec) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const UserStats = ({
|
const UserStats = ({
|
||||||
@@ -46,6 +53,7 @@ const UserStats = ({
|
|||||||
interactionGraph,
|
interactionGraph,
|
||||||
totalUsers,
|
totalUsers,
|
||||||
mostCommentHeavyUser,
|
mostCommentHeavyUser,
|
||||||
|
onExplore,
|
||||||
}: UserStatsProps) => {
|
}: UserStatsProps) => {
|
||||||
const graphData = useMemo(
|
const graphData = useMemo(
|
||||||
() => ApiToGraphData(interactionGraph),
|
() => ApiToGraphData(interactionGraph),
|
||||||
@@ -87,9 +95,9 @@ const UserStats = ({
|
|||||||
null,
|
null,
|
||||||
);
|
);
|
||||||
|
|
||||||
const mostActiveUser = topUsers.find(
|
const mostActiveUser = topUsers.find((u) => u.author !== "[deleted]");
|
||||||
(u) => u.author !== "[deleted]",
|
const strongestLinkSource = strongestLink ? toText(strongestLink.source) : "";
|
||||||
);
|
const strongestLinkTarget = strongestLink ? toText(strongestLink.target) : "";
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
@@ -114,37 +122,69 @@ const UserStats = ({
|
|||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Most Active User"
|
label="Most Active User"
|
||||||
value={mostActiveUser?.author ?? "—"}
|
value={mostActiveUser?.author ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
mostActiveUser
|
mostActiveUser
|
||||||
? `${mostActiveUser.count.toLocaleString()} events`
|
? `${mostActiveUser.count.toLocaleString()} events`
|
||||||
: "No user activity found"
|
: "No user activity found"
|
||||||
}
|
}
|
||||||
|
rightSlot={
|
||||||
|
mostActiveUser ? (
|
||||||
|
<button
|
||||||
|
onClick={() => onExplore(buildUserSpec(mostActiveUser.author))}
|
||||||
|
style={styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Explore
|
||||||
|
</button>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Strongest User Link"
|
label="Strongest User Link"
|
||||||
value={
|
value={
|
||||||
strongestLink
|
strongestLinkSource && strongestLinkTarget
|
||||||
? `${strongestLink.source} -> ${strongestLink.target}`
|
? `${strongestLinkSource} -> ${strongestLinkTarget}`
|
||||||
: "—"
|
: "-"
|
||||||
}
|
}
|
||||||
sublabel={
|
sublabel={
|
||||||
strongestLink
|
strongestLink
|
||||||
? `${strongestLink.value.toLocaleString()} replies`
|
? `${strongestLink.value.toLocaleString()} replies`
|
||||||
: "No graph links after filtering"
|
: "No graph links after filtering"
|
||||||
}
|
}
|
||||||
|
rightSlot={
|
||||||
|
strongestLinkSource && strongestLinkTarget ? (
|
||||||
|
<button
|
||||||
|
onClick={() =>
|
||||||
|
onExplore(buildReplyPairSpec(strongestLinkSource, strongestLinkTarget))
|
||||||
|
}
|
||||||
|
style={styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Explore
|
||||||
|
</button>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
style={{ gridColumn: "span 6" }}
|
style={{ gridColumn: "span 6" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Most Comment-Heavy User"
|
label="Most Comment-Heavy User"
|
||||||
value={mostCommentHeavyUser?.author ?? "—"}
|
value={mostCommentHeavyUser?.author ?? "-"}
|
||||||
sublabel={
|
sublabel={
|
||||||
mostCommentHeavyUser
|
mostCommentHeavyUser
|
||||||
? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments`
|
? `${Math.round(mostCommentHeavyUser.commentShare * 100)}% comments`
|
||||||
: "No user distribution available"
|
: "No user distribution available"
|
||||||
}
|
}
|
||||||
|
rightSlot={
|
||||||
|
mostCommentHeavyUser ? (
|
||||||
|
<button
|
||||||
|
onClick={() => onExplore(buildUserSpec(mostCommentHeavyUser.author))}
|
||||||
|
style={styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Explore
|
||||||
|
</button>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
style={{ gridColumn: "span 6" }}
|
style={{ gridColumn: "span 6" }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
@@ -166,6 +206,19 @@ const UserStats = ({
|
|||||||
linkDirectionalParticleSpeed={0.004}
|
linkDirectionalParticleSpeed={0.004}
|
||||||
linkWidth={(link) => Math.sqrt(Number(link.value))}
|
linkWidth={(link) => Math.sqrt(Number(link.value))}
|
||||||
nodeLabel={(node) => `${node.id}`}
|
nodeLabel={(node) => `${node.id}`}
|
||||||
|
onNodeClick={(node) => {
|
||||||
|
const userId = toText(node.id);
|
||||||
|
if (userId) {
|
||||||
|
onExplore(buildUserSpec(userId));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
onLinkClick={(link) => {
|
||||||
|
const source = toText(link.source);
|
||||||
|
const target = toText(link.target);
|
||||||
|
if (source && target) {
|
||||||
|
onExplore(buildReplyPairSpec(source, target));
|
||||||
|
}
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -22,12 +22,10 @@ const DatasetEditPage = () => {
|
|||||||
const [isSaving, setIsSaving] = useState(false);
|
const [isSaving, setIsSaving] = useState(false);
|
||||||
const [isDeleting, setIsDeleting] = useState(false);
|
const [isDeleting, setIsDeleting] = useState(false);
|
||||||
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
|
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
|
||||||
const [hasError, setHasError] = useState(false);
|
|
||||||
|
|
||||||
const [datasetName, setDatasetName] = useState("");
|
const [datasetName, setDatasetName] = useState("");
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) {
|
if (!Number.isInteger(parsedDatasetId) || parsedDatasetId <= 0) {
|
||||||
setHasError(true);
|
|
||||||
setStatusMessage("Invalid dataset id.");
|
setStatusMessage("Invalid dataset id.");
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
return;
|
return;
|
||||||
@@ -35,7 +33,6 @@ const DatasetEditPage = () => {
|
|||||||
|
|
||||||
const token = localStorage.getItem("access_token");
|
const token = localStorage.getItem("access_token");
|
||||||
if (!token) {
|
if (!token) {
|
||||||
setHasError(true);
|
|
||||||
setStatusMessage("You must be signed in to edit datasets.");
|
setStatusMessage("You must be signed in to edit datasets.");
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
return;
|
return;
|
||||||
@@ -49,7 +46,6 @@ const DatasetEditPage = () => {
|
|||||||
setDatasetName(response.data.name || "");
|
setDatasetName(response.data.name || "");
|
||||||
})
|
})
|
||||||
.catch((error: unknown) => {
|
.catch((error: unknown) => {
|
||||||
setHasError(true);
|
|
||||||
if (axios.isAxiosError(error)) {
|
if (axios.isAxiosError(error)) {
|
||||||
setStatusMessage(
|
setStatusMessage(
|
||||||
String(error.response?.data?.error || error.message),
|
String(error.response?.data?.error || error.message),
|
||||||
@@ -68,21 +64,18 @@ const DatasetEditPage = () => {
|
|||||||
|
|
||||||
const trimmedName = datasetName.trim();
|
const trimmedName = datasetName.trim();
|
||||||
if (!trimmedName) {
|
if (!trimmedName) {
|
||||||
setHasError(true);
|
|
||||||
setStatusMessage("Please enter a valid dataset name.");
|
setStatusMessage("Please enter a valid dataset name.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const token = localStorage.getItem("access_token");
|
const token = localStorage.getItem("access_token");
|
||||||
if (!token) {
|
if (!token) {
|
||||||
setHasError(true);
|
|
||||||
setStatusMessage("You must be signed in to save changes.");
|
setStatusMessage("You must be signed in to save changes.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
setIsSaving(true);
|
setIsSaving(true);
|
||||||
setHasError(false);
|
|
||||||
setStatusMessage("");
|
setStatusMessage("");
|
||||||
|
|
||||||
await axios.patch(
|
await axios.patch(
|
||||||
@@ -93,7 +86,6 @@ const DatasetEditPage = () => {
|
|||||||
|
|
||||||
navigate("/datasets", { replace: true });
|
navigate("/datasets", { replace: true });
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
setHasError(true);
|
|
||||||
if (axios.isAxiosError(error)) {
|
if (axios.isAxiosError(error)) {
|
||||||
setStatusMessage(
|
setStatusMessage(
|
||||||
String(
|
String(
|
||||||
@@ -111,7 +103,6 @@ const DatasetEditPage = () => {
|
|||||||
const deleteDataset = async () => {
|
const deleteDataset = async () => {
|
||||||
const deleteToken = localStorage.getItem("access_token");
|
const deleteToken = localStorage.getItem("access_token");
|
||||||
if (!deleteToken) {
|
if (!deleteToken) {
|
||||||
setHasError(true);
|
|
||||||
setStatusMessage("You must be signed in to delete datasets.");
|
setStatusMessage("You must be signed in to delete datasets.");
|
||||||
setIsDeleteModalOpen(false);
|
setIsDeleteModalOpen(false);
|
||||||
return;
|
return;
|
||||||
@@ -119,7 +110,6 @@ const DatasetEditPage = () => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
setIsDeleting(true);
|
setIsDeleting(true);
|
||||||
setHasError(false);
|
|
||||||
setStatusMessage("");
|
setStatusMessage("");
|
||||||
|
|
||||||
await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, {
|
await axios.delete(`${API_BASE_URL}/dataset/${parsedDatasetId}`, {
|
||||||
@@ -129,7 +119,6 @@ const DatasetEditPage = () => {
|
|||||||
setIsDeleteModalOpen(false);
|
setIsDeleteModalOpen(false);
|
||||||
navigate("/datasets", { replace: true });
|
navigate("/datasets", { replace: true });
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
setHasError(true);
|
|
||||||
if (axios.isAxiosError(error)) {
|
if (axios.isAxiosError(error)) {
|
||||||
setStatusMessage(
|
setStatusMessage(
|
||||||
String(
|
String(
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { useEffect, useState, useRef } from "react";
|
import { useEffect, useRef, useState } from "react";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { useParams } from "react-router-dom";
|
import { useParams } from "react-router-dom";
|
||||||
import StatsStyling from "../styles/stats_styling";
|
import StatsStyling from "../styles/stats_styling";
|
||||||
@@ -8,6 +8,7 @@ import UserStats from "../components/UserStats";
|
|||||||
import LinguisticStats from "../components/LinguisticStats";
|
import LinguisticStats from "../components/LinguisticStats";
|
||||||
import InteractionalStats from "../components/InteractionalStats";
|
import InteractionalStats from "../components/InteractionalStats";
|
||||||
import CulturalStats from "../components/CulturalStats";
|
import CulturalStats from "../components/CulturalStats";
|
||||||
|
import CorpusExplorer from "../components/CorpusExplorer";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
type SummaryResponse,
|
type SummaryResponse,
|
||||||
@@ -19,10 +20,15 @@ import {
|
|||||||
type InteractionAnalysisResponse,
|
type InteractionAnalysisResponse,
|
||||||
type CulturalAnalysisResponse,
|
type CulturalAnalysisResponse,
|
||||||
} from "../types/ApiTypes";
|
} from "../types/ApiTypes";
|
||||||
|
import {
|
||||||
|
buildExplorerContext,
|
||||||
|
type CorpusExplorerSpec,
|
||||||
|
type DatasetRecord,
|
||||||
|
} from "../utils/corpusExplorer";
|
||||||
|
|
||||||
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
|
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
const DELETED_USERS = ["[deleted]"];
|
const DELETED_USERS = ["[deleted]", "automoderator"];
|
||||||
|
|
||||||
const isDeletedUser = (value: string | null | undefined) =>
|
const isDeletedUser = (value: string | null | undefined) =>
|
||||||
DELETED_USERS.includes((value ?? "").trim().toLowerCase());
|
DELETED_USERS.includes((value ?? "").trim().toLowerCase());
|
||||||
@@ -40,6 +46,97 @@ type UserStatsMeta = {
|
|||||||
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
mostCommentHeavyUser: { author: string; commentShare: number } | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type ExplorerState = {
|
||||||
|
open: boolean;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
emptyMessage: string;
|
||||||
|
records: DatasetRecord[];
|
||||||
|
loading: boolean;
|
||||||
|
error: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const EMPTY_EXPLORER_STATE: ExplorerState = {
|
||||||
|
open: false,
|
||||||
|
title: "Corpus Explorer",
|
||||||
|
description: "",
|
||||||
|
emptyMessage: "No records found.",
|
||||||
|
records: [],
|
||||||
|
loading: false,
|
||||||
|
error: "",
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
|
||||||
|
if (typeof payload === "string") {
|
||||||
|
try {
|
||||||
|
return normalizeRecordPayload(JSON.parse(payload));
|
||||||
|
} catch {
|
||||||
|
throw new Error("Corpus endpoint returned a non-JSON string payload.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
payload &&
|
||||||
|
typeof payload === "object" &&
|
||||||
|
"error" in payload &&
|
||||||
|
typeof (payload as { error?: unknown }).error === "string"
|
||||||
|
) {
|
||||||
|
throw new Error((payload as { error: string }).error);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(payload)) {
|
||||||
|
return payload as DatasetRecord[];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
payload &&
|
||||||
|
typeof payload === "object" &&
|
||||||
|
"data" in payload &&
|
||||||
|
Array.isArray((payload as { data?: unknown }).data)
|
||||||
|
) {
|
||||||
|
return (payload as { data: DatasetRecord[] }).data;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
payload &&
|
||||||
|
typeof payload === "object" &&
|
||||||
|
"records" in payload &&
|
||||||
|
Array.isArray((payload as { records?: unknown }).records)
|
||||||
|
) {
|
||||||
|
return (payload as { records: DatasetRecord[] }).records;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
payload &&
|
||||||
|
typeof payload === "object" &&
|
||||||
|
"rows" in payload &&
|
||||||
|
Array.isArray((payload as { rows?: unknown }).rows)
|
||||||
|
) {
|
||||||
|
return (payload as { rows: DatasetRecord[] }).rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
payload &&
|
||||||
|
typeof payload === "object" &&
|
||||||
|
"result" in payload &&
|
||||||
|
Array.isArray((payload as { result?: unknown }).result)
|
||||||
|
) {
|
||||||
|
return (payload as { result: DatasetRecord[] }).result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload && typeof payload === "object") {
|
||||||
|
const values = Object.values(payload);
|
||||||
|
if (values.length === 1 && Array.isArray(values[0])) {
|
||||||
|
return values[0] as DatasetRecord[];
|
||||||
|
}
|
||||||
|
if (values.every((value) => value && typeof value === "object")) {
|
||||||
|
return values as DatasetRecord[];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("Corpus endpoint returned an unexpected payload.");
|
||||||
|
};
|
||||||
|
|
||||||
const StatPage = () => {
|
const StatPage = () => {
|
||||||
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
||||||
const [error, setError] = useState("");
|
const [error, setError] = useState("");
|
||||||
@@ -61,6 +158,12 @@ const StatPage = () => {
|
|||||||
totalUsers: 0,
|
totalUsers: 0,
|
||||||
mostCommentHeavyUser: null,
|
mostCommentHeavyUser: null,
|
||||||
});
|
});
|
||||||
|
const [appliedFilters, setAppliedFilters] = useState<Record<string, string>>({});
|
||||||
|
const [allRecords, setAllRecords] = useState<DatasetRecord[] | null>(null);
|
||||||
|
const [allRecordsKey, setAllRecordsKey] = useState("");
|
||||||
|
const [explorerState, setExplorerState] = useState<ExplorerState>(
|
||||||
|
EMPTY_EXPLORER_STATE,
|
||||||
|
);
|
||||||
|
|
||||||
const searchInputRef = useRef<HTMLInputElement>(null);
|
const searchInputRef = useRef<HTMLInputElement>(null);
|
||||||
const beforeDateRef = useRef<HTMLInputElement>(null);
|
const beforeDateRef = useRef<HTMLInputElement>(null);
|
||||||
@@ -104,6 +207,82 @@ const StatPage = () => {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getFilterKey = (params: Record<string, string>) =>
|
||||||
|
JSON.stringify(Object.entries(params).sort(([a], [b]) => a.localeCompare(b)));
|
||||||
|
|
||||||
|
const ensureFilteredRecords = async () => {
|
||||||
|
if (!datasetId) {
|
||||||
|
throw new Error("Missing dataset id.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const authHeaders = getAuthHeaders();
|
||||||
|
if (!authHeaders) {
|
||||||
|
throw new Error("You must be signed in to load corpus records.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const filterKey = getFilterKey(appliedFilters);
|
||||||
|
if (allRecords && allRecordsKey === filterKey) {
|
||||||
|
return allRecords;
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await axios.get<unknown>(
|
||||||
|
`${API_BASE_URL}/dataset/${datasetId}/all`,
|
||||||
|
{
|
||||||
|
params: appliedFilters,
|
||||||
|
headers: authHeaders,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const normalizedRecords = normalizeRecordPayload(response.data);
|
||||||
|
|
||||||
|
setAllRecords(normalizedRecords);
|
||||||
|
setAllRecordsKey(filterKey);
|
||||||
|
return normalizedRecords;
|
||||||
|
};
|
||||||
|
|
||||||
|
const openExplorer = async (spec: CorpusExplorerSpec) => {
|
||||||
|
setExplorerState({
|
||||||
|
open: true,
|
||||||
|
title: spec.title,
|
||||||
|
description: spec.description,
|
||||||
|
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||||
|
records: [],
|
||||||
|
loading: true,
|
||||||
|
error: "",
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const records = await ensureFilteredRecords();
|
||||||
|
const context = buildExplorerContext(records);
|
||||||
|
const matched = records.filter((record) => spec.matcher(record, context));
|
||||||
|
matched.sort((a, b) => {
|
||||||
|
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
||||||
|
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
||||||
|
return bValue.localeCompare(aValue);
|
||||||
|
});
|
||||||
|
|
||||||
|
setExplorerState({
|
||||||
|
open: true,
|
||||||
|
title: spec.title,
|
||||||
|
description: spec.description,
|
||||||
|
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||||
|
records: matched,
|
||||||
|
loading: false,
|
||||||
|
error: "",
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
setExplorerState({
|
||||||
|
open: true,
|
||||||
|
title: spec.title,
|
||||||
|
description: spec.description,
|
||||||
|
emptyMessage: spec.emptyMessage ?? "No matching records found.",
|
||||||
|
records: [],
|
||||||
|
loading: false,
|
||||||
|
error: `Failed to load corpus records: ${String(e)}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const getStats = (params: Record<string, string> = {}) => {
|
const getStats = (params: Record<string, string> = {}) => {
|
||||||
if (!datasetId) {
|
if (!datasetId) {
|
||||||
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
||||||
@@ -118,22 +297,20 @@ const StatPage = () => {
|
|||||||
|
|
||||||
setError("");
|
setError("");
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
|
setAppliedFilters(params);
|
||||||
|
setAllRecords(null);
|
||||||
|
setAllRecordsKey("");
|
||||||
|
setExplorerState((current) => ({ ...current, open: false }));
|
||||||
|
|
||||||
Promise.all([
|
Promise.all([
|
||||||
axios.get<TimeAnalysisResponse>(
|
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/temporal`,
|
|
||||||
{
|
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
}),
|
||||||
),
|
axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
||||||
axios.get<UserEndpointResponse>(
|
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/user`,
|
|
||||||
{
|
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
}),
|
||||||
),
|
|
||||||
axios.get<LinguisticAnalysisResponse>(
|
axios.get<LinguisticAnalysisResponse>(
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/linguistic`,
|
`${API_BASE_URL}/dataset/${datasetId}/linguistic`,
|
||||||
{
|
{
|
||||||
@@ -141,13 +318,10 @@ const StatPage = () => {
|
|||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
axios.get<EmotionalAnalysisResponse>(
|
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/emotional`,
|
|
||||||
{
|
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
}),
|
||||||
),
|
|
||||||
axios.get<InteractionAnalysisResponse>(
|
axios.get<InteractionAnalysisResponse>(
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/interactional`,
|
`${API_BASE_URL}/dataset/${datasetId}/interactional`,
|
||||||
{
|
{
|
||||||
@@ -155,20 +329,14 @@ const StatPage = () => {
|
|||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
axios.get<SummaryResponse>(
|
axios.get<SummaryResponse>(`${API_BASE_URL}/dataset/${datasetId}/summary`, {
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/summary`,
|
|
||||||
{
|
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
}),
|
||||||
),
|
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
|
||||||
axios.get<CulturalAnalysisResponse>(
|
|
||||||
`${API_BASE_URL}/dataset/${datasetId}/cultural`,
|
|
||||||
{
|
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
},
|
}),
|
||||||
),
|
|
||||||
])
|
])
|
||||||
.then(
|
.then(
|
||||||
([
|
([
|
||||||
@@ -182,8 +350,7 @@ const StatPage = () => {
|
|||||||
]) => {
|
]) => {
|
||||||
const usersList = userRes.data.users ?? [];
|
const usersList = userRes.data.users ?? [];
|
||||||
const topUsersList = userRes.data.top_users ?? [];
|
const topUsersList = userRes.data.top_users ?? [];
|
||||||
const interactionGraphRaw =
|
const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
|
||||||
interactionRes.data?.interaction_graph ?? {};
|
|
||||||
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
|
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
|
||||||
|
|
||||||
const filteredUsers: typeof usersList = [];
|
const filteredUsers: typeof usersList = [];
|
||||||
@@ -198,14 +365,10 @@ const StatPage = () => {
|
|||||||
filteredTopUsers.push(user);
|
filteredTopUsers.push(user);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] =
|
let mostCommentHeavyUser: UserStatsMeta["mostCommentHeavyUser"] = null;
|
||||||
null;
|
|
||||||
for (const user of filteredUsers) {
|
for (const user of filteredUsers) {
|
||||||
const currentShare = user.comment_share ?? 0;
|
const currentShare = user.comment_share ?? 0;
|
||||||
if (
|
if (!mostCommentHeavyUser || currentShare > mostCommentHeavyUser.commentShare) {
|
||||||
!mostCommentHeavyUser ||
|
|
||||||
currentShare > mostCommentHeavyUser.commentShare
|
|
||||||
) {
|
|
||||||
mostCommentHeavyUser = {
|
mostCommentHeavyUser = {
|
||||||
author: user.author,
|
author: user.author,
|
||||||
commentShare: currentShare,
|
commentShare: currentShare,
|
||||||
@@ -221,8 +384,7 @@ const StatPage = () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const filteredInteractionGraph: Record<string, Record<string, number>> =
|
const filteredInteractionGraph: Record<string, Record<string, number>> = {};
|
||||||
{};
|
|
||||||
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
|
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
|
||||||
if (isDeletedUser(source)) {
|
if (isDeletedUser(source)) {
|
||||||
continue;
|
continue;
|
||||||
@@ -279,7 +441,7 @@ const StatPage = () => {
|
|||||||
setSummary(filteredSummary || null);
|
setSummary(filteredSummary || null);
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.catch((e) => setError("Failed to load statistics: " + String(e)))
|
.catch((e) => setError(`Failed to load statistics: ${String(e)}`))
|
||||||
.finally(() => setLoading(false));
|
.finally(() => setLoading(false));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -302,6 +464,9 @@ const StatPage = () => {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setError("");
|
setError("");
|
||||||
|
setAllRecords(null);
|
||||||
|
setAllRecordsKey("");
|
||||||
|
setExplorerState(EMPTY_EXPLORER_STATE);
|
||||||
if (!datasetId) {
|
if (!datasetId) {
|
||||||
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
setError("Missing dataset id. Open /dataset/<id>/stats.");
|
||||||
return;
|
return;
|
||||||
@@ -398,9 +563,7 @@ const StatPage = () => {
|
|||||||
<button
|
<button
|
||||||
onClick={() => setActiveView("summary")}
|
onClick={() => setActiveView("summary")}
|
||||||
style={
|
style={
|
||||||
activeView === "summary"
|
activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary
|
||||||
? styles.buttonPrimary
|
|
||||||
: styles.buttonSecondary
|
|
||||||
}
|
}
|
||||||
>
|
>
|
||||||
Summary
|
Summary
|
||||||
@@ -418,11 +581,7 @@ const StatPage = () => {
|
|||||||
|
|
||||||
<button
|
<button
|
||||||
onClick={() => setActiveView("user")}
|
onClick={() => setActiveView("user")}
|
||||||
style={
|
style={activeView === "user" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||||
activeView === "user"
|
|
||||||
? styles.buttonPrimary
|
|
||||||
: styles.buttonSecondary
|
|
||||||
}
|
|
||||||
>
|
>
|
||||||
Users
|
Users
|
||||||
</button>
|
</button>
|
||||||
@@ -449,9 +608,7 @@ const StatPage = () => {
|
|||||||
<button
|
<button
|
||||||
onClick={() => setActiveView("cultural")}
|
onClick={() => setActiveView("cultural")}
|
||||||
style={
|
style={
|
||||||
activeView === "cultural"
|
activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary
|
||||||
? styles.buttonPrimary
|
|
||||||
: styles.buttonSecondary
|
|
||||||
}
|
}
|
||||||
>
|
>
|
||||||
Cultural
|
Cultural
|
||||||
@@ -464,11 +621,12 @@ const StatPage = () => {
|
|||||||
timeData={timeData}
|
timeData={timeData}
|
||||||
linguisticData={linguisticData}
|
linguisticData={linguisticData}
|
||||||
summary={summary}
|
summary={summary}
|
||||||
|
onExplore={openExplorer}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "emotional" && emotionalData && (
|
{activeView === "emotional" && emotionalData && (
|
||||||
<EmotionalStats emotionalData={emotionalData} />
|
<EmotionalStats emotionalData={emotionalData} onExplore={openExplorer} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "emotional" && !emotionalData && (
|
{activeView === "emotional" && !emotionalData && (
|
||||||
@@ -483,6 +641,7 @@ const StatPage = () => {
|
|||||||
interactionGraph={interactionData.interaction_graph}
|
interactionGraph={interactionData.interaction_graph}
|
||||||
totalUsers={userStatsMeta.totalUsers}
|
totalUsers={userStatsMeta.totalUsers}
|
||||||
mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser}
|
mostCommentHeavyUser={userStatsMeta.mostCommentHeavyUser}
|
||||||
|
onExplore={openExplorer}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
@@ -493,7 +652,7 @@ const StatPage = () => {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "linguistic" && linguisticData && (
|
{activeView === "linguistic" && linguisticData && (
|
||||||
<LinguisticStats data={linguisticData} />
|
<LinguisticStats data={linguisticData} onExplore={openExplorer} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "linguistic" && !linguisticData && (
|
{activeView === "linguistic" && !linguisticData && (
|
||||||
@@ -503,7 +662,7 @@ const StatPage = () => {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "interactional" && interactionData && (
|
{activeView === "interactional" && interactionData && (
|
||||||
<InteractionalStats data={interactionData} />
|
<InteractionalStats data={interactionData} onExplore={openExplorer} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "interactional" && !interactionData && (
|
{activeView === "interactional" && !interactionData && (
|
||||||
@@ -513,7 +672,7 @@ const StatPage = () => {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "cultural" && culturalData && (
|
{activeView === "cultural" && culturalData && (
|
||||||
<CulturalStats data={culturalData} />
|
<CulturalStats data={culturalData} onExplore={openExplorer} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "cultural" && !culturalData && (
|
{activeView === "cultural" && !culturalData && (
|
||||||
@@ -521,6 +680,17 @@ const StatPage = () => {
|
|||||||
No cultural data available.
|
No cultural data available.
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
<CorpusExplorer
|
||||||
|
open={explorerState.open}
|
||||||
|
onClose={() => setExplorerState((current) => ({ ...current, open: false }))}
|
||||||
|
title={explorerState.title}
|
||||||
|
description={explorerState.description}
|
||||||
|
records={explorerState.records}
|
||||||
|
loading={explorerState.loading}
|
||||||
|
error={explorerState.error}
|
||||||
|
emptyMessage={explorerState.emptyMessage}
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
405
frontend/src/utils/corpusExplorer.ts
Normal file
405
frontend/src/utils/corpusExplorer.ts
Normal file
@@ -0,0 +1,405 @@
|
|||||||
|
import type { CSSProperties } from "react";
|
||||||
|
|
||||||
|
type EntityRecord = {
|
||||||
|
text?: string;
|
||||||
|
[key: string]: unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
type DatasetRecord = {
|
||||||
|
id?: string | number;
|
||||||
|
post_id?: string | number | null;
|
||||||
|
parent_id?: string | number | null;
|
||||||
|
author?: string | null;
|
||||||
|
title?: string | null;
|
||||||
|
content?: string | null;
|
||||||
|
timestamp?: string | number | null;
|
||||||
|
date?: string | null;
|
||||||
|
dt?: string | null;
|
||||||
|
hour?: number | null;
|
||||||
|
weekday?: string | null;
|
||||||
|
reply_to?: string | number | null;
|
||||||
|
source?: string | null;
|
||||||
|
topic?: string | null;
|
||||||
|
topic_confidence?: number | null;
|
||||||
|
type?: string | null;
|
||||||
|
ner_entities?: EntityRecord[] | null;
|
||||||
|
emotion_anger?: number | null;
|
||||||
|
emotion_disgust?: number | null;
|
||||||
|
emotion_fear?: number | null;
|
||||||
|
emotion_joy?: number | null;
|
||||||
|
emotion_sadness?: number | null;
|
||||||
|
[key: string]: unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CorpusExplorerContext = {
|
||||||
|
authorByPostId: Map<string, string>;
|
||||||
|
authorEventCounts: Map<string, number>;
|
||||||
|
authorCommentCounts: Map<string, number>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CorpusExplorerSpec = {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
emptyMessage?: string;
|
||||||
|
matcher: (record: DatasetRecord, context: CorpusExplorerContext) => boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
const IN_GROUP_PATTERN = /\b(we|us|our|ourselves)\b/gi;
|
||||||
|
const OUT_GROUP_PATTERN = /\b(they|them|their|themselves)\b/gi;
|
||||||
|
const HEDGE_PATTERN = /\b(maybe|perhaps|possibly|probably|likely|seems|seem|i think|i feel|i guess|kind of|sort of|somewhat)\b/i;
|
||||||
|
const CERTAINTY_PATTERN = /\b(definitely|certainly|clearly|obviously|undeniably|always|never)\b/i;
|
||||||
|
const DEONTIC_PATTERN = /\b(must|should|need|needs|have to|has to|ought|required|require)\b/i;
|
||||||
|
const PERMISSION_PATTERN = /\b(can|allowed|okay|ok|permitted)\b/i;
|
||||||
|
const EMOTION_KEYS = [
|
||||||
|
"emotion_anger",
|
||||||
|
"emotion_disgust",
|
||||||
|
"emotion_fear",
|
||||||
|
"emotion_joy",
|
||||||
|
"emotion_sadness",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
const shrinkButtonStyle: CSSProperties = {
|
||||||
|
padding: "4px 8px",
|
||||||
|
fontSize: 12,
|
||||||
|
};
|
||||||
|
|
||||||
|
const toText = (value: unknown) => {
|
||||||
|
if (typeof value === "string") {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value === "number" || typeof value === "boolean") {
|
||||||
|
return String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value && typeof value === "object" && "id" in value) {
|
||||||
|
const id = (value as { id?: unknown }).id;
|
||||||
|
if (typeof id === "string" || typeof id === "number") {
|
||||||
|
return String(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalize = (value: unknown) => toText(value).trim().toLowerCase();
|
||||||
|
|
||||||
|
const getRecordText = (record: DatasetRecord) =>
|
||||||
|
`${record.title ?? ""} ${record.content ?? ""}`.trim();
|
||||||
|
|
||||||
|
const escapeRegExp = (value: string) =>
|
||||||
|
value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||||
|
|
||||||
|
const buildPhrasePattern = (phrase: string) => {
|
||||||
|
const tokens = phrase
|
||||||
|
.toLowerCase()
|
||||||
|
.trim()
|
||||||
|
.split(/\s+/)
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(escapeRegExp);
|
||||||
|
|
||||||
|
if (!tokens.length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new RegExp(`\\b${tokens.join("\\s+")}\\b`, "i");
|
||||||
|
};
|
||||||
|
|
||||||
|
const countMatches = (pattern: RegExp, text: string) =>
|
||||||
|
Array.from(text.matchAll(new RegExp(pattern.source, "gi"))).length;
|
||||||
|
|
||||||
|
const getDateBucket = (record: DatasetRecord) => {
|
||||||
|
if (typeof record.date === "string" && record.date) {
|
||||||
|
return record.date.slice(0, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof record.dt === "string" && record.dt) {
|
||||||
|
return record.dt.slice(0, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof record.timestamp === "number") {
|
||||||
|
return new Date(record.timestamp * 1000).toISOString().slice(0, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof record.timestamp === "string" && record.timestamp) {
|
||||||
|
const numeric = Number(record.timestamp);
|
||||||
|
if (Number.isFinite(numeric)) {
|
||||||
|
return new Date(numeric * 1000).toISOString().slice(0, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
};
|
||||||
|
|
||||||
|
const getDominantEmotion = (record: DatasetRecord) => {
|
||||||
|
let bestKey = "";
|
||||||
|
let bestValue = Number.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
for (const key of EMOTION_KEYS) {
|
||||||
|
const value = Number(record[key] ?? Number.NEGATIVE_INFINITY);
|
||||||
|
if (value > bestValue) {
|
||||||
|
bestValue = value;
|
||||||
|
bestKey = key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestKey.replace("emotion_", "");
|
||||||
|
};
|
||||||
|
|
||||||
|
const matchesPhrase = (record: DatasetRecord, phrase: string) => {
|
||||||
|
const pattern = buildPhrasePattern(phrase);
|
||||||
|
if (!pattern) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pattern.test(getRecordText(record).toLowerCase());
|
||||||
|
};
|
||||||
|
|
||||||
|
const recordIdentityBucket = (record: DatasetRecord) => {
|
||||||
|
const text = getRecordText(record).toLowerCase();
|
||||||
|
const inHits = countMatches(IN_GROUP_PATTERN, text);
|
||||||
|
const outHits = countMatches(OUT_GROUP_PATTERN, text);
|
||||||
|
|
||||||
|
if (inHits > outHits) {
|
||||||
|
return "in";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outHits > inHits) {
|
||||||
|
return "out";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "tie";
|
||||||
|
};
|
||||||
|
|
||||||
|
const createAuthorEventCounts = (records: DatasetRecord[]) => {
|
||||||
|
const counts = new Map<string, number>();
|
||||||
|
for (const record of records) {
|
||||||
|
const author = toText(record.author).trim();
|
||||||
|
if (!author) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
counts.set(author, (counts.get(author) ?? 0) + 1);
|
||||||
|
}
|
||||||
|
return counts;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createAuthorCommentCounts = (records: DatasetRecord[]) => {
|
||||||
|
const counts = new Map<string, number>();
|
||||||
|
for (const record of records) {
|
||||||
|
const author = toText(record.author).trim();
|
||||||
|
if (!author || record.type !== "comment") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
counts.set(author, (counts.get(author) ?? 0) + 1);
|
||||||
|
}
|
||||||
|
return counts;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createAuthorByPostId = (records: DatasetRecord[]) => {
|
||||||
|
const map = new Map<string, string>();
|
||||||
|
for (const record of records) {
|
||||||
|
const postId = record.post_id;
|
||||||
|
const author = toText(record.author).trim();
|
||||||
|
if (postId === null || postId === undefined || !author) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
map.set(String(postId), author);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildExplorerContext = (records: DatasetRecord[]): CorpusExplorerContext => ({
|
||||||
|
authorByPostId: createAuthorByPostId(records),
|
||||||
|
authorEventCounts: createAuthorEventCounts(records),
|
||||||
|
authorCommentCounts: createAuthorCommentCounts(records),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildAllRecordsSpec = (): CorpusExplorerSpec => ({
|
||||||
|
title: "Corpus Explorer",
|
||||||
|
description: "All records in the current filtered dataset.",
|
||||||
|
emptyMessage: "No records match the current filters.",
|
||||||
|
matcher: () => true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildUserSpec = (author: string): CorpusExplorerSpec => ({
|
||||||
|
title: `User: ${author}`,
|
||||||
|
description: `All records authored by ${author}.`,
|
||||||
|
emptyMessage: `No records found for ${author}.`,
|
||||||
|
matcher: (record) => normalize(record.author) === normalize(author),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildTopicSpec = (topic: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Topic: ${topic}`,
|
||||||
|
description: `Records assigned to the ${topic} topic bucket.`,
|
||||||
|
emptyMessage: `No records found in the ${topic} topic bucket.`,
|
||||||
|
matcher: (record) => normalize(record.topic) === normalize(topic),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildDateBucketSpec = (date: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Date Bucket: ${date}`,
|
||||||
|
description: `Records from the ${date} activity bucket.`,
|
||||||
|
emptyMessage: `No records found on ${date}.`,
|
||||||
|
matcher: (record) => getDateBucket(record) === date,
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildWordSpec = (word: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Word: ${word}`,
|
||||||
|
description: `Records containing the word ${word}.`,
|
||||||
|
emptyMessage: `No records mention ${word}.`,
|
||||||
|
matcher: (record) => matchesPhrase(record, word),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildNgramSpec = (ngram: string): CorpusExplorerSpec => ({
|
||||||
|
title: `N-gram: ${ngram}`,
|
||||||
|
description: `Records containing the phrase ${ngram}.`,
|
||||||
|
emptyMessage: `No records contain the phrase ${ngram}.`,
|
||||||
|
matcher: (record) => matchesPhrase(record, ngram),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildEntitySpec = (entity: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Entity: ${entity}`,
|
||||||
|
description: `Records mentioning the ${entity} entity.`,
|
||||||
|
emptyMessage: `No records found for the ${entity} entity.`,
|
||||||
|
matcher: (record) => {
|
||||||
|
const target = normalize(entity);
|
||||||
|
const entities = Array.isArray(record.ner_entities) ? record.ner_entities : [];
|
||||||
|
return entities.some((item) => normalize(item?.text) === target) || matchesPhrase(record, entity);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildSourceSpec = (source: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Source: ${source}`,
|
||||||
|
description: `Records from the ${source} source.`,
|
||||||
|
emptyMessage: `No records found for ${source}.`,
|
||||||
|
matcher: (record) => normalize(record.source) === normalize(source),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildDominantEmotionSpec = (emotion: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Dominant Emotion: ${emotion}`,
|
||||||
|
description: `Records where ${emotion} is the strongest emotion score.`,
|
||||||
|
emptyMessage: `No records found with dominant emotion ${emotion}.`,
|
||||||
|
matcher: (record) => getDominantEmotion(record) === normalize(emotion),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildReplyPairSpec = (source: string, target: string): CorpusExplorerSpec => ({
|
||||||
|
title: `Reply Path: ${source} -> ${target}`,
|
||||||
|
description: `Reply records authored by ${source} in response to ${target}.`,
|
||||||
|
emptyMessage: `No reply records found for ${source} -> ${target}.`,
|
||||||
|
matcher: (record, context) => {
|
||||||
|
if (normalize(record.author) !== normalize(source)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const replyTo = record.reply_to;
|
||||||
|
if (replyTo === null || replyTo === undefined || replyTo === "") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const replyTarget = context.authorByPostId.get(String(replyTo));
|
||||||
|
return normalize(replyTarget) === normalize(target);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildOneTimeUsersSpec = (): CorpusExplorerSpec => ({
|
||||||
|
title: "One-Time Users",
|
||||||
|
description: "Records written by authors who appear exactly once in the filtered corpus.",
|
||||||
|
emptyMessage: "No one-time-user records found.",
|
||||||
|
matcher: (record, context) => {
|
||||||
|
const author = toText(record.author).trim();
|
||||||
|
return !!author && context.authorEventCounts.get(author) === 1;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildTopCommentersSpec = (topAuthorCount: number): CorpusExplorerSpec => ({
|
||||||
|
title: "Top Commenters",
|
||||||
|
description: `Comment records from the top ${topAuthorCount} commenters in the filtered corpus.`,
|
||||||
|
emptyMessage: "No top-commenter records found.",
|
||||||
|
matcher: (record, context) => {
|
||||||
|
if (record.type !== "comment") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rankedAuthors = Array.from(context.authorCommentCounts.entries())
|
||||||
|
.sort((a, b) => b[1] - a[1])
|
||||||
|
.slice(0, topAuthorCount)
|
||||||
|
.map(([author]) => author);
|
||||||
|
|
||||||
|
return rankedAuthors.includes(toText(record.author).trim());
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildSingleCommentAuthorsSpec = (): CorpusExplorerSpec => ({
|
||||||
|
title: "Single-Comment Authors",
|
||||||
|
description: "Comment records from authors who commented exactly once.",
|
||||||
|
emptyMessage: "No single-comment-author records found.",
|
||||||
|
matcher: (record, context) => {
|
||||||
|
const author = toText(record.author).trim();
|
||||||
|
return record.type === "comment" && !!author && context.authorCommentCounts.get(author) === 1;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildIdentityBucketSpec = (bucket: "in" | "out" | "tie"): CorpusExplorerSpec => {
|
||||||
|
const labels = {
|
||||||
|
in: "In-Group Posts",
|
||||||
|
out: "Out-Group Posts",
|
||||||
|
tie: "Balanced Posts",
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: labels[bucket],
|
||||||
|
description: `Records in the ${labels[bucket].toLowerCase()} cultural bucket.`,
|
||||||
|
emptyMessage: `No records found for ${labels[bucket].toLowerCase()}.`,
|
||||||
|
matcher: (record) => recordIdentityBucket(record) === bucket,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildPatternSpec = (
|
||||||
|
title: string,
|
||||||
|
description: string,
|
||||||
|
pattern: RegExp,
|
||||||
|
): CorpusExplorerSpec => ({
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
emptyMessage: `No records found for ${title.toLowerCase()}.`,
|
||||||
|
matcher: (record) => pattern.test(getRecordText(record)),
|
||||||
|
});
|
||||||
|
|
||||||
|
const buildHedgeSpec = () =>
|
||||||
|
buildPatternSpec("Hedging Words", "Records containing hedging language.", HEDGE_PATTERN);
|
||||||
|
|
||||||
|
const buildCertaintySpec = () =>
|
||||||
|
buildPatternSpec("Certainty Words", "Records containing certainty language.", CERTAINTY_PATTERN);
|
||||||
|
|
||||||
|
const buildDeonticSpec = () =>
|
||||||
|
buildPatternSpec("Need/Should Words", "Records containing deontic language.", DEONTIC_PATTERN);
|
||||||
|
|
||||||
|
const buildPermissionSpec = () =>
|
||||||
|
buildPatternSpec("Permission Words", "Records containing permission language.", PERMISSION_PATTERN);
|
||||||
|
|
||||||
|
const getExplorerButtonStyle = () => shrinkButtonStyle;
|
||||||
|
|
||||||
|
export type { DatasetRecord, CorpusExplorerContext, CorpusExplorerSpec };
|
||||||
|
export {
|
||||||
|
buildAllRecordsSpec,
|
||||||
|
buildCertaintySpec,
|
||||||
|
buildDateBucketSpec,
|
||||||
|
buildDeonticSpec,
|
||||||
|
buildDominantEmotionSpec,
|
||||||
|
buildEntitySpec,
|
||||||
|
buildExplorerContext,
|
||||||
|
buildHedgeSpec,
|
||||||
|
buildIdentityBucketSpec,
|
||||||
|
buildNgramSpec,
|
||||||
|
buildOneTimeUsersSpec,
|
||||||
|
buildPermissionSpec,
|
||||||
|
buildReplyPairSpec,
|
||||||
|
buildSingleCommentAuthorsSpec,
|
||||||
|
buildSourceSpec,
|
||||||
|
buildTopicSpec,
|
||||||
|
buildTopCommentersSpec,
|
||||||
|
buildUserSpec,
|
||||||
|
buildWordSpec,
|
||||||
|
getDateBucket,
|
||||||
|
getExplorerButtonStyle,
|
||||||
|
toText,
|
||||||
|
};
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import nltk
|
import nltk
|
||||||
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
|
|
||||||
@@ -27,6 +28,8 @@ DOMAIN_STOPWORDS = {
|
|||||||
"one",
|
"one",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXCLUDED_AUTHORS = {"[deleted]", "automoderator"}
|
||||||
|
|
||||||
nltk.download("stopwords")
|
nltk.download("stopwords")
|
||||||
EXCLUDE_WORDS = set(stopwords.words("english")) | DOMAIN_STOPWORDS
|
EXCLUDE_WORDS = set(stopwords.words("english")) | DOMAIN_STOPWORDS
|
||||||
|
|
||||||
@@ -46,6 +49,12 @@ class StatGen:
|
|||||||
filters = filters or {}
|
filters = filters or {}
|
||||||
filtered_df = df.copy()
|
filtered_df = df.copy()
|
||||||
|
|
||||||
|
if "author" in filtered_df.columns:
|
||||||
|
normalized_authors = (
|
||||||
|
filtered_df["author"].fillna("").astype(str).str.strip().str.lower()
|
||||||
|
)
|
||||||
|
filtered_df = filtered_df[~normalized_authors.isin(EXCLUDED_AUTHORS)]
|
||||||
|
|
||||||
search_query = filters.get("search_query", None)
|
search_query = filters.get("search_query", None)
|
||||||
start_date_filter = filters.get("start_date", None)
|
start_date_filter = filters.get("start_date", None)
|
||||||
end_date_filter = filters.get("end_date", None)
|
end_date_filter = filters.get("end_date", None)
|
||||||
@@ -75,9 +84,15 @@ class StatGen:
|
|||||||
|
|
||||||
return filtered_df
|
return filtered_df
|
||||||
|
|
||||||
|
def _json_ready_records(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
return json.loads(
|
||||||
|
df.to_json(orient="records", date_format="iso", date_unit="s")
|
||||||
|
)
|
||||||
|
|
||||||
## Public Methods
|
## Public Methods
|
||||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
||||||
return self._prepare_filtered_df(df, filters).to_dict(orient="records")
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
return self._json_ready_records(filtered_df)
|
||||||
|
|
||||||
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|||||||
@@ -591,7 +591,8 @@ def get_full_dataset(dataset_id: int):
|
|||||||
)
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
return jsonify(dataset_content.to_dict(orient="records")), 200
|
filters = get_request_filters()
|
||||||
|
return jsonify(stat_gen.filter_dataset(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
|
|||||||
Reference in New Issue
Block a user