Compare commits
25 Commits
94befb61c5
...
acc591ff1e
| Author | SHA1 | Date | |
|---|---|---|---|
| acc591ff1e | |||
| e054997bb1 | |||
| e5414befa7 | |||
| 86926898ce | |||
| b1177540a1 | |||
| f604fcc531 | |||
| b7aec2b0ea | |||
| 1446dd176d | |||
| c215024ef2 | |||
| 17ef42e548 | |||
| 7e4a91bb5e | |||
| 436549641f | |||
| 3e78a54388 | |||
| 71998c450e | |||
| 2a00384a55 | |||
| 8372aa7278 | |||
| 7b5a939271 | |||
| 2fa1dff4b7 | |||
| 31fb275ee3 | |||
| 8a0f6e71e8 | |||
| 9093059d05 | |||
| 8a13444b16 | |||
| 3468fdc2ea | |||
| 09a4f9036f | |||
| 97fccd073b |
@@ -2,7 +2,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<link rel="icon" type="image/png" href="/icon.png" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>frontend</title>
|
||||
</head>
|
||||
|
||||
BIN
frontend/public/icon.png
Normal file
BIN
frontend/public/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
158
frontend/src/components/CulturalStats.tsx
Normal file
158
frontend/src/components/CulturalStats.tsx
Normal file
@@ -0,0 +1,158 @@
|
||||
import Card from "./Card";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type CulturalStatsProps = {
|
||||
data: CulturalAnalysisResponse;
|
||||
};
|
||||
|
||||
const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||
const identity = data.identity_markers;
|
||||
const stance = data.stance_markers;
|
||||
const inGroupWords = identity?.in_group_usage ?? 0;
|
||||
const outGroupWords = identity?.out_group_usage ?? 0;
|
||||
const totalGroupWords = inGroupWords + outGroupWords;
|
||||
const inGroupWordRate = typeof identity?.in_group_ratio === "number"
|
||||
? identity.in_group_ratio * 100
|
||||
: null;
|
||||
const outGroupWordRate = typeof identity?.out_group_ratio === "number"
|
||||
? identity.out_group_ratio * 100
|
||||
: null;
|
||||
const rawEntities = data.avg_emotion_per_entity?.entity_emotion_avg ?? {};
|
||||
const entities = Object.entries(rawEntities)
|
||||
.sort((a, b) => (b[1].post_count - a[1].post_count))
|
||||
.slice(0, 20);
|
||||
|
||||
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
|
||||
const entries = Object.entries(emotionAvg ?? {});
|
||||
if (!entries.length) {
|
||||
return "—";
|
||||
}
|
||||
|
||||
entries.sort((a, b) => b[1] - a[1]);
|
||||
const dominant = entries[0] ?? ["emotion_unknown", 0];
|
||||
const dominantLabel = dominant[0].replace("emotion_", "");
|
||||
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
<div style={{ ...styles.container, ...styles.grid }}>
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Community Framing Overview</h2>
|
||||
<p style={styles.sectionSubtitle}>Simple view of how often people use "us" words vs "them" words, and the tone around that language.</p>
|
||||
</div>
|
||||
|
||||
<Card
|
||||
label="In-Group Words"
|
||||
value={inGroupWords.toLocaleString()}
|
||||
sublabel="Times we/us/our appears"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Out-Group Words"
|
||||
value={outGroupWords.toLocaleString()}
|
||||
sublabel="Times they/them/their appears"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="In-Group Posts"
|
||||
value={identity?.in_group_posts?.toLocaleString() ?? "—"}
|
||||
sublabel='Posts leaning toward "us" language'
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Out-Group Posts"
|
||||
value={identity?.out_group_posts?.toLocaleString() ?? "—"}
|
||||
sublabel='Posts leaning toward "them" language'
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Balanced Posts"
|
||||
value={identity?.tie_posts?.toLocaleString() ?? "—"}
|
||||
sublabel="Posts with equal us/them signals"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Total Group Words"
|
||||
value={totalGroupWords.toLocaleString()}
|
||||
sublabel="In-group + out-group words"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="In-Group Share"
|
||||
value={inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`}
|
||||
sublabel="Share of all words"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Out-Group Share"
|
||||
value={outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`}
|
||||
sublabel="Share of all words"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Hedging Words"
|
||||
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
||||
sublabel={typeof stance?.hedge_per_1k_tokens === "number" ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Certainty Words"
|
||||
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
||||
sublabel={typeof stance?.certainty_per_1k_tokens === "number" ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Need/Should Words"
|
||||
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
||||
sublabel={typeof stance?.deontic_per_1k_tokens === "number" ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Permission Words"
|
||||
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
||||
sublabel={typeof stance?.permission_per_1k_tokens === "number" ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 6" }}>
|
||||
<h2 style={styles.sectionTitle}>Mood in "Us" Posts</h2>
|
||||
<p style={styles.sectionSubtitle}>Most likely emotion when in-group wording is stronger.</p>
|
||||
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 6" }}>
|
||||
<h2 style={styles.sectionTitle}>Mood in "Them" Posts</h2>
|
||||
<p style={styles.sectionSubtitle}>Most likely emotion when out-group wording is stronger.</p>
|
||||
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Entity Mood Snapshot</h2>
|
||||
<p style={styles.sectionSubtitle}>Most mentioned entities and the mood that appears most with each.</p>
|
||||
{!entities.length ? (
|
||||
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
|
||||
) : (
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
|
||||
{entities.map(([entity, aggregate]) => (
|
||||
<div key={entity} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{entity}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{aggregate.post_count.toLocaleString()} posts • Likely mood: {topEmotion(aggregate.emotion_avg)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default CulturalStats;
|
||||
@@ -9,6 +9,9 @@ type EmotionalStatsProps = {
|
||||
|
||||
const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
|
||||
const rows = contentData.average_emotion_by_topic ?? [];
|
||||
const overallEmotionAverage = contentData.overall_emotion_average ?? [];
|
||||
const dominantEmotionDistribution = contentData.dominant_emotion_distribution ?? [];
|
||||
const emotionBySource = contentData.emotion_by_source ?? [];
|
||||
const lowSampleThreshold = 20;
|
||||
const stableSampleThreshold = 50;
|
||||
const emotionKeys = rows.length
|
||||
@@ -64,39 +67,104 @@ const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||
<h2 style={styles.sectionTitle}>Average Emotion by Topic</h2>
|
||||
<p style={styles.sectionSubtitle}>Read confidence together with sample size. Topics with fewer than {lowSampleThreshold} events are usually noisy and less reliable.</p>
|
||||
<h2 style={styles.sectionTitle}>Topic Mood Overview</h2>
|
||||
<p style={styles.sectionSubtitle}>Use the strength score together with post count. Topics with fewer than {lowSampleThreshold} events are often noisy.</p>
|
||||
<div style={styles.emotionalSummaryRow}>
|
||||
<span><strong style={{ color: "#24292f" }}>Topics:</strong> {strongestPerTopic.length}</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Median Sample:</strong> {medianSampleSize} events</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Low Sample (<{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Stable Sample ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Median Posts:</strong> {medianSampleSize}</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Small Topics (<{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
|
||||
<span><strong style={{ color: "#24292f" }}>Stable Topics ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
|
||||
</div>
|
||||
<p style={{ ...styles.sectionSubtitle, marginTop: 10, marginBottom: 0 }}>
|
||||
Confidence reflects how strongly one emotion leads within a topic, not model accuracy. Use larger samples for stronger conclusions.
|
||||
Strength means how far the top emotion is ahead in that topic. It does not mean model accuracy.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.container, ...styles.grid }}>
|
||||
{strongestPerTopic.map((topic) => (
|
||||
<div key={topic.topic} style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
|
||||
<div style={styles.emotionalTopicLabel}>
|
||||
Top Emotion
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Mood Averages</h2>
|
||||
<p style={styles.sectionSubtitle}>Average score for each emotion.</p>
|
||||
{!overallEmotionAverage.length ? (
|
||||
<div style={styles.topUserMeta}>No overall emotion averages available.</div>
|
||||
) : (
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||
{[...overallEmotionAverage]
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.map((row) => (
|
||||
<div key={row.emotion} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
|
||||
<div style={styles.topUserMeta}>{row.score.toFixed(3)}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<div style={styles.emotionalTopicValue}>
|
||||
{formatEmotion(topic.emotion)}
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Mood Split</h2>
|
||||
<p style={styles.sectionSubtitle}>How often each emotion is dominant.</p>
|
||||
{!dominantEmotionDistribution.length ? (
|
||||
<div style={styles.topUserMeta}>No dominant-emotion split available.</div>
|
||||
) : (
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||
{[...dominantEmotionDistribution]
|
||||
.sort((a, b) => b.ratio - a.ratio)
|
||||
.map((row) => (
|
||||
<div key={row.emotion} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
|
||||
<div style={styles.topUserMeta}>{(row.ratio * 100).toFixed(1)}% • {row.count.toLocaleString()} events</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<div style={styles.emotionalMetricRow}>
|
||||
<span>Confidence</span>
|
||||
<span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
|
||||
</div>
|
||||
<div style={styles.emotionalMetricRowCompact}>
|
||||
<span>Sample Size</span>
|
||||
<span style={styles.emotionalMetricValue}>{topic.count} events</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Mood by Source</h2>
|
||||
<p style={styles.sectionSubtitle}>Leading emotion in each source.</p>
|
||||
{!emotionBySource.length ? (
|
||||
<div style={styles.topUserMeta}>No source emotion profile available.</div>
|
||||
) : (
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||
{[...emotionBySource]
|
||||
.sort((a, b) => b.event_count - a.event_count)
|
||||
.map((row) => (
|
||||
<div key={row.source} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{row.source}</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{formatEmotion(row.dominant_emotion)} • {row.dominant_score.toFixed(3)} • {row.event_count.toLocaleString()} events
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Topic Snapshots</h2>
|
||||
<p style={styles.sectionSubtitle}>Per-topic mood with strength and post count.</p>
|
||||
<div style={{ ...styles.grid, marginTop: 10 }}>
|
||||
{strongestPerTopic.map((topic) => (
|
||||
<div key={topic.topic} style={{ ...styles.cardBase, gridColumn: "span 4" }}>
|
||||
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
|
||||
<div style={styles.emotionalTopicLabel}>
|
||||
Likely Mood
|
||||
</div>
|
||||
<div style={styles.emotionalTopicValue}>
|
||||
{formatEmotion(topic.emotion)}
|
||||
</div>
|
||||
<div style={styles.emotionalMetricRow}>
|
||||
<span>Strength</span>
|
||||
<span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
|
||||
</div>
|
||||
<div style={styles.emotionalMetricRowCompact}>
|
||||
<span>Posts in Topic</span>
|
||||
<span style={styles.emotionalMetricValue}>{topic.count}</span>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
208
frontend/src/components/InteractionalStats.tsx
Normal file
208
frontend/src/components/InteractionalStats.tsx
Normal file
@@ -0,0 +1,208 @@
|
||||
import Card from "./Card";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { InteractionAnalysisResponse } from "../types/ApiTypes";
|
||||
import {
|
||||
ResponsiveContainer,
|
||||
BarChart,
|
||||
Bar,
|
||||
XAxis,
|
||||
YAxis,
|
||||
CartesianGrid,
|
||||
Tooltip,
|
||||
PieChart,
|
||||
Pie,
|
||||
Cell,
|
||||
Legend,
|
||||
} from "recharts";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type InteractionalStatsProps = {
|
||||
data: InteractionAnalysisResponse;
|
||||
};
|
||||
|
||||
const InteractionalStats = ({ data }: InteractionalStatsProps) => {
|
||||
const graph = data.interaction_graph ?? {};
|
||||
const userCount = Object.keys(graph).length;
|
||||
const edges = Object.values(graph).flatMap((targets) => Object.values(targets));
|
||||
const edgeCount = edges.length;
|
||||
const interactionVolume = edges.reduce((sum, value) => sum + value, 0);
|
||||
const concentration = data.conversation_concentration;
|
||||
const topTenCommentShare = typeof concentration?.top_10pct_comment_share === "number"
|
||||
? concentration?.top_10pct_comment_share
|
||||
: null;
|
||||
const topTenAuthorCount = typeof concentration?.top_10pct_author_count === "number"
|
||||
? concentration.top_10pct_author_count
|
||||
: null;
|
||||
const totalCommentingAuthors = typeof concentration?.total_commenting_authors === "number"
|
||||
? concentration.total_commenting_authors
|
||||
: null;
|
||||
const singleCommentAuthorRatio = typeof concentration?.single_comment_author_ratio === "number"
|
||||
? concentration.single_comment_author_ratio
|
||||
: null;
|
||||
const singleCommentAuthors = typeof concentration?.single_comment_authors === "number"
|
||||
? concentration.single_comment_authors
|
||||
: null;
|
||||
|
||||
const topPairs = (data.top_interaction_pairs ?? [])
|
||||
.filter((item): item is [[string, string], number] => {
|
||||
if (!Array.isArray(item) || item.length !== 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pair = item[0];
|
||||
const count = item[1];
|
||||
|
||||
return Array.isArray(pair)
|
||||
&& pair.length === 2
|
||||
&& typeof pair[0] === "string"
|
||||
&& typeof pair[1] === "string"
|
||||
&& typeof count === "number";
|
||||
})
|
||||
.slice(0, 20);
|
||||
|
||||
const topPairChartData = topPairs.slice(0, 8).map(([[source, target], value], index) => ({
|
||||
pair: `${source} -> ${target}`,
|
||||
replies: value,
|
||||
rank: index + 1,
|
||||
}));
|
||||
|
||||
const topTenSharePercent = topTenCommentShare === null
|
||||
? null
|
||||
: topTenCommentShare * 100;
|
||||
const nonTopTenSharePercent = topTenSharePercent === null
|
||||
? null
|
||||
: Math.max(0, 100 - topTenSharePercent);
|
||||
|
||||
let concentrationPieData: { name: string; value: number }[] = [];
|
||||
if (topTenSharePercent !== null && nonTopTenSharePercent !== null) {
|
||||
concentrationPieData = [
|
||||
{ name: "Top 10% authors", value: topTenSharePercent },
|
||||
{ name: "Other authors", value: nonTopTenSharePercent },
|
||||
];
|
||||
}
|
||||
|
||||
const PIE_COLORS = ["#2b6777", "#c8d8e4"];
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
<div style={{ ...styles.container, ...styles.grid }}>
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Conversation Overview</h2>
|
||||
<p style={styles.sectionSubtitle}>Who talks to who, and how concentrated the replies are.</p>
|
||||
</div>
|
||||
|
||||
<Card
|
||||
label="Average Reply Depth"
|
||||
value={typeof data.average_thread_depth === "number" ? data.average_thread_depth.toFixed(2) : "—"}
|
||||
sublabel="How deep reply chains usually go"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Users in Network"
|
||||
value={userCount.toLocaleString()}
|
||||
sublabel="Users in the reply graph"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="User-to-User Links"
|
||||
value={edgeCount.toLocaleString()}
|
||||
sublabel="Unique reply directions"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Total Replies"
|
||||
value={interactionVolume.toLocaleString()}
|
||||
sublabel="All reply links combined"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Concentrated Replies"
|
||||
value={topTenSharePercent === null ? "-" : `${topTenSharePercent.toFixed(1)}%`}
|
||||
sublabel={topTenAuthorCount === null || totalCommentingAuthors === null
|
||||
? "Reply share from the top 10% commenters"
|
||||
: `${topTenAuthorCount.toLocaleString()} of ${totalCommentingAuthors.toLocaleString()} authors`}
|
||||
style={{ gridColumn: "span 6" }}
|
||||
/>
|
||||
<Card
|
||||
label="Single-Comment Authors"
|
||||
value={singleCommentAuthorRatio === null ? "-" : `${(singleCommentAuthorRatio * 100).toFixed(1)}%`}
|
||||
sublabel={singleCommentAuthors === null
|
||||
? "Authors who commented exactly once"
|
||||
: `${singleCommentAuthors.toLocaleString()} authors commented exactly once`}
|
||||
style={{ gridColumn: "span 6" }}
|
||||
/>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Conversation Visuals</h2>
|
||||
<p style={styles.sectionSubtitle}>Main reply links and concentration split.</p>
|
||||
|
||||
<div style={{ ...styles.grid, marginTop: 12 }}>
|
||||
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
|
||||
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top Interaction Pairs</h3>
|
||||
<div style={{ width: "100%", height: 300 }}>
|
||||
<ResponsiveContainer>
|
||||
<BarChart data={topPairChartData} layout="vertical" margin={{ top: 8, right: 16, left: 16, bottom: 8 }}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke="#d9e2ec" />
|
||||
<XAxis type="number" allowDecimals={false} />
|
||||
<YAxis
|
||||
type="category"
|
||||
dataKey="rank"
|
||||
tickFormatter={(value) => `#${value}`}
|
||||
width={36}
|
||||
/>
|
||||
<Tooltip />
|
||||
<Bar dataKey="replies" fill="#2b6777" radius={[0, 6, 6, 0]} />
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
|
||||
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top 10% vs Other Comment Share</h3>
|
||||
<div style={{ width: "100%", height: 300 }}>
|
||||
<ResponsiveContainer>
|
||||
<PieChart>
|
||||
<Pie
|
||||
data={concentrationPieData}
|
||||
dataKey="value"
|
||||
nameKey="name"
|
||||
innerRadius={56}
|
||||
outerRadius={88}
|
||||
paddingAngle={2}
|
||||
>
|
||||
{concentrationPieData.map((entry, index) => (
|
||||
<Cell key={`${entry.name}-${index}`} fill={PIE_COLORS[index % PIE_COLORS.length]} />
|
||||
))}
|
||||
</Pie>
|
||||
<Tooltip />
|
||||
<Legend verticalAlign="bottom" height={36} />
|
||||
</PieChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Frequent Reply Paths</h2>
|
||||
<p style={styles.sectionSubtitle}>Most common user-to-user reply paths.</p>
|
||||
{!topPairs.length ? (
|
||||
<div style={styles.topUserMeta}>No interaction pair data available.</div>
|
||||
) : (
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
|
||||
{topPairs.map(([[source, target], value], index) => (
|
||||
<div key={`${source}->${target}-${index}`} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{source} -> {target}</div>
|
||||
<div style={styles.topUserMeta}>{value.toLocaleString()} replies</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default InteractionalStats;
|
||||
91
frontend/src/components/LinguisticStats.tsx
Normal file
91
frontend/src/components/LinguisticStats.tsx
Normal file
@@ -0,0 +1,91 @@
|
||||
import Card from "./Card";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
type LinguisticStatsProps = {
|
||||
data: LinguisticAnalysisResponse;
|
||||
};
|
||||
|
||||
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||
const lexical = data.lexical_diversity;
|
||||
const words = data.word_frequencies ?? [];
|
||||
const bigrams = data.common_two_phrases ?? [];
|
||||
const trigrams = data.common_three_phrases ?? [];
|
||||
|
||||
const topWords = words.slice(0, 20);
|
||||
const topBigrams = bigrams.slice(0, 10);
|
||||
const topTrigrams = trigrams.slice(0, 10);
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
<div style={{ ...styles.container, ...styles.grid }}>
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Language Overview</h2>
|
||||
<p style={styles.sectionSubtitle}>Quick read on how broad and repetitive the wording is.</p>
|
||||
</div>
|
||||
|
||||
<Card
|
||||
label="Total Words"
|
||||
value={lexical?.total_tokens?.toLocaleString() ?? "—"}
|
||||
sublabel="Words after basic filtering"
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
<Card
|
||||
label="Unique Words"
|
||||
value={lexical?.unique_tokens?.toLocaleString() ?? "—"}
|
||||
sublabel="Different words used"
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
<Card
|
||||
label="Vocabulary Variety"
|
||||
value={typeof lexical?.ttr === "number" ? lexical.ttr.toFixed(4) : "—"}
|
||||
sublabel="Higher means less repetition"
|
||||
style={{ gridColumn: "span 4" }}
|
||||
/>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Top Words</h2>
|
||||
<p style={styles.sectionSubtitle}>Most used single words.</p>
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||
{topWords.map((item) => (
|
||||
<div key={item.word} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{item.word}</div>
|
||||
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Top Bigrams</h2>
|
||||
<p style={styles.sectionSubtitle}>Most used 2-word phrases.</p>
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||
{topBigrams.map((item) => (
|
||||
<div key={item.ngram} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{item.ngram}</div>
|
||||
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Top Trigrams</h2>
|
||||
<p style={styles.sectionSubtitle}>Most used 3-word phrases.</p>
|
||||
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||
{topTrigrams.map((item) => (
|
||||
<div key={item.ngram} style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>{item.ngram}</div>
|
||||
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default LinguisticStats;
|
||||
@@ -58,15 +58,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
const [selectedUser, setSelectedUser] = useState<string | null>(null);
|
||||
const selectedUserData: User | null = userData?.users.find((u) => u.author === selectedUser) ?? null;
|
||||
|
||||
console.log(summary)
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
|
||||
{/* main grid*/}
|
||||
<div style={{ ...styles.container, ...styles.grid}}>
|
||||
<Card
|
||||
label="Total Events"
|
||||
label="Total Activity"
|
||||
value={summary?.total_events ?? "—"}
|
||||
sublabel="Posts + comments"
|
||||
style={{
|
||||
@@ -74,15 +72,15 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
}}
|
||||
/>
|
||||
<Card
|
||||
label="Unique Users"
|
||||
label="Active People"
|
||||
value={summary?.unique_users ?? "—"}
|
||||
sublabel="Distinct authors"
|
||||
sublabel="Distinct users"
|
||||
style={{
|
||||
gridColumn: "span 4"
|
||||
}}
|
||||
/>
|
||||
<Card
|
||||
label="Posts / Comments"
|
||||
label="Posts vs Comments"
|
||||
value={
|
||||
summary
|
||||
? `${summary.total_posts} / ${summary.total_comments}`
|
||||
@@ -108,13 +106,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Lurker Ratio"
|
||||
label="One-Time Users"
|
||||
value={
|
||||
typeof summary?.lurker_ratio === "number"
|
||||
? `${Math.round(summary.lurker_ratio * 100)}%`
|
||||
: "—"
|
||||
}
|
||||
sublabel="Users with only 1 event"
|
||||
sublabel="Users with only one event"
|
||||
style={{
|
||||
gridColumn: "span 4"
|
||||
}}
|
||||
@@ -136,12 +134,12 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
|
||||
{/* events per day */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
||||
<h2 style={styles.sectionTitle}>Events per Day</h2>
|
||||
<p style={styles.sectionSubtitle}>Trend of activity over time</p>
|
||||
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
|
||||
<p style={styles.sectionSubtitle}>How much posting happened each day.</p>
|
||||
|
||||
<div style={styles.chartWrapper}>
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<LineChart data={timeData?.events_per_day.filter((d) => new Date(d.date) >= new Date('2026-01-10'))}>
|
||||
<LineChart data={timeData?.events_per_day ?? []}>
|
||||
<CartesianGrid strokeDasharray="3 3" />
|
||||
<XAxis dataKey="date" />
|
||||
<YAxis />
|
||||
@@ -154,8 +152,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
|
||||
{/* Word Cloud */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||
<h2 style={styles.sectionTitle}>Word Cloud</h2>
|
||||
<p style={styles.sectionSubtitle}>Most common terms across events</p>
|
||||
<h2 style={styles.sectionTitle}>Common Words</h2>
|
||||
<p style={styles.sectionSubtitle}>Frequently used words across the dataset.</p>
|
||||
|
||||
<div style={styles.chartWrapper}>
|
||||
<ReactWordcloud
|
||||
@@ -174,8 +172,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
<div style={{...styles.card, ...styles.scrollArea, gridColumn: "span 3",
|
||||
}}
|
||||
>
|
||||
<h2 style={styles.sectionTitle}>Top Users</h2>
|
||||
<p style={styles.sectionSubtitle}>Most active authors</p>
|
||||
<h2 style={styles.sectionTitle}>Most Active Users</h2>
|
||||
<p style={styles.sectionSubtitle}>Who posted the most events.</p>
|
||||
|
||||
<div style={styles.topUsersList}>
|
||||
{userData?.top_users.slice(0, 100).map((item) => (
|
||||
@@ -195,8 +193,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
||||
|
||||
{/* Heatmap */}
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>Heatmap</h2>
|
||||
<p style={styles.sectionSubtitle}>Activity density across time</p>
|
||||
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
|
||||
<p style={styles.sectionSubtitle}>When activity tends to happen by weekday and hour.</p>
|
||||
|
||||
<div style={styles.heatmapWrapper}>
|
||||
<ActivityHeatmap data={timeData?.weekday_hour_heatmap ?? []} />
|
||||
|
||||
@@ -12,6 +12,9 @@ type Props = {
|
||||
};
|
||||
|
||||
export default function UserModal({ open, onClose, userData, username }: Props) {
|
||||
const dominantEmotionEntry = Object.entries(userData?.avg_emotions ?? {})
|
||||
.sort((a, b) => b[1] - a[1])[0];
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
|
||||
<div style={styles.modalBackdrop} />
|
||||
@@ -66,6 +69,15 @@ export default function UserModal({ open, onClose, userData, username }: Props)
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{dominantEmotionEntry ? (
|
||||
<div style={styles.topUserItem}>
|
||||
<div style={styles.topUserName}>Dominant Avg Emotion</div>
|
||||
<div style={styles.topUserMeta}>
|
||||
{dominantEmotionEntry[0].replace("emotion_", "")} ({dominantEmotionEntry[1].toFixed(3)})
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
)}
|
||||
</DialogPanel>
|
||||
|
||||
@@ -87,15 +87,15 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Interactions"
|
||||
label="Replies"
|
||||
value={totalInteractions.toLocaleString()}
|
||||
sublabel="Filtered links (2+ interactions)"
|
||||
sublabel="Links with at least 2 replies"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
label="Average Intensity"
|
||||
label="Replies per Connected User"
|
||||
value={avgInteractionsPerConnectedUser.toFixed(1)}
|
||||
sublabel="Interactions per connected user"
|
||||
sublabel="Average from visible graph links"
|
||||
style={{ gridColumn: "span 3" }}
|
||||
/>
|
||||
<Card
|
||||
@@ -106,13 +106,13 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
||||
/>
|
||||
|
||||
<Card
|
||||
label="Strongest Connection"
|
||||
label="Strongest User Link"
|
||||
value={strongestLink ? `${strongestLink.source} -> ${strongestLink.target}` : "—"}
|
||||
sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} interactions` : "No graph edges after filtering"}
|
||||
sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} replies` : "No graph links after filtering"}
|
||||
style={{ gridColumn: "span 6" }}
|
||||
/>
|
||||
<Card
|
||||
label="Most Reply-Driven User"
|
||||
label="Most Comment-Heavy User"
|
||||
value={highlyInteractiveUser?.author ?? "—"}
|
||||
sublabel={
|
||||
highlyInteractiveUser
|
||||
@@ -125,7 +125,7 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||
<h2 style={styles.sectionTitle}>User Interaction Graph</h2>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
Nodes represent users and links represent conversation interactions.
|
||||
Each node is a user, and each link shows replies between them.
|
||||
</p>
|
||||
<div ref={graphContainerRef} style={{ width: "100%", height: graphSize.height }}>
|
||||
<ForceGraph3D
|
||||
|
||||
@@ -191,6 +191,9 @@ const AutoScrapePage = () => {
|
||||
<p style={styles.sectionHeaderSubtitle}>
|
||||
Select sources and scrape settings, then queue processing automatically.
|
||||
</p>
|
||||
<p style={{ ...styles.subtleBodyText, marginTop: 6, color: "#9a6700" }}>
|
||||
Warning: Scraping more than 250 posts from any single site can take hours due to rate limits.
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
|
||||
@@ -5,26 +5,42 @@ import StatsStyling from "../styles/stats_styling";
|
||||
import SummaryStats from "../components/SummaryStats";
|
||||
import EmotionalStats from "../components/EmotionalStats";
|
||||
import UserStats from "../components/UserStats";
|
||||
import LinguisticStats from "../components/LinguisticStats";
|
||||
import InteractionalStats from "../components/InteractionalStats";
|
||||
import CulturalStats from "../components/CulturalStats";
|
||||
|
||||
import {
|
||||
type SummaryResponse,
|
||||
type UserAnalysisResponse,
|
||||
type TimeAnalysisResponse,
|
||||
type ContentAnalysisResponse
|
||||
type ContentAnalysisResponse,
|
||||
type UserEndpointResponse,
|
||||
type LinguisticAnalysisResponse,
|
||||
type EmotionalAnalysisResponse,
|
||||
type InteractionAnalysisResponse,
|
||||
type CulturalAnalysisResponse
|
||||
} from '../types/ApiTypes'
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
|
||||
const styles = StatsStyling;
|
||||
const DELETED_USERS = ["[deleted]"];
|
||||
|
||||
const isDeletedUser = (value: string | null | undefined) => (
|
||||
DELETED_USERS.includes((value ?? "").trim().toLowerCase())
|
||||
);
|
||||
|
||||
const StatPage = () => {
|
||||
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
||||
const [error, setError] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [activeView, setActiveView] = useState<"summary" | "emotional" | "user">("summary");
|
||||
const [activeView, setActiveView] = useState<"summary" | "emotional" | "user" | "linguistic" | "interactional" | "cultural">("summary");
|
||||
|
||||
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
|
||||
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
|
||||
const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
|
||||
const [linguisticData, setLinguisticData] = useState<LinguisticAnalysisResponse | null>(null);
|
||||
const [interactionData, setInteractionData] = useState<InteractionAnalysisResponse | null>(null);
|
||||
const [culturalData, setCulturalData] = useState<CulturalAnalysisResponse | null>(null);
|
||||
const [summary, setSummary] = useState<SummaryResponse | null>(null);
|
||||
|
||||
|
||||
@@ -83,15 +99,23 @@ const StatPage = () => {
|
||||
setLoading(true);
|
||||
|
||||
Promise.all([
|
||||
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/time`, {
|
||||
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<UserAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
||||
axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<ContentAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/content`, {
|
||||
axios.get<LinguisticAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/linguistic`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<InteractionAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/interactional`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
@@ -99,12 +123,87 @@ const StatPage = () => {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
|
||||
params,
|
||||
headers: authHeaders,
|
||||
}),
|
||||
])
|
||||
.then(([timeRes, userRes, contentRes, summaryRes]) => {
|
||||
setUserData(userRes.data || null);
|
||||
.then(([timeRes, userRes, linguisticRes, emotionalRes, interactionRes, summaryRes, culturalRes]) => {
|
||||
const usersList = userRes.data.users ?? [];
|
||||
const topUsersList = userRes.data.top_users ?? [];
|
||||
const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
|
||||
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
|
||||
|
||||
const filteredUsers: typeof usersList = [];
|
||||
for (const user of usersList) {
|
||||
if (isDeletedUser(user.author)) continue;
|
||||
filteredUsers.push(user);
|
||||
}
|
||||
|
||||
const filteredTopUsers: typeof topUsersList = [];
|
||||
for (const user of topUsersList) {
|
||||
if (isDeletedUser(user.author)) continue;
|
||||
filteredTopUsers.push(user);
|
||||
}
|
||||
|
||||
const filteredInteractionGraph: Record<string, Record<string, number>> = {};
|
||||
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
|
||||
if (isDeletedUser(source)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const nextTargets: Record<string, number> = {};
|
||||
for (const [target, count] of Object.entries(targets)) {
|
||||
if (isDeletedUser(target)) {
|
||||
continue;
|
||||
}
|
||||
nextTargets[target] = count;
|
||||
}
|
||||
|
||||
filteredInteractionGraph[source] = nextTargets;
|
||||
}
|
||||
|
||||
const filteredTopInteractionPairs: typeof topPairsRaw = [];
|
||||
for (const pairEntry of topPairsRaw) {
|
||||
const pair = pairEntry[0];
|
||||
const source = pair[0];
|
||||
const target = pair[1];
|
||||
if (isDeletedUser(source) || isDeletedUser(target)) {
|
||||
continue;
|
||||
}
|
||||
filteredTopInteractionPairs.push(pairEntry);
|
||||
}
|
||||
|
||||
const combinedUserData: UserAnalysisResponse = {
|
||||
...userRes.data,
|
||||
users: filteredUsers,
|
||||
top_users: filteredTopUsers,
|
||||
interaction_graph: filteredInteractionGraph,
|
||||
};
|
||||
|
||||
const combinedContentData: ContentAnalysisResponse = {
|
||||
...linguisticRes.data,
|
||||
...emotionalRes.data,
|
||||
};
|
||||
|
||||
const filteredInteractionData: InteractionAnalysisResponse = {
|
||||
...interactionRes.data,
|
||||
interaction_graph: filteredInteractionGraph,
|
||||
top_interaction_pairs: filteredTopInteractionPairs,
|
||||
};
|
||||
|
||||
const filteredSummary: SummaryResponse = {
|
||||
...summaryRes.data,
|
||||
unique_users: filteredUsers.length,
|
||||
};
|
||||
|
||||
setUserData(combinedUserData);
|
||||
setTimeData(timeRes.data || null);
|
||||
setContentData(contentRes.data || null);
|
||||
setSummary(summaryRes.data || null);
|
||||
setContentData(combinedContentData);
|
||||
setLinguisticData(linguisticRes.data || null);
|
||||
setInteractionData(filteredInteractionData || null);
|
||||
setCulturalData(culturalRes.data || null);
|
||||
setSummary(filteredSummary || null);
|
||||
})
|
||||
.catch((e) => setError("Failed to load statistics: " + String(e)))
|
||||
.finally(() => setLoading(false));
|
||||
@@ -198,7 +297,7 @@ return (
|
||||
<div style={styles.dashboardMeta}>Dataset #{datasetId ?? "-"}</div>
|
||||
</div>
|
||||
|
||||
<div style={{ ...styles.container, ...styles.tabsRow }}>
|
||||
<div style={{ ...styles.container, ...styles.tabsRow, justifyContent: "center" }}>
|
||||
<button
|
||||
onClick={() => setActiveView("summary")}
|
||||
style={activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
@@ -218,6 +317,24 @@ return (
|
||||
>
|
||||
Users
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveView("linguistic")}
|
||||
style={activeView === "linguistic" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
>
|
||||
Linguistic
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveView("interactional")}
|
||||
style={activeView === "interactional" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
>
|
||||
Interactional
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveView("cultural")}
|
||||
style={activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
>
|
||||
Cultural
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{activeView === "summary" && (
|
||||
@@ -243,6 +360,36 @@ return (
|
||||
<UserStats data={userData} />
|
||||
)}
|
||||
|
||||
{activeView === "linguistic" && linguisticData && (
|
||||
<LinguisticStats data={linguisticData} />
|
||||
)}
|
||||
|
||||
{activeView === "linguistic" && !linguisticData && (
|
||||
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||
No linguistic data available.
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeView === "interactional" && interactionData && (
|
||||
<InteractionalStats data={interactionData} />
|
||||
)}
|
||||
|
||||
{activeView === "interactional" && !interactionData && (
|
||||
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||
No interactional data available.
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeView === "cultural" && culturalData && (
|
||||
<CulturalStats data={culturalData} />
|
||||
)}
|
||||
|
||||
{activeView === "cultural" && !culturalData && (
|
||||
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||
No cultural data available.
|
||||
</div>
|
||||
)}
|
||||
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,14 +1,28 @@
|
||||
// User Responses
|
||||
type TopUser = {
|
||||
author: string;
|
||||
source: string;
|
||||
count: number
|
||||
// Shared types
|
||||
type FrequencyWord = {
|
||||
word: string;
|
||||
count: number;
|
||||
};
|
||||
|
||||
type FrequencyWord = {
|
||||
word: string;
|
||||
count: number;
|
||||
}
|
||||
type NGram = {
|
||||
count: number;
|
||||
ngram: string;
|
||||
};
|
||||
|
||||
type Emotion = {
|
||||
emotion_anger: number;
|
||||
emotion_disgust: number;
|
||||
emotion_fear: number;
|
||||
emotion_joy: number;
|
||||
emotion_sadness: number;
|
||||
};
|
||||
|
||||
// User
|
||||
type TopUser = {
|
||||
author: string;
|
||||
source: string;
|
||||
count: number;
|
||||
};
|
||||
|
||||
type Vocab = {
|
||||
author: string;
|
||||
@@ -26,60 +40,145 @@ type User = {
|
||||
comment: number;
|
||||
comment_post_ratio: number;
|
||||
comment_share: number;
|
||||
avg_emotions?: Record<string, number>;
|
||||
vocab?: Vocab | null;
|
||||
};
|
||||
|
||||
type InteractionGraph = Record<string, Record<string, number>>;
|
||||
|
||||
type UserEndpointResponse = {
|
||||
top_users: TopUser[];
|
||||
users: User[];
|
||||
};
|
||||
|
||||
type UserAnalysisResponse = {
|
||||
top_users: TopUser[];
|
||||
users: User[];
|
||||
interaction_graph: InteractionGraph;
|
||||
};
|
||||
|
||||
// Time Analysis
|
||||
// Time
|
||||
type EventsPerDay = {
|
||||
date: Date;
|
||||
count: number;
|
||||
}
|
||||
|
||||
type HeatmapCell = {
|
||||
date: Date;
|
||||
hour: number;
|
||||
count: number;
|
||||
}
|
||||
|
||||
type TimeAnalysisResponse = {
|
||||
events_per_day: EventsPerDay[];
|
||||
weekday_hour_heatmap: HeatmapCell[];
|
||||
}
|
||||
|
||||
// Content Analysis
|
||||
type Emotion = {
|
||||
emotion_anger: number;
|
||||
emotion_disgust: number;
|
||||
emotion_fear: number;
|
||||
emotion_joy: number;
|
||||
emotion_sadness: number;
|
||||
date: Date;
|
||||
count: number;
|
||||
};
|
||||
|
||||
type NGram = {
|
||||
count: number;
|
||||
ngram: string;
|
||||
}
|
||||
type HeatmapCell = {
|
||||
date: Date;
|
||||
hour: number;
|
||||
count: number;
|
||||
};
|
||||
|
||||
type TimeAnalysisResponse = {
|
||||
events_per_day: EventsPerDay[];
|
||||
weekday_hour_heatmap: HeatmapCell[];
|
||||
};
|
||||
|
||||
// Content (combines emotional and linguistic)
|
||||
type AverageEmotionByTopic = Emotion & {
|
||||
n: number;
|
||||
topic: string;
|
||||
[key: string]: string | number;
|
||||
};
|
||||
|
||||
type OverallEmotionAverage = {
|
||||
emotion: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
type DominantEmotionDistribution = {
|
||||
emotion: string;
|
||||
count: number;
|
||||
ratio: number;
|
||||
};
|
||||
|
||||
type EmotionBySource = {
|
||||
source: string;
|
||||
dominant_emotion: string;
|
||||
dominant_score: number;
|
||||
event_count: number;
|
||||
};
|
||||
|
||||
type ContentAnalysisResponse = {
|
||||
word_frequencies: FrequencyWord[];
|
||||
average_emotion_by_topic: AverageEmotionByTopic[];
|
||||
common_three_phrases: NGram[];
|
||||
common_two_phrases: NGram[];
|
||||
}
|
||||
word_frequencies: FrequencyWord[];
|
||||
average_emotion_by_topic: AverageEmotionByTopic[];
|
||||
common_three_phrases: NGram[];
|
||||
common_two_phrases: NGram[];
|
||||
overall_emotion_average?: OverallEmotionAverage[];
|
||||
dominant_emotion_distribution?: DominantEmotionDistribution[];
|
||||
emotion_by_source?: EmotionBySource[];
|
||||
};
|
||||
|
||||
// Linguistic
|
||||
type LinguisticAnalysisResponse = {
|
||||
word_frequencies: FrequencyWord[];
|
||||
common_two_phrases: NGram[];
|
||||
common_three_phrases: NGram[];
|
||||
lexical_diversity?: Record<string, number>;
|
||||
};
|
||||
|
||||
// Emotional
|
||||
type EmotionalAnalysisResponse = {
|
||||
average_emotion_by_topic: AverageEmotionByTopic[];
|
||||
overall_emotion_average?: OverallEmotionAverage[];
|
||||
dominant_emotion_distribution?: DominantEmotionDistribution[];
|
||||
emotion_by_source?: EmotionBySource[];
|
||||
};
|
||||
|
||||
// Interactional
|
||||
type ConversationConcentration = {
|
||||
total_commenting_authors: number;
|
||||
top_10pct_author_count: number;
|
||||
top_10pct_comment_share: number;
|
||||
single_comment_authors: number;
|
||||
single_comment_author_ratio: number;
|
||||
};
|
||||
|
||||
type InteractionAnalysisResponse = {
|
||||
average_thread_depth?: number;
|
||||
top_interaction_pairs?: [[string, string], number][];
|
||||
conversation_concentration?: ConversationConcentration;
|
||||
interaction_graph: InteractionGraph;
|
||||
};
|
||||
|
||||
// Cultural
|
||||
type IdentityMarkers = {
|
||||
in_group_usage: number;
|
||||
out_group_usage: number;
|
||||
in_group_ratio: number;
|
||||
out_group_ratio: number;
|
||||
in_group_posts: number;
|
||||
out_group_posts: number;
|
||||
tie_posts: number;
|
||||
in_group_emotion_avg?: Record<string, number>;
|
||||
out_group_emotion_avg?: Record<string, number>;
|
||||
};
|
||||
|
||||
type StanceMarkers = {
|
||||
hedge_total: number;
|
||||
certainty_total: number;
|
||||
deontic_total: number;
|
||||
permission_total: number;
|
||||
hedge_per_1k_tokens: number;
|
||||
certainty_per_1k_tokens: number;
|
||||
deontic_per_1k_tokens: number;
|
||||
permission_per_1k_tokens: number;
|
||||
};
|
||||
|
||||
type EntityEmotionAggregate = {
|
||||
post_count: number;
|
||||
emotion_avg: Record<string, number>;
|
||||
};
|
||||
|
||||
type AverageEmotionPerEntity = {
|
||||
entity_emotion_avg: Record<string, EntityEmotionAggregate>;
|
||||
};
|
||||
|
||||
type CulturalAnalysisResponse = {
|
||||
identity_markers?: IdentityMarkers;
|
||||
stance_markers?: StanceMarkers;
|
||||
avg_emotion_per_entity?: AverageEmotionPerEntity;
|
||||
};
|
||||
|
||||
// Summary
|
||||
type SummaryResponse = {
|
||||
@@ -96,22 +195,35 @@ type SummaryResponse = {
|
||||
sources: string[];
|
||||
};
|
||||
|
||||
// Filtering Response
|
||||
// Filter
|
||||
type FilterResponse = {
|
||||
rows: number
|
||||
data: any;
|
||||
}
|
||||
rows: number;
|
||||
data: any;
|
||||
};
|
||||
|
||||
export type {
|
||||
TopUser,
|
||||
Vocab,
|
||||
User,
|
||||
InteractionGraph,
|
||||
UserAnalysisResponse,
|
||||
FrequencyWord,
|
||||
AverageEmotionByTopic,
|
||||
SummaryResponse,
|
||||
TimeAnalysisResponse,
|
||||
ContentAnalysisResponse,
|
||||
FilterResponse
|
||||
}
|
||||
TopUser,
|
||||
Vocab,
|
||||
User,
|
||||
InteractionGraph,
|
||||
ConversationConcentration,
|
||||
UserAnalysisResponse,
|
||||
UserEndpointResponse,
|
||||
FrequencyWord,
|
||||
AverageEmotionByTopic,
|
||||
OverallEmotionAverage,
|
||||
DominantEmotionDistribution,
|
||||
EmotionBySource,
|
||||
SummaryResponse,
|
||||
TimeAnalysisResponse,
|
||||
ContentAnalysisResponse,
|
||||
LinguisticAnalysisResponse,
|
||||
EmotionalAnalysisResponse,
|
||||
InteractionAnalysisResponse,
|
||||
IdentityMarkers,
|
||||
StanceMarkers,
|
||||
EntityEmotionAggregate,
|
||||
AverageEmotionPerEntity,
|
||||
CulturalAnalysisResponse,
|
||||
FilterResponse,
|
||||
};
|
||||
|
||||
@@ -1,33 +1,86 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class EmotionalAnalysis:
|
||||
def avg_emotion_by_topic(self, df: pd.DataFrame) -> dict:
|
||||
emotion_cols = [
|
||||
col for col in df.columns
|
||||
if col.startswith("emotion_")
|
||||
]
|
||||
def _emotion_cols(self, df: pd.DataFrame) -> list[str]:
|
||||
return [col for col in df.columns if col.startswith("emotion_")]
|
||||
|
||||
def avg_emotion_by_topic(self, df: pd.DataFrame) -> list[dict]:
|
||||
emotion_cols = self._emotion_cols(df)
|
||||
|
||||
if not emotion_cols:
|
||||
return []
|
||||
|
||||
counts = (
|
||||
df[
|
||||
(df["topic"] != "Misc")
|
||||
]
|
||||
.groupby("topic")
|
||||
.size()
|
||||
.rename("n")
|
||||
df[(df["topic"] != "Misc")].groupby("topic").size().reset_index(name="n")
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = (
|
||||
df[
|
||||
(df["topic"] != "Misc")
|
||||
]
|
||||
df[(df["topic"] != "Misc")]
|
||||
.groupby("topic")[emotion_cols]
|
||||
.mean()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(
|
||||
counts,
|
||||
on="topic"
|
||||
)
|
||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(counts, on="topic")
|
||||
|
||||
return avg_emotion_by_topic.to_dict(orient='records')
|
||||
return avg_emotion_by_topic.to_dict(orient="records")
|
||||
|
||||
def overall_emotion_average(self, df: pd.DataFrame) -> list[dict]:
|
||||
emotion_cols = self._emotion_cols(df)
|
||||
|
||||
if not emotion_cols:
|
||||
return []
|
||||
|
||||
means = df[emotion_cols].mean()
|
||||
return [
|
||||
{
|
||||
"emotion": col.replace("emotion_", ""),
|
||||
"score": float(means[col]),
|
||||
}
|
||||
for col in emotion_cols
|
||||
]
|
||||
|
||||
def dominant_emotion_distribution(self, df: pd.DataFrame) -> list[dict]:
|
||||
emotion_cols = self._emotion_cols(df)
|
||||
|
||||
if not emotion_cols or df.empty:
|
||||
return []
|
||||
|
||||
dominant_per_row = df[emotion_cols].idxmax(axis=1)
|
||||
counts = dominant_per_row.value_counts()
|
||||
total = max(len(dominant_per_row), 1)
|
||||
|
||||
return [
|
||||
{
|
||||
"emotion": col.replace("emotion_", ""),
|
||||
"count": int(count),
|
||||
"ratio": round(float(count / total), 4),
|
||||
}
|
||||
for col, count in counts.items()
|
||||
]
|
||||
|
||||
def emotion_by_source(self, df: pd.DataFrame) -> list[dict]:
|
||||
emotion_cols = self._emotion_cols(df)
|
||||
|
||||
if not emotion_cols or "source" not in df.columns or df.empty:
|
||||
return []
|
||||
|
||||
source_counts = df.groupby("source").size()
|
||||
source_means = df.groupby("source")[emotion_cols].mean().reset_index()
|
||||
rows = source_means.to_dict(orient="records")
|
||||
output = []
|
||||
|
||||
for row in rows:
|
||||
source = row["source"]
|
||||
dominant_col = max(emotion_cols, key=lambda col: float(row.get(col, 0)))
|
||||
output.append(
|
||||
{
|
||||
"source": str(source),
|
||||
"dominant_emotion": dominant_col.replace("emotion_", ""),
|
||||
"dominant_score": round(float(row.get(dominant_col, 0)), 4),
|
||||
"event_count": int(source_counts.get(source, 0)),
|
||||
}
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
|
||||
|
||||
class InteractionAnalysis:
|
||||
def __init__(self, word_exclusions: set[str]):
|
||||
self.word_exclusions = word_exclusions
|
||||
@@ -12,118 +9,6 @@ class InteractionAnalysis:
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
return [t for t in tokens if t not in self.word_exclusions]
|
||||
|
||||
def _vocab_richness_per_user(
|
||||
self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
|
||||
) -> list:
|
||||
df = df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
df["tokens"] = df["content"].apply(self._tokenize)
|
||||
|
||||
rows = []
|
||||
for author, group in df.groupby("author"):
|
||||
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
||||
|
||||
total_words = len(all_tokens)
|
||||
unique_words = len(set(all_tokens))
|
||||
events = len(group)
|
||||
|
||||
# Min amount of words for a user, any less than this might give weird results
|
||||
if total_words < min_words:
|
||||
continue
|
||||
|
||||
# 100% = they never reused a word (excluding stop words)
|
||||
vocab_richness = unique_words / total_words
|
||||
avg_words = total_words / max(events, 1)
|
||||
|
||||
counts = Counter(all_tokens)
|
||||
top_words = [
|
||||
{"word": w, "count": int(c)}
|
||||
for w, c in counts.most_common(top_most_used_words)
|
||||
]
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"author": author,
|
||||
"events": int(events),
|
||||
"total_words": int(total_words),
|
||||
"unique_words": int(unique_words),
|
||||
"vocab_richness": round(vocab_richness, 3),
|
||||
"avg_words_per_event": round(avg_words, 2),
|
||||
"top_words": top_words,
|
||||
}
|
||||
)
|
||||
|
||||
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
||||
|
||||
return rows
|
||||
|
||||
def top_users(self, df: pd.DataFrame) -> list:
|
||||
counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
|
||||
|
||||
top_users = [
|
||||
{"author": author, "source": source, "count": int(count)}
|
||||
for (author, source), count in counts.items()
|
||||
]
|
||||
|
||||
return top_users
|
||||
|
||||
def per_user_analysis(self, df: pd.DataFrame) -> dict:
|
||||
per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
|
||||
|
||||
emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
|
||||
|
||||
avg_emotions_by_author = {}
|
||||
if emotion_cols:
|
||||
avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
|
||||
avg_emotions_by_author = {
|
||||
author: {emotion: float(score) for emotion, score in row.items()}
|
||||
for author, row in avg_emotions.iterrows()
|
||||
}
|
||||
|
||||
# ensure columns always exist
|
||||
for col in ("post", "comment"):
|
||||
if col not in per_user.columns:
|
||||
per_user[col] = 0
|
||||
|
||||
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
|
||||
0, 1
|
||||
)
|
||||
per_user["comment_share"] = per_user["comment"] / (
|
||||
per_user["post"] + per_user["comment"]
|
||||
).replace(0, 1)
|
||||
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
||||
per_user_records = per_user.reset_index().to_dict(orient="records")
|
||||
|
||||
vocab_rows = self._vocab_richness_per_user(df)
|
||||
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
||||
|
||||
# merge vocab richness + per_user information
|
||||
merged_users = []
|
||||
for row in per_user_records:
|
||||
author = row["author"]
|
||||
merged_users.append(
|
||||
{
|
||||
"author": author,
|
||||
"post": int(row.get("post", 0)),
|
||||
"comment": int(row.get("comment", 0)),
|
||||
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
||||
"comment_share": float(row.get("comment_share", 0)),
|
||||
"avg_emotions": avg_emotions_by_author.get(author, {}),
|
||||
"vocab": vocab_by_author.get(
|
||||
author,
|
||||
{
|
||||
"vocab_richness": 0,
|
||||
"avg_words_per_event": 0,
|
||||
"top_words": [],
|
||||
},
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
||||
|
||||
return merged_users
|
||||
|
||||
def interaction_graph(self, df: pd.DataFrame):
|
||||
interactions = {a: {} for a in df["author"].dropna().unique()}
|
||||
|
||||
@@ -167,67 +52,36 @@ class InteractionAnalysis:
|
||||
|
||||
return round(sum(depths) / len(depths), 2)
|
||||
|
||||
def average_thread_length_by_emotion(self, df: pd.DataFrame):
|
||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||
def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
|
||||
graph = self.interaction_graph(df)
|
||||
pairs = []
|
||||
|
||||
emotion_cols = [
|
||||
c
|
||||
for c in df.columns
|
||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||
]
|
||||
for a, targets in graph.items():
|
||||
for b, count in targets.items():
|
||||
pairs.append(((a, b), count))
|
||||
|
||||
id_to_reply = df.set_index("id")["reply_to"].to_dict()
|
||||
length_cache = {}
|
||||
pairs.sort(key=lambda x: x[1], reverse=True)
|
||||
return pairs[:top_n]
|
||||
|
||||
def thread_length_from(start_id):
|
||||
if start_id in length_cache:
|
||||
return length_cache[start_id]
|
||||
def conversation_concentration(self, df: pd.DataFrame) -> dict:
|
||||
if "type" not in df.columns:
|
||||
return {}
|
||||
|
||||
seen = set()
|
||||
length = 1
|
||||
current = start_id
|
||||
comments = df[df["type"] == "comment"]
|
||||
if comments.empty:
|
||||
return {}
|
||||
|
||||
while True:
|
||||
if current in seen:
|
||||
# infinite loop shouldn't happen, but just in case
|
||||
break
|
||||
seen.add(current)
|
||||
author_counts = comments["author"].value_counts()
|
||||
total_comments = len(comments)
|
||||
total_authors = len(author_counts)
|
||||
|
||||
reply_to = id_to_reply.get(current)
|
||||
|
||||
if (
|
||||
reply_to is None
|
||||
or (isinstance(reply_to, float) and pd.isna(reply_to))
|
||||
or reply_to == ""
|
||||
):
|
||||
break
|
||||
|
||||
length += 1
|
||||
current = reply_to
|
||||
|
||||
if current in length_cache:
|
||||
length += length_cache[current] - 1
|
||||
break
|
||||
|
||||
length_cache[start_id] = length
|
||||
return length
|
||||
|
||||
emotion_to_lengths = {}
|
||||
|
||||
# Fill NaNs in emotion cols to avoid max() issues
|
||||
emo_df = df[["id"] + emotion_cols].copy()
|
||||
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
||||
|
||||
for _, row in emo_df.iterrows():
|
||||
msg_id = row["id"]
|
||||
length = thread_length_from(msg_id)
|
||||
|
||||
emotions = {c: row[c] for c in emotion_cols}
|
||||
dominant = max(emotions, key=emotions.get)
|
||||
|
||||
emotion_to_lengths.setdefault(dominant, []).append(length)
|
||||
top_10_pct_n = max(1, int(total_authors * 0.1))
|
||||
top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
|
||||
|
||||
return {
|
||||
emotion: round(sum(lengths) / len(lengths), 2)
|
||||
for emotion, lengths in emotion_to_lengths.items()
|
||||
"total_commenting_authors": total_authors,
|
||||
"top_10pct_author_count": top_10_pct_n,
|
||||
"top_10pct_comment_share": float(top_10_pct_share),
|
||||
"single_comment_authors": int((author_counts == 1).sum()),
|
||||
"single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
|
||||
}
|
||||
@@ -61,3 +61,19 @@ class LinguisticAnalysis:
|
||||
.head(limit)
|
||||
.to_dict(orient="records")
|
||||
)
|
||||
|
||||
def lexical_diversity(self, df: pd.DataFrame) -> dict:
|
||||
tokens = (
|
||||
df["content"].fillna("").astype(str).str.lower()
|
||||
.str.findall(r"\b[a-z]{2,}\b")
|
||||
.explode()
|
||||
)
|
||||
tokens = tokens[~tokens.isin(self.word_exclusions)]
|
||||
total = max(len(tokens), 1)
|
||||
unique = int(tokens.nunique())
|
||||
|
||||
return {
|
||||
"total_tokens": total,
|
||||
"unique_tokens": unique,
|
||||
"ttr": round(unique / total, 4),
|
||||
}
|
||||
|
||||
@@ -6,7 +6,9 @@ from server.analysis.cultural import CulturalAnalysis
|
||||
from server.analysis.emotional import EmotionalAnalysis
|
||||
from server.analysis.interactional import InteractionAnalysis
|
||||
from server.analysis.linguistic import LinguisticAnalysis
|
||||
from server.analysis.summary import SummaryAnalysis
|
||||
from server.analysis.temporal import TemporalAnalysis
|
||||
from server.analysis.user import UserAnalysis
|
||||
|
||||
DOMAIN_STOPWORDS = {
|
||||
"www",
|
||||
@@ -36,12 +38,11 @@ class StatGen:
|
||||
self.interaction_analysis = InteractionAnalysis(EXCLUDE_WORDS)
|
||||
self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
|
||||
self.cultural_analysis = CulturalAnalysis()
|
||||
self.summary_analysis = SummaryAnalysis()
|
||||
self.user_analysis = UserAnalysis(EXCLUDE_WORDS)
|
||||
|
||||
## Private Methods
|
||||
def _prepare_filtered_df(self,
|
||||
df: pd.DataFrame,
|
||||
filters: dict | None = None
|
||||
) -> pd.DataFrame:
|
||||
def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
|
||||
filters = filters or {}
|
||||
filtered_df = df.copy()
|
||||
|
||||
@@ -51,10 +52,9 @@ class StatGen:
|
||||
data_source_filter = filters.get("data_sources", None)
|
||||
|
||||
if search_query:
|
||||
mask = (
|
||||
filtered_df["content"].str.contains(search_query, case=False, na=False)
|
||||
| filtered_df["author"].str.contains(search_query, case=False, na=False)
|
||||
)
|
||||
mask = filtered_df["content"].str.contains(
|
||||
search_query, case=False, na=False
|
||||
) | filtered_df["author"].str.contains(search_query, case=False, na=False)
|
||||
|
||||
# Only include title if the column exists
|
||||
if "title" in filtered_df.columns:
|
||||
@@ -76,10 +76,10 @@ class StatGen:
|
||||
return filtered_df
|
||||
|
||||
## Public Methods
|
||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
||||
return self._prepare_filtered_df(df, filters).to_dict(orient="records")
|
||||
|
||||
def get_time_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
@@ -87,84 +87,54 @@ class StatGen:
|
||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap(filtered_df),
|
||||
}
|
||||
|
||||
def get_content_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def linguistic(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
"word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
|
||||
"common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
|
||||
"common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
|
||||
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(
|
||||
filtered_df
|
||||
)
|
||||
"lexical_diversity": self.linguistic_analysis.lexical_diversity(filtered_df)
|
||||
}
|
||||
|
||||
def get_user_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
"top_users": self.interaction_analysis.top_users(filtered_df),
|
||||
"users": self.interaction_analysis.per_user_analysis(filtered_df),
|
||||
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
|
||||
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(filtered_df),
|
||||
"overall_emotion_average": self.emotional_analysis.overall_emotion_average(filtered_df),
|
||||
"dominant_emotion_distribution": self.emotional_analysis.dominant_emotion_distribution(filtered_df),
|
||||
"emotion_by_source": self.emotional_analysis.emotion_by_source(filtered_df)
|
||||
}
|
||||
|
||||
def get_interactional_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def user(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(
|
||||
filtered_df
|
||||
),
|
||||
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(
|
||||
filtered_df
|
||||
),
|
||||
"top_users": self.user_analysis.top_users(filtered_df),
|
||||
"users": self.user_analysis.per_user_analysis(filtered_df)
|
||||
}
|
||||
|
||||
def get_cultural_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
"identity_markers": self.cultural_analysis.get_identity_markers(
|
||||
filtered_df
|
||||
),
|
||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
|
||||
"top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
|
||||
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df),
|
||||
"conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df)
|
||||
}
|
||||
|
||||
def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
return {
|
||||
"identity_markers": self.cultural_analysis.get_identity_markers(filtered_df),
|
||||
"stance_markers": self.cultural_analysis.get_stance_markers(filtered_df),
|
||||
"entity_salience": self.cultural_analysis.get_avg_emotions_per_entity(
|
||||
filtered_df
|
||||
),
|
||||
"avg_emotion_per_entity": self.cultural_analysis.get_avg_emotions_per_entity(filtered_df)
|
||||
}
|
||||
|
||||
def summary(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||
filtered_df = self._prepare_filtered_df(df, filters)
|
||||
|
||||
total_posts = (filtered_df["type"] == "post").sum()
|
||||
total_comments = (filtered_df["type"] == "comment").sum()
|
||||
events_per_user = filtered_df.groupby("author").size()
|
||||
|
||||
if filtered_df.empty:
|
||||
return {
|
||||
"total_events": 0,
|
||||
"total_posts": 0,
|
||||
"total_comments": 0,
|
||||
"unique_users": 0,
|
||||
"comments_per_post": 0,
|
||||
"lurker_ratio": 0,
|
||||
"time_range": {
|
||||
"start": None,
|
||||
"end": None,
|
||||
},
|
||||
"sources": [],
|
||||
}
|
||||
|
||||
return {
|
||||
"total_events": int(len(filtered_df)),
|
||||
"total_posts": int(total_posts),
|
||||
"total_comments": int(total_comments),
|
||||
"unique_users": int(events_per_user.count()),
|
||||
"comments_per_post": round(total_comments / max(total_posts, 1), 2),
|
||||
"lurker_ratio": round((events_per_user == 1).mean(), 2),
|
||||
"time_range": {
|
||||
"start": int(filtered_df["dt"].min().timestamp()),
|
||||
"end": int(filtered_df["dt"].max().timestamp()),
|
||||
},
|
||||
"sources": filtered_df["source"].dropna().unique().tolist(),
|
||||
}
|
||||
return self.summary_analysis.summary(filtered_df)
|
||||
|
||||
64
server/analysis/summary.py
Normal file
64
server/analysis/summary.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SummaryAnalysis:
|
||||
def total_events(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df))
|
||||
|
||||
def total_posts(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df[df["type"] == "post"]))
|
||||
|
||||
def total_comments(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df[df["type"] == "comment"]))
|
||||
|
||||
def unique_users(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df["author"].dropna().unique()))
|
||||
|
||||
def comments_per_post(self, total_comments: int, total_posts: int) -> float:
|
||||
return round(total_comments / max(total_posts, 1), 2)
|
||||
|
||||
def lurker_ratio(self, df: pd.DataFrame) -> float:
|
||||
events_per_user = df.groupby("author").size()
|
||||
return round((events_per_user == 1).mean(), 2)
|
||||
|
||||
def time_range(self, df: pd.DataFrame) -> dict:
|
||||
return {
|
||||
"start": int(df["dt"].min().timestamp()),
|
||||
"end": int(df["dt"].max().timestamp()),
|
||||
}
|
||||
|
||||
def sources(self, df: pd.DataFrame) -> list:
|
||||
return df["source"].dropna().unique().tolist()
|
||||
|
||||
def empty_summary(self) -> dict:
|
||||
return {
|
||||
"total_events": 0,
|
||||
"total_posts": 0,
|
||||
"total_comments": 0,
|
||||
"unique_users": 0,
|
||||
"comments_per_post": 0,
|
||||
"lurker_ratio": 0,
|
||||
"time_range": {
|
||||
"start": None,
|
||||
"end": None,
|
||||
},
|
||||
"sources": [],
|
||||
}
|
||||
|
||||
def summary(self, df: pd.DataFrame) -> dict:
|
||||
if df.empty:
|
||||
return self.empty_summary()
|
||||
|
||||
total_posts = self.total_posts(df)
|
||||
total_comments = self.total_comments(df)
|
||||
|
||||
return {
|
||||
"total_events": self.total_events(df),
|
||||
"total_posts": total_posts,
|
||||
"total_comments": total_comments,
|
||||
"unique_users": self.unique_users(df),
|
||||
"comments_per_post": self.comments_per_post(total_comments, total_posts),
|
||||
"lurker_ratio": self.lurker_ratio(df),
|
||||
"time_range": self.time_range(df),
|
||||
"sources": self.sources(df),
|
||||
}
|
||||
124
server/analysis/user.py
Normal file
124
server/analysis/user.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
|
||||
class UserAnalysis:
|
||||
def __init__(self, word_exclusions: set[str]):
|
||||
self.word_exclusions = word_exclusions
|
||||
|
||||
def _tokenize(self, text: str):
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
return [t for t in tokens if t not in self.word_exclusions]
|
||||
|
||||
def _vocab_richness_per_user(
|
||||
self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
|
||||
) -> list:
|
||||
df = df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
df["tokens"] = df["content"].apply(self._tokenize)
|
||||
|
||||
rows = []
|
||||
for author, group in df.groupby("author"):
|
||||
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
||||
|
||||
total_words = len(all_tokens)
|
||||
unique_words = len(set(all_tokens))
|
||||
events = len(group)
|
||||
|
||||
# Min amount of words for a user, any less than this might give weird results
|
||||
if total_words < min_words:
|
||||
continue
|
||||
|
||||
# 100% = they never reused a word (excluding stop words)
|
||||
vocab_richness = unique_words / total_words
|
||||
avg_words = total_words / max(events, 1)
|
||||
|
||||
counts = Counter(all_tokens)
|
||||
top_words = [
|
||||
{"word": w, "count": int(c)}
|
||||
for w, c in counts.most_common(top_most_used_words)
|
||||
]
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"author": author,
|
||||
"events": int(events),
|
||||
"total_words": int(total_words),
|
||||
"unique_words": int(unique_words),
|
||||
"vocab_richness": round(vocab_richness, 3),
|
||||
"avg_words_per_event": round(avg_words, 2),
|
||||
"top_words": top_words,
|
||||
}
|
||||
)
|
||||
|
||||
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
||||
|
||||
return rows
|
||||
|
||||
def top_users(self, df: pd.DataFrame) -> list:
|
||||
counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
|
||||
|
||||
top_users = [
|
||||
{"author": author, "source": source, "count": int(count)}
|
||||
for (author, source), count in counts.items()
|
||||
]
|
||||
|
||||
return top_users
|
||||
|
||||
def per_user_analysis(self, df: pd.DataFrame) -> dict:
|
||||
per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
|
||||
|
||||
emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
|
||||
|
||||
avg_emotions_by_author = {}
|
||||
if emotion_cols:
|
||||
avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
|
||||
avg_emotions_by_author = {
|
||||
author: {emotion: float(score) for emotion, score in row.items()}
|
||||
for author, row in avg_emotions.iterrows()
|
||||
}
|
||||
|
||||
# ensure columns always exist
|
||||
for col in ("post", "comment"):
|
||||
if col not in per_user.columns:
|
||||
per_user[col] = 0
|
||||
|
||||
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
|
||||
0, 1
|
||||
)
|
||||
per_user["comment_share"] = per_user["comment"] / (
|
||||
per_user["post"] + per_user["comment"]
|
||||
).replace(0, 1)
|
||||
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
||||
per_user_records = per_user.reset_index().to_dict(orient="records")
|
||||
|
||||
vocab_rows = self._vocab_richness_per_user(df)
|
||||
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
||||
|
||||
# merge vocab richness + per_user information
|
||||
merged_users = []
|
||||
for row in per_user_records:
|
||||
author = row["author"]
|
||||
merged_users.append(
|
||||
{
|
||||
"author": author,
|
||||
"post": int(row.get("post", 0)),
|
||||
"comment": int(row.get("comment", 0)),
|
||||
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
||||
"comment_share": float(row.get("comment_share", 0)),
|
||||
"avg_emotions": avg_emotions_by_author.get(author, {}),
|
||||
"vocab": vocab_by_author.get(
|
||||
author,
|
||||
{
|
||||
"vocab_richness": 0,
|
||||
"avg_words_per_event": 0,
|
||||
"top_words": [],
|
||||
},
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
||||
|
||||
return merged_users
|
||||
138
server/app.py
138
server/app.py
@@ -186,7 +186,7 @@ def scrape_data():
|
||||
dataset_manager.set_dataset_status(
|
||||
dataset_id,
|
||||
"fetching",
|
||||
f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}"
|
||||
f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}",
|
||||
)
|
||||
|
||||
fetch_and_process_dataset.delay(
|
||||
@@ -198,12 +198,14 @@ def scrape_data():
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": "Failed to queue dataset processing"}), 500
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"message": "Dataset queued for processing",
|
||||
"dataset_id": dataset_id,
|
||||
"status": "processing",
|
||||
}
|
||||
), 202
|
||||
|
||||
return jsonify({
|
||||
"message": "Dataset queued for processing",
|
||||
"dataset_id": dataset_id,
|
||||
"status": "processing"
|
||||
}), 202
|
||||
|
||||
@app.route("/datasets/upload", methods=["POST"])
|
||||
@jwt_required()
|
||||
@@ -233,7 +235,9 @@ def upload_data():
|
||||
|
||||
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
||||
topics = json.load(topic_file)
|
||||
dataset_id = dataset_manager.save_dataset_info(current_user, dataset_name, topics)
|
||||
dataset_id = dataset_manager.save_dataset_info(
|
||||
current_user, dataset_name, topics
|
||||
)
|
||||
|
||||
process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)
|
||||
|
||||
@@ -249,6 +253,7 @@ def upload_data():
|
||||
except Exception as e:
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_dataset(dataset_id):
|
||||
@@ -256,7 +261,9 @@ def get_dataset(dataset_id):
|
||||
user_id = int(get_jwt_identity())
|
||||
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_info = dataset_manager.get_dataset_info(dataset_id)
|
||||
included_cols = {"id", "name", "created_at"}
|
||||
@@ -270,6 +277,7 @@ def get_dataset(dataset_id):
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": "An unexpected error occured"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
|
||||
@jwt_required()
|
||||
def update_dataset(dataset_id):
|
||||
@@ -277,7 +285,9 @@ def update_dataset(dataset_id):
|
||||
user_id = int(get_jwt_identity())
|
||||
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
body = request.get_json()
|
||||
new_name = body.get("name")
|
||||
@@ -286,7 +296,9 @@ def update_dataset(dataset_id):
|
||||
return jsonify({"error": "A valid name must be provided"}), 400
|
||||
|
||||
dataset_manager.update_dataset_name(dataset_id, new_name.strip())
|
||||
return jsonify({"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}), 200
|
||||
return jsonify(
|
||||
{"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}
|
||||
), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -295,6 +307,7 @@ def update_dataset(dataset_id):
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": "An unexpected error occurred"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
|
||||
@jwt_required()
|
||||
def delete_dataset(dataset_id):
|
||||
@@ -302,11 +315,17 @@ def delete_dataset(dataset_id):
|
||||
user_id = int(get_jwt_identity())
|
||||
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_manager.delete_dataset_info(dataset_id)
|
||||
dataset_manager.delete_dataset_content(dataset_id)
|
||||
return jsonify({"message": f"Dataset {dataset_id} metadata and content successfully deleted"}), 200
|
||||
return jsonify(
|
||||
{
|
||||
"message": f"Dataset {dataset_id} metadata and content successfully deleted"
|
||||
}
|
||||
), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -315,6 +334,7 @@ def delete_dataset(dataset_id):
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": "An unexpected error occured"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_dataset_status(dataset_id):
|
||||
@@ -322,7 +342,9 @@ def get_dataset_status(dataset_id):
|
||||
user_id = int(get_jwt_identity())
|
||||
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_status = dataset_manager.get_dataset_status(dataset_id)
|
||||
return jsonify(dataset_status), 200
|
||||
@@ -334,17 +356,44 @@ def get_dataset_status(dataset_id):
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": "An unexpected error occured"}), 500
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/linguistic", methods=["GET"])
|
||||
@jwt_required()
|
||||
def content_endpoint(dataset_id):
|
||||
def get_linguistic_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
|
||||
return jsonify(stat_gen.linguistic(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
return jsonify({"error": "Dataset does not exist"}), 404
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data"}), 400
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/emotional", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_emotional_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.emotional(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -362,7 +411,9 @@ def get_summary(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
@@ -378,17 +429,19 @@ def get_summary(dataset_id):
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
|
||||
@app.route("/dataset/<int:dataset_id>/temporal", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_time_analysis(dataset_id):
|
||||
def get_temporal_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
|
||||
return jsonify(stat_gen.temporal(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -406,11 +459,13 @@ def get_user_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
|
||||
return jsonify(stat_gen.user(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -428,11 +483,13 @@ def get_cultural_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
|
||||
return jsonify(stat_gen.cultural(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -444,17 +501,19 @@ def get_cultural_analysis(dataset_id):
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
|
||||
@app.route("/dataset/<int:dataset_id>/interactional", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_interaction_analysis(dataset_id):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
filters = get_request_filters()
|
||||
return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
|
||||
return jsonify(stat_gen.interactional(dataset_content, filters)), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
@@ -465,6 +524,27 @@ def get_interaction_analysis(dataset_id):
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
@app.route("/dataset/<int:dataset_id>/all", methods=["GET"])
|
||||
@jwt_required()
|
||||
def get_full_dataset(dataset_id: int):
|
||||
try:
|
||||
user_id = int(get_jwt_identity())
|
||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||
raise NotAuthorisedException(
|
||||
"This user is not authorised to access this dataset"
|
||||
)
|
||||
|
||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||
return jsonify(dataset_content.to_dict(orient="records")), 200
|
||||
except NotAuthorisedException:
|
||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||
except NonExistentDatasetException:
|
||||
return jsonify({"error": "Dataset does not exist"}), 404
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data"}), 400
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
||||
|
||||
@@ -101,7 +101,7 @@ class DatasetManager:
|
||||
row["source"],
|
||||
row.get("topic"),
|
||||
row.get("topic_confidence"),
|
||||
Json(row["ner_entities"]) if row.get("ner_entities") else None,
|
||||
Json(row["entities"]) if row.get("entities") is not None else None,
|
||||
row.get("emotion_anger"),
|
||||
row.get("emotion_disgust"),
|
||||
row.get("emotion_fear"),
|
||||
|
||||
@@ -43,7 +43,7 @@ CREATE TABLE events (
|
||||
weekday VARCHAR(255) NOT NULL,
|
||||
|
||||
/* Posts Only */
|
||||
title VARCHAR(255),
|
||||
title TEXT,
|
||||
|
||||
/* Comments Only*/
|
||||
parent_id VARCHAR(255),
|
||||
|
||||
Reference in New Issue
Block a user