Compare commits
25 Commits
94befb61c5
...
acc591ff1e
| Author | SHA1 | Date | |
|---|---|---|---|
| acc591ff1e | |||
| e054997bb1 | |||
| e5414befa7 | |||
| 86926898ce | |||
| b1177540a1 | |||
| f604fcc531 | |||
| b7aec2b0ea | |||
| 1446dd176d | |||
| c215024ef2 | |||
| 17ef42e548 | |||
| 7e4a91bb5e | |||
| 436549641f | |||
| 3e78a54388 | |||
| 71998c450e | |||
| 2a00384a55 | |||
| 8372aa7278 | |||
| 7b5a939271 | |||
| 2fa1dff4b7 | |||
| 31fb275ee3 | |||
| 8a0f6e71e8 | |||
| 9093059d05 | |||
| 8a13444b16 | |||
| 3468fdc2ea | |||
| 09a4f9036f | |||
| 97fccd073b |
@@ -2,7 +2,7 @@
|
|||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
<link rel="icon" type="image/png" href="/icon.png" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>frontend</title>
|
<title>frontend</title>
|
||||||
</head>
|
</head>
|
||||||
|
|||||||
BIN
frontend/public/icon.png
Normal file
BIN
frontend/public/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
158
frontend/src/components/CulturalStats.tsx
Normal file
158
frontend/src/components/CulturalStats.tsx
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
import Card from "./Card";
|
||||||
|
import StatsStyling from "../styles/stats_styling";
|
||||||
|
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
|
||||||
|
|
||||||
|
const styles = StatsStyling;
|
||||||
|
|
||||||
|
type CulturalStatsProps = {
|
||||||
|
data: CulturalAnalysisResponse;
|
||||||
|
};
|
||||||
|
|
||||||
|
const CulturalStats = ({ data }: CulturalStatsProps) => {
|
||||||
|
const identity = data.identity_markers;
|
||||||
|
const stance = data.stance_markers;
|
||||||
|
const inGroupWords = identity?.in_group_usage ?? 0;
|
||||||
|
const outGroupWords = identity?.out_group_usage ?? 0;
|
||||||
|
const totalGroupWords = inGroupWords + outGroupWords;
|
||||||
|
const inGroupWordRate = typeof identity?.in_group_ratio === "number"
|
||||||
|
? identity.in_group_ratio * 100
|
||||||
|
: null;
|
||||||
|
const outGroupWordRate = typeof identity?.out_group_ratio === "number"
|
||||||
|
? identity.out_group_ratio * 100
|
||||||
|
: null;
|
||||||
|
const rawEntities = data.avg_emotion_per_entity?.entity_emotion_avg ?? {};
|
||||||
|
const entities = Object.entries(rawEntities)
|
||||||
|
.sort((a, b) => (b[1].post_count - a[1].post_count))
|
||||||
|
.slice(0, 20);
|
||||||
|
|
||||||
|
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
|
||||||
|
const entries = Object.entries(emotionAvg ?? {});
|
||||||
|
if (!entries.length) {
|
||||||
|
return "—";
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.sort((a, b) => b[1] - a[1]);
|
||||||
|
const dominant = entries[0] ?? ["emotion_unknown", 0];
|
||||||
|
const dominantLabel = dominant[0].replace("emotion_", "");
|
||||||
|
return `${dominantLabel} (${(dominant[1] * 100).toFixed(1)}%)`;
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div style={styles.page}>
|
||||||
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Community Framing Overview</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Simple view of how often people use "us" words vs "them" words, and the tone around that language.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<Card
|
||||||
|
label="In-Group Words"
|
||||||
|
value={inGroupWords.toLocaleString()}
|
||||||
|
sublabel="Times we/us/our appears"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Out-Group Words"
|
||||||
|
value={outGroupWords.toLocaleString()}
|
||||||
|
sublabel="Times they/them/their appears"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="In-Group Posts"
|
||||||
|
value={identity?.in_group_posts?.toLocaleString() ?? "—"}
|
||||||
|
sublabel='Posts leaning toward "us" language'
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Out-Group Posts"
|
||||||
|
value={identity?.out_group_posts?.toLocaleString() ?? "—"}
|
||||||
|
sublabel='Posts leaning toward "them" language'
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Card
|
||||||
|
label="Balanced Posts"
|
||||||
|
value={identity?.tie_posts?.toLocaleString() ?? "—"}
|
||||||
|
sublabel="Posts with equal us/them signals"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Total Group Words"
|
||||||
|
value={totalGroupWords.toLocaleString()}
|
||||||
|
sublabel="In-group + out-group words"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="In-Group Share"
|
||||||
|
value={inGroupWordRate === null ? "—" : `${inGroupWordRate.toFixed(2)}%`}
|
||||||
|
sublabel="Share of all words"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Out-Group Share"
|
||||||
|
value={outGroupWordRate === null ? "—" : `${outGroupWordRate.toFixed(2)}%`}
|
||||||
|
sublabel="Share of all words"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Card
|
||||||
|
label="Hedging Words"
|
||||||
|
value={stance?.hedge_total?.toLocaleString() ?? "—"}
|
||||||
|
sublabel={typeof stance?.hedge_per_1k_tokens === "number" ? `${stance.hedge_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Certainty Words"
|
||||||
|
value={stance?.certainty_total?.toLocaleString() ?? "—"}
|
||||||
|
sublabel={typeof stance?.certainty_per_1k_tokens === "number" ? `${stance.certainty_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Need/Should Words"
|
||||||
|
value={stance?.deontic_total?.toLocaleString() ?? "—"}
|
||||||
|
sublabel={typeof stance?.deontic_per_1k_tokens === "number" ? `${stance.deontic_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Permission Words"
|
||||||
|
value={stance?.permission_total?.toLocaleString() ?? "—"}
|
||||||
|
sublabel={typeof stance?.permission_per_1k_tokens === "number" ? `${stance.permission_per_1k_tokens.toFixed(1)} per 1k words` : "Word frequency"}
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 6" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Mood in "Us" Posts</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most likely emotion when in-group wording is stronger.</p>
|
||||||
|
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 6" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Mood in "Them" Posts</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most likely emotion when out-group wording is stronger.</p>
|
||||||
|
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Entity Mood Snapshot</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most mentioned entities and the mood that appears most with each.</p>
|
||||||
|
{!entities.length ? (
|
||||||
|
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
|
||||||
|
{entities.map(([entity, aggregate]) => (
|
||||||
|
<div key={entity} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{entity}</div>
|
||||||
|
<div style={styles.topUserMeta}>
|
||||||
|
{aggregate.post_count.toLocaleString()} posts • Likely mood: {topEmotion(aggregate.emotion_avg)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default CulturalStats;
|
||||||
@@ -9,6 +9,9 @@ type EmotionalStatsProps = {
|
|||||||
|
|
||||||
const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
|
const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
|
||||||
const rows = contentData.average_emotion_by_topic ?? [];
|
const rows = contentData.average_emotion_by_topic ?? [];
|
||||||
|
const overallEmotionAverage = contentData.overall_emotion_average ?? [];
|
||||||
|
const dominantEmotionDistribution = contentData.dominant_emotion_distribution ?? [];
|
||||||
|
const emotionBySource = contentData.emotion_by_source ?? [];
|
||||||
const lowSampleThreshold = 20;
|
const lowSampleThreshold = 20;
|
||||||
const stableSampleThreshold = 50;
|
const stableSampleThreshold = 50;
|
||||||
const emotionKeys = rows.length
|
const emotionKeys = rows.length
|
||||||
@@ -64,39 +67,104 @@ const EmotionalStats = ({contentData}: EmotionalStatsProps) => {
|
|||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||||
<h2 style={styles.sectionTitle}>Average Emotion by Topic</h2>
|
<h2 style={styles.sectionTitle}>Topic Mood Overview</h2>
|
||||||
<p style={styles.sectionSubtitle}>Read confidence together with sample size. Topics with fewer than {lowSampleThreshold} events are usually noisy and less reliable.</p>
|
<p style={styles.sectionSubtitle}>Use the strength score together with post count. Topics with fewer than {lowSampleThreshold} events are often noisy.</p>
|
||||||
<div style={styles.emotionalSummaryRow}>
|
<div style={styles.emotionalSummaryRow}>
|
||||||
<span><strong style={{ color: "#24292f" }}>Topics:</strong> {strongestPerTopic.length}</span>
|
<span><strong style={{ color: "#24292f" }}>Topics:</strong> {strongestPerTopic.length}</span>
|
||||||
<span><strong style={{ color: "#24292f" }}>Median Sample:</strong> {medianSampleSize} events</span>
|
<span><strong style={{ color: "#24292f" }}>Median Posts:</strong> {medianSampleSize}</span>
|
||||||
<span><strong style={{ color: "#24292f" }}>Low Sample (<{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
|
<span><strong style={{ color: "#24292f" }}>Small Topics (<{lowSampleThreshold}):</strong> {lowSampleTopics}</span>
|
||||||
<span><strong style={{ color: "#24292f" }}>Stable Sample ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
|
<span><strong style={{ color: "#24292f" }}>Stable Topics ({stableSampleThreshold}+):</strong> {stableSampleTopics}</span>
|
||||||
</div>
|
</div>
|
||||||
<p style={{ ...styles.sectionSubtitle, marginTop: 10, marginBottom: 0 }}>
|
<p style={{ ...styles.sectionSubtitle, marginTop: 10, marginBottom: 0 }}>
|
||||||
Confidence reflects how strongly one emotion leads within a topic, not model accuracy. Use larger samples for stronger conclusions.
|
Strength means how far the top emotion is ahead in that topic. It does not mean model accuracy.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div style={{ ...styles.container, ...styles.grid }}>
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
{strongestPerTopic.map((topic) => (
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
<div key={topic.topic} style={{ ...styles.card, gridColumn: "span 4" }}>
|
<h2 style={styles.sectionTitle}>Mood Averages</h2>
|
||||||
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
|
<p style={styles.sectionSubtitle}>Average score for each emotion.</p>
|
||||||
<div style={styles.emotionalTopicLabel}>
|
{!overallEmotionAverage.length ? (
|
||||||
Top Emotion
|
<div style={styles.topUserMeta}>No overall emotion averages available.</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||||
|
{[...overallEmotionAverage]
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
.map((row) => (
|
||||||
|
<div key={row.emotion} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
|
||||||
|
<div style={styles.topUserMeta}>{row.score.toFixed(3)}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
<div style={styles.emotionalTopicValue}>
|
)}
|
||||||
{formatEmotion(topic.emotion)}
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Mood Split</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>How often each emotion is dominant.</p>
|
||||||
|
{!dominantEmotionDistribution.length ? (
|
||||||
|
<div style={styles.topUserMeta}>No dominant-emotion split available.</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||||
|
{[...dominantEmotionDistribution]
|
||||||
|
.sort((a, b) => b.ratio - a.ratio)
|
||||||
|
.map((row) => (
|
||||||
|
<div key={row.emotion} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{formatEmotion(row.emotion)}</div>
|
||||||
|
<div style={styles.topUserMeta}>{(row.ratio * 100).toFixed(1)}% • {row.count.toLocaleString()} events</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
<div style={styles.emotionalMetricRow}>
|
)}
|
||||||
<span>Confidence</span>
|
</div>
|
||||||
<span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
|
|
||||||
</div>
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
<div style={styles.emotionalMetricRowCompact}>
|
<h2 style={styles.sectionTitle}>Mood by Source</h2>
|
||||||
<span>Sample Size</span>
|
<p style={styles.sectionSubtitle}>Leading emotion in each source.</p>
|
||||||
<span style={styles.emotionalMetricValue}>{topic.count} events</span>
|
{!emotionBySource.length ? (
|
||||||
|
<div style={styles.topUserMeta}>No source emotion profile available.</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 260, overflowY: "auto" }}>
|
||||||
|
{[...emotionBySource]
|
||||||
|
.sort((a, b) => b.event_count - a.event_count)
|
||||||
|
.map((row) => (
|
||||||
|
<div key={row.source} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{row.source}</div>
|
||||||
|
<div style={styles.topUserMeta}>
|
||||||
|
{formatEmotion(row.dominant_emotion)} • {row.dominant_score.toFixed(3)} • {row.event_count.toLocaleString()} events
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Topic Snapshots</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Per-topic mood with strength and post count.</p>
|
||||||
|
<div style={{ ...styles.grid, marginTop: 10 }}>
|
||||||
|
{strongestPerTopic.map((topic) => (
|
||||||
|
<div key={topic.topic} style={{ ...styles.cardBase, gridColumn: "span 4" }}>
|
||||||
|
<h3 style={{ ...styles.sectionTitle, marginBottom: 6 }}>{topic.topic}</h3>
|
||||||
|
<div style={styles.emotionalTopicLabel}>
|
||||||
|
Likely Mood
|
||||||
|
</div>
|
||||||
|
<div style={styles.emotionalTopicValue}>
|
||||||
|
{formatEmotion(topic.emotion)}
|
||||||
|
</div>
|
||||||
|
<div style={styles.emotionalMetricRow}>
|
||||||
|
<span>Strength</span>
|
||||||
|
<span style={styles.emotionalMetricValue}>{topic.value.toFixed(3)}</span>
|
||||||
|
</div>
|
||||||
|
<div style={styles.emotionalMetricRowCompact}>
|
||||||
|
<span>Posts in Topic</span>
|
||||||
|
<span style={styles.emotionalMetricValue}>{topic.count}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
))}
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|||||||
208
frontend/src/components/InteractionalStats.tsx
Normal file
208
frontend/src/components/InteractionalStats.tsx
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
import Card from "./Card";
|
||||||
|
import StatsStyling from "../styles/stats_styling";
|
||||||
|
import type { InteractionAnalysisResponse } from "../types/ApiTypes";
|
||||||
|
import {
|
||||||
|
ResponsiveContainer,
|
||||||
|
BarChart,
|
||||||
|
Bar,
|
||||||
|
XAxis,
|
||||||
|
YAxis,
|
||||||
|
CartesianGrid,
|
||||||
|
Tooltip,
|
||||||
|
PieChart,
|
||||||
|
Pie,
|
||||||
|
Cell,
|
||||||
|
Legend,
|
||||||
|
} from "recharts";
|
||||||
|
|
||||||
|
const styles = StatsStyling;
|
||||||
|
|
||||||
|
type InteractionalStatsProps = {
|
||||||
|
data: InteractionAnalysisResponse;
|
||||||
|
};
|
||||||
|
|
||||||
|
const InteractionalStats = ({ data }: InteractionalStatsProps) => {
|
||||||
|
const graph = data.interaction_graph ?? {};
|
||||||
|
const userCount = Object.keys(graph).length;
|
||||||
|
const edges = Object.values(graph).flatMap((targets) => Object.values(targets));
|
||||||
|
const edgeCount = edges.length;
|
||||||
|
const interactionVolume = edges.reduce((sum, value) => sum + value, 0);
|
||||||
|
const concentration = data.conversation_concentration;
|
||||||
|
const topTenCommentShare = typeof concentration?.top_10pct_comment_share === "number"
|
||||||
|
? concentration?.top_10pct_comment_share
|
||||||
|
: null;
|
||||||
|
const topTenAuthorCount = typeof concentration?.top_10pct_author_count === "number"
|
||||||
|
? concentration.top_10pct_author_count
|
||||||
|
: null;
|
||||||
|
const totalCommentingAuthors = typeof concentration?.total_commenting_authors === "number"
|
||||||
|
? concentration.total_commenting_authors
|
||||||
|
: null;
|
||||||
|
const singleCommentAuthorRatio = typeof concentration?.single_comment_author_ratio === "number"
|
||||||
|
? concentration.single_comment_author_ratio
|
||||||
|
: null;
|
||||||
|
const singleCommentAuthors = typeof concentration?.single_comment_authors === "number"
|
||||||
|
? concentration.single_comment_authors
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const topPairs = (data.top_interaction_pairs ?? [])
|
||||||
|
.filter((item): item is [[string, string], number] => {
|
||||||
|
if (!Array.isArray(item) || item.length !== 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pair = item[0];
|
||||||
|
const count = item[1];
|
||||||
|
|
||||||
|
return Array.isArray(pair)
|
||||||
|
&& pair.length === 2
|
||||||
|
&& typeof pair[0] === "string"
|
||||||
|
&& typeof pair[1] === "string"
|
||||||
|
&& typeof count === "number";
|
||||||
|
})
|
||||||
|
.slice(0, 20);
|
||||||
|
|
||||||
|
const topPairChartData = topPairs.slice(0, 8).map(([[source, target], value], index) => ({
|
||||||
|
pair: `${source} -> ${target}`,
|
||||||
|
replies: value,
|
||||||
|
rank: index + 1,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const topTenSharePercent = topTenCommentShare === null
|
||||||
|
? null
|
||||||
|
: topTenCommentShare * 100;
|
||||||
|
const nonTopTenSharePercent = topTenSharePercent === null
|
||||||
|
? null
|
||||||
|
: Math.max(0, 100 - topTenSharePercent);
|
||||||
|
|
||||||
|
let concentrationPieData: { name: string; value: number }[] = [];
|
||||||
|
if (topTenSharePercent !== null && nonTopTenSharePercent !== null) {
|
||||||
|
concentrationPieData = [
|
||||||
|
{ name: "Top 10% authors", value: topTenSharePercent },
|
||||||
|
{ name: "Other authors", value: nonTopTenSharePercent },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
const PIE_COLORS = ["#2b6777", "#c8d8e4"];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div style={styles.page}>
|
||||||
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Conversation Overview</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Who talks to who, and how concentrated the replies are.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<Card
|
||||||
|
label="Average Reply Depth"
|
||||||
|
value={typeof data.average_thread_depth === "number" ? data.average_thread_depth.toFixed(2) : "—"}
|
||||||
|
sublabel="How deep reply chains usually go"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Users in Network"
|
||||||
|
value={userCount.toLocaleString()}
|
||||||
|
sublabel="Users in the reply graph"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="User-to-User Links"
|
||||||
|
value={edgeCount.toLocaleString()}
|
||||||
|
sublabel="Unique reply directions"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Total Replies"
|
||||||
|
value={interactionVolume.toLocaleString()}
|
||||||
|
sublabel="All reply links combined"
|
||||||
|
style={{ gridColumn: "span 3" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Concentrated Replies"
|
||||||
|
value={topTenSharePercent === null ? "-" : `${topTenSharePercent.toFixed(1)}%`}
|
||||||
|
sublabel={topTenAuthorCount === null || totalCommentingAuthors === null
|
||||||
|
? "Reply share from the top 10% commenters"
|
||||||
|
: `${topTenAuthorCount.toLocaleString()} of ${totalCommentingAuthors.toLocaleString()} authors`}
|
||||||
|
style={{ gridColumn: "span 6" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Single-Comment Authors"
|
||||||
|
value={singleCommentAuthorRatio === null ? "-" : `${(singleCommentAuthorRatio * 100).toFixed(1)}%`}
|
||||||
|
sublabel={singleCommentAuthors === null
|
||||||
|
? "Authors who commented exactly once"
|
||||||
|
: `${singleCommentAuthors.toLocaleString()} authors commented exactly once`}
|
||||||
|
style={{ gridColumn: "span 6" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Conversation Visuals</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Main reply links and concentration split.</p>
|
||||||
|
|
||||||
|
<div style={{ ...styles.grid, marginTop: 12 }}>
|
||||||
|
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
|
||||||
|
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top Interaction Pairs</h3>
|
||||||
|
<div style={{ width: "100%", height: 300 }}>
|
||||||
|
<ResponsiveContainer>
|
||||||
|
<BarChart data={topPairChartData} layout="vertical" margin={{ top: 8, right: 16, left: 16, bottom: 8 }}>
|
||||||
|
<CartesianGrid strokeDasharray="3 3" stroke="#d9e2ec" />
|
||||||
|
<XAxis type="number" allowDecimals={false} />
|
||||||
|
<YAxis
|
||||||
|
type="category"
|
||||||
|
dataKey="rank"
|
||||||
|
tickFormatter={(value) => `#${value}`}
|
||||||
|
width={36}
|
||||||
|
/>
|
||||||
|
<Tooltip />
|
||||||
|
<Bar dataKey="replies" fill="#2b6777" radius={[0, 6, 6, 0]} />
|
||||||
|
</BarChart>
|
||||||
|
</ResponsiveContainer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
|
||||||
|
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top 10% vs Other Comment Share</h3>
|
||||||
|
<div style={{ width: "100%", height: 300 }}>
|
||||||
|
<ResponsiveContainer>
|
||||||
|
<PieChart>
|
||||||
|
<Pie
|
||||||
|
data={concentrationPieData}
|
||||||
|
dataKey="value"
|
||||||
|
nameKey="name"
|
||||||
|
innerRadius={56}
|
||||||
|
outerRadius={88}
|
||||||
|
paddingAngle={2}
|
||||||
|
>
|
||||||
|
{concentrationPieData.map((entry, index) => (
|
||||||
|
<Cell key={`${entry.name}-${index}`} fill={PIE_COLORS[index % PIE_COLORS.length]} />
|
||||||
|
))}
|
||||||
|
</Pie>
|
||||||
|
<Tooltip />
|
||||||
|
<Legend verticalAlign="bottom" height={36} />
|
||||||
|
</PieChart>
|
||||||
|
</ResponsiveContainer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Frequent Reply Paths</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most common user-to-user reply paths.</p>
|
||||||
|
{!topPairs.length ? (
|
||||||
|
<div style={styles.topUserMeta}>No interaction pair data available.</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
|
||||||
|
{topPairs.map(([[source, target], value], index) => (
|
||||||
|
<div key={`${source}->${target}-${index}`} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{source} -> {target}</div>
|
||||||
|
<div style={styles.topUserMeta}>{value.toLocaleString()} replies</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default InteractionalStats;
|
||||||
91
frontend/src/components/LinguisticStats.tsx
Normal file
91
frontend/src/components/LinguisticStats.tsx
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
import Card from "./Card";
|
||||||
|
import StatsStyling from "../styles/stats_styling";
|
||||||
|
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
|
||||||
|
|
||||||
|
const styles = StatsStyling;
|
||||||
|
|
||||||
|
type LinguisticStatsProps = {
|
||||||
|
data: LinguisticAnalysisResponse;
|
||||||
|
};
|
||||||
|
|
||||||
|
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
|
||||||
|
const lexical = data.lexical_diversity;
|
||||||
|
const words = data.word_frequencies ?? [];
|
||||||
|
const bigrams = data.common_two_phrases ?? [];
|
||||||
|
const trigrams = data.common_three_phrases ?? [];
|
||||||
|
|
||||||
|
const topWords = words.slice(0, 20);
|
||||||
|
const topBigrams = bigrams.slice(0, 10);
|
||||||
|
const topTrigrams = trigrams.slice(0, 10);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div style={styles.page}>
|
||||||
|
<div style={{ ...styles.container, ...styles.grid }}>
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Language Overview</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Quick read on how broad and repetitive the wording is.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<Card
|
||||||
|
label="Total Words"
|
||||||
|
value={lexical?.total_tokens?.toLocaleString() ?? "—"}
|
||||||
|
sublabel="Words after basic filtering"
|
||||||
|
style={{ gridColumn: "span 4" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Unique Words"
|
||||||
|
value={lexical?.unique_tokens?.toLocaleString() ?? "—"}
|
||||||
|
sublabel="Different words used"
|
||||||
|
style={{ gridColumn: "span 4" }}
|
||||||
|
/>
|
||||||
|
<Card
|
||||||
|
label="Vocabulary Variety"
|
||||||
|
value={typeof lexical?.ttr === "number" ? lexical.ttr.toFixed(4) : "—"}
|
||||||
|
sublabel="Higher means less repetition"
|
||||||
|
style={{ gridColumn: "span 4" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Top Words</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most used single words.</p>
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||||
|
{topWords.map((item) => (
|
||||||
|
<div key={item.word} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{item.word}</div>
|
||||||
|
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Top Bigrams</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most used 2-word phrases.</p>
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||||
|
{topBigrams.map((item) => (
|
||||||
|
<div key={item.ngram} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{item.ngram}</div>
|
||||||
|
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
|
<h2 style={styles.sectionTitle}>Top Trigrams</h2>
|
||||||
|
<p style={styles.sectionSubtitle}>Most used 3-word phrases.</p>
|
||||||
|
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
|
||||||
|
{topTrigrams.map((item) => (
|
||||||
|
<div key={item.ngram} style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>{item.ngram}</div>
|
||||||
|
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default LinguisticStats;
|
||||||
@@ -58,15 +58,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
const [selectedUser, setSelectedUser] = useState<string | null>(null);
|
const [selectedUser, setSelectedUser] = useState<string | null>(null);
|
||||||
const selectedUserData: User | null = userData?.users.find((u) => u.author === selectedUser) ?? null;
|
const selectedUserData: User | null = userData?.users.find((u) => u.author === selectedUser) ?? null;
|
||||||
|
|
||||||
console.log(summary)
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={styles.page}>
|
<div style={styles.page}>
|
||||||
|
|
||||||
{/* main grid*/}
|
{/* main grid*/}
|
||||||
<div style={{ ...styles.container, ...styles.grid}}>
|
<div style={{ ...styles.container, ...styles.grid}}>
|
||||||
<Card
|
<Card
|
||||||
label="Total Events"
|
label="Total Activity"
|
||||||
value={summary?.total_events ?? "—"}
|
value={summary?.total_events ?? "—"}
|
||||||
sublabel="Posts + comments"
|
sublabel="Posts + comments"
|
||||||
style={{
|
style={{
|
||||||
@@ -74,15 +72,15 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Unique Users"
|
label="Active People"
|
||||||
value={summary?.unique_users ?? "—"}
|
value={summary?.unique_users ?? "—"}
|
||||||
sublabel="Distinct authors"
|
sublabel="Distinct users"
|
||||||
style={{
|
style={{
|
||||||
gridColumn: "span 4"
|
gridColumn: "span 4"
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Posts / Comments"
|
label="Posts vs Comments"
|
||||||
value={
|
value={
|
||||||
summary
|
summary
|
||||||
? `${summary.total_posts} / ${summary.total_comments}`
|
? `${summary.total_posts} / ${summary.total_comments}`
|
||||||
@@ -108,13 +106,13 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Lurker Ratio"
|
label="One-Time Users"
|
||||||
value={
|
value={
|
||||||
typeof summary?.lurker_ratio === "number"
|
typeof summary?.lurker_ratio === "number"
|
||||||
? `${Math.round(summary.lurker_ratio * 100)}%`
|
? `${Math.round(summary.lurker_ratio * 100)}%`
|
||||||
: "—"
|
: "—"
|
||||||
}
|
}
|
||||||
sublabel="Users with only 1 event"
|
sublabel="Users with only one event"
|
||||||
style={{
|
style={{
|
||||||
gridColumn: "span 4"
|
gridColumn: "span 4"
|
||||||
}}
|
}}
|
||||||
@@ -136,12 +134,12 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
|
|
||||||
{/* events per day */}
|
{/* events per day */}
|
||||||
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
<div style={{ ...styles.card, gridColumn: "span 5" }}>
|
||||||
<h2 style={styles.sectionTitle}>Events per Day</h2>
|
<h2 style={styles.sectionTitle}>Activity Over Time</h2>
|
||||||
<p style={styles.sectionSubtitle}>Trend of activity over time</p>
|
<p style={styles.sectionSubtitle}>How much posting happened each day.</p>
|
||||||
|
|
||||||
<div style={styles.chartWrapper}>
|
<div style={styles.chartWrapper}>
|
||||||
<ResponsiveContainer width="100%" height="100%">
|
<ResponsiveContainer width="100%" height="100%">
|
||||||
<LineChart data={timeData?.events_per_day.filter((d) => new Date(d.date) >= new Date('2026-01-10'))}>
|
<LineChart data={timeData?.events_per_day ?? []}>
|
||||||
<CartesianGrid strokeDasharray="3 3" />
|
<CartesianGrid strokeDasharray="3 3" />
|
||||||
<XAxis dataKey="date" />
|
<XAxis dataKey="date" />
|
||||||
<YAxis />
|
<YAxis />
|
||||||
@@ -154,8 +152,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
|
|
||||||
{/* Word Cloud */}
|
{/* Word Cloud */}
|
||||||
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
<div style={{ ...styles.card, gridColumn: "span 4" }}>
|
||||||
<h2 style={styles.sectionTitle}>Word Cloud</h2>
|
<h2 style={styles.sectionTitle}>Common Words</h2>
|
||||||
<p style={styles.sectionSubtitle}>Most common terms across events</p>
|
<p style={styles.sectionSubtitle}>Frequently used words across the dataset.</p>
|
||||||
|
|
||||||
<div style={styles.chartWrapper}>
|
<div style={styles.chartWrapper}>
|
||||||
<ReactWordcloud
|
<ReactWordcloud
|
||||||
@@ -174,8 +172,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
<div style={{...styles.card, ...styles.scrollArea, gridColumn: "span 3",
|
<div style={{...styles.card, ...styles.scrollArea, gridColumn: "span 3",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<h2 style={styles.sectionTitle}>Top Users</h2>
|
<h2 style={styles.sectionTitle}>Most Active Users</h2>
|
||||||
<p style={styles.sectionSubtitle}>Most active authors</p>
|
<p style={styles.sectionSubtitle}>Who posted the most events.</p>
|
||||||
|
|
||||||
<div style={styles.topUsersList}>
|
<div style={styles.topUsersList}>
|
||||||
{userData?.top_users.slice(0, 100).map((item) => (
|
{userData?.top_users.slice(0, 100).map((item) => (
|
||||||
@@ -195,8 +193,8 @@ const SummaryStats = ({userData, timeData, contentData, summary}: SummaryStatsPr
|
|||||||
|
|
||||||
{/* Heatmap */}
|
{/* Heatmap */}
|
||||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
<h2 style={styles.sectionTitle}>Heatmap</h2>
|
<h2 style={styles.sectionTitle}>Weekly Activity Pattern</h2>
|
||||||
<p style={styles.sectionSubtitle}>Activity density across time</p>
|
<p style={styles.sectionSubtitle}>When activity tends to happen by weekday and hour.</p>
|
||||||
|
|
||||||
<div style={styles.heatmapWrapper}>
|
<div style={styles.heatmapWrapper}>
|
||||||
<ActivityHeatmap data={timeData?.weekday_hour_heatmap ?? []} />
|
<ActivityHeatmap data={timeData?.weekday_hour_heatmap ?? []} />
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ type Props = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export default function UserModal({ open, onClose, userData, username }: Props) {
|
export default function UserModal({ open, onClose, userData, username }: Props) {
|
||||||
|
const dominantEmotionEntry = Object.entries(userData?.avg_emotions ?? {})
|
||||||
|
.sort((a, b) => b[1] - a[1])[0];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
|
<Dialog open={open} onClose={onClose} style={styles.modalRoot}>
|
||||||
<div style={styles.modalBackdrop} />
|
<div style={styles.modalBackdrop} />
|
||||||
@@ -66,6 +69,15 @@ export default function UserModal({ open, onClose, userData, username }: Props)
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
{dominantEmotionEntry ? (
|
||||||
|
<div style={styles.topUserItem}>
|
||||||
|
<div style={styles.topUserName}>Dominant Avg Emotion</div>
|
||||||
|
<div style={styles.topUserMeta}>
|
||||||
|
{dominantEmotionEntry[0].replace("emotion_", "")} ({dominantEmotionEntry[1].toFixed(3)})
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</DialogPanel>
|
</DialogPanel>
|
||||||
|
|||||||
@@ -87,15 +87,15 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
|||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Interactions"
|
label="Replies"
|
||||||
value={totalInteractions.toLocaleString()}
|
value={totalInteractions.toLocaleString()}
|
||||||
sublabel="Filtered links (2+ interactions)"
|
sublabel="Links with at least 2 replies"
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Average Intensity"
|
label="Replies per Connected User"
|
||||||
value={avgInteractionsPerConnectedUser.toFixed(1)}
|
value={avgInteractionsPerConnectedUser.toFixed(1)}
|
||||||
sublabel="Interactions per connected user"
|
sublabel="Average from visible graph links"
|
||||||
style={{ gridColumn: "span 3" }}
|
style={{ gridColumn: "span 3" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
@@ -106,13 +106,13 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
<Card
|
<Card
|
||||||
label="Strongest Connection"
|
label="Strongest User Link"
|
||||||
value={strongestLink ? `${strongestLink.source} -> ${strongestLink.target}` : "—"}
|
value={strongestLink ? `${strongestLink.source} -> ${strongestLink.target}` : "—"}
|
||||||
sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} interactions` : "No graph edges after filtering"}
|
sublabel={strongestLink ? `${strongestLink.value.toLocaleString()} replies` : "No graph links after filtering"}
|
||||||
style={{ gridColumn: "span 6" }}
|
style={{ gridColumn: "span 6" }}
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
label="Most Reply-Driven User"
|
label="Most Comment-Heavy User"
|
||||||
value={highlyInteractiveUser?.author ?? "—"}
|
value={highlyInteractiveUser?.author ?? "—"}
|
||||||
sublabel={
|
sublabel={
|
||||||
highlyInteractiveUser
|
highlyInteractiveUser
|
||||||
@@ -125,7 +125,7 @@ const UserStats = (props: { data: UserAnalysisResponse }) => {
|
|||||||
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
<div style={{ ...styles.card, gridColumn: "span 12" }}>
|
||||||
<h2 style={styles.sectionTitle}>User Interaction Graph</h2>
|
<h2 style={styles.sectionTitle}>User Interaction Graph</h2>
|
||||||
<p style={styles.sectionSubtitle}>
|
<p style={styles.sectionSubtitle}>
|
||||||
Nodes represent users and links represent conversation interactions.
|
Each node is a user, and each link shows replies between them.
|
||||||
</p>
|
</p>
|
||||||
<div ref={graphContainerRef} style={{ width: "100%", height: graphSize.height }}>
|
<div ref={graphContainerRef} style={{ width: "100%", height: graphSize.height }}>
|
||||||
<ForceGraph3D
|
<ForceGraph3D
|
||||||
|
|||||||
@@ -191,6 +191,9 @@ const AutoScrapePage = () => {
|
|||||||
<p style={styles.sectionHeaderSubtitle}>
|
<p style={styles.sectionHeaderSubtitle}>
|
||||||
Select sources and scrape settings, then queue processing automatically.
|
Select sources and scrape settings, then queue processing automatically.
|
||||||
</p>
|
</p>
|
||||||
|
<p style={{ ...styles.subtleBodyText, marginTop: 6, color: "#9a6700" }}>
|
||||||
|
Warning: Scraping more than 250 posts from any single site can take hours due to rate limits.
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
|
|||||||
@@ -5,26 +5,42 @@ import StatsStyling from "../styles/stats_styling";
|
|||||||
import SummaryStats from "../components/SummaryStats";
|
import SummaryStats from "../components/SummaryStats";
|
||||||
import EmotionalStats from "../components/EmotionalStats";
|
import EmotionalStats from "../components/EmotionalStats";
|
||||||
import UserStats from "../components/UserStats";
|
import UserStats from "../components/UserStats";
|
||||||
|
import LinguisticStats from "../components/LinguisticStats";
|
||||||
|
import InteractionalStats from "../components/InteractionalStats";
|
||||||
|
import CulturalStats from "../components/CulturalStats";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
type SummaryResponse,
|
type SummaryResponse,
|
||||||
type UserAnalysisResponse,
|
type UserAnalysisResponse,
|
||||||
type TimeAnalysisResponse,
|
type TimeAnalysisResponse,
|
||||||
type ContentAnalysisResponse
|
type ContentAnalysisResponse,
|
||||||
|
type UserEndpointResponse,
|
||||||
|
type LinguisticAnalysisResponse,
|
||||||
|
type EmotionalAnalysisResponse,
|
||||||
|
type InteractionAnalysisResponse,
|
||||||
|
type CulturalAnalysisResponse
|
||||||
} from '../types/ApiTypes'
|
} from '../types/ApiTypes'
|
||||||
|
|
||||||
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
|
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
|
||||||
const styles = StatsStyling;
|
const styles = StatsStyling;
|
||||||
|
const DELETED_USERS = ["[deleted]"];
|
||||||
|
|
||||||
|
const isDeletedUser = (value: string | null | undefined) => (
|
||||||
|
DELETED_USERS.includes((value ?? "").trim().toLowerCase())
|
||||||
|
);
|
||||||
|
|
||||||
const StatPage = () => {
|
const StatPage = () => {
|
||||||
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
|
||||||
const [error, setError] = useState('');
|
const [error, setError] = useState('');
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [activeView, setActiveView] = useState<"summary" | "emotional" | "user">("summary");
|
const [activeView, setActiveView] = useState<"summary" | "emotional" | "user" | "linguistic" | "interactional" | "cultural">("summary");
|
||||||
|
|
||||||
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
|
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
|
||||||
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
|
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
|
||||||
const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
|
const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
|
||||||
|
const [linguisticData, setLinguisticData] = useState<LinguisticAnalysisResponse | null>(null);
|
||||||
|
const [interactionData, setInteractionData] = useState<InteractionAnalysisResponse | null>(null);
|
||||||
|
const [culturalData, setCulturalData] = useState<CulturalAnalysisResponse | null>(null);
|
||||||
const [summary, setSummary] = useState<SummaryResponse | null>(null);
|
const [summary, setSummary] = useState<SummaryResponse | null>(null);
|
||||||
|
|
||||||
|
|
||||||
@@ -83,15 +99,23 @@ const StatPage = () => {
|
|||||||
setLoading(true);
|
setLoading(true);
|
||||||
|
|
||||||
Promise.all([
|
Promise.all([
|
||||||
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/time`, {
|
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
}),
|
}),
|
||||||
axios.get<UserAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
}),
|
}),
|
||||||
axios.get<ContentAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/content`, {
|
axios.get<LinguisticAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/linguistic`, {
|
||||||
|
params,
|
||||||
|
headers: authHeaders,
|
||||||
|
}),
|
||||||
|
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
|
||||||
|
params,
|
||||||
|
headers: authHeaders,
|
||||||
|
}),
|
||||||
|
axios.get<InteractionAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/interactional`, {
|
||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
}),
|
}),
|
||||||
@@ -99,12 +123,87 @@ const StatPage = () => {
|
|||||||
params,
|
params,
|
||||||
headers: authHeaders,
|
headers: authHeaders,
|
||||||
}),
|
}),
|
||||||
|
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
|
||||||
|
params,
|
||||||
|
headers: authHeaders,
|
||||||
|
}),
|
||||||
])
|
])
|
||||||
.then(([timeRes, userRes, contentRes, summaryRes]) => {
|
.then(([timeRes, userRes, linguisticRes, emotionalRes, interactionRes, summaryRes, culturalRes]) => {
|
||||||
setUserData(userRes.data || null);
|
const usersList = userRes.data.users ?? [];
|
||||||
|
const topUsersList = userRes.data.top_users ?? [];
|
||||||
|
const interactionGraphRaw = interactionRes.data?.interaction_graph ?? {};
|
||||||
|
const topPairsRaw = interactionRes.data?.top_interaction_pairs ?? [];
|
||||||
|
|
||||||
|
const filteredUsers: typeof usersList = [];
|
||||||
|
for (const user of usersList) {
|
||||||
|
if (isDeletedUser(user.author)) continue;
|
||||||
|
filteredUsers.push(user);
|
||||||
|
}
|
||||||
|
|
||||||
|
const filteredTopUsers: typeof topUsersList = [];
|
||||||
|
for (const user of topUsersList) {
|
||||||
|
if (isDeletedUser(user.author)) continue;
|
||||||
|
filteredTopUsers.push(user);
|
||||||
|
}
|
||||||
|
|
||||||
|
const filteredInteractionGraph: Record<string, Record<string, number>> = {};
|
||||||
|
for (const [source, targets] of Object.entries(interactionGraphRaw)) {
|
||||||
|
if (isDeletedUser(source)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextTargets: Record<string, number> = {};
|
||||||
|
for (const [target, count] of Object.entries(targets)) {
|
||||||
|
if (isDeletedUser(target)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nextTargets[target] = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
filteredInteractionGraph[source] = nextTargets;
|
||||||
|
}
|
||||||
|
|
||||||
|
const filteredTopInteractionPairs: typeof topPairsRaw = [];
|
||||||
|
for (const pairEntry of topPairsRaw) {
|
||||||
|
const pair = pairEntry[0];
|
||||||
|
const source = pair[0];
|
||||||
|
const target = pair[1];
|
||||||
|
if (isDeletedUser(source) || isDeletedUser(target)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
filteredTopInteractionPairs.push(pairEntry);
|
||||||
|
}
|
||||||
|
|
||||||
|
const combinedUserData: UserAnalysisResponse = {
|
||||||
|
...userRes.data,
|
||||||
|
users: filteredUsers,
|
||||||
|
top_users: filteredTopUsers,
|
||||||
|
interaction_graph: filteredInteractionGraph,
|
||||||
|
};
|
||||||
|
|
||||||
|
const combinedContentData: ContentAnalysisResponse = {
|
||||||
|
...linguisticRes.data,
|
||||||
|
...emotionalRes.data,
|
||||||
|
};
|
||||||
|
|
||||||
|
const filteredInteractionData: InteractionAnalysisResponse = {
|
||||||
|
...interactionRes.data,
|
||||||
|
interaction_graph: filteredInteractionGraph,
|
||||||
|
top_interaction_pairs: filteredTopInteractionPairs,
|
||||||
|
};
|
||||||
|
|
||||||
|
const filteredSummary: SummaryResponse = {
|
||||||
|
...summaryRes.data,
|
||||||
|
unique_users: filteredUsers.length,
|
||||||
|
};
|
||||||
|
|
||||||
|
setUserData(combinedUserData);
|
||||||
setTimeData(timeRes.data || null);
|
setTimeData(timeRes.data || null);
|
||||||
setContentData(contentRes.data || null);
|
setContentData(combinedContentData);
|
||||||
setSummary(summaryRes.data || null);
|
setLinguisticData(linguisticRes.data || null);
|
||||||
|
setInteractionData(filteredInteractionData || null);
|
||||||
|
setCulturalData(culturalRes.data || null);
|
||||||
|
setSummary(filteredSummary || null);
|
||||||
})
|
})
|
||||||
.catch((e) => setError("Failed to load statistics: " + String(e)))
|
.catch((e) => setError("Failed to load statistics: " + String(e)))
|
||||||
.finally(() => setLoading(false));
|
.finally(() => setLoading(false));
|
||||||
@@ -198,7 +297,7 @@ return (
|
|||||||
<div style={styles.dashboardMeta}>Dataset #{datasetId ?? "-"}</div>
|
<div style={styles.dashboardMeta}>Dataset #{datasetId ?? "-"}</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div style={{ ...styles.container, ...styles.tabsRow }}>
|
<div style={{ ...styles.container, ...styles.tabsRow, justifyContent: "center" }}>
|
||||||
<button
|
<button
|
||||||
onClick={() => setActiveView("summary")}
|
onClick={() => setActiveView("summary")}
|
||||||
style={activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary}
|
style={activeView === "summary" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||||
@@ -218,6 +317,24 @@ return (
|
|||||||
>
|
>
|
||||||
Users
|
Users
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveView("linguistic")}
|
||||||
|
style={activeView === "linguistic" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Linguistic
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveView("interactional")}
|
||||||
|
style={activeView === "interactional" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Interactional
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveView("cultural")}
|
||||||
|
style={activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||||
|
>
|
||||||
|
Cultural
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{activeView === "summary" && (
|
{activeView === "summary" && (
|
||||||
@@ -243,6 +360,36 @@ return (
|
|||||||
<UserStats data={userData} />
|
<UserStats data={userData} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{activeView === "linguistic" && linguisticData && (
|
||||||
|
<LinguisticStats data={linguisticData} />
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeView === "linguistic" && !linguisticData && (
|
||||||
|
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||||
|
No linguistic data available.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeView === "interactional" && interactionData && (
|
||||||
|
<InteractionalStats data={interactionData} />
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeView === "interactional" && !interactionData && (
|
||||||
|
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||||
|
No interactional data available.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeView === "cultural" && culturalData && (
|
||||||
|
<CulturalStats data={culturalData} />
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeView === "cultural" && !culturalData && (
|
||||||
|
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
|
||||||
|
No cultural data available.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,28 @@
|
|||||||
// User Responses
|
// Shared types
|
||||||
type TopUser = {
|
type FrequencyWord = {
|
||||||
author: string;
|
word: string;
|
||||||
source: string;
|
count: number;
|
||||||
count: number
|
|
||||||
};
|
};
|
||||||
|
|
||||||
type FrequencyWord = {
|
type NGram = {
|
||||||
word: string;
|
count: number;
|
||||||
count: number;
|
ngram: string;
|
||||||
}
|
};
|
||||||
|
|
||||||
|
type Emotion = {
|
||||||
|
emotion_anger: number;
|
||||||
|
emotion_disgust: number;
|
||||||
|
emotion_fear: number;
|
||||||
|
emotion_joy: number;
|
||||||
|
emotion_sadness: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// User
|
||||||
|
type TopUser = {
|
||||||
|
author: string;
|
||||||
|
source: string;
|
||||||
|
count: number;
|
||||||
|
};
|
||||||
|
|
||||||
type Vocab = {
|
type Vocab = {
|
||||||
author: string;
|
author: string;
|
||||||
@@ -26,60 +40,145 @@ type User = {
|
|||||||
comment: number;
|
comment: number;
|
||||||
comment_post_ratio: number;
|
comment_post_ratio: number;
|
||||||
comment_share: number;
|
comment_share: number;
|
||||||
|
avg_emotions?: Record<string, number>;
|
||||||
vocab?: Vocab | null;
|
vocab?: Vocab | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
type InteractionGraph = Record<string, Record<string, number>>;
|
type InteractionGraph = Record<string, Record<string, number>>;
|
||||||
|
|
||||||
|
type UserEndpointResponse = {
|
||||||
|
top_users: TopUser[];
|
||||||
|
users: User[];
|
||||||
|
};
|
||||||
|
|
||||||
type UserAnalysisResponse = {
|
type UserAnalysisResponse = {
|
||||||
top_users: TopUser[];
|
top_users: TopUser[];
|
||||||
users: User[];
|
users: User[];
|
||||||
interaction_graph: InteractionGraph;
|
interaction_graph: InteractionGraph;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Time Analysis
|
// Time
|
||||||
type EventsPerDay = {
|
type EventsPerDay = {
|
||||||
date: Date;
|
date: Date;
|
||||||
count: number;
|
count: number;
|
||||||
}
|
|
||||||
|
|
||||||
type HeatmapCell = {
|
|
||||||
date: Date;
|
|
||||||
hour: number;
|
|
||||||
count: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
type TimeAnalysisResponse = {
|
|
||||||
events_per_day: EventsPerDay[];
|
|
||||||
weekday_hour_heatmap: HeatmapCell[];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Content Analysis
|
|
||||||
type Emotion = {
|
|
||||||
emotion_anger: number;
|
|
||||||
emotion_disgust: number;
|
|
||||||
emotion_fear: number;
|
|
||||||
emotion_joy: number;
|
|
||||||
emotion_sadness: number;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
type NGram = {
|
type HeatmapCell = {
|
||||||
count: number;
|
date: Date;
|
||||||
ngram: string;
|
hour: number;
|
||||||
}
|
count: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type TimeAnalysisResponse = {
|
||||||
|
events_per_day: EventsPerDay[];
|
||||||
|
weekday_hour_heatmap: HeatmapCell[];
|
||||||
|
};
|
||||||
|
|
||||||
|
// Content (combines emotional and linguistic)
|
||||||
type AverageEmotionByTopic = Emotion & {
|
type AverageEmotionByTopic = Emotion & {
|
||||||
n: number;
|
n: number;
|
||||||
topic: string;
|
topic: string;
|
||||||
|
[key: string]: string | number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type OverallEmotionAverage = {
|
||||||
|
emotion: string;
|
||||||
|
score: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type DominantEmotionDistribution = {
|
||||||
|
emotion: string;
|
||||||
|
count: number;
|
||||||
|
ratio: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type EmotionBySource = {
|
||||||
|
source: string;
|
||||||
|
dominant_emotion: string;
|
||||||
|
dominant_score: number;
|
||||||
|
event_count: number;
|
||||||
|
};
|
||||||
|
|
||||||
type ContentAnalysisResponse = {
|
type ContentAnalysisResponse = {
|
||||||
word_frequencies: FrequencyWord[];
|
word_frequencies: FrequencyWord[];
|
||||||
average_emotion_by_topic: AverageEmotionByTopic[];
|
average_emotion_by_topic: AverageEmotionByTopic[];
|
||||||
common_three_phrases: NGram[];
|
common_three_phrases: NGram[];
|
||||||
common_two_phrases: NGram[];
|
common_two_phrases: NGram[];
|
||||||
}
|
overall_emotion_average?: OverallEmotionAverage[];
|
||||||
|
dominant_emotion_distribution?: DominantEmotionDistribution[];
|
||||||
|
emotion_by_source?: EmotionBySource[];
|
||||||
|
};
|
||||||
|
|
||||||
|
// Linguistic
|
||||||
|
type LinguisticAnalysisResponse = {
|
||||||
|
word_frequencies: FrequencyWord[];
|
||||||
|
common_two_phrases: NGram[];
|
||||||
|
common_three_phrases: NGram[];
|
||||||
|
lexical_diversity?: Record<string, number>;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Emotional
|
||||||
|
type EmotionalAnalysisResponse = {
|
||||||
|
average_emotion_by_topic: AverageEmotionByTopic[];
|
||||||
|
overall_emotion_average?: OverallEmotionAverage[];
|
||||||
|
dominant_emotion_distribution?: DominantEmotionDistribution[];
|
||||||
|
emotion_by_source?: EmotionBySource[];
|
||||||
|
};
|
||||||
|
|
||||||
|
// Interactional
|
||||||
|
type ConversationConcentration = {
|
||||||
|
total_commenting_authors: number;
|
||||||
|
top_10pct_author_count: number;
|
||||||
|
top_10pct_comment_share: number;
|
||||||
|
single_comment_authors: number;
|
||||||
|
single_comment_author_ratio: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type InteractionAnalysisResponse = {
|
||||||
|
average_thread_depth?: number;
|
||||||
|
top_interaction_pairs?: [[string, string], number][];
|
||||||
|
conversation_concentration?: ConversationConcentration;
|
||||||
|
interaction_graph: InteractionGraph;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cultural
|
||||||
|
type IdentityMarkers = {
|
||||||
|
in_group_usage: number;
|
||||||
|
out_group_usage: number;
|
||||||
|
in_group_ratio: number;
|
||||||
|
out_group_ratio: number;
|
||||||
|
in_group_posts: number;
|
||||||
|
out_group_posts: number;
|
||||||
|
tie_posts: number;
|
||||||
|
in_group_emotion_avg?: Record<string, number>;
|
||||||
|
out_group_emotion_avg?: Record<string, number>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type StanceMarkers = {
|
||||||
|
hedge_total: number;
|
||||||
|
certainty_total: number;
|
||||||
|
deontic_total: number;
|
||||||
|
permission_total: number;
|
||||||
|
hedge_per_1k_tokens: number;
|
||||||
|
certainty_per_1k_tokens: number;
|
||||||
|
deontic_per_1k_tokens: number;
|
||||||
|
permission_per_1k_tokens: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type EntityEmotionAggregate = {
|
||||||
|
post_count: number;
|
||||||
|
emotion_avg: Record<string, number>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type AverageEmotionPerEntity = {
|
||||||
|
entity_emotion_avg: Record<string, EntityEmotionAggregate>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CulturalAnalysisResponse = {
|
||||||
|
identity_markers?: IdentityMarkers;
|
||||||
|
stance_markers?: StanceMarkers;
|
||||||
|
avg_emotion_per_entity?: AverageEmotionPerEntity;
|
||||||
|
};
|
||||||
|
|
||||||
// Summary
|
// Summary
|
||||||
type SummaryResponse = {
|
type SummaryResponse = {
|
||||||
@@ -96,22 +195,35 @@ type SummaryResponse = {
|
|||||||
sources: string[];
|
sources: string[];
|
||||||
};
|
};
|
||||||
|
|
||||||
// Filtering Response
|
// Filter
|
||||||
type FilterResponse = {
|
type FilterResponse = {
|
||||||
rows: number
|
rows: number;
|
||||||
data: any;
|
data: any;
|
||||||
}
|
};
|
||||||
|
|
||||||
export type {
|
export type {
|
||||||
TopUser,
|
TopUser,
|
||||||
Vocab,
|
Vocab,
|
||||||
User,
|
User,
|
||||||
InteractionGraph,
|
InteractionGraph,
|
||||||
UserAnalysisResponse,
|
ConversationConcentration,
|
||||||
FrequencyWord,
|
UserAnalysisResponse,
|
||||||
AverageEmotionByTopic,
|
UserEndpointResponse,
|
||||||
SummaryResponse,
|
FrequencyWord,
|
||||||
TimeAnalysisResponse,
|
AverageEmotionByTopic,
|
||||||
ContentAnalysisResponse,
|
OverallEmotionAverage,
|
||||||
FilterResponse
|
DominantEmotionDistribution,
|
||||||
}
|
EmotionBySource,
|
||||||
|
SummaryResponse,
|
||||||
|
TimeAnalysisResponse,
|
||||||
|
ContentAnalysisResponse,
|
||||||
|
LinguisticAnalysisResponse,
|
||||||
|
EmotionalAnalysisResponse,
|
||||||
|
InteractionAnalysisResponse,
|
||||||
|
IdentityMarkers,
|
||||||
|
StanceMarkers,
|
||||||
|
EntityEmotionAggregate,
|
||||||
|
AverageEmotionPerEntity,
|
||||||
|
CulturalAnalysisResponse,
|
||||||
|
FilterResponse,
|
||||||
|
};
|
||||||
|
|||||||
@@ -1,33 +1,86 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class EmotionalAnalysis:
|
class EmotionalAnalysis:
|
||||||
def avg_emotion_by_topic(self, df: pd.DataFrame) -> dict:
|
def _emotion_cols(self, df: pd.DataFrame) -> list[str]:
|
||||||
emotion_cols = [
|
return [col for col in df.columns if col.startswith("emotion_")]
|
||||||
col for col in df.columns
|
|
||||||
if col.startswith("emotion_")
|
def avg_emotion_by_topic(self, df: pd.DataFrame) -> list[dict]:
|
||||||
]
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols:
|
||||||
|
return []
|
||||||
|
|
||||||
counts = (
|
counts = (
|
||||||
df[
|
df[(df["topic"] != "Misc")].groupby("topic").size().reset_index(name="n")
|
||||||
(df["topic"] != "Misc")
|
|
||||||
]
|
|
||||||
.groupby("topic")
|
|
||||||
.size()
|
|
||||||
.rename("n")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
avg_emotion_by_topic = (
|
avg_emotion_by_topic = (
|
||||||
df[
|
df[(df["topic"] != "Misc")]
|
||||||
(df["topic"] != "Misc")
|
|
||||||
]
|
|
||||||
.groupby("topic")[emotion_cols]
|
.groupby("topic")[emotion_cols]
|
||||||
.mean()
|
.mean()
|
||||||
.reset_index()
|
.reset_index()
|
||||||
)
|
)
|
||||||
|
|
||||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(
|
avg_emotion_by_topic = avg_emotion_by_topic.merge(counts, on="topic")
|
||||||
counts,
|
|
||||||
on="topic"
|
|
||||||
)
|
|
||||||
|
|
||||||
return avg_emotion_by_topic.to_dict(orient='records')
|
return avg_emotion_by_topic.to_dict(orient="records")
|
||||||
|
|
||||||
|
def overall_emotion_average(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols:
|
||||||
|
return []
|
||||||
|
|
||||||
|
means = df[emotion_cols].mean()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"emotion": col.replace("emotion_", ""),
|
||||||
|
"score": float(means[col]),
|
||||||
|
}
|
||||||
|
for col in emotion_cols
|
||||||
|
]
|
||||||
|
|
||||||
|
def dominant_emotion_distribution(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols or df.empty:
|
||||||
|
return []
|
||||||
|
|
||||||
|
dominant_per_row = df[emotion_cols].idxmax(axis=1)
|
||||||
|
counts = dominant_per_row.value_counts()
|
||||||
|
total = max(len(dominant_per_row), 1)
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"emotion": col.replace("emotion_", ""),
|
||||||
|
"count": int(count),
|
||||||
|
"ratio": round(float(count / total), 4),
|
||||||
|
}
|
||||||
|
for col, count in counts.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
def emotion_by_source(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols or "source" not in df.columns or df.empty:
|
||||||
|
return []
|
||||||
|
|
||||||
|
source_counts = df.groupby("source").size()
|
||||||
|
source_means = df.groupby("source")[emotion_cols].mean().reset_index()
|
||||||
|
rows = source_means.to_dict(orient="records")
|
||||||
|
output = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
source = row["source"]
|
||||||
|
dominant_col = max(emotion_cols, key=lambda col: float(row.get(col, 0)))
|
||||||
|
output.append(
|
||||||
|
{
|
||||||
|
"source": str(source),
|
||||||
|
"dominant_emotion": dominant_col.replace("emotion_", ""),
|
||||||
|
"dominant_score": round(float(row.get(dominant_col, 0)), 4),
|
||||||
|
"event_count": int(source_counts.get(source, 0)),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from collections import Counter
|
|
||||||
|
|
||||||
|
|
||||||
class InteractionAnalysis:
|
class InteractionAnalysis:
|
||||||
def __init__(self, word_exclusions: set[str]):
|
def __init__(self, word_exclusions: set[str]):
|
||||||
self.word_exclusions = word_exclusions
|
self.word_exclusions = word_exclusions
|
||||||
@@ -12,118 +9,6 @@ class InteractionAnalysis:
|
|||||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||||
return [t for t in tokens if t not in self.word_exclusions]
|
return [t for t in tokens if t not in self.word_exclusions]
|
||||||
|
|
||||||
def _vocab_richness_per_user(
|
|
||||||
self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
|
|
||||||
) -> list:
|
|
||||||
df = df.copy()
|
|
||||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
|
||||||
df["tokens"] = df["content"].apply(self._tokenize)
|
|
||||||
|
|
||||||
rows = []
|
|
||||||
for author, group in df.groupby("author"):
|
|
||||||
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
|
||||||
|
|
||||||
total_words = len(all_tokens)
|
|
||||||
unique_words = len(set(all_tokens))
|
|
||||||
events = len(group)
|
|
||||||
|
|
||||||
# Min amount of words for a user, any less than this might give weird results
|
|
||||||
if total_words < min_words:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 100% = they never reused a word (excluding stop words)
|
|
||||||
vocab_richness = unique_words / total_words
|
|
||||||
avg_words = total_words / max(events, 1)
|
|
||||||
|
|
||||||
counts = Counter(all_tokens)
|
|
||||||
top_words = [
|
|
||||||
{"word": w, "count": int(c)}
|
|
||||||
for w, c in counts.most_common(top_most_used_words)
|
|
||||||
]
|
|
||||||
|
|
||||||
rows.append(
|
|
||||||
{
|
|
||||||
"author": author,
|
|
||||||
"events": int(events),
|
|
||||||
"total_words": int(total_words),
|
|
||||||
"unique_words": int(unique_words),
|
|
||||||
"vocab_richness": round(vocab_richness, 3),
|
|
||||||
"avg_words_per_event": round(avg_words, 2),
|
|
||||||
"top_words": top_words,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
|
||||||
|
|
||||||
return rows
|
|
||||||
|
|
||||||
def top_users(self, df: pd.DataFrame) -> list:
|
|
||||||
counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
|
|
||||||
|
|
||||||
top_users = [
|
|
||||||
{"author": author, "source": source, "count": int(count)}
|
|
||||||
for (author, source), count in counts.items()
|
|
||||||
]
|
|
||||||
|
|
||||||
return top_users
|
|
||||||
|
|
||||||
def per_user_analysis(self, df: pd.DataFrame) -> dict:
|
|
||||||
per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
|
|
||||||
|
|
||||||
emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
|
|
||||||
|
|
||||||
avg_emotions_by_author = {}
|
|
||||||
if emotion_cols:
|
|
||||||
avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
|
|
||||||
avg_emotions_by_author = {
|
|
||||||
author: {emotion: float(score) for emotion, score in row.items()}
|
|
||||||
for author, row in avg_emotions.iterrows()
|
|
||||||
}
|
|
||||||
|
|
||||||
# ensure columns always exist
|
|
||||||
for col in ("post", "comment"):
|
|
||||||
if col not in per_user.columns:
|
|
||||||
per_user[col] = 0
|
|
||||||
|
|
||||||
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
|
|
||||||
0, 1
|
|
||||||
)
|
|
||||||
per_user["comment_share"] = per_user["comment"] / (
|
|
||||||
per_user["post"] + per_user["comment"]
|
|
||||||
).replace(0, 1)
|
|
||||||
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
|
||||||
per_user_records = per_user.reset_index().to_dict(orient="records")
|
|
||||||
|
|
||||||
vocab_rows = self._vocab_richness_per_user(df)
|
|
||||||
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
|
||||||
|
|
||||||
# merge vocab richness + per_user information
|
|
||||||
merged_users = []
|
|
||||||
for row in per_user_records:
|
|
||||||
author = row["author"]
|
|
||||||
merged_users.append(
|
|
||||||
{
|
|
||||||
"author": author,
|
|
||||||
"post": int(row.get("post", 0)),
|
|
||||||
"comment": int(row.get("comment", 0)),
|
|
||||||
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
|
||||||
"comment_share": float(row.get("comment_share", 0)),
|
|
||||||
"avg_emotions": avg_emotions_by_author.get(author, {}),
|
|
||||||
"vocab": vocab_by_author.get(
|
|
||||||
author,
|
|
||||||
{
|
|
||||||
"vocab_richness": 0,
|
|
||||||
"avg_words_per_event": 0,
|
|
||||||
"top_words": [],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
|
||||||
|
|
||||||
return merged_users
|
|
||||||
|
|
||||||
def interaction_graph(self, df: pd.DataFrame):
|
def interaction_graph(self, df: pd.DataFrame):
|
||||||
interactions = {a: {} for a in df["author"].dropna().unique()}
|
interactions = {a: {} for a in df["author"].dropna().unique()}
|
||||||
|
|
||||||
@@ -167,67 +52,36 @@ class InteractionAnalysis:
|
|||||||
|
|
||||||
return round(sum(depths) / len(depths), 2)
|
return round(sum(depths) / len(depths), 2)
|
||||||
|
|
||||||
def average_thread_length_by_emotion(self, df: pd.DataFrame):
|
def top_interaction_pairs(self, df: pd.DataFrame, top_n=10):
|
||||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
graph = self.interaction_graph(df)
|
||||||
|
pairs = []
|
||||||
|
|
||||||
emotion_cols = [
|
for a, targets in graph.items():
|
||||||
c
|
for b, count in targets.items():
|
||||||
for c in df.columns
|
pairs.append(((a, b), count))
|
||||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
|
||||||
]
|
|
||||||
|
|
||||||
id_to_reply = df.set_index("id")["reply_to"].to_dict()
|
pairs.sort(key=lambda x: x[1], reverse=True)
|
||||||
length_cache = {}
|
return pairs[:top_n]
|
||||||
|
|
||||||
def thread_length_from(start_id):
|
def conversation_concentration(self, df: pd.DataFrame) -> dict:
|
||||||
if start_id in length_cache:
|
if "type" not in df.columns:
|
||||||
return length_cache[start_id]
|
return {}
|
||||||
|
|
||||||
seen = set()
|
comments = df[df["type"] == "comment"]
|
||||||
length = 1
|
if comments.empty:
|
||||||
current = start_id
|
return {}
|
||||||
|
|
||||||
while True:
|
author_counts = comments["author"].value_counts()
|
||||||
if current in seen:
|
total_comments = len(comments)
|
||||||
# infinite loop shouldn't happen, but just in case
|
total_authors = len(author_counts)
|
||||||
break
|
|
||||||
seen.add(current)
|
|
||||||
|
|
||||||
reply_to = id_to_reply.get(current)
|
top_10_pct_n = max(1, int(total_authors * 0.1))
|
||||||
|
top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
|
||||||
if (
|
|
||||||
reply_to is None
|
|
||||||
or (isinstance(reply_to, float) and pd.isna(reply_to))
|
|
||||||
or reply_to == ""
|
|
||||||
):
|
|
||||||
break
|
|
||||||
|
|
||||||
length += 1
|
|
||||||
current = reply_to
|
|
||||||
|
|
||||||
if current in length_cache:
|
|
||||||
length += length_cache[current] - 1
|
|
||||||
break
|
|
||||||
|
|
||||||
length_cache[start_id] = length
|
|
||||||
return length
|
|
||||||
|
|
||||||
emotion_to_lengths = {}
|
|
||||||
|
|
||||||
# Fill NaNs in emotion cols to avoid max() issues
|
|
||||||
emo_df = df[["id"] + emotion_cols].copy()
|
|
||||||
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
|
||||||
|
|
||||||
for _, row in emo_df.iterrows():
|
|
||||||
msg_id = row["id"]
|
|
||||||
length = thread_length_from(msg_id)
|
|
||||||
|
|
||||||
emotions = {c: row[c] for c in emotion_cols}
|
|
||||||
dominant = max(emotions, key=emotions.get)
|
|
||||||
|
|
||||||
emotion_to_lengths.setdefault(dominant, []).append(length)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
emotion: round(sum(lengths) / len(lengths), 2)
|
"total_commenting_authors": total_authors,
|
||||||
for emotion, lengths in emotion_to_lengths.items()
|
"top_10pct_author_count": top_10_pct_n,
|
||||||
|
"top_10pct_comment_share": float(top_10_pct_share),
|
||||||
|
"single_comment_authors": int((author_counts == 1).sum()),
|
||||||
|
"single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
|
||||||
}
|
}
|
||||||
@@ -61,3 +61,19 @@ class LinguisticAnalysis:
|
|||||||
.head(limit)
|
.head(limit)
|
||||||
.to_dict(orient="records")
|
.to_dict(orient="records")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def lexical_diversity(self, df: pd.DataFrame) -> dict:
|
||||||
|
tokens = (
|
||||||
|
df["content"].fillna("").astype(str).str.lower()
|
||||||
|
.str.findall(r"\b[a-z]{2,}\b")
|
||||||
|
.explode()
|
||||||
|
)
|
||||||
|
tokens = tokens[~tokens.isin(self.word_exclusions)]
|
||||||
|
total = max(len(tokens), 1)
|
||||||
|
unique = int(tokens.nunique())
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_tokens": total,
|
||||||
|
"unique_tokens": unique,
|
||||||
|
"ttr": round(unique / total, 4),
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ from server.analysis.cultural import CulturalAnalysis
|
|||||||
from server.analysis.emotional import EmotionalAnalysis
|
from server.analysis.emotional import EmotionalAnalysis
|
||||||
from server.analysis.interactional import InteractionAnalysis
|
from server.analysis.interactional import InteractionAnalysis
|
||||||
from server.analysis.linguistic import LinguisticAnalysis
|
from server.analysis.linguistic import LinguisticAnalysis
|
||||||
|
from server.analysis.summary import SummaryAnalysis
|
||||||
from server.analysis.temporal import TemporalAnalysis
|
from server.analysis.temporal import TemporalAnalysis
|
||||||
|
from server.analysis.user import UserAnalysis
|
||||||
|
|
||||||
DOMAIN_STOPWORDS = {
|
DOMAIN_STOPWORDS = {
|
||||||
"www",
|
"www",
|
||||||
@@ -36,12 +38,11 @@ class StatGen:
|
|||||||
self.interaction_analysis = InteractionAnalysis(EXCLUDE_WORDS)
|
self.interaction_analysis = InteractionAnalysis(EXCLUDE_WORDS)
|
||||||
self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
|
self.linguistic_analysis = LinguisticAnalysis(EXCLUDE_WORDS)
|
||||||
self.cultural_analysis = CulturalAnalysis()
|
self.cultural_analysis = CulturalAnalysis()
|
||||||
|
self.summary_analysis = SummaryAnalysis()
|
||||||
|
self.user_analysis = UserAnalysis(EXCLUDE_WORDS)
|
||||||
|
|
||||||
## Private Methods
|
## Private Methods
|
||||||
def _prepare_filtered_df(self,
|
def _prepare_filtered_df(self, df: pd.DataFrame, filters: dict | None = None) -> pd.DataFrame:
|
||||||
df: pd.DataFrame,
|
|
||||||
filters: dict | None = None
|
|
||||||
) -> pd.DataFrame:
|
|
||||||
filters = filters or {}
|
filters = filters or {}
|
||||||
filtered_df = df.copy()
|
filtered_df = df.copy()
|
||||||
|
|
||||||
@@ -51,10 +52,9 @@ class StatGen:
|
|||||||
data_source_filter = filters.get("data_sources", None)
|
data_source_filter = filters.get("data_sources", None)
|
||||||
|
|
||||||
if search_query:
|
if search_query:
|
||||||
mask = (
|
mask = filtered_df["content"].str.contains(
|
||||||
filtered_df["content"].str.contains(search_query, case=False, na=False)
|
search_query, case=False, na=False
|
||||||
| filtered_df["author"].str.contains(search_query, case=False, na=False)
|
) | filtered_df["author"].str.contains(search_query, case=False, na=False)
|
||||||
)
|
|
||||||
|
|
||||||
# Only include title if the column exists
|
# Only include title if the column exists
|
||||||
if "title" in filtered_df.columns:
|
if "title" in filtered_df.columns:
|
||||||
@@ -76,10 +76,10 @@ class StatGen:
|
|||||||
return filtered_df
|
return filtered_df
|
||||||
|
|
||||||
## Public Methods
|
## Public Methods
|
||||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
||||||
return self._prepare_filtered_df(df, filters).to_dict(orient="records")
|
return self._prepare_filtered_df(df, filters).to_dict(orient="records")
|
||||||
|
|
||||||
def get_time_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -87,84 +87,54 @@ class StatGen:
|
|||||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap(filtered_df),
|
"weekday_hour_heatmap": self.temporal_analysis.heatmap(filtered_df),
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_content_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def linguistic(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
|
"word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
|
||||||
"common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
|
"common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
|
||||||
"common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
|
"common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
|
||||||
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(
|
"lexical_diversity": self.linguistic_analysis.lexical_diversity(filtered_df)
|
||||||
filtered_df
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_user_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"top_users": self.interaction_analysis.top_users(filtered_df),
|
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(filtered_df),
|
||||||
"users": self.interaction_analysis.per_user_analysis(filtered_df),
|
"overall_emotion_average": self.emotional_analysis.overall_emotion_average(filtered_df),
|
||||||
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df)
|
"dominant_emotion_distribution": self.emotional_analysis.dominant_emotion_distribution(filtered_df),
|
||||||
|
"emotion_by_source": self.emotional_analysis.emotion_by_source(filtered_df)
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_interactional_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def user(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(
|
"top_users": self.user_analysis.top_users(filtered_df),
|
||||||
filtered_df
|
"users": self.user_analysis.per_user_analysis(filtered_df)
|
||||||
),
|
|
||||||
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion(
|
|
||||||
filtered_df
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_cultural_analysis(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def interactional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"identity_markers": self.cultural_analysis.get_identity_markers(
|
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
|
||||||
filtered_df
|
"top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
|
||||||
),
|
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df),
|
||||||
|
"conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df)
|
||||||
|
}
|
||||||
|
|
||||||
|
def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"identity_markers": self.cultural_analysis.get_identity_markers(filtered_df),
|
||||||
"stance_markers": self.cultural_analysis.get_stance_markers(filtered_df),
|
"stance_markers": self.cultural_analysis.get_stance_markers(filtered_df),
|
||||||
"entity_salience": self.cultural_analysis.get_avg_emotions_per_entity(
|
"avg_emotion_per_entity": self.cultural_analysis.get_avg_emotions_per_entity(filtered_df)
|
||||||
filtered_df
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def summary(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def summary(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|
||||||
total_posts = (filtered_df["type"] == "post").sum()
|
return self.summary_analysis.summary(filtered_df)
|
||||||
total_comments = (filtered_df["type"] == "comment").sum()
|
|
||||||
events_per_user = filtered_df.groupby("author").size()
|
|
||||||
|
|
||||||
if filtered_df.empty:
|
|
||||||
return {
|
|
||||||
"total_events": 0,
|
|
||||||
"total_posts": 0,
|
|
||||||
"total_comments": 0,
|
|
||||||
"unique_users": 0,
|
|
||||||
"comments_per_post": 0,
|
|
||||||
"lurker_ratio": 0,
|
|
||||||
"time_range": {
|
|
||||||
"start": None,
|
|
||||||
"end": None,
|
|
||||||
},
|
|
||||||
"sources": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
"total_events": int(len(filtered_df)),
|
|
||||||
"total_posts": int(total_posts),
|
|
||||||
"total_comments": int(total_comments),
|
|
||||||
"unique_users": int(events_per_user.count()),
|
|
||||||
"comments_per_post": round(total_comments / max(total_posts, 1), 2),
|
|
||||||
"lurker_ratio": round((events_per_user == 1).mean(), 2),
|
|
||||||
"time_range": {
|
|
||||||
"start": int(filtered_df["dt"].min().timestamp()),
|
|
||||||
"end": int(filtered_df["dt"].max().timestamp()),
|
|
||||||
},
|
|
||||||
"sources": filtered_df["source"].dropna().unique().tolist(),
|
|
||||||
}
|
|
||||||
|
|||||||
64
server/analysis/summary.py
Normal file
64
server/analysis/summary.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class SummaryAnalysis:
|
||||||
|
def total_events(self, df: pd.DataFrame) -> int:
|
||||||
|
return int(len(df))
|
||||||
|
|
||||||
|
def total_posts(self, df: pd.DataFrame) -> int:
|
||||||
|
return int(len(df[df["type"] == "post"]))
|
||||||
|
|
||||||
|
def total_comments(self, df: pd.DataFrame) -> int:
|
||||||
|
return int(len(df[df["type"] == "comment"]))
|
||||||
|
|
||||||
|
def unique_users(self, df: pd.DataFrame) -> int:
|
||||||
|
return int(len(df["author"].dropna().unique()))
|
||||||
|
|
||||||
|
def comments_per_post(self, total_comments: int, total_posts: int) -> float:
|
||||||
|
return round(total_comments / max(total_posts, 1), 2)
|
||||||
|
|
||||||
|
def lurker_ratio(self, df: pd.DataFrame) -> float:
|
||||||
|
events_per_user = df.groupby("author").size()
|
||||||
|
return round((events_per_user == 1).mean(), 2)
|
||||||
|
|
||||||
|
def time_range(self, df: pd.DataFrame) -> dict:
|
||||||
|
return {
|
||||||
|
"start": int(df["dt"].min().timestamp()),
|
||||||
|
"end": int(df["dt"].max().timestamp()),
|
||||||
|
}
|
||||||
|
|
||||||
|
def sources(self, df: pd.DataFrame) -> list:
|
||||||
|
return df["source"].dropna().unique().tolist()
|
||||||
|
|
||||||
|
def empty_summary(self) -> dict:
|
||||||
|
return {
|
||||||
|
"total_events": 0,
|
||||||
|
"total_posts": 0,
|
||||||
|
"total_comments": 0,
|
||||||
|
"unique_users": 0,
|
||||||
|
"comments_per_post": 0,
|
||||||
|
"lurker_ratio": 0,
|
||||||
|
"time_range": {
|
||||||
|
"start": None,
|
||||||
|
"end": None,
|
||||||
|
},
|
||||||
|
"sources": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
def summary(self, df: pd.DataFrame) -> dict:
|
||||||
|
if df.empty:
|
||||||
|
return self.empty_summary()
|
||||||
|
|
||||||
|
total_posts = self.total_posts(df)
|
||||||
|
total_comments = self.total_comments(df)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_events": self.total_events(df),
|
||||||
|
"total_posts": total_posts,
|
||||||
|
"total_comments": total_comments,
|
||||||
|
"unique_users": self.unique_users(df),
|
||||||
|
"comments_per_post": self.comments_per_post(total_comments, total_posts),
|
||||||
|
"lurker_ratio": self.lurker_ratio(df),
|
||||||
|
"time_range": self.time_range(df),
|
||||||
|
"sources": self.sources(df),
|
||||||
|
}
|
||||||
124
server/analysis/user.py
Normal file
124
server/analysis/user.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
class UserAnalysis:
|
||||||
|
def __init__(self, word_exclusions: set[str]):
|
||||||
|
self.word_exclusions = word_exclusions
|
||||||
|
|
||||||
|
def _tokenize(self, text: str):
|
||||||
|
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||||
|
return [t for t in tokens if t not in self.word_exclusions]
|
||||||
|
|
||||||
|
def _vocab_richness_per_user(
|
||||||
|
self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
|
||||||
|
) -> list:
|
||||||
|
df = df.copy()
|
||||||
|
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||||
|
df["tokens"] = df["content"].apply(self._tokenize)
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
for author, group in df.groupby("author"):
|
||||||
|
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
||||||
|
|
||||||
|
total_words = len(all_tokens)
|
||||||
|
unique_words = len(set(all_tokens))
|
||||||
|
events = len(group)
|
||||||
|
|
||||||
|
# Min amount of words for a user, any less than this might give weird results
|
||||||
|
if total_words < min_words:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 100% = they never reused a word (excluding stop words)
|
||||||
|
vocab_richness = unique_words / total_words
|
||||||
|
avg_words = total_words / max(events, 1)
|
||||||
|
|
||||||
|
counts = Counter(all_tokens)
|
||||||
|
top_words = [
|
||||||
|
{"word": w, "count": int(c)}
|
||||||
|
for w, c in counts.most_common(top_most_used_words)
|
||||||
|
]
|
||||||
|
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"author": author,
|
||||||
|
"events": int(events),
|
||||||
|
"total_words": int(total_words),
|
||||||
|
"unique_words": int(unique_words),
|
||||||
|
"vocab_richness": round(vocab_richness, 3),
|
||||||
|
"avg_words_per_event": round(avg_words, 2),
|
||||||
|
"top_words": top_words,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def top_users(self, df: pd.DataFrame) -> list:
|
||||||
|
counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)
|
||||||
|
|
||||||
|
top_users = [
|
||||||
|
{"author": author, "source": source, "count": int(count)}
|
||||||
|
for (author, source), count in counts.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
return top_users
|
||||||
|
|
||||||
|
def per_user_analysis(self, df: pd.DataFrame) -> dict:
|
||||||
|
per_user = df.groupby(["author", "type"]).size().unstack(fill_value=0)
|
||||||
|
|
||||||
|
emotion_cols = [col for col in df.columns if col.startswith("emotion_")]
|
||||||
|
|
||||||
|
avg_emotions_by_author = {}
|
||||||
|
if emotion_cols:
|
||||||
|
avg_emotions = df.groupby("author")[emotion_cols].mean().fillna(0.0)
|
||||||
|
avg_emotions_by_author = {
|
||||||
|
author: {emotion: float(score) for emotion, score in row.items()}
|
||||||
|
for author, row in avg_emotions.iterrows()
|
||||||
|
}
|
||||||
|
|
||||||
|
# ensure columns always exist
|
||||||
|
for col in ("post", "comment"):
|
||||||
|
if col not in per_user.columns:
|
||||||
|
per_user[col] = 0
|
||||||
|
|
||||||
|
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(
|
||||||
|
0, 1
|
||||||
|
)
|
||||||
|
per_user["comment_share"] = per_user["comment"] / (
|
||||||
|
per_user["post"] + per_user["comment"]
|
||||||
|
).replace(0, 1)
|
||||||
|
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
||||||
|
per_user_records = per_user.reset_index().to_dict(orient="records")
|
||||||
|
|
||||||
|
vocab_rows = self._vocab_richness_per_user(df)
|
||||||
|
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
||||||
|
|
||||||
|
# merge vocab richness + per_user information
|
||||||
|
merged_users = []
|
||||||
|
for row in per_user_records:
|
||||||
|
author = row["author"]
|
||||||
|
merged_users.append(
|
||||||
|
{
|
||||||
|
"author": author,
|
||||||
|
"post": int(row.get("post", 0)),
|
||||||
|
"comment": int(row.get("comment", 0)),
|
||||||
|
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
||||||
|
"comment_share": float(row.get("comment_share", 0)),
|
||||||
|
"avg_emotions": avg_emotions_by_author.get(author, {}),
|
||||||
|
"vocab": vocab_by_author.get(
|
||||||
|
author,
|
||||||
|
{
|
||||||
|
"vocab_richness": 0,
|
||||||
|
"avg_words_per_event": 0,
|
||||||
|
"top_words": [],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
||||||
|
|
||||||
|
return merged_users
|
||||||
138
server/app.py
138
server/app.py
@@ -186,7 +186,7 @@ def scrape_data():
|
|||||||
dataset_manager.set_dataset_status(
|
dataset_manager.set_dataset_status(
|
||||||
dataset_id,
|
dataset_id,
|
||||||
"fetching",
|
"fetching",
|
||||||
f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}"
|
f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}",
|
||||||
)
|
)
|
||||||
|
|
||||||
fetch_and_process_dataset.delay(
|
fetch_and_process_dataset.delay(
|
||||||
@@ -198,12 +198,14 @@ def scrape_data():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": "Failed to queue dataset processing"}), 500
|
return jsonify({"error": "Failed to queue dataset processing"}), 500
|
||||||
|
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"message": "Dataset queued for processing",
|
||||||
|
"dataset_id": dataset_id,
|
||||||
|
"status": "processing",
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|
||||||
return jsonify({
|
|
||||||
"message": "Dataset queued for processing",
|
|
||||||
"dataset_id": dataset_id,
|
|
||||||
"status": "processing"
|
|
||||||
}), 202
|
|
||||||
|
|
||||||
@app.route("/datasets/upload", methods=["POST"])
|
@app.route("/datasets/upload", methods=["POST"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
@@ -233,7 +235,9 @@ def upload_data():
|
|||||||
|
|
||||||
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
||||||
topics = json.load(topic_file)
|
topics = json.load(topic_file)
|
||||||
dataset_id = dataset_manager.save_dataset_info(current_user, dataset_name, topics)
|
dataset_id = dataset_manager.save_dataset_info(
|
||||||
|
current_user, dataset_name, topics
|
||||||
|
)
|
||||||
|
|
||||||
process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)
|
process_dataset.delay(dataset_id, posts_df.to_dict(orient="records"), topics)
|
||||||
|
|
||||||
@@ -249,6 +253,7 @@ def upload_data():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
|
@app.route("/dataset/<int:dataset_id>", methods=["GET"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def get_dataset(dataset_id):
|
def get_dataset(dataset_id):
|
||||||
@@ -256,7 +261,9 @@ def get_dataset(dataset_id):
|
|||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
|
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_info = dataset_manager.get_dataset_info(dataset_id)
|
dataset_info = dataset_manager.get_dataset_info(dataset_id)
|
||||||
included_cols = {"id", "name", "created_at"}
|
included_cols = {"id", "name", "created_at"}
|
||||||
@@ -270,6 +277,7 @@ def get_dataset(dataset_id):
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": "An unexpected error occured"}), 500
|
return jsonify({"error": "An unexpected error occured"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
|
@app.route("/dataset/<int:dataset_id>", methods=["PATCH"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def update_dataset(dataset_id):
|
def update_dataset(dataset_id):
|
||||||
@@ -277,7 +285,9 @@ def update_dataset(dataset_id):
|
|||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
|
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
body = request.get_json()
|
body = request.get_json()
|
||||||
new_name = body.get("name")
|
new_name = body.get("name")
|
||||||
@@ -286,7 +296,9 @@ def update_dataset(dataset_id):
|
|||||||
return jsonify({"error": "A valid name must be provided"}), 400
|
return jsonify({"error": "A valid name must be provided"}), 400
|
||||||
|
|
||||||
dataset_manager.update_dataset_name(dataset_id, new_name.strip())
|
dataset_manager.update_dataset_name(dataset_id, new_name.strip())
|
||||||
return jsonify({"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}), 200
|
return jsonify(
|
||||||
|
{"message": f"Dataset {dataset_id} renamed to '{new_name.strip()}'"}
|
||||||
|
), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -295,6 +307,7 @@ def update_dataset(dataset_id):
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": "An unexpected error occurred"}), 500
|
return jsonify({"error": "An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
|
@app.route("/dataset/<int:dataset_id>", methods=["DELETE"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def delete_dataset(dataset_id):
|
def delete_dataset(dataset_id):
|
||||||
@@ -302,11 +315,17 @@ def delete_dataset(dataset_id):
|
|||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
|
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_manager.delete_dataset_info(dataset_id)
|
dataset_manager.delete_dataset_info(dataset_id)
|
||||||
dataset_manager.delete_dataset_content(dataset_id)
|
dataset_manager.delete_dataset_content(dataset_id)
|
||||||
return jsonify({"message": f"Dataset {dataset_id} metadata and content successfully deleted"}), 200
|
return jsonify(
|
||||||
|
{
|
||||||
|
"message": f"Dataset {dataset_id} metadata and content successfully deleted"
|
||||||
|
}
|
||||||
|
), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -315,6 +334,7 @@ def delete_dataset(dataset_id):
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": "An unexpected error occured"}), 500
|
return jsonify({"error": "An unexpected error occured"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
|
@app.route("/dataset/<int:dataset_id>/status", methods=["GET"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def get_dataset_status(dataset_id):
|
def get_dataset_status(dataset_id):
|
||||||
@@ -322,7 +342,9 @@ def get_dataset_status(dataset_id):
|
|||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
|
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_status = dataset_manager.get_dataset_status(dataset_id)
|
dataset_status = dataset_manager.get_dataset_status(dataset_id)
|
||||||
return jsonify(dataset_status), 200
|
return jsonify(dataset_status), 200
|
||||||
@@ -334,17 +356,44 @@ def get_dataset_status(dataset_id):
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": "An unexpected error occured"}), 500
|
return jsonify({"error": "An unexpected error occured"}), 500
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>/content", methods=["GET"])
|
|
||||||
|
@app.route("/dataset/<int:dataset_id>/linguistic", methods=["GET"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def content_endpoint(dataset_id):
|
def get_linguistic_analysis(dataset_id):
|
||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
return jsonify(stat_gen.get_content_analysis(dataset_content, filters)), 200
|
return jsonify(stat_gen.linguistic(dataset_content, filters)), 200
|
||||||
|
except NotAuthorisedException:
|
||||||
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
|
except NonExistentDatasetException:
|
||||||
|
return jsonify({"error": "Dataset does not exist"}), 404
|
||||||
|
except ValueError as e:
|
||||||
|
return jsonify({"error": f"Malformed or missing data"}), 400
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/dataset/<int:dataset_id>/emotional", methods=["GET"])
|
||||||
|
@jwt_required()
|
||||||
|
def get_emotional_analysis(dataset_id):
|
||||||
|
try:
|
||||||
|
user_id = int(get_jwt_identity())
|
||||||
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
|
filters = get_request_filters()
|
||||||
|
return jsonify(stat_gen.emotional(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -362,7 +411,9 @@ def get_summary(dataset_id):
|
|||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
@@ -378,17 +429,19 @@ def get_summary(dataset_id):
|
|||||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>/time", methods=["GET"])
|
@app.route("/dataset/<int:dataset_id>/temporal", methods=["GET"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def get_time_analysis(dataset_id):
|
def get_temporal_analysis(dataset_id):
|
||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
return jsonify(stat_gen.get_time_analysis(dataset_content, filters)), 200
|
return jsonify(stat_gen.temporal(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -406,11 +459,13 @@ def get_user_analysis(dataset_id):
|
|||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
return jsonify(stat_gen.get_user_analysis(dataset_content, filters)), 200
|
return jsonify(stat_gen.user(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -428,11 +483,13 @@ def get_cultural_analysis(dataset_id):
|
|||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
return jsonify(stat_gen.get_cultural_analysis(dataset_content, filters)), 200
|
return jsonify(stat_gen.cultural(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -444,17 +501,19 @@ def get_cultural_analysis(dataset_id):
|
|||||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dataset/<int:dataset_id>/interaction", methods=["GET"])
|
@app.route("/dataset/<int:dataset_id>/interactional", methods=["GET"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def get_interaction_analysis(dataset_id):
|
def get_interaction_analysis(dataset_id):
|
||||||
try:
|
try:
|
||||||
user_id = int(get_jwt_identity())
|
user_id = int(get_jwt_identity())
|
||||||
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
raise NotAuthorisedException("This user is not authorised to access this dataset")
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
filters = get_request_filters()
|
filters = get_request_filters()
|
||||||
return jsonify(stat_gen.get_interactional_analysis(dataset_content, filters)), 200
|
return jsonify(stat_gen.interactional(dataset_content, filters)), 200
|
||||||
except NotAuthorisedException:
|
except NotAuthorisedException:
|
||||||
return jsonify({"error": "User is not authorised to access this content"}), 403
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
except NonExistentDatasetException:
|
except NonExistentDatasetException:
|
||||||
@@ -465,6 +524,27 @@ def get_interaction_analysis(dataset_id):
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": f"An unexpected error occurred"}), 500
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
|
@app.route("/dataset/<int:dataset_id>/all", methods=["GET"])
|
||||||
|
@jwt_required()
|
||||||
|
def get_full_dataset(dataset_id: int):
|
||||||
|
try:
|
||||||
|
user_id = int(get_jwt_identity())
|
||||||
|
if not dataset_manager.authorize_user_dataset(dataset_id, user_id):
|
||||||
|
raise NotAuthorisedException(
|
||||||
|
"This user is not authorised to access this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset_content = dataset_manager.get_dataset_content(dataset_id)
|
||||||
|
return jsonify(dataset_content.to_dict(orient="records")), 200
|
||||||
|
except NotAuthorisedException:
|
||||||
|
return jsonify({"error": "User is not authorised to access this content"}), 403
|
||||||
|
except NonExistentDatasetException:
|
||||||
|
return jsonify({"error": "Dataset does not exist"}), 404
|
||||||
|
except ValueError as e:
|
||||||
|
return jsonify({"error": f"Malformed or missing data"}), 400
|
||||||
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return jsonify({"error": f"An unexpected error occurred"}), 500
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=True)
|
app.run(debug=True)
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ class DatasetManager:
|
|||||||
row["source"],
|
row["source"],
|
||||||
row.get("topic"),
|
row.get("topic"),
|
||||||
row.get("topic_confidence"),
|
row.get("topic_confidence"),
|
||||||
Json(row["ner_entities"]) if row.get("ner_entities") else None,
|
Json(row["entities"]) if row.get("entities") is not None else None,
|
||||||
row.get("emotion_anger"),
|
row.get("emotion_anger"),
|
||||||
row.get("emotion_disgust"),
|
row.get("emotion_disgust"),
|
||||||
row.get("emotion_fear"),
|
row.get("emotion_fear"),
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ CREATE TABLE events (
|
|||||||
weekday VARCHAR(255) NOT NULL,
|
weekday VARCHAR(255) NOT NULL,
|
||||||
|
|
||||||
/* Posts Only */
|
/* Posts Only */
|
||||||
title VARCHAR(255),
|
title TEXT,
|
||||||
|
|
||||||
/* Comments Only*/
|
/* Comments Only*/
|
||||||
parent_id VARCHAR(255),
|
parent_id VARCHAR(255),
|
||||||
|
|||||||
Reference in New Issue
Block a user