Compare commits

..

4 Commits

Author SHA1 Message Date
17ef42e548 feat!(frontend): add cultural, interactional and linguistic stat pages 2026-03-18 18:43:49 +00:00
7e4a91bb5e style(frontend): style api types to be in order of the endpoint 2026-03-18 18:40:39 +00:00
436549641f chore(frontend): add api types for new backend data 2026-03-18 18:37:39 +00:00
3e78a54388 feat(stat): add conversation concentration metric
Remove old `initiator_ratio` metric which wasn't working due every event having a `reply_to` value.

This metric was suggested by AI, and is a surprisingly interesting one that gave interesting insights.
2026-03-18 18:36:09 +00:00
7 changed files with 668 additions and 81 deletions

View File

@@ -0,0 +1,119 @@
import Card from "./Card";
import StatsStyling from "../styles/stats_styling";
import type { CulturalAnalysisResponse } from "../types/ApiTypes";
const styles = StatsStyling;
type CulturalStatsProps = {
data: CulturalAnalysisResponse;
};
const CulturalStats = ({ data }: CulturalStatsProps) => {
const identity = data.identity_markers;
const stance = data.stance_markers;
const rawEntities = data.avg_emotion_per_entity?.entity_emotion_avg ?? {};
const entities = Object.entries(rawEntities)
.sort((a, b) => (b[1].post_count - a[1].post_count))
.slice(0, 20);
const topEmotion = (emotionAvg: Record<string, number> | undefined) => {
const entries = Object.entries(emotionAvg ?? {});
if (!entries.length) {
return "—";
}
entries.sort((a, b) => b[1] - a[1]);
const dominant = entries[0] ?? ["emotion_unknown", 0];
const dominantLabel = dominant[0].replace("emotion_", "");
return `${dominantLabel} (${dominant[1].toFixed(3)})`;
};
return (
<div style={styles.page}>
<div style={{ ...styles.container, ...styles.grid }}>
<Card
label="In-Group Usage"
value={identity?.in_group_usage?.toLocaleString() ?? "—"}
sublabel="we/us/our references"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Out-Group Usage"
value={identity?.out_group_usage?.toLocaleString() ?? "—"}
sublabel="they/them/their references"
style={{ gridColumn: "span 3" }}
/>
<Card
label="In-Group Posts"
value={identity?.in_group_posts?.toLocaleString() ?? "—"}
sublabel="Posts with stronger in-group language"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Out-Group Posts"
value={identity?.out_group_posts?.toLocaleString() ?? "—"}
sublabel="Posts with stronger out-group language"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Hedge Markers"
value={stance?.hedge_total?.toLocaleString() ?? "—"}
sublabel={typeof stance?.hedge_per_1k_tokens === "number" ? `${stance.hedge_per_1k_tokens.toFixed(3)} per 1k tokens` : "Marker frequency"}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Certainty Markers"
value={stance?.certainty_total?.toLocaleString() ?? "—"}
sublabel={typeof stance?.certainty_per_1k_tokens === "number" ? `${stance.certainty_per_1k_tokens.toFixed(3)} per 1k tokens` : "Marker frequency"}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Deontic Markers"
value={stance?.deontic_total?.toLocaleString() ?? "—"}
sublabel={typeof stance?.deontic_per_1k_tokens === "number" ? `${stance.deontic_per_1k_tokens.toFixed(3)} per 1k tokens` : "Marker frequency"}
style={{ gridColumn: "span 3" }}
/>
<Card
label="Permission Markers"
value={stance?.permission_total?.toLocaleString() ?? "—"}
sublabel={typeof stance?.permission_per_1k_tokens === "number" ? `${stance.permission_per_1k_tokens.toFixed(3)} per 1k tokens` : "Marker frequency"}
style={{ gridColumn: "span 3" }}
/>
<div style={{ ...styles.card, gridColumn: "span 6" }}>
<h2 style={styles.sectionTitle}>In-Group Emotion Profile</h2>
<p style={styles.sectionSubtitle}>Dominant average emotion where in-group framing is stronger.</p>
<div style={styles.topUserName}>{topEmotion(identity?.in_group_emotion_avg)}</div>
</div>
<div style={{ ...styles.card, gridColumn: "span 6" }}>
<h2 style={styles.sectionTitle}>Out-Group Emotion Profile</h2>
<p style={styles.sectionSubtitle}>Dominant average emotion where out-group framing is stronger.</p>
<div style={styles.topUserName}>{topEmotion(identity?.out_group_emotion_avg)}</div>
</div>
<div style={{ ...styles.card, gridColumn: "span 12" }}>
<h2 style={styles.sectionTitle}>Entity Emotion Averages</h2>
<p style={styles.sectionSubtitle}>Most frequent entities and their dominant average emotion signature.</p>
{!entities.length ? (
<div style={styles.topUserMeta}>No entity-level cultural data available.</div>
) : (
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
{entities.map(([entity, aggregate]) => (
<div key={entity} style={styles.topUserItem}>
<div style={styles.topUserName}>{entity}</div>
<div style={styles.topUserMeta}>
{aggregate.post_count.toLocaleString()} posts Dominant emotion: {topEmotion(aggregate.emotion_avg)}
</div>
</div>
))}
</div>
)}
</div>
</div>
</div>
);
};
export default CulturalStats;

View File

@@ -0,0 +1,198 @@
import Card from "./Card";
import StatsStyling from "../styles/stats_styling";
import type { InteractionAnalysisResponse } from "../types/ApiTypes";
import {
ResponsiveContainer,
BarChart,
Bar,
XAxis,
YAxis,
CartesianGrid,
Tooltip,
PieChart,
Pie,
Cell,
Legend,
} from "recharts";
const styles = StatsStyling;
type InteractionalStatsProps = {
data: InteractionAnalysisResponse;
};
const InteractionalStats = ({ data }: InteractionalStatsProps) => {
const graph = data.interaction_graph ?? {};
const userCount = Object.keys(graph).length;
const edges = Object.values(graph).flatMap((targets) => Object.values(targets));
const edgeCount = edges.length;
const interactionVolume = edges.reduce((sum, value) => sum + value, 0);
const concentration = data.conversation_concentration;
const topTenCommentShare = typeof concentration?.top_10pct_comment_share === "number"
? concentration?.top_10pct_comment_share
: null;
const topTenAuthorCount = typeof concentration?.top_10pct_author_count === "number"
? concentration.top_10pct_author_count
: null;
const totalCommentingAuthors = typeof concentration?.total_commenting_authors === "number"
? concentration.total_commenting_authors
: null;
const singleCommentAuthorRatio = typeof concentration?.single_comment_author_ratio === "number"
? concentration.single_comment_author_ratio
: null;
const topPairs = (data.top_interaction_pairs ?? [])
.filter((item): item is [[string, string], number] => {
if (!Array.isArray(item) || item.length !== 2) {
return false;
}
const pair = item[0];
const count = item[1];
return Array.isArray(pair)
&& pair.length === 2
&& typeof pair[0] === "string"
&& typeof pair[1] === "string"
&& typeof count === "number";
})
.slice(0, 20);
const topPairChartData = topPairs.slice(0, 8).map(([[source, target], value], index) => ({
pair: `${source} -> ${target}`,
replies: value,
rank: index + 1,
}));
const topTenSharePercent = topTenCommentShare === null
? null
: topTenCommentShare * 100;
const nonTopTenSharePercent = topTenSharePercent === null
? null
: Math.max(0, 100 - topTenSharePercent);
let concentrationPieData: { name: string; value: number }[] = [];
if (topTenSharePercent !== null && nonTopTenSharePercent !== null) {
concentrationPieData = [
{ name: "Top 10% authors", value: topTenSharePercent },
{ name: "Other authors", value: nonTopTenSharePercent },
];
}
const PIE_COLORS = ["#2b6777", "#c8d8e4"];
return (
<div style={styles.page}>
<div style={{ ...styles.container, ...styles.grid }}>
<Card
label="Avg Thread Depth"
value={typeof data.average_thread_depth === "number" ? data.average_thread_depth.toFixed(2) : "—"}
sublabel="Depth from reply chains"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Network Users"
value={userCount.toLocaleString()}
sublabel="Authors in interaction graph"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Unique Links"
value={edgeCount.toLocaleString()}
sublabel="Directed source-target pairs"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Interaction Volume"
value={interactionVolume.toLocaleString()}
sublabel="Sum of link weights"
style={{ gridColumn: "span 3" }}
/>
<Card
label="Top 10% Comment Share"
value={topTenSharePercent === null ? "-" : `${topTenSharePercent.toFixed(1)}%`}
sublabel={topTenAuthorCount === null || totalCommentingAuthors === null
? "Comment volume held by top commenters"
: `${topTenAuthorCount.toLocaleString()} of ${totalCommentingAuthors.toLocaleString()} authors`}
style={{ gridColumn: "span 6" }}
/>
<Card
label="Single-Comment Authors"
value={singleCommentAuthorRatio === null ? "-" : `${(singleCommentAuthorRatio * 100).toFixed(1)}%`}
sublabel="Authors who commented exactly once"
style={{ gridColumn: "span 6" }}
/>
<div style={{ ...styles.card, gridColumn: "span 12" }}>
<h2 style={styles.sectionTitle}>Interaction Visuals</h2>
<p style={styles.sectionSubtitle}>Quick charts for interaction direction and conversation concentration.</p>
<div style={{ ...styles.grid, marginTop: 12 }}>
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top Interaction Pairs</h3>
<div style={{ width: "100%", height: 300 }}>
<ResponsiveContainer>
<BarChart data={topPairChartData} layout="vertical" margin={{ top: 8, right: 16, left: 16, bottom: 8 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#d9e2ec" />
<XAxis type="number" allowDecimals={false} />
<YAxis
type="category"
dataKey="rank"
tickFormatter={(value) => `#${value}`}
width={36}
/>
<Tooltip />
<Bar dataKey="replies" fill="#2b6777" radius={[0, 6, 6, 0]} />
</BarChart>
</ResponsiveContainer>
</div>
</div>
<div style={{ ...styles.cardBase, gridColumn: "span 6" }}>
<h3 style={{ ...styles.sectionTitle, fontSize: "1rem" }}>Top 10% vs Other Comment Share</h3>
<div style={{ width: "100%", height: 300 }}>
<ResponsiveContainer>
<PieChart>
<Pie
data={concentrationPieData}
dataKey="value"
nameKey="name"
innerRadius={56}
outerRadius={88}
paddingAngle={2}
>
{concentrationPieData.map((entry, index) => (
<Cell key={`${entry.name}-${index}`} fill={PIE_COLORS[index % PIE_COLORS.length]} />
))}
</Pie>
<Tooltip />
<Legend verticalAlign="bottom" height={36} />
</PieChart>
</ResponsiveContainer>
</div>
</div>
</div>
</div>
<div style={{ ...styles.card, gridColumn: "span 12" }}>
<h2 style={styles.sectionTitle}>Top Interaction Pairs</h2>
<p style={styles.sectionSubtitle}>Most frequent directed reply paths between users.</p>
{!topPairs.length ? (
<div style={styles.topUserMeta}>No interaction pair data available.</div>
) : (
<div style={{ ...styles.topUsersList, maxHeight: 420, overflowY: "auto" }}>
{topPairs.map(([[source, target], value], index) => (
<div key={`${source}->${target}-${index}`} style={styles.topUserItem}>
<div style={styles.topUserName}>{source} -&gt; {target}</div>
<div style={styles.topUserMeta}>{value.toLocaleString()} replies</div>
</div>
))}
</div>
)}
</div>
</div>
</div>
);
};
export default InteractionalStats;

View File

@@ -0,0 +1,86 @@
import Card from "./Card";
import StatsStyling from "../styles/stats_styling";
import type { LinguisticAnalysisResponse } from "../types/ApiTypes";
const styles = StatsStyling;
type LinguisticStatsProps = {
data: LinguisticAnalysisResponse;
};
const LinguisticStats = ({ data }: LinguisticStatsProps) => {
const lexical = data.lexical_diversity;
const words = data.word_frequencies ?? [];
const bigrams = data.common_two_phrases ?? [];
const trigrams = data.common_three_phrases ?? [];
const topWords = words.slice(0, 20);
const topBigrams = bigrams.slice(0, 10);
const topTrigrams = trigrams.slice(0, 10);
return (
<div style={styles.page}>
<div style={{ ...styles.container, ...styles.grid }}>
<Card
label="Total Tokens"
value={lexical?.total_tokens?.toLocaleString() ?? "—"}
sublabel="After token filtering"
style={{ gridColumn: "span 4" }}
/>
<Card
label="Unique Tokens"
value={lexical?.unique_tokens?.toLocaleString() ?? "—"}
sublabel="Distinct vocabulary items"
style={{ gridColumn: "span 4" }}
/>
<Card
label="Type-Token Ratio"
value={typeof lexical?.ttr === "number" ? lexical.ttr.toFixed(4) : "—"}
sublabel="Vocabulary richness proxy"
style={{ gridColumn: "span 4" }}
/>
<div style={{ ...styles.card, gridColumn: "span 4" }}>
<h2 style={styles.sectionTitle}>Top Words</h2>
<p style={styles.sectionSubtitle}>Most frequent filtered terms.</p>
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
{topWords.map((item) => (
<div key={item.word} style={styles.topUserItem}>
<div style={styles.topUserName}>{item.word}</div>
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
</div>
))}
</div>
</div>
<div style={{ ...styles.card, gridColumn: "span 4" }}>
<h2 style={styles.sectionTitle}>Top Bigrams</h2>
<p style={styles.sectionSubtitle}>Most frequent 2-word phrases.</p>
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
{topBigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}>
<div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
</div>
))}
</div>
</div>
<div style={{ ...styles.card, gridColumn: "span 4" }}>
<h2 style={styles.sectionTitle}>Top Trigrams</h2>
<p style={styles.sectionSubtitle}>Most frequent 3-word phrases.</p>
<div style={{ ...styles.topUsersList, maxHeight: 360, overflowY: "auto" }}>
{topTrigrams.map((item) => (
<div key={item.ngram} style={styles.topUserItem}>
<div style={styles.topUserName}>{item.ngram}</div>
<div style={styles.topUserMeta}>{item.count.toLocaleString()} uses</div>
</div>
))}
</div>
</div>
</div>
</div>
);
};
export default LinguisticStats;

View File

@@ -5,12 +5,20 @@ import StatsStyling from "../styles/stats_styling";
import SummaryStats from "../components/SummaryStats"; import SummaryStats from "../components/SummaryStats";
import EmotionalStats from "../components/EmotionalStats"; import EmotionalStats from "../components/EmotionalStats";
import UserStats from "../components/UserStats"; import UserStats from "../components/UserStats";
import LinguisticStats from "../components/LinguisticStats";
import InteractionalStats from "../components/InteractionalStats";
import CulturalStats from "../components/CulturalStats";
import { import {
type SummaryResponse, type SummaryResponse,
type UserAnalysisResponse, type UserAnalysisResponse,
type TimeAnalysisResponse, type TimeAnalysisResponse,
type ContentAnalysisResponse type ContentAnalysisResponse,
type UserEndpointResponse,
type LinguisticAnalysisResponse,
type EmotionalAnalysisResponse,
type InteractionAnalysisResponse,
type CulturalAnalysisResponse
} from '../types/ApiTypes' } from '../types/ApiTypes'
const API_BASE_URL = import.meta.env.VITE_BACKEND_URL const API_BASE_URL = import.meta.env.VITE_BACKEND_URL
@@ -20,11 +28,14 @@ const StatPage = () => {
const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>(); const { datasetId: routeDatasetId } = useParams<{ datasetId: string }>();
const [error, setError] = useState(''); const [error, setError] = useState('');
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [activeView, setActiveView] = useState<"summary" | "emotional" | "user">("summary"); const [activeView, setActiveView] = useState<"summary" | "emotional" | "user" | "linguistic" | "interactional" | "cultural">("summary");
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null); const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null); const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null); const [contentData, setContentData] = useState<ContentAnalysisResponse | null>(null);
const [linguisticData, setLinguisticData] = useState<LinguisticAnalysisResponse | null>(null);
const [interactionData, setInteractionData] = useState<InteractionAnalysisResponse | null>(null);
const [culturalData, setCulturalData] = useState<CulturalAnalysisResponse | null>(null);
const [summary, setSummary] = useState<SummaryResponse | null>(null); const [summary, setSummary] = useState<SummaryResponse | null>(null);
@@ -83,15 +94,23 @@ const StatPage = () => {
setLoading(true); setLoading(true);
Promise.all([ Promise.all([
axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/time`, { axios.get<TimeAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/temporal`, {
params, params,
headers: authHeaders, headers: authHeaders,
}), }),
axios.get<UserAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, { axios.get<UserEndpointResponse>(`${API_BASE_URL}/dataset/${datasetId}/user`, {
params, params,
headers: authHeaders, headers: authHeaders,
}), }),
axios.get<ContentAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/content`, { axios.get<LinguisticAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/linguistic`, {
params,
headers: authHeaders,
}),
axios.get<EmotionalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/emotional`, {
params,
headers: authHeaders,
}),
axios.get<InteractionAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/interactional`, {
params, params,
headers: authHeaders, headers: authHeaders,
}), }),
@@ -99,11 +118,28 @@ const StatPage = () => {
params, params,
headers: authHeaders, headers: authHeaders,
}), }),
axios.get<CulturalAnalysisResponse>(`${API_BASE_URL}/dataset/${datasetId}/cultural`, {
params,
headers: authHeaders,
}),
]) ])
.then(([timeRes, userRes, contentRes, summaryRes]) => { .then(([timeRes, userRes, linguisticRes, emotionalRes, interactionRes, summaryRes, culturalRes]) => {
setUserData(userRes.data || null); const combinedUserData: UserAnalysisResponse = {
...userRes.data,
interaction_graph: interactionRes.data?.interaction_graph ?? {},
};
const combinedContentData: ContentAnalysisResponse = {
...linguisticRes.data,
...emotionalRes.data,
};
setUserData(combinedUserData);
setTimeData(timeRes.data || null); setTimeData(timeRes.data || null);
setContentData(contentRes.data || null); setContentData(combinedContentData);
setLinguisticData(linguisticRes.data || null);
setInteractionData(interactionRes.data || null);
setCulturalData(culturalRes.data || null);
setSummary(summaryRes.data || null); setSummary(summaryRes.data || null);
}) })
.catch((e) => setError("Failed to load statistics: " + String(e))) .catch((e) => setError("Failed to load statistics: " + String(e)))
@@ -218,6 +254,24 @@ return (
> >
Users Users
</button> </button>
<button
onClick={() => setActiveView("linguistic")}
style={activeView === "linguistic" ? styles.buttonPrimary : styles.buttonSecondary}
>
Linguistic
</button>
<button
onClick={() => setActiveView("interactional")}
style={activeView === "interactional" ? styles.buttonPrimary : styles.buttonSecondary}
>
Interactional
</button>
<button
onClick={() => setActiveView("cultural")}
style={activeView === "cultural" ? styles.buttonPrimary : styles.buttonSecondary}
>
Cultural
</button>
</div> </div>
{activeView === "summary" && ( {activeView === "summary" && (
@@ -243,6 +297,36 @@ return (
<UserStats data={userData} /> <UserStats data={userData} />
)} )}
{activeView === "linguistic" && linguisticData && (
<LinguisticStats data={linguisticData} />
)}
{activeView === "linguistic" && !linguisticData && (
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
No linguistic data available.
</div>
)}
{activeView === "interactional" && interactionData && (
<InteractionalStats data={interactionData} />
)}
{activeView === "interactional" && !interactionData && (
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
No interactional data available.
</div>
)}
{activeView === "cultural" && culturalData && (
<CulturalStats data={culturalData} />
)}
{activeView === "cultural" && !culturalData && (
<div style={{ ...styles.container, ...styles.card, marginTop: 16 }}>
No cultural data available.
</div>
)}
</div> </div>
); );
} }

View File

@@ -1,14 +1,28 @@
// User Responses // Shared types
type TopUser = {
author: string;
source: string;
count: number
};
type FrequencyWord = { type FrequencyWord = {
word: string; word: string;
count: number; count: number;
} };
type NGram = {
count: number;
ngram: string;
};
type Emotion = {
emotion_anger: number;
emotion_disgust: number;
emotion_fear: number;
emotion_joy: number;
emotion_sadness: number;
};
// User
type TopUser = {
author: string;
source: string;
count: number;
};
type Vocab = { type Vocab = {
author: string; author: string;
@@ -31,43 +45,35 @@ type User = {
type InteractionGraph = Record<string, Record<string, number>>; type InteractionGraph = Record<string, Record<string, number>>;
type UserEndpointResponse = {
top_users: TopUser[];
users: User[];
};
type UserAnalysisResponse = { type UserAnalysisResponse = {
top_users: TopUser[]; top_users: TopUser[];
users: User[]; users: User[];
interaction_graph: InteractionGraph; interaction_graph: InteractionGraph;
}; };
// Time Analysis // Time
type EventsPerDay = { type EventsPerDay = {
date: Date; date: Date;
count: number; count: number;
} };
type HeatmapCell = { type HeatmapCell = {
date: Date; date: Date;
hour: number; hour: number;
count: number; count: number;
} };
type TimeAnalysisResponse = { type TimeAnalysisResponse = {
events_per_day: EventsPerDay[]; events_per_day: EventsPerDay[];
weekday_hour_heatmap: HeatmapCell[]; weekday_hour_heatmap: HeatmapCell[];
}
// Content Analysis
type Emotion = {
emotion_anger: number;
emotion_disgust: number;
emotion_fear: number;
emotion_joy: number;
emotion_sadness: number;
}; };
type NGram = { // Content (combines emotional and linguistic)
count: number;
ngram: string;
}
type AverageEmotionByTopic = Emotion & { type AverageEmotionByTopic = Emotion & {
n: number; n: number;
topic: string; topic: string;
@@ -92,7 +98,6 @@ type EmotionBySource = {
event_count: number; event_count: number;
}; };
type ContentAnalysisResponse = { type ContentAnalysisResponse = {
word_frequencies: FrequencyWord[]; word_frequencies: FrequencyWord[];
average_emotion_by_topic: AverageEmotionByTopic[]; average_emotion_by_topic: AverageEmotionByTopic[];
@@ -101,7 +106,78 @@ type ContentAnalysisResponse = {
overall_emotion_average?: OverallEmotionAverage[]; overall_emotion_average?: OverallEmotionAverage[];
dominant_emotion_distribution?: DominantEmotionDistribution[]; dominant_emotion_distribution?: DominantEmotionDistribution[];
emotion_by_source?: EmotionBySource[]; emotion_by_source?: EmotionBySource[];
} };
// Linguistic
type LinguisticAnalysisResponse = {
word_frequencies: FrequencyWord[];
common_two_phrases: NGram[];
common_three_phrases: NGram[];
lexical_diversity?: Record<string, number>;
};
// Emotional
type EmotionalAnalysisResponse = {
average_emotion_by_topic: AverageEmotionByTopic[];
overall_emotion_average?: OverallEmotionAverage[];
dominant_emotion_distribution?: DominantEmotionDistribution[];
emotion_by_source?: EmotionBySource[];
};
// Interactional
type ConversationConcentration = {
total_commenting_authors: number;
top_10pct_author_count: number;
top_10pct_comment_share: number;
single_comment_authors: number;
single_comment_author_ratio: number;
};
type InteractionAnalysisResponse = {
average_thread_depth?: number;
top_interaction_pairs?: [[string, string], number][];
conversation_concentration?: ConversationConcentration;
interaction_graph: InteractionGraph;
};
// Cultural
type IdentityMarkers = {
in_group_usage: number;
out_group_usage: number;
in_group_ratio: number;
out_group_ratio: number;
in_group_posts: number;
out_group_posts: number;
tie_posts: number;
in_group_emotion_avg?: Record<string, number>;
out_group_emotion_avg?: Record<string, number>;
};
type StanceMarkers = {
hedge_total: number;
certainty_total: number;
deontic_total: number;
permission_total: number;
hedge_per_1k_tokens: number;
certainty_per_1k_tokens: number;
deontic_per_1k_tokens: number;
permission_per_1k_tokens: number;
};
type EntityEmotionAggregate = {
post_count: number;
emotion_avg: Record<string, number>;
};
type AverageEmotionPerEntity = {
entity_emotion_avg: Record<string, EntityEmotionAggregate>;
};
type CulturalAnalysisResponse = {
identity_markers?: IdentityMarkers;
stance_markers?: StanceMarkers;
avg_emotion_per_entity?: AverageEmotionPerEntity;
};
// Summary // Summary
type SummaryResponse = { type SummaryResponse = {
@@ -118,18 +194,20 @@ type SummaryResponse = {
sources: string[]; sources: string[];
}; };
// Filtering Response // Filter
type FilterResponse = { type FilterResponse = {
rows: number rows: number;
data: any; data: any;
} };
export type { export type {
TopUser, TopUser,
Vocab, Vocab,
User, User,
InteractionGraph, InteractionGraph,
ConversationConcentration,
UserAnalysisResponse, UserAnalysisResponse,
UserEndpointResponse,
FrequencyWord, FrequencyWord,
AverageEmotionByTopic, AverageEmotionByTopic,
OverallEmotionAverage, OverallEmotionAverage,
@@ -138,5 +216,13 @@ export type {
SummaryResponse, SummaryResponse,
TimeAnalysisResponse, TimeAnalysisResponse,
ContentAnalysisResponse, ContentAnalysisResponse,
FilterResponse LinguisticAnalysisResponse,
} EmotionalAnalysisResponse,
InteractionAnalysisResponse,
IdentityMarkers,
StanceMarkers,
EntityEmotionAggregate,
AverageEmotionPerEntity,
CulturalAnalysisResponse,
FilterResponse,
};

View File

@@ -63,11 +63,25 @@ class InteractionAnalysis:
pairs.sort(key=lambda x: x[1], reverse=True) pairs.sort(key=lambda x: x[1], reverse=True)
return pairs[:top_n] return pairs[:top_n]
def initiator_ratio(self, df: pd.DataFrame): def conversation_concentration(self, df: pd.DataFrame) -> dict:
starters = df["reply_to"].isna().sum() if "type" not in df.columns:
total = len(df) return {}
if total == 0: comments = df[df["type"] == "comment"]
return 0 if comments.empty:
return {}
return round(starters / total, 2) author_counts = comments["author"].value_counts()
total_comments = len(comments)
total_authors = len(author_counts)
top_10_pct_n = max(1, int(total_authors * 0.1))
top_10_pct_share = round(author_counts.head(top_10_pct_n).sum() / total_comments, 4)
return {
"total_commenting_authors": total_authors,
"top_10pct_author_count": top_10_pct_n,
"top_10pct_comment_share": float(top_10_pct_share),
"single_comment_authors": int((author_counts == 1).sum()),
"single_comment_author_ratio": float(round((author_counts == 1).sum() / total_authors, 4)),
}

View File

@@ -121,8 +121,8 @@ class StatGen:
return { return {
"average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df), "average_thread_depth": self.interaction_analysis.average_thread_depth(filtered_df),
"top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100), "top_interaction_pairs": self.interaction_analysis.top_interaction_pairs(filtered_df, top_n=100),
"initiator_ratio": self.interaction_analysis.initiator_ratio(filtered_df), "interaction_graph": self.interaction_analysis.interaction_graph(filtered_df),
"interaction_graph": self.interaction_analysis.interaction_graph(filtered_df) "conversation_concentration": self.interaction_analysis.conversation_concentration(filtered_df)
} }
def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict: def cultural(self, df: pd.DataFrame, filters: dict | None = None) -> dict: