refactor: extract emotional analysis out of stat_gen

This commit is contained in:
2026-02-17 17:40:29 +00:00
parent 70b34036db
commit 83010aee55
2 changed files with 44 additions and 34 deletions

View File

@@ -0,0 +1,41 @@
import pandas as pd
class EmotionalAnalysis:
def __init__(self, df: pd.DataFrame):
self.df = df
def avg_emotion_by_topic(self) -> dict:
emotion_exclusions = [
"emotion_neutral",
"emotion_surprise"
]
emotion_cols = [
col for col in self.df.columns
if col.startswith("emotion_") and col not in emotion_exclusions
]
counts = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")
.size()
.rename("n")
)
avg_emotion_by_topic = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")[emotion_cols]
.mean()
.reset_index()
)
avg_emotion_by_topic = avg_emotion_by_topic.merge(
counts,
on="topic"
)
return avg_emotion_by_topic.to_dict(orient='records')

View File

@@ -7,6 +7,7 @@ from nltk.corpus import stopwords
from collections import Counter from collections import Counter
from server.nlp import NLP from server.nlp import NLP
from server.analysis.temporal import TemporalAnalysis from server.analysis.temporal import TemporalAnalysis
from server.analysis.emotional import EmotionalAnalysis
DOMAIN_STOPWORDS = { DOMAIN_STOPWORDS = {
"www", "https", "http", "www", "https", "http",
@@ -41,6 +42,7 @@ class StatGen:
self._add_extra_cols(self.df) self._add_extra_cols(self.df)
self.temporal_analysis = TemporalAnalysis(self.df) self.temporal_analysis = TemporalAnalysis(self.df)
self.emotional_analysis = EmotionalAnalysis(self.df)
self.original_df = self.df.copy(deep=True) self.original_df = self.df.copy(deep=True)
@@ -173,42 +175,9 @@ class StatGen:
.reset_index(drop=True) .reset_index(drop=True)
) )
emotion_exclusions = [
"emotion_neutral",
"emotion_surprise"
]
emotion_cols = [
col for col in self.df.columns
if col.startswith("emotion_") and col not in emotion_exclusions
]
counts = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")
.size()
.rename("n")
)
avg_emotion_by_topic = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")[emotion_cols]
.mean()
.reset_index()
)
avg_emotion_by_topic = avg_emotion_by_topic.merge(
counts,
on="topic"
)
return { return {
"word_frequencies": word_frequencies.to_dict(orient='records'), "word_frequencies": word_frequencies.to_dict(orient='records'),
"average_emotion_by_topic": avg_emotion_by_topic.to_dict(orient='records'), "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
"reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion() "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
} }