feat(emotional): add average emotion & dominant emotion stats
This commit is contained in:
@@ -1,33 +1,86 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class EmotionalAnalysis:
|
class EmotionalAnalysis:
|
||||||
def avg_emotion_by_topic(self, df: pd.DataFrame) -> dict:
|
def _emotion_cols(self, df: pd.DataFrame) -> list[str]:
|
||||||
emotion_cols = [
|
return [col for col in df.columns if col.startswith("emotion_")]
|
||||||
col for col in df.columns
|
|
||||||
if col.startswith("emotion_")
|
def avg_emotion_by_topic(self, df: pd.DataFrame) -> list[dict]:
|
||||||
]
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols:
|
||||||
|
return []
|
||||||
|
|
||||||
counts = (
|
counts = (
|
||||||
df[
|
df[(df["topic"] != "Misc")].groupby("topic").size().reset_index(name="n")
|
||||||
(df["topic"] != "Misc")
|
|
||||||
]
|
|
||||||
.groupby("topic")
|
|
||||||
.size()
|
|
||||||
.rename("n")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
avg_emotion_by_topic = (
|
avg_emotion_by_topic = (
|
||||||
df[
|
df[(df["topic"] != "Misc")]
|
||||||
(df["topic"] != "Misc")
|
|
||||||
]
|
|
||||||
.groupby("topic")[emotion_cols]
|
.groupby("topic")[emotion_cols]
|
||||||
.mean()
|
.mean()
|
||||||
.reset_index()
|
.reset_index()
|
||||||
)
|
)
|
||||||
|
|
||||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(
|
avg_emotion_by_topic = avg_emotion_by_topic.merge(counts, on="topic")
|
||||||
counts,
|
|
||||||
on="topic"
|
|
||||||
)
|
|
||||||
|
|
||||||
return avg_emotion_by_topic.to_dict(orient='records')
|
return avg_emotion_by_topic.to_dict(orient="records")
|
||||||
|
|
||||||
|
def overall_emotion_average(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols:
|
||||||
|
return []
|
||||||
|
|
||||||
|
means = df[emotion_cols].mean()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"emotion": col.replace("emotion_", ""),
|
||||||
|
"score": float(means[col]),
|
||||||
|
}
|
||||||
|
for col in emotion_cols
|
||||||
|
]
|
||||||
|
|
||||||
|
def dominant_emotion_distribution(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols or df.empty:
|
||||||
|
return []
|
||||||
|
|
||||||
|
dominant_per_row = df[emotion_cols].idxmax(axis=1)
|
||||||
|
counts = dominant_per_row.value_counts()
|
||||||
|
total = max(len(dominant_per_row), 1)
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"emotion": col.replace("emotion_", ""),
|
||||||
|
"count": int(count),
|
||||||
|
"ratio": round(float(count / total), 4),
|
||||||
|
}
|
||||||
|
for col, count in counts.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
def emotion_by_source(self, df: pd.DataFrame) -> list[dict]:
|
||||||
|
emotion_cols = self._emotion_cols(df)
|
||||||
|
|
||||||
|
if not emotion_cols or "source" not in df.columns or df.empty:
|
||||||
|
return []
|
||||||
|
|
||||||
|
source_counts = df.groupby("source").size()
|
||||||
|
source_means = df.groupby("source")[emotion_cols].mean().reset_index()
|
||||||
|
rows = source_means.to_dict(orient="records")
|
||||||
|
output = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
source = row["source"]
|
||||||
|
dominant_col = max(emotion_cols, key=lambda col: float(row.get(col, 0)))
|
||||||
|
output.append(
|
||||||
|
{
|
||||||
|
"source": str(source),
|
||||||
|
"dominant_emotion": dominant_col.replace("emotion_", ""),
|
||||||
|
"dominant_score": round(float(row.get(dominant_col, 0)), 4),
|
||||||
|
"event_count": int(source_counts.get(source, 0)),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|||||||
Reference in New Issue
Block a user