refactor(stats): add summary and user stat classes for consistency
This commit is contained in:
64
server/analysis/summary.py
Normal file
64
server/analysis/summary.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SummaryAnalysis:
|
||||
def total_events(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df))
|
||||
|
||||
def total_posts(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df[df["type"] == "post"]))
|
||||
|
||||
def total_comments(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df[df["type"] == "comment"]))
|
||||
|
||||
def unique_users(self, df: pd.DataFrame) -> int:
|
||||
return int(len(df["author"].dropna().unique()))
|
||||
|
||||
def comments_per_post(self, total_comments: int, total_posts: int) -> float:
|
||||
return round(total_comments / max(total_posts, 1), 2)
|
||||
|
||||
def lurker_ratio(self, df: pd.DataFrame) -> float:
|
||||
events_per_user = df.groupby("author").size()
|
||||
return round((events_per_user == 1).mean(), 2)
|
||||
|
||||
def time_range(self, df: pd.DataFrame) -> dict:
|
||||
return {
|
||||
"start": int(df["dt"].min().timestamp()),
|
||||
"end": int(df["dt"].max().timestamp()),
|
||||
}
|
||||
|
||||
def sources(self, df: pd.DataFrame) -> list:
|
||||
return df["source"].dropna().unique().tolist()
|
||||
|
||||
def empty_summary(self) -> dict:
|
||||
return {
|
||||
"total_events": 0,
|
||||
"total_posts": 0,
|
||||
"total_comments": 0,
|
||||
"unique_users": 0,
|
||||
"comments_per_post": 0,
|
||||
"lurker_ratio": 0,
|
||||
"time_range": {
|
||||
"start": None,
|
||||
"end": None,
|
||||
},
|
||||
"sources": [],
|
||||
}
|
||||
|
||||
def summary(self, df: pd.DataFrame) -> dict:
|
||||
if df.empty:
|
||||
return self.empty_summary()
|
||||
|
||||
total_posts = self.total_posts(df)
|
||||
total_comments = self.total_comments(df)
|
||||
|
||||
return {
|
||||
"total_events": self.total_events(df),
|
||||
"total_posts": total_posts,
|
||||
"total_comments": total_comments,
|
||||
"unique_users": self.unique_users(df),
|
||||
"comments_per_post": self.comments_per_post(total_comments, total_posts),
|
||||
"lurker_ratio": self.lurker_ratio(df),
|
||||
"time_range": self.time_range(df),
|
||||
"sources": self.sources(df),
|
||||
}
|
||||
Reference in New Issue
Block a user