65 lines
2.0 KiB
Python
65 lines
2.0 KiB
Python
import pandas as pd
|
|
|
|
|
|
class SummaryAnalysis:
|
|
def total_events(self, df: pd.DataFrame) -> int:
|
|
return int(len(df))
|
|
|
|
def total_posts(self, df: pd.DataFrame) -> int:
|
|
return int(len(df[df["type"] == "post"]))
|
|
|
|
def total_comments(self, df: pd.DataFrame) -> int:
|
|
return int(len(df[df["type"] == "comment"]))
|
|
|
|
def unique_users(self, df: pd.DataFrame) -> int:
|
|
return int(len(df["author"].dropna().unique()))
|
|
|
|
def comments_per_post(self, total_comments: int, total_posts: int) -> float:
|
|
return round(total_comments / max(total_posts, 1), 2)
|
|
|
|
def lurker_ratio(self, df: pd.DataFrame) -> float:
|
|
events_per_user = df.groupby("author").size()
|
|
return round((events_per_user == 1).mean(), 2)
|
|
|
|
def time_range(self, df: pd.DataFrame) -> dict:
|
|
return {
|
|
"start": int(df["dt"].min().timestamp()),
|
|
"end": int(df["dt"].max().timestamp()),
|
|
}
|
|
|
|
def sources(self, df: pd.DataFrame) -> list:
|
|
return df["source"].dropna().unique().tolist()
|
|
|
|
def empty_summary(self) -> dict:
|
|
return {
|
|
"total_events": 0,
|
|
"total_posts": 0,
|
|
"total_comments": 0,
|
|
"unique_users": 0,
|
|
"comments_per_post": 0,
|
|
"lurker_ratio": 0,
|
|
"time_range": {
|
|
"start": None,
|
|
"end": None,
|
|
},
|
|
"sources": [],
|
|
}
|
|
|
|
def summary(self, df: pd.DataFrame) -> dict:
|
|
if df.empty:
|
|
return self.empty_summary()
|
|
|
|
total_posts = self.total_posts(df)
|
|
total_comments = self.total_comments(df)
|
|
|
|
return {
|
|
"total_events": self.total_events(df),
|
|
"total_posts": total_posts,
|
|
"total_comments": total_comments,
|
|
"unique_users": self.unique_users(df),
|
|
"comments_per_post": self.comments_per_post(total_comments, total_posts),
|
|
"lurker_ratio": self.lurker_ratio(df),
|
|
"time_range": self.time_range(df),
|
|
"sources": self.sources(df),
|
|
}
|