feat: user analysis endpoint

This commit is contained in:
2026-02-01 18:00:54 +00:00
parent c9aff27ef0
commit d608514dfa
2 changed files with 38 additions and 0 deletions

View File

@@ -79,6 +79,19 @@ def get_time_analysis():
print(traceback.format_exc()) print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route("/stats/user", methods=["GET"])
def get_user_analysis():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
try:
return jsonify(stat_obj.user_analysis()), 200
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/filter/search', methods=["POST"]) @app.route('/filter/search', methods=["POST"])
def search_dataset(): def search_dataset():
if stat_obj is None: if stat_obj is None:

View File

@@ -125,6 +125,31 @@ class StatGen:
"word_frequencies": word_frequencies.to_dict(orient='records') "word_frequencies": word_frequencies.to_dict(orient='records')
} }
def user_analysis(self) -> dict:
counts = (
self.df.groupby(["author", "source"])
.size()
.sort_values(ascending=False)
)
per_user = (
self.df.groupby(["author", "type"])
.size()
.unstack(fill_value=0)
)
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
return {
"top_users": [
{"author": author, "source": source, "count": int(count)}
for (author, source), count in counts.items()
],
"users": per_user.reset_index().to_dict(orient="records")
}
def search(self, search_query: str) -> dict: def search(self, search_query: str) -> dict:
self.df = self.df[ self.df = self.df[
self.df["content"].str.contains(search_query) self.df["content"].str.contains(search_query)