feat: add time filter

This commit is contained in:
2026-01-31 20:16:49 +00:00
parent ee65b894dd
commit 68350f3a88
2 changed files with 61 additions and 18 deletions

View File

@@ -55,19 +55,6 @@ def word_frequencies():
print(traceback.format_exc()) print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/stats/search', methods=["POST"])
def search_dataset():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
data = request.get_json(silent=True) or {}
if "query" not in data:
return stat_obj.df
query = data["query"]
return jsonify(stat_obj.filter_events(query).to_dict(orient='records')), 200
@app.route('/stats/summary', methods=["GET"]) @app.route('/stats/summary', methods=["GET"])
def get_summary(): def get_summary():
if stat_obj is None: if stat_obj is None:
@@ -93,7 +80,44 @@ def get_time_analysis():
print(traceback.format_exc()) print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/reset', methods=["GET"]) @app.route('/filter/search', methods=["POST"])
def search_dataset():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
data = request.get_json(silent=True) or {}
if "query" not in data:
return stat_obj.df
query = data["query"]
filtered_df = stat_obj.search(query)
return jsonify(filtered_df), 200
@app.route('/filter/time', methods=["POST"])
def filter_time():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
data = request.get_json(silent=True)
if not data:
return jsonify({"error": "Invalid or missing JSON body"}), 400
if "start" not in data or "end" not in data:
return jsonify({"error": "Please include both start and end dates"}), 400
try:
start = pd.to_datetime(data["start"], utc=True)
end = pd.to_datetime(data["end"], utc=True)
except Exception:
return jsonify({"error": "Invalid datetime format"}), 400
filtered_df = stat_obj.set_time_range(start, end)
return jsonify(filtered_df), 200
@app.route('/filter/reset', methods=["GET"])
def reset_dataset(): def reset_dataset():
if stat_obj is None: if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400 return jsonify({"error": "No data uploaded"}), 400

View File

@@ -1,6 +1,7 @@
import pandas as pd import pandas as pd
import re import re
import nltk import nltk
import datetime
from nltk.corpus import stopwords from nltk.corpus import stopwords
from collections import Counter from collections import Counter
@@ -27,9 +28,10 @@ class StatGen:
comments_df["parent_id"] = comments_df.get("post_id") comments_df["parent_id"] = comments_df.get("post_id")
self.df = pd.concat([posts_df, comments_df]) self.df = pd.concat([posts_df, comments_df])
self.original_df = self.df.copy(deep=True)
self._add_date_cols(self.df) self._add_date_cols(self.df)
self.original_df = self.df.copy(deep=True)
## Private Methods ## Private Methods
def _add_date_cols(self, df: pd.DataFrame) -> None: def _add_date_cols(self, df: pd.DataFrame) -> None:
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
@@ -123,9 +125,26 @@ class StatGen:
"word_frequencies": word_frequencies.to_dict(orient='records') "word_frequencies": word_frequencies.to_dict(orient='records')
} }
def filter_events(self, search_query: str) -> pd.DataFrame: def search(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)] self.df = self.df[
return self.df self.df["content"].str.contains(search_query)
]
return {
"rows": len(self.df),
"data": self.df.to_dict(orient="records")
}
def set_time_range(self, start: datetime.datetime, end: datetime.datetime):
self.df = self.df[
(self.df["dt"] >= start) &
(self.df["dt"] <= end)
]
return {
"rows": len(self.df),
"data": self.df.to_dict(orient="records")
}
def reset_dataset(self) -> None: def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True) self.df = self.original_df.copy(deep=True)