feat: add time filter

This commit is contained in:
2026-01-31 20:16:49 +00:00
parent ee65b894dd
commit 68350f3a88
2 changed files with 61 additions and 18 deletions

View File

@@ -55,19 +55,6 @@ def word_frequencies():
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/stats/search', methods=["POST"])
def search_dataset():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
data = request.get_json(silent=True) or {}
if "query" not in data:
return stat_obj.df
query = data["query"]
return jsonify(stat_obj.filter_events(query).to_dict(orient='records')), 200
@app.route('/stats/summary', methods=["GET"])
def get_summary():
if stat_obj is None:
@@ -92,8 +79,45 @@ def get_time_analysis():
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/filter/search', methods=["POST"])
def search_dataset():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
@app.route('/reset', methods=["GET"])
data = request.get_json(silent=True) or {}
if "query" not in data:
return stat_obj.df
query = data["query"]
filtered_df = stat_obj.search(query)
return jsonify(filtered_df), 200
@app.route('/filter/time', methods=["POST"])
def filter_time():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
data = request.get_json(silent=True)
if not data:
return jsonify({"error": "Invalid or missing JSON body"}), 400
if "start" not in data or "end" not in data:
return jsonify({"error": "Please include both start and end dates"}), 400
try:
start = pd.to_datetime(data["start"], utc=True)
end = pd.to_datetime(data["end"], utc=True)
except Exception:
return jsonify({"error": "Invalid datetime format"}), 400
filtered_df = stat_obj.set_time_range(start, end)
return jsonify(filtered_df), 200
@app.route('/filter/reset', methods=["GET"])
def reset_dataset():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400

View File

@@ -1,6 +1,7 @@
import pandas as pd
import re
import nltk
import datetime
from nltk.corpus import stopwords
from collections import Counter
@@ -27,9 +28,10 @@ class StatGen:
comments_df["parent_id"] = comments_df.get("post_id")
self.df = pd.concat([posts_df, comments_df])
self.original_df = self.df.copy(deep=True)
self._add_date_cols(self.df)
self.original_df = self.df.copy(deep=True)
## Private Methods
def _add_date_cols(self, df: pd.DataFrame) -> None:
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
@@ -123,9 +125,26 @@ class StatGen:
"word_frequencies": word_frequencies.to_dict(orient='records')
}
def filter_events(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)]
return self.df
def search(self, search_query: str) -> pd.DataFrame:
self.df = self.df[
self.df["content"].str.contains(search_query)
]
return {
"rows": len(self.df),
"data": self.df.to_dict(orient="records")
}
def set_time_range(self, start: datetime.datetime, end: datetime.datetime):
self.df = self.df[
(self.df["dt"] >= start) &
(self.df["dt"] <= end)
]
return {
"rows": len(self.df),
"data": self.df.to_dict(orient="records")
}
def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True)