feat: add time filter
This commit is contained in:
@@ -55,19 +55,6 @@ def word_frequencies():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
@app.route('/stats/search', methods=["POST"])
|
|
||||||
def search_dataset():
|
|
||||||
if stat_obj is None:
|
|
||||||
return jsonify({"error": "No data uploaded"}), 400
|
|
||||||
|
|
||||||
data = request.get_json(silent=True) or {}
|
|
||||||
|
|
||||||
if "query" not in data:
|
|
||||||
return stat_obj.df
|
|
||||||
|
|
||||||
query = data["query"]
|
|
||||||
return jsonify(stat_obj.filter_events(query).to_dict(orient='records')), 200
|
|
||||||
|
|
||||||
@app.route('/stats/summary', methods=["GET"])
|
@app.route('/stats/summary', methods=["GET"])
|
||||||
def get_summary():
|
def get_summary():
|
||||||
if stat_obj is None:
|
if stat_obj is None:
|
||||||
@@ -93,7 +80,44 @@ def get_time_analysis():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
@app.route('/reset', methods=["GET"])
|
@app.route('/filter/search', methods=["POST"])
|
||||||
|
def search_dataset():
|
||||||
|
if stat_obj is None:
|
||||||
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
|
data = request.get_json(silent=True) or {}
|
||||||
|
|
||||||
|
if "query" not in data:
|
||||||
|
return stat_obj.df
|
||||||
|
|
||||||
|
query = data["query"]
|
||||||
|
filtered_df = stat_obj.search(query)
|
||||||
|
|
||||||
|
return jsonify(filtered_df), 200
|
||||||
|
|
||||||
|
@app.route('/filter/time', methods=["POST"])
|
||||||
|
def filter_time():
|
||||||
|
if stat_obj is None:
|
||||||
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
|
data = request.get_json(silent=True)
|
||||||
|
if not data:
|
||||||
|
return jsonify({"error": "Invalid or missing JSON body"}), 400
|
||||||
|
|
||||||
|
if "start" not in data or "end" not in data:
|
||||||
|
return jsonify({"error": "Please include both start and end dates"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
start = pd.to_datetime(data["start"], utc=True)
|
||||||
|
end = pd.to_datetime(data["end"], utc=True)
|
||||||
|
except Exception:
|
||||||
|
return jsonify({"error": "Invalid datetime format"}), 400
|
||||||
|
|
||||||
|
filtered_df = stat_obj.set_time_range(start, end)
|
||||||
|
|
||||||
|
return jsonify(filtered_df), 200
|
||||||
|
|
||||||
|
@app.route('/filter/reset', methods=["GET"])
|
||||||
def reset_dataset():
|
def reset_dataset():
|
||||||
if stat_obj is None:
|
if stat_obj is None:
|
||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
import nltk
|
import nltk
|
||||||
|
import datetime
|
||||||
|
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@@ -27,9 +28,10 @@ class StatGen:
|
|||||||
comments_df["parent_id"] = comments_df.get("post_id")
|
comments_df["parent_id"] = comments_df.get("post_id")
|
||||||
|
|
||||||
self.df = pd.concat([posts_df, comments_df])
|
self.df = pd.concat([posts_df, comments_df])
|
||||||
self.original_df = self.df.copy(deep=True)
|
|
||||||
self._add_date_cols(self.df)
|
self._add_date_cols(self.df)
|
||||||
|
|
||||||
|
self.original_df = self.df.copy(deep=True)
|
||||||
|
|
||||||
## Private Methods
|
## Private Methods
|
||||||
def _add_date_cols(self, df: pd.DataFrame) -> None:
|
def _add_date_cols(self, df: pd.DataFrame) -> None:
|
||||||
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
|
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
|
||||||
@@ -123,9 +125,26 @@ class StatGen:
|
|||||||
"word_frequencies": word_frequencies.to_dict(orient='records')
|
"word_frequencies": word_frequencies.to_dict(orient='records')
|
||||||
}
|
}
|
||||||
|
|
||||||
def filter_events(self, search_query: str) -> pd.DataFrame:
|
def search(self, search_query: str) -> pd.DataFrame:
|
||||||
self.df = self.df[self.df["content"].str.contains(search_query)]
|
self.df = self.df[
|
||||||
return self.df
|
self.df["content"].str.contains(search_query)
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"rows": len(self.df),
|
||||||
|
"data": self.df.to_dict(orient="records")
|
||||||
|
}
|
||||||
|
|
||||||
|
def set_time_range(self, start: datetime.datetime, end: datetime.datetime):
|
||||||
|
self.df = self.df[
|
||||||
|
(self.df["dt"] >= start) &
|
||||||
|
(self.df["dt"] <= end)
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"rows": len(self.df),
|
||||||
|
"data": self.df.to_dict(orient="records")
|
||||||
|
}
|
||||||
|
|
||||||
def reset_dataset(self) -> None:
|
def reset_dataset(self) -> None:
|
||||||
self.df = self.original_df.copy(deep=True)
|
self.df = self.original_df.copy(deep=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user