diff --git a/server/app.py b/server/app.py index da73dcc..3ac27b9 100644 --- a/server/app.py +++ b/server/app.py @@ -55,19 +55,6 @@ def word_frequencies(): print(traceback.format_exc()) return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 -@app.route('/stats/search', methods=["POST"]) -def search_dataset(): - if stat_obj is None: - return jsonify({"error": "No data uploaded"}), 400 - - data = request.get_json(silent=True) or {} - - if "query" not in data: - return stat_obj.df - - query = data["query"] - return jsonify(stat_obj.filter_events(query).to_dict(orient='records')), 200 - @app.route('/stats/summary', methods=["GET"]) def get_summary(): if stat_obj is None: @@ -92,8 +79,45 @@ def get_time_analysis(): except Exception as e: print(traceback.format_exc()) return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 + +@app.route('/filter/search', methods=["POST"]) +def search_dataset(): + if stat_obj is None: + return jsonify({"error": "No data uploaded"}), 400 -@app.route('/reset', methods=["GET"]) + data = request.get_json(silent=True) or {} + + if "query" not in data: + return stat_obj.df + + query = data["query"] + filtered_df = stat_obj.search(query) + + return jsonify(filtered_df), 200 + +@app.route('/filter/time', methods=["POST"]) +def filter_time(): + if stat_obj is None: + return jsonify({"error": "No data uploaded"}), 400 + + data = request.get_json(silent=True) + if not data: + return jsonify({"error": "Invalid or missing JSON body"}), 400 + + if "start" not in data or "end" not in data: + return jsonify({"error": "Please include both start and end dates"}), 400 + + try: + start = pd.to_datetime(data["start"], utc=True) + end = pd.to_datetime(data["end"], utc=True) + except Exception: + return jsonify({"error": "Invalid datetime format"}), 400 + + filtered_df = stat_obj.set_time_range(start, end) + + return jsonify(filtered_df), 200 + +@app.route('/filter/reset', methods=["GET"]) def reset_dataset(): if stat_obj is None: return jsonify({"error": "No data uploaded"}), 400 diff --git a/server/stat_gen.py b/server/stat_gen.py index 686cd81..eaf4b3f 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -1,6 +1,7 @@ import pandas as pd import re import nltk +import datetime from nltk.corpus import stopwords from collections import Counter @@ -27,9 +28,10 @@ class StatGen: comments_df["parent_id"] = comments_df.get("post_id") self.df = pd.concat([posts_df, comments_df]) - self.original_df = self.df.copy(deep=True) self._add_date_cols(self.df) + self.original_df = self.df.copy(deep=True) + ## Private Methods def _add_date_cols(self, df: pd.DataFrame) -> None: df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date @@ -123,9 +125,26 @@ class StatGen: "word_frequencies": word_frequencies.to_dict(orient='records') } - def filter_events(self, search_query: str) -> pd.DataFrame: - self.df = self.df[self.df["content"].str.contains(search_query)] - return self.df + def search(self, search_query: str) -> pd.DataFrame: + self.df = self.df[ + self.df["content"].str.contains(search_query) + ] + + return { + "rows": len(self.df), + "data": self.df.to_dict(orient="records") + } + + def set_time_range(self, start: datetime.datetime, end: datetime.datetime): + self.df = self.df[ + (self.df["dt"] >= start) & + (self.df["dt"] <= end) + ] + + return { + "rows": len(self.df), + "data": self.df.to_dict(orient="records") + } def reset_dataset(self) -> None: self.df = self.original_df.copy(deep=True)