diff --git a/server/app.py b/server/app.py index 7118f4c..d71e66a 100644 --- a/server/app.py +++ b/server/app.py @@ -126,8 +126,8 @@ def get_interaction_analysis(): print(traceback.format_exc()) return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 -@app.route('/filter/search', methods=["POST"]) -def search_dataset(): +@app.route('/filter/query', methods=["POST"]) +def filter_query(): if stat_obj is None: return jsonify({"error": "No data uploaded"}), 400 @@ -137,7 +137,7 @@ def search_dataset(): return jsonify(stat_obj.df.to_dict(orient="records")), 200 query = data["query"] - filtered_df = stat_obj.search(query) + filtered_df = stat_obj.filter_by_query(query) return jsonify(filtered_df), 200 diff --git a/server/stat_gen.py b/server/stat_gen.py index f6b4886..bbba747 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -16,8 +16,7 @@ DOMAIN_STOPWORDS = { "comment", "comments", "discussion", "thread", "post", "posts", - "would", "could", "should", - "like", "get", "one" + "would", "get", "one" } nltk.download('stopwords') @@ -41,7 +40,10 @@ class StatGen: self.df.drop(columns=["post_id"], inplace=True, errors="ignore") self.nlp = NLP(self.df, "title", "content", domain_topics) - self._add_extra_cols(self.df) + self.nlp.add_emotion_cols() + self.nlp.add_topic_col() + self.nlp.add_ner_cols() + self._add_time_cols(self.df) self.temporal_analysis = TemporalAnalysis(self.df) self.emotional_analysis = EmotionalAnalysis(self.df) @@ -52,23 +54,18 @@ class StatGen: self.original_df = self.df.copy(deep=True) ## Private Methods - def _add_extra_cols(self, df: pd.DataFrame) -> None: - df['timestamp'] = pd.to_numeric(self.df['timestamp'], errors='coerce') + def _add_time_cols(self, df: pd.DataFrame) -> None: + df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce') df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date df["dt"] = pd.to_datetime(df["timestamp"], unit="s", utc=True) df["hour"] = df["dt"].dt.hour df["weekday"] = df["dt"].dt.day_name() - - self.nlp.add_emotion_cols() - self.nlp.add_topic_col() - self.nlp.add_ner_cols() ## Public - # topics over time # emotions over time - def get_time_analysis(self) -> pd.DataFrame: + def get_time_analysis(self) -> dict: return { "events_per_day": self.temporal_analysis.posts_per_day(), "weekday_hour_heatmap": self.temporal_analysis.heatmap() @@ -130,7 +127,7 @@ class StatGen: "sources": self.df["source"].dropna().unique().tolist() } - def search(self, search_query: str) -> dict: + def filter_by_query(self, search_query: str) -> dict: self.df = self.df[ self.df["content"].str.contains(search_query) ]