refactor: improve API wording & cleanup code

This commit is contained in:
2026-02-24 15:55:56 +00:00
parent ce0aa6bc43
commit 6695d3d272
2 changed files with 12 additions and 15 deletions

View File

@@ -126,8 +126,8 @@ def get_interaction_analysis():
print(traceback.format_exc()) print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/filter/search', methods=["POST"]) @app.route('/filter/query', methods=["POST"])
def search_dataset(): def filter_query():
if stat_obj is None: if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400 return jsonify({"error": "No data uploaded"}), 400
@@ -137,7 +137,7 @@ def search_dataset():
return jsonify(stat_obj.df.to_dict(orient="records")), 200 return jsonify(stat_obj.df.to_dict(orient="records")), 200
query = data["query"] query = data["query"]
filtered_df = stat_obj.search(query) filtered_df = stat_obj.filter_by_query(query)
return jsonify(filtered_df), 200 return jsonify(filtered_df), 200

View File

@@ -16,8 +16,7 @@ DOMAIN_STOPWORDS = {
"comment", "comments", "comment", "comments",
"discussion", "thread", "discussion", "thread",
"post", "posts", "post", "posts",
"would", "could", "should", "would", "get", "one"
"like", "get", "one"
} }
nltk.download('stopwords') nltk.download('stopwords')
@@ -41,7 +40,10 @@ class StatGen:
self.df.drop(columns=["post_id"], inplace=True, errors="ignore") self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
self.nlp = NLP(self.df, "title", "content", domain_topics) self.nlp = NLP(self.df, "title", "content", domain_topics)
self._add_extra_cols(self.df) self.nlp.add_emotion_cols()
self.nlp.add_topic_col()
self.nlp.add_ner_cols()
self._add_time_cols(self.df)
self.temporal_analysis = TemporalAnalysis(self.df) self.temporal_analysis = TemporalAnalysis(self.df)
self.emotional_analysis = EmotionalAnalysis(self.df) self.emotional_analysis = EmotionalAnalysis(self.df)
@@ -52,23 +54,18 @@ class StatGen:
self.original_df = self.df.copy(deep=True) self.original_df = self.df.copy(deep=True)
## Private Methods ## Private Methods
def _add_extra_cols(self, df: pd.DataFrame) -> None: def _add_time_cols(self, df: pd.DataFrame) -> None:
df['timestamp'] = pd.to_numeric(self.df['timestamp'], errors='coerce') df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
df["dt"] = pd.to_datetime(df["timestamp"], unit="s", utc=True) df["dt"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
df["hour"] = df["dt"].dt.hour df["hour"] = df["dt"].dt.hour
df["weekday"] = df["dt"].dt.day_name() df["weekday"] = df["dt"].dt.day_name()
self.nlp.add_emotion_cols()
self.nlp.add_topic_col()
self.nlp.add_ner_cols()
## Public ## Public
# topics over time # topics over time
# emotions over time # emotions over time
def get_time_analysis(self) -> pd.DataFrame: def get_time_analysis(self) -> dict:
return { return {
"events_per_day": self.temporal_analysis.posts_per_day(), "events_per_day": self.temporal_analysis.posts_per_day(),
"weekday_hour_heatmap": self.temporal_analysis.heatmap() "weekday_hour_heatmap": self.temporal_analysis.heatmap()
@@ -130,7 +127,7 @@ class StatGen:
"sources": self.df["source"].dropna().unique().tolist() "sources": self.df["source"].dropna().unique().tolist()
} }
def search(self, search_query: str) -> dict: def filter_by_query(self, search_query: str) -> dict:
self.df = self.df[ self.df = self.df[
self.df["content"].str.contains(search_query) self.df["content"].str.contains(search_query)
] ]