refactor: improve API wording & cleanup code
This commit is contained in:
@@ -126,8 +126,8 @@ def get_interaction_analysis():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
@app.route('/filter/search', methods=["POST"])
|
@app.route('/filter/query', methods=["POST"])
|
||||||
def search_dataset():
|
def filter_query():
|
||||||
if stat_obj is None:
|
if stat_obj is None:
|
||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
@@ -137,7 +137,7 @@ def search_dataset():
|
|||||||
return jsonify(stat_obj.df.to_dict(orient="records")), 200
|
return jsonify(stat_obj.df.to_dict(orient="records")), 200
|
||||||
|
|
||||||
query = data["query"]
|
query = data["query"]
|
||||||
filtered_df = stat_obj.search(query)
|
filtered_df = stat_obj.filter_by_query(query)
|
||||||
|
|
||||||
return jsonify(filtered_df), 200
|
return jsonify(filtered_df), 200
|
||||||
|
|
||||||
|
|||||||
@@ -16,8 +16,7 @@ DOMAIN_STOPWORDS = {
|
|||||||
"comment", "comments",
|
"comment", "comments",
|
||||||
"discussion", "thread",
|
"discussion", "thread",
|
||||||
"post", "posts",
|
"post", "posts",
|
||||||
"would", "could", "should",
|
"would", "get", "one"
|
||||||
"like", "get", "one"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nltk.download('stopwords')
|
nltk.download('stopwords')
|
||||||
@@ -41,7 +40,10 @@ class StatGen:
|
|||||||
self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
|
self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
|
||||||
|
|
||||||
self.nlp = NLP(self.df, "title", "content", domain_topics)
|
self.nlp = NLP(self.df, "title", "content", domain_topics)
|
||||||
self._add_extra_cols(self.df)
|
self.nlp.add_emotion_cols()
|
||||||
|
self.nlp.add_topic_col()
|
||||||
|
self.nlp.add_ner_cols()
|
||||||
|
self._add_time_cols(self.df)
|
||||||
|
|
||||||
self.temporal_analysis = TemporalAnalysis(self.df)
|
self.temporal_analysis = TemporalAnalysis(self.df)
|
||||||
self.emotional_analysis = EmotionalAnalysis(self.df)
|
self.emotional_analysis = EmotionalAnalysis(self.df)
|
||||||
@@ -52,23 +54,18 @@ class StatGen:
|
|||||||
self.original_df = self.df.copy(deep=True)
|
self.original_df = self.df.copy(deep=True)
|
||||||
|
|
||||||
## Private Methods
|
## Private Methods
|
||||||
def _add_extra_cols(self, df: pd.DataFrame) -> None:
|
def _add_time_cols(self, df: pd.DataFrame) -> None:
|
||||||
df['timestamp'] = pd.to_numeric(self.df['timestamp'], errors='coerce')
|
df['timestamp'] = pd.to_numeric(df['timestamp'], errors='coerce')
|
||||||
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
|
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
|
||||||
df["dt"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
|
df["dt"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
|
||||||
df["hour"] = df["dt"].dt.hour
|
df["hour"] = df["dt"].dt.hour
|
||||||
df["weekday"] = df["dt"].dt.day_name()
|
df["weekday"] = df["dt"].dt.day_name()
|
||||||
|
|
||||||
self.nlp.add_emotion_cols()
|
|
||||||
self.nlp.add_topic_col()
|
|
||||||
self.nlp.add_ner_cols()
|
|
||||||
|
|
||||||
## Public
|
## Public
|
||||||
|
|
||||||
|
|
||||||
# topics over time
|
# topics over time
|
||||||
# emotions over time
|
# emotions over time
|
||||||
def get_time_analysis(self) -> pd.DataFrame:
|
def get_time_analysis(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"events_per_day": self.temporal_analysis.posts_per_day(),
|
"events_per_day": self.temporal_analysis.posts_per_day(),
|
||||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
||||||
@@ -130,7 +127,7 @@ class StatGen:
|
|||||||
"sources": self.df["source"].dropna().unique().tolist()
|
"sources": self.df["source"].dropna().unique().tolist()
|
||||||
}
|
}
|
||||||
|
|
||||||
def search(self, search_query: str) -> dict:
|
def filter_by_query(self, search_query: str) -> dict:
|
||||||
self.df = self.df[
|
self.df = self.df[
|
||||||
self.df["content"].str.contains(search_query)
|
self.df["content"].str.contains(search_query)
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user