refactor: rename word freq endpoint

Improving consistency be grouping similar endpoints together
This commit is contained in:
2026-01-31 19:43:00 +00:00
parent 05c5e04f92
commit b058853f3c
3 changed files with 45 additions and 39 deletions

View File

@@ -42,16 +42,17 @@ def upload_data():
return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
@app.route('/stats/word_frequencies', methods=['GET'])
@app.route('/stats/content', methods=['GET'])
def word_frequencies():
if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400
try:
return jsonify(stat_obj.get_word_frequencies().to_dict(orient='records')), 200
return jsonify(stat_obj.content_analysis()), 200
except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/stats/search', methods=["POST"])

View File

@@ -74,39 +74,7 @@ class StatGen:
"burstiness": round(burst_index, 2)
}
def get_word_frequencies(self, limit: int = 100) -> pd.DataFrame:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in EXCLUDE_WORDS
)
counts = Counter(words)
return (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
def filter_events(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)]
return self.df
def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True)
def get_summary(self) -> dict:
def summary(self) -> dict:
total_posts = (self.df["type"] == "post").sum()
total_comments = (self.df["type"] == "comment").sum()
@@ -126,4 +94,39 @@ class StatGen:
"sources": self.df["source"].unique().tolist()
}
def content_analysis(self, limit: int = 100) -> dict:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in EXCLUDE_WORDS
)
counts = Counter(words)
word_frequencies = (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
return {
"word_frequencies": word_frequencies.to_dict(orient='records')
}
def filter_events(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)]
return self.df
def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True)