refactor: rename word freq endpoint

Improving consistency be grouping similar endpoints together
This commit is contained in:
2026-01-31 19:43:00 +00:00
parent 05c5e04f92
commit b058853f3c
3 changed files with 45 additions and 39 deletions

View File

@@ -29,22 +29,24 @@ const StatPage = () => {
useEffect(() => { useEffect(() => {
Promise.all([ Promise.all([
axios.get("http://localhost:5000/stats/time"), axios.get("http://localhost:5000/stats/time"),
axios.get("http://localhost:5000/stats/word_frequencies"), axios.get("http://localhost:5000/stats/content"),
]) ])
.then(([timeRes, wordsRes]) => { .then(([timeRes, wordsRes]) => {
setPostsPerDay(timeRes.data["events_per_day"].filter( setPostsPerDay(timeRes.data["events_per_day"].filter(
(d: any) => new Date(d.date) >= new Date('2026-01-10') (d: any) => new Date(d.date) >= new Date('2026-01-10')
)) ))
setHeatmapData(timeRes.data["weekday_hour_heatmap"]) setHeatmapData(timeRes.data["weekday_hour_heatmap"])
setWordFrequencyData( setWordFrequencyData(
wordsRes.data.map((d: BackendWord) => ({ wordsRes.data["word_frequencies"].map((d: BackendWord) => ({
text: d.word, text: d.word,
value: d.count, value: d.count,
})) }))
); );
}) })
.catch(() => setError("Failed to load statistics")) .catch((e) => setError("Failed to load statistics: " + e))
.finally(() => setLoading(false)); .finally(() => setLoading(false));
}, []); }, []);
@@ -64,7 +66,7 @@ const StatPage = () => {
}} }}
> >
<div> <div>
<h2>Posts per Day</h2> <h2>Events per Day</h2>
<ResponsiveContainer width={800} height={350}> <ResponsiveContainer width={800} height={350}>
<LineChart data={postsPerDay}> <LineChart data={postsPerDay}>

View File

@@ -42,16 +42,17 @@ def upload_data():
return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200 return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
@app.route('/stats/word_frequencies', methods=['GET']) @app.route('/stats/content', methods=['GET'])
def word_frequencies(): def word_frequencies():
if stat_obj is None: if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400 return jsonify({"error": "No data uploaded"}), 400
try: try:
return jsonify(stat_obj.get_word_frequencies().to_dict(orient='records')), 200 return jsonify(stat_obj.content_analysis()), 200
except ValueError as e: except ValueError as e:
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400 return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
except Exception as e: except Exception as e:
print(traceback.format_exc())
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/stats/search', methods=["POST"]) @app.route('/stats/search', methods=["POST"])

View File

@@ -74,39 +74,7 @@ class StatGen:
"burstiness": round(burst_index, 2) "burstiness": round(burst_index, 2)
} }
def get_word_frequencies(self, limit: int = 100) -> pd.DataFrame: def summary(self) -> dict:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in EXCLUDE_WORDS
)
counts = Counter(words)
return (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
def filter_events(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)]
return self.df
def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True)
def get_summary(self) -> dict:
total_posts = (self.df["type"] == "post").sum() total_posts = (self.df["type"] == "post").sum()
total_comments = (self.df["type"] == "comment").sum() total_comments = (self.df["type"] == "comment").sum()
@@ -126,4 +94,39 @@ class StatGen:
"sources": self.df["source"].unique().tolist() "sources": self.df["source"].unique().tolist()
} }
def content_analysis(self, limit: int = 100) -> dict:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in EXCLUDE_WORDS
)
counts = Counter(words)
word_frequencies = (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
return {
"word_frequencies": word_frequencies.to_dict(orient='records')
}
def filter_events(self, search_query: str) -> pd.DataFrame:
self.df = self.df[self.df["content"].str.contains(search_query)]
return self.df
def reset_dataset(self) -> None:
self.df = self.original_df.copy(deep=True)