refactor: rename word freq endpoint
Improving consistency be grouping similar endpoints together
This commit is contained in:
@@ -29,22 +29,24 @@ const StatPage = () => {
|
|||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
Promise.all([
|
Promise.all([
|
||||||
axios.get("http://localhost:5000/stats/time"),
|
axios.get("http://localhost:5000/stats/time"),
|
||||||
axios.get("http://localhost:5000/stats/word_frequencies"),
|
axios.get("http://localhost:5000/stats/content"),
|
||||||
])
|
])
|
||||||
.then(([timeRes, wordsRes]) => {
|
.then(([timeRes, wordsRes]) => {
|
||||||
|
|
||||||
setPostsPerDay(timeRes.data["events_per_day"].filter(
|
setPostsPerDay(timeRes.data["events_per_day"].filter(
|
||||||
(d: any) => new Date(d.date) >= new Date('2026-01-10')
|
(d: any) => new Date(d.date) >= new Date('2026-01-10')
|
||||||
))
|
))
|
||||||
|
|
||||||
setHeatmapData(timeRes.data["weekday_hour_heatmap"])
|
setHeatmapData(timeRes.data["weekday_hour_heatmap"])
|
||||||
|
|
||||||
setWordFrequencyData(
|
setWordFrequencyData(
|
||||||
wordsRes.data.map((d: BackendWord) => ({
|
wordsRes.data["word_frequencies"].map((d: BackendWord) => ({
|
||||||
text: d.word,
|
text: d.word,
|
||||||
value: d.count,
|
value: d.count,
|
||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
.catch(() => setError("Failed to load statistics"))
|
.catch((e) => setError("Failed to load statistics: " + e))
|
||||||
.finally(() => setLoading(false));
|
.finally(() => setLoading(false));
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
@@ -64,7 +66,7 @@ const StatPage = () => {
|
|||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<div>
|
<div>
|
||||||
<h2>Posts per Day</h2>
|
<h2>Events per Day</h2>
|
||||||
|
|
||||||
<ResponsiveContainer width={800} height={350}>
|
<ResponsiveContainer width={800} height={350}>
|
||||||
<LineChart data={postsPerDay}>
|
<LineChart data={postsPerDay}>
|
||||||
|
|||||||
@@ -42,16 +42,17 @@ def upload_data():
|
|||||||
|
|
||||||
return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
|
return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
|
||||||
|
|
||||||
@app.route('/stats/word_frequencies', methods=['GET'])
|
@app.route('/stats/content', methods=['GET'])
|
||||||
def word_frequencies():
|
def word_frequencies():
|
||||||
if stat_obj is None:
|
if stat_obj is None:
|
||||||
return jsonify({"error": "No data uploaded"}), 400
|
return jsonify({"error": "No data uploaded"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return jsonify(stat_obj.get_word_frequencies().to_dict(orient='records')), 200
|
return jsonify(stat_obj.content_analysis()), 200
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print(traceback.format_exc())
|
||||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||||
|
|
||||||
@app.route('/stats/search', methods=["POST"])
|
@app.route('/stats/search', methods=["POST"])
|
||||||
|
|||||||
@@ -74,39 +74,7 @@ class StatGen:
|
|||||||
"burstiness": round(burst_index, 2)
|
"burstiness": round(burst_index, 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_word_frequencies(self, limit: int = 100) -> pd.DataFrame:
|
def summary(self) -> dict:
|
||||||
texts = (
|
|
||||||
self.df["content"]
|
|
||||||
.dropna()
|
|
||||||
.astype(str)
|
|
||||||
.str.lower()
|
|
||||||
)
|
|
||||||
|
|
||||||
words = []
|
|
||||||
for text in texts:
|
|
||||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
|
||||||
words.extend(
|
|
||||||
w for w in tokens
|
|
||||||
if w not in EXCLUDE_WORDS
|
|
||||||
)
|
|
||||||
|
|
||||||
counts = Counter(words)
|
|
||||||
|
|
||||||
return (
|
|
||||||
pd.DataFrame(counts.items(), columns=["word", "count"])
|
|
||||||
.sort_values("count", ascending=False)
|
|
||||||
.head(limit)
|
|
||||||
.reset_index(drop=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
def filter_events(self, search_query: str) -> pd.DataFrame:
|
|
||||||
self.df = self.df[self.df["content"].str.contains(search_query)]
|
|
||||||
return self.df
|
|
||||||
|
|
||||||
def reset_dataset(self) -> None:
|
|
||||||
self.df = self.original_df.copy(deep=True)
|
|
||||||
|
|
||||||
def get_summary(self) -> dict:
|
|
||||||
total_posts = (self.df["type"] == "post").sum()
|
total_posts = (self.df["type"] == "post").sum()
|
||||||
total_comments = (self.df["type"] == "comment").sum()
|
total_comments = (self.df["type"] == "comment").sum()
|
||||||
|
|
||||||
@@ -126,4 +94,39 @@ class StatGen:
|
|||||||
"sources": self.df["source"].unique().tolist()
|
"sources": self.df["source"].unique().tolist()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def content_analysis(self, limit: int = 100) -> dict:
|
||||||
|
texts = (
|
||||||
|
self.df["content"]
|
||||||
|
.dropna()
|
||||||
|
.astype(str)
|
||||||
|
.str.lower()
|
||||||
|
)
|
||||||
|
|
||||||
|
words = []
|
||||||
|
for text in texts:
|
||||||
|
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||||
|
words.extend(
|
||||||
|
w for w in tokens
|
||||||
|
if w not in EXCLUDE_WORDS
|
||||||
|
)
|
||||||
|
|
||||||
|
counts = Counter(words)
|
||||||
|
|
||||||
|
word_frequencies = (
|
||||||
|
pd.DataFrame(counts.items(), columns=["word", "count"])
|
||||||
|
.sort_values("count", ascending=False)
|
||||||
|
.head(limit)
|
||||||
|
.reset_index(drop=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"word_frequencies": word_frequencies.to_dict(orient='records')
|
||||||
|
}
|
||||||
|
|
||||||
|
def filter_events(self, search_query: str) -> pd.DataFrame:
|
||||||
|
self.df = self.df[self.df["content"].str.contains(search_query)]
|
||||||
|
return self.df
|
||||||
|
|
||||||
|
def reset_dataset(self) -> None:
|
||||||
|
self.df = self.original_df.copy(deep=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user