diff --git a/frontend/src/pages/Stats.tsx b/frontend/src/pages/Stats.tsx index fc910c6..b20d01a 100644 --- a/frontend/src/pages/Stats.tsx +++ b/frontend/src/pages/Stats.tsx @@ -29,22 +29,24 @@ const StatPage = () => { useEffect(() => { Promise.all([ axios.get("http://localhost:5000/stats/time"), - axios.get("http://localhost:5000/stats/word_frequencies"), + axios.get("http://localhost:5000/stats/content"), ]) .then(([timeRes, wordsRes]) => { + setPostsPerDay(timeRes.data["events_per_day"].filter( (d: any) => new Date(d.date) >= new Date('2026-01-10') )) + setHeatmapData(timeRes.data["weekday_hour_heatmap"]) setWordFrequencyData( - wordsRes.data.map((d: BackendWord) => ({ + wordsRes.data["word_frequencies"].map((d: BackendWord) => ({ text: d.word, value: d.count, })) ); }) - .catch(() => setError("Failed to load statistics")) + .catch((e) => setError("Failed to load statistics: " + e)) .finally(() => setLoading(false)); }, []); @@ -64,7 +66,7 @@ const StatPage = () => { }} >
-

Posts per Day

+

Events per Day

diff --git a/server/app.py b/server/app.py index 4175aaa..0077a7c 100644 --- a/server/app.py +++ b/server/app.py @@ -42,16 +42,17 @@ def upload_data(): return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200 -@app.route('/stats/word_frequencies', methods=['GET']) +@app.route('/stats/content', methods=['GET']) def word_frequencies(): if stat_obj is None: return jsonify({"error": "No data uploaded"}), 400 try: - return jsonify(stat_obj.get_word_frequencies().to_dict(orient='records')), 200 + return jsonify(stat_obj.content_analysis()), 200 except ValueError as e: return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400 except Exception as e: + print(traceback.format_exc()) return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 @app.route('/stats/search', methods=["POST"]) diff --git a/server/stat_gen.py b/server/stat_gen.py index c33114f..686cd81 100644 --- a/server/stat_gen.py +++ b/server/stat_gen.py @@ -74,39 +74,7 @@ class StatGen: "burstiness": round(burst_index, 2) } - def get_word_frequencies(self, limit: int = 100) -> pd.DataFrame: - texts = ( - self.df["content"] - .dropna() - .astype(str) - .str.lower() - ) - - words = [] - for text in texts: - tokens = re.findall(r"\b[a-z]{3,}\b", text) - words.extend( - w for w in tokens - if w not in EXCLUDE_WORDS - ) - - counts = Counter(words) - - return ( - pd.DataFrame(counts.items(), columns=["word", "count"]) - .sort_values("count", ascending=False) - .head(limit) - .reset_index(drop=True) - ) - - def filter_events(self, search_query: str) -> pd.DataFrame: - self.df = self.df[self.df["content"].str.contains(search_query)] - return self.df - - def reset_dataset(self) -> None: - self.df = self.original_df.copy(deep=True) - - def get_summary(self) -> dict: + def summary(self) -> dict: total_posts = (self.df["type"] == "post").sum() total_comments = (self.df["type"] == "comment").sum() @@ -126,4 +94,39 @@ class StatGen: "sources": self.df["source"].unique().tolist() } + def content_analysis(self, limit: int = 100) -> dict: + texts = ( + self.df["content"] + .dropna() + .astype(str) + .str.lower() + ) + + words = [] + for text in texts: + tokens = re.findall(r"\b[a-z]{3,}\b", text) + words.extend( + w for w in tokens + if w not in EXCLUDE_WORDS + ) + + counts = Counter(words) + + word_frequencies = ( + pd.DataFrame(counts.items(), columns=["word", "count"]) + .sort_values("count", ascending=False) + .head(limit) + .reset_index(drop=True) + ) + + return { + "word_frequencies": word_frequencies.to_dict(orient='records') + } + + def filter_events(self, search_query: str) -> pd.DataFrame: + self.df = self.df[self.df["content"].str.contains(search_query)] + return self.df + + def reset_dataset(self) -> None: + self.df = self.original_df.copy(deep=True)