refactor: rename word freq endpoint

Improving consistency be grouping similar endpoints together
2026-01-31 19:43:00 +00:00
parent 05c5e04f92
commit b058853f3c
3 changed files with 45 additions and 39 deletions
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -29,22 +29,24 @@ const StatPage = () => {
  useEffect(() => {
    Promise.all([
      axios.get("http://localhost:5000/stats/time"),
-      axios.get("http://localhost:5000/stats/word_frequencies"),
+      axios.get("http://localhost:5000/stats/content"),
    ])
      .then(([timeRes, wordsRes]) => {
        setPostsPerDay(timeRes.data["events_per_day"].filter(
          (d: any) => new Date(d.date) >= new Date('2026-01-10')
        ))
        setHeatmapData(timeRes.data["weekday_hour_heatmap"])
        setWordFrequencyData(
-          wordsRes.data.map((d: BackendWord) => ({
+          wordsRes.data["word_frequencies"].map((d: BackendWord) => ({
            text: d.word,
            value: d.count,
          }))
        );
      })
-      .catch(() => setError("Failed to load statistics"))
+      .catch((e) => setError("Failed to load statistics: " + e))
      .finally(() => setLoading(false));
  }, []);
@@ -64,7 +66,7 @@ const StatPage = () => {
        }}
      >
        <div>
-          <h2>Posts per Day</h2>
+          <h2>Events per Day</h2>
          <ResponsiveContainer width={800} height={350}>
            <LineChart data={postsPerDay}>
--- a/server/app.py
+++ b/server/app.py
@@ -42,16 +42,17 @@ def upload_data():
    return jsonify({"message": "File uploaded successfully", "event_count": len(stat_obj.df)}), 200
-@app.route('/stats/word_frequencies', methods=['GET'])
+@app.route('/stats/content', methods=['GET'])
 def word_frequencies():
    if stat_obj is None:
        return jsonify({"error": "No data uploaded"}), 400
    try:
-        return jsonify(stat_obj.get_word_frequencies().to_dict(orient='records')), 200
+        return jsonify(stat_obj.content_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
@app.route('/stats/search', methods=["POST"])
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -74,39 +74,7 @@ class StatGen:
            "burstiness": round(burst_index, 2)
        }
-    def get_word_frequencies(self, limit: int = 100) -> pd.DataFrame:
+    def summary(self) -> dict:
        texts = (
            self.df["content"]
            .dropna()
            .astype(str)
            .str.lower()
        )
        words = []
        for text in texts:
            tokens = re.findall(r"\b[a-z]{3,}\b", text)
            words.extend(
                w for w in tokens
                if w not in EXCLUDE_WORDS
            )
        counts = Counter(words)
        return (
            pd.DataFrame(counts.items(), columns=["word", "count"])
            .sort_values("count", ascending=False)
            .head(limit)
            .reset_index(drop=True)
        )
    def filter_events(self, search_query: str) -> pd.DataFrame:
        self.df = self.df[self.df["content"].str.contains(search_query)]
        return self.df
    def reset_dataset(self) -> None:
        self.df = self.original_df.copy(deep=True)
    def get_summary(self) -> dict:
        total_posts = (self.df["type"] == "post").sum()
        total_comments = (self.df["type"] == "comment").sum()
@@ -126,4 +94,39 @@ class StatGen:
            "sources": self.df["source"].unique().tolist()
        }
    def content_analysis(self, limit: int = 100) -> dict:
        texts = (
            self.df["content"]
            .dropna()
            .astype(str)
            .str.lower()
        )
        words = []
        for text in texts:
            tokens = re.findall(r"\b[a-z]{3,}\b", text)
            words.extend(
                w for w in tokens
                if w not in EXCLUDE_WORDS
            )
        counts = Counter(words)
        word_frequencies = (
            pd.DataFrame(counts.items(), columns=["word", "count"])
            .sort_values("count", ascending=False)
            .head(limit)
            .reset_index(drop=True)
        )
        return {
            "word_frequencies": word_frequencies.to_dict(orient='records')
        }
    def filter_events(self, search_query: str) -> pd.DataFrame:
        self.df = self.df[self.df["content"].str.contains(search_query)]
        return self.df
    def reset_dataset(self) -> None:
        self.df = self.original_df.copy(deep=True)