style: run python linter & prettifier on backend code

2026-03-25 19:34:43 +00:00
parent aae10c4d9d
commit 376773a0cc
17 changed files with 408 additions and 315 deletions
--- a/server/analysis/user.py
+++ b/server/analysis/user.py
@@ -3,6 +3,7 @@ import re

 from collections import Counter

+
 class UserAnalysis:
    def __init__(self, word_exclusions: set[str]):
        self.word_exclusions = word_exclusions
@@ -12,49 +13,49 @@ class UserAnalysis:
        return [t for t in tokens if t not in self.word_exclusions]

    def _vocab_richness_per_user(
-            self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
-        ) -> list:
-            df = df.copy()
-            df["content"] = df["content"].fillna("").astype(str).str.lower()
-            df["tokens"] = df["content"].apply(self._tokenize)
+        self, df: pd.DataFrame, min_words: int = 20, top_most_used_words: int = 100
+    ) -> list:
+        df = df.copy()
+        df["content"] = df["content"].fillna("").astype(str).str.lower()
+        df["tokens"] = df["content"].apply(self._tokenize)

-            rows = []
-            for author, group in df.groupby("author"):
-                all_tokens = [t for tokens in group["tokens"] for t in tokens]
+        rows = []
+        for author, group in df.groupby("author"):
+            all_tokens = [t for tokens in group["tokens"] for t in tokens]

-                total_words = len(all_tokens)
-                unique_words = len(set(all_tokens))
-                events = len(group)
+            total_words = len(all_tokens)
+            unique_words = len(set(all_tokens))
+            events = len(group)

-                # Min amount of words for a user, any less than this might give weird results
-                if total_words < min_words:
-                    continue
+            # Min amount of words for a user, any less than this might give weird results
+            if total_words < min_words:
+                continue

-                # 100% = they never reused a word (excluding stop words)
-                vocab_richness = unique_words / total_words
-                avg_words = total_words / max(events, 1)
+            # 100% = they never reused a word (excluding stop words)
+            vocab_richness = unique_words / total_words
+            avg_words = total_words / max(events, 1)

-                counts = Counter(all_tokens)
-                top_words = [
-                    {"word": w, "count": int(c)}
-                    for w, c in counts.most_common(top_most_used_words)
-                ]
+            counts = Counter(all_tokens)
+            top_words = [
+                {"word": w, "count": int(c)}
+                for w, c in counts.most_common(top_most_used_words)
+            ]

-                rows.append(
-                    {
-                        "author": author,
-                        "events": int(events),
-                        "total_words": int(total_words),
-                        "unique_words": int(unique_words),
-                        "vocab_richness": round(vocab_richness, 3),
-                        "avg_words_per_event": round(avg_words, 2),
-                        "top_words": top_words,
-                    }
-                )
+            rows.append(
+                {
+                    "author": author,
+                    "events": int(events),
+                    "total_words": int(total_words),
+                    "unique_words": int(unique_words),
+                    "vocab_richness": round(vocab_richness, 3),
+                    "avg_words_per_event": round(avg_words, 2),
+                    "top_words": top_words,
+                }
+            )

-            rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
+        rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)

-            return rows
+        return rows

    def top_users(self, df: pd.DataFrame) -> list:
        counts = df.groupby(["author", "source"]).size().sort_values(ascending=False)