feat(api): add cultural analysis endpoint with identity markers

2026-02-24 14:25:53 +00:00
parent 257eb80de7
commit ccba6a5262
3 changed files with 47 additions and 47 deletions
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -69,45 +69,4 @@ class LinguisticAnalysis:
            .sort_values("count", ascending=False)
            .head(limit)
            .to_dict(orient="records")
-        )
-        
-    def identity_markers(self):
-        df = self.df.copy()
-        df["content"] = df["content"].fillna("").astype(str).str.lower()
-
-        in_group_words = {"we", "us", "our", "ourselves"}
-        out_group_words = {"they", "them", "their", "themselves"}
-
-        emotion_exclusions = [
-            "emotion_neutral",
-            "emotion_surprise"
-        ]
-
-        emotion_cols = [
-            col for col in self.df.columns
-            if col.startswith("emotion_") and col not in emotion_exclusions
-        ]
-        in_count = 0
-        out_count = 0
-        in_emotions = {e: 0 for e in emotion_cols}
-        out_emotions = {e: 0 for e in emotion_cols}
-        total = 0
-
-        for post in df:
-            text = post["content"]
-            tokens = re.findall(r"\b[a-z]{2,}\b", text)
-            total += len(tokens)
-            in_count += sum(t in in_group_words for t in tokens)
-            out_count += sum(t in out_group_words for t in tokens)
-
-            emotions = post[emotion_cols]
-            print(emotions)
-
-            
-
-        return {
-            "in_group_usage": in_count,
-            "out_group_usage": out_count,
-            "in_group_ratio": round(in_count / max(total, 1), 5),
-            "out_group_ratio": round(out_count / max(total, 1), 5),
-        }
+        )