diff --git a/server/analysis/linguistic.py b/server/analysis/linguistic.py
index c292328..8cfcc7f 100644
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -9,6 +9,10 @@ class LinguisticAnalysis:
         self.df = df
         self.word_exclusions = word_exclusions
 
+    def _tokenize(self, text: str):
+        tokens = re.findall(r"\b[a-z]{3,}\b", text)
+        return [t for t in tokens if t not in self.word_exclusions]
+
     def _clean_text(self, text: str) -> str:
         text = re.sub(r"http\S+", "", text)        # remove URLs
         text = re.sub(r"www\S+", "", text)
@@ -65,4 +69,45 @@ class LinguisticAnalysis:
             .sort_values("count", ascending=False)
             .head(limit)
             .to_dict(orient="records")
-        )
\ No newline at end of file
+        )
+        
+    def identity_markers(self):
+        df = self.df.copy()
+        df["content"] = df["content"].fillna("").astype(str).str.lower()
+
+        in_group_words = {"we", "us", "our", "ourselves"}
+        out_group_words = {"they", "them", "their", "themselves"}
+
+        emotion_exclusions = [
+            "emotion_neutral",
+            "emotion_surprise"
+        ]
+
+        emotion_cols = [
+            col for col in self.df.columns
+            if col.startswith("emotion_") and col not in emotion_exclusions
+        ]
+        in_count = 0
+        out_count = 0
+        in_emotions = {e: 0 for e in emotion_cols}
+        out_emotions = {e: 0 for e in emotion_cols}
+        total = 0
+
+        for post in df:
+            text = post["content"]
+            tokens = re.findall(r"\b[a-z]{2,}\b", text)
+            total += len(tokens)
+            in_count += sum(t in in_group_words for t in tokens)
+            out_count += sum(t in out_group_words for t in tokens)
+
+            emotions = post[emotion_cols]
+            print(emotions)
+
+            
+
+        return {
+            "in_group_usage": in_count,
+            "out_group_usage": out_count,
+            "in_group_ratio": round(in_count / max(total, 1), 5),
+            "out_group_ratio": round(out_count / max(total, 1), 5),
+        }
\ No newline at end of file
diff --git a/server/app.py b/server/app.py
index 7b3730d..760e48d 100644
--- a/server/app.py
+++ b/server/app.py
@@ -100,6 +100,19 @@ def get_user_analysis():
         print(traceback.format_exc())
         return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
     
+@app.route("/stats/cultural", methods=["GET"])
+def get_cultural_analysis():
+    if stat_obj is None:
+        return jsonify({"error": "No data uploaded"}), 400
+    
+    try:
+        return jsonify(stat_obj.cultural_analysis()), 200
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
+    
 @app.route('/filter/search', methods=["POST"])
 def search_dataset():
     if stat_obj is None:
diff --git a/server/stat_gen.py b/server/stat_gen.py
index 5d88cf6..014f95f 100644
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -62,12 +62,17 @@ class StatGen:
         self.nlp.add_ner_cols()
     
     ## Public
+
+
+    # topics over time
+    # emotions over time
     def time_analysis(self) -> pd.DataFrame:
         return {
             "events_per_day": self.temporal_analysis.posts_per_day(),
             "weekday_hour_heatmap": self.temporal_analysis.heatmap()
         }
 
+    # average topic duration
     def content_analysis(self) -> dict:
         return {
             "word_frequencies": self.linguistic_analysis.word_frequencies(),
@@ -77,6 +82,8 @@ class StatGen:
             "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
         }
     
+    # average emotion per user
+    # average chain length
     def user_analysis(self) -> dict:
         return {
             "top_users": self.interaction_analysis.top_users(),
@@ -84,6 +91,21 @@ class StatGen:
             "interaction_graph": self.interaction_analysis.interaction_graph()
         }
     
+    # average / max thread depth
+    # high engagment threads based on volume
+
+    def conversational_analysis(self) -> dict:
+        return {
+
+        }
+    
+    # detect community jargon
+    # in-group and out-group linguistic markers
+    def cultural_analysis(self) -> dict:
+        return {
+            "identity_markers": self.linguistic_analysis.identity_markers()
+        }
+    
     def summary(self) -> dict:
         total_posts = (self.df["type"] == "post").sum()
         total_comments = (self.df["type"] == "comment").sum()