diff --git a/server/analysis/linguistic.py b/server/analysis/linguistic.py
index 3c1fdbc..c292328 100644
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -2,12 +2,21 @@ import pandas as pd
 import re
 
 from collections import Counter
+from itertools import islice
 
 class LinguisticAnalysis:
     def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
         self.df = df
         self.word_exclusions = word_exclusions
 
+    def _clean_text(self, text: str) -> str:
+        text = re.sub(r"http\S+", "", text)        # remove URLs
+        text = re.sub(r"www\S+", "", text)
+        text = re.sub(r"&\w+;", "", text)          # remove HTML entities
+        text = re.sub(r"\bamp\b", "", text)        # remove stray amp
+        text = re.sub(r"\S+\.(jpg|jpeg|png|webp|gif)", "", text)
+        return text
+
     def word_frequencies(self, limit: int = 100) -> dict:
         texts = (
             self.df["content"]
@@ -34,4 +43,26 @@ class LinguisticAnalysis:
             .reset_index(drop=True)
         )
 
-        return word_frequencies.to_dict(orient="records")
\ No newline at end of file
+        return word_frequencies.to_dict(orient="records")
+    
+    def ngrams(self, n=2, limit=100):
+        texts = self.df["content"].dropna().astype(str).apply(self._clean_text).str.lower()
+        all_ngrams = []
+
+        for text in texts:
+            tokens = re.findall(r"\b[a-z]{3,}\b", text)
+
+            # stop word removal causes strange behaviors in ngrams
+            #tokens = [w for w in tokens if w not in self.word_exclusions]
+
+            ngrams = zip(*(islice(tokens, i, None) for i in range(n)))
+            all_ngrams.extend([" ".join(ng) for ng in ngrams])
+
+        counts = Counter(all_ngrams)
+
+        return (
+            pd.DataFrame(counts.items(), columns=["ngram", "count"])
+            .sort_values("count", ascending=False)
+            .head(limit)
+            .to_dict(orient="records")
+        )
\ No newline at end of file
diff --git a/server/stat_gen.py b/server/stat_gen.py
index 3e36010..4200741 100644
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -65,6 +65,22 @@ class StatGen:
             "events_per_day": self.temporal_analysis.posts_per_day(),
             "weekday_hour_heatmap": self.temporal_analysis.heatmap()
         }
+
+    def content_analysis(self) -> dict:
+        return {
+            "word_frequencies": self.linguistic_analysis.word_frequencies(),
+            "common_two_phrases": self.linguistic_analysis.ngrams(),
+            "common_three_phrases": self.linguistic_analysis.ngrams(n=3),
+            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
+            "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
+        }
+    
+    def user_analysis(self) -> dict:
+        return {
+            "top_users": self.interaction_analysis.top_users(),
+            "users": self.interaction_analysis.per_user_analysis(),
+            "interaction_graph": self.interaction_analysis.interaction_graph()
+        }
     
     def summary(self) -> dict:
         total_posts = (self.df["type"] == "post").sum()
@@ -85,20 +101,6 @@ class StatGen:
             },
             "sources": self.df["source"].dropna().unique().tolist()
         }
-
-    def content_analysis(self) -> dict:
-        return {
-            "word_frequencies": self.linguistic_analysis.word_frequencies(),
-            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
-            "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
-        }
-    
-    def user_analysis(self) -> dict:
-        return {
-            "top_users": self.interaction_analysis.top_users(),
-            "users": self.interaction_analysis.per_user_analysis(),
-            "interaction_graph": self.interaction_analysis.interaction_graph()
-        }
         
     def search(self, search_query: str) -> dict:
         self.df = self.df[