From 7c1e069152aec1763bdebba2ded5b5f527e97e80 Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Mon, 16 Feb 2026 16:41:06 +0000
Subject: [PATCH] fix(backend): comment parsing didn't account of NaN values

---
 server/stat_gen.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/stat_gen.py b/server/stat_gen.py
index 2998826..d8c38da 100644
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -23,6 +23,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
 class StatGen:
     def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
         comments_df = df[["id", "comments"]].explode("comments")
+        comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
         comments_df = pd.json_normalize(comments_df["comments"])
 
         posts_df = df.drop(columns=["comments"])