fix(backend): comment parsing didn't account of NaN values

2026-02-16 16:41:06 +00:00
parent fb20c3ab1b
commit 7c1e069152
1 changed files with 1 additions and 0 deletions
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -23,6 +23,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
 class StatGen:
    def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
        comments_df = df[["id", "comments"]].explode("comments")
+        comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
        comments_df = pd.json_normalize(comments_df["comments"])

        posts_df = df.drop(columns=["comments"])