fix(backend): comment parsing didn't account of NaN values
This commit is contained in:
@@ -23,6 +23,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
|
|||||||
class StatGen:
|
class StatGen:
|
||||||
def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
|
def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
|
||||||
comments_df = df[["id", "comments"]].explode("comments")
|
comments_df = df[["id", "comments"]].explode("comments")
|
||||||
|
comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
|
||||||
comments_df = pd.json_normalize(comments_df["comments"])
|
comments_df = pd.json_normalize(comments_df["comments"])
|
||||||
|
|
||||||
posts_df = df.drop(columns=["comments"])
|
posts_df = df.drop(columns=["comments"])
|
||||||
|
|||||||
Reference in New Issue
Block a user