From b0e079599a8887a977bf6d3392e5f1b7b2923d9b Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Tue, 13 Jan 2026 19:06:00 +0000 Subject: [PATCH] Rename fetch data script & add check for empty posts --- fetch_data.py => create_dataset.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) rename fetch_data.py => create_dataset.py (68%) diff --git a/fetch_data.py b/create_dataset.py similarity index 68% rename from fetch_data.py rename to create_dataset.py index 46b8f5e..6e74b31 100644 --- a/fetch_data.py +++ b/create_dataset.py @@ -1,11 +1,16 @@ import json from connectors.reddit_connector import RedditConnector -data_file = 'reddit_posts.json' +data_file = 'data/reddit_posts.json' reddit_connector = RedditConnector() +def remove_empty_posts(posts): + return [post for post in posts if post.content.strip() != ""] + def main(): posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000) + posts = remove_empty_posts(posts) + print(f"Fetched {len(posts)} posts from r/cork") with open(data_file, 'w') as f: