Rename fetch data script & add check for empty posts

This commit is contained in:
2026-01-13 19:06:00 +00:00
parent 538ea9fe12
commit b0e079599a

20
create_dataset.py Normal file
View File

@@ -0,0 +1,20 @@
import json
from connectors.reddit_connector import RedditConnector
data_file = 'data/reddit_posts.json'
reddit_connector = RedditConnector()
def remove_empty_posts(posts):
return [post for post in posts if post.content.strip() != ""]
def main():
posts = reddit_connector.get_new_subreddit_posts('cork', limit=1000)
posts = remove_empty_posts(posts)
print(f"Fetched {len(posts)} posts from r/cork")
with open(data_file, 'w') as f:
json.dump([post.__dict__ for post in posts], f, indent=4)
if __name__ == "__main__":
main()