diff --git a/create_dataset.py b/create_dataset.py index dd9ae25..791b2bd 100644 --- a/create_dataset.py +++ b/create_dataset.py @@ -25,18 +25,18 @@ def save_to_jsonl(filename, posts): def main(): - boards_posts = boards_connector.get_new_category_posts('cork-city', 10, 10) + boards_posts = boards_connector.get_new_category_posts('cork-city', 1200, 1200) save_to_jsonl(posts_file, boards_posts) - reddit_posts = reddit_connector.get_new_subreddit_posts('cork', 10) + reddit_posts = reddit_connector.get_new_subreddit_posts('cork', 1200) reddit_posts = remove_empty_posts(reddit_posts) save_to_jsonl(posts_file, reddit_posts) - ireland_posts = reddit_connector.search_new_subreddit_posts('cork', 'ireland', 10) + ireland_posts = reddit_connector.search_new_subreddit_posts('cork', 'ireland', 1200) ireland_posts = remove_empty_posts(ireland_posts) save_to_jsonl(posts_file, ireland_posts) - youtube_videos = youtube_connector.fetch_videos('cork city', 10, 10) + youtube_videos = youtube_connector.fetch_videos('cork city', 1200, 1200) save_to_jsonl(posts_file, youtube_videos) if __name__ == "__main__":