youtube connector returns posts and comments in a flat manner

This commit is contained in:
2026-01-24 20:19:15 +00:00
parent d96845d48b
commit 7d94494fe2
2 changed files with 17 additions and 14 deletions

View File

@@ -40,9 +40,10 @@ class YouTubeAPI:
return [] return []
return response.get('items', []) return response.get('items', [])
def fetch_and_parse_videos(self, query, video_limit, comment_limit): def fetch_video_and_comments(self, query, video_limit, comment_limit) -> tuple[list[Post], list[Comment]]:
videos = self.search_videos(query, video_limit) videos = self.search_videos(query, video_limit)
posts = [] posts = []
comments = []
for video in videos: for video in videos:
video_id = video['id']['videoId'] video_id = video['id']['videoId']
@@ -62,7 +63,6 @@ class YouTubeAPI:
source="YouTube" source="YouTube"
) )
post.comments = []
comments_data = self.get_video_comments(video_id, comment_limit) comments_data = self.get_video_comments(video_id, comment_limit)
for comment_thread in comments_data: for comment_thread in comments_data:
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet'] comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
@@ -75,8 +75,8 @@ class YouTubeAPI:
reply_to=None, reply_to=None,
source="YouTube" source="YouTube"
) )
post.comments.append(comment)
comments.append(comment)
posts.append(post) posts.append(post)
return posts return posts, comments

View File

@@ -30,20 +30,23 @@ def save_to_jsonl(filename, posts):
def main(): def main():
boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=5) boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=400)
save_to_jsonl(posts_file, boards_posts) save_to_jsonl(posts_file, boards_posts)
save_to_jsonl(comments_file, boards_comments) save_to_jsonl(comments_file, boards_comments)
#reddit_posts = reddit_connector.get_new_subreddit_posts('cork', limit=350) reddit_posts, reddit_comments = reddit_connector.get_new_subreddit_posts('cork', limit=400)
#reddit_posts = remove_empty_posts(reddit_posts) reddit_posts = remove_empty_posts(reddit_posts)
#save_to_jsonl(data_file, reddit_posts) save_to_jsonl(posts_file, reddit_posts)
save_to_jsonl(comments_file, reddit_comments)
#ireland_posts = reddit_connector.search_subreddit('cork', 'ireland', limit=350, timeframe='year')
#ireland_posts = remove_empty_posts(ireland_posts)
#save_to_jsonl(data_file, ireland_posts)
#youtube_videos = youtube_connector.fetch_and_parse_videos('cork city', 100, 100) ireland_posts, ireland_comments = reddit_connector.search_new_subreddit_posts('cork', 'ireland', limit=10)
#save_to_jsonl(data_file, youtube_videos) ireland_posts = remove_empty_posts(ireland_posts)
save_to_jsonl(posts_file, ireland_posts)
save_to_jsonl(comments_file, ireland_comments)
youtube_videos, youtube_comments = youtube_connector.fetch_video_and_comments('cork city', 100, 100)
save_to_jsonl(posts_file, youtube_videos)
save_to_jsonl(comments_file, youtube_comments)
if __name__ == "__main__": if __name__ == "__main__":
main() main()