youtube connector returns posts and comments in a flat manner

This commit is contained in:
2026-01-24 20:19:15 +00:00
parent d96845d48b
commit 7d94494fe2
2 changed files with 17 additions and 14 deletions

View File

@@ -40,9 +40,10 @@ class YouTubeAPI:
return []
return response.get('items', [])
def fetch_and_parse_videos(self, query, video_limit, comment_limit):
def fetch_video_and_comments(self, query, video_limit, comment_limit) -> tuple[list[Post], list[Comment]]:
videos = self.search_videos(query, video_limit)
posts = []
comments = []
for video in videos:
video_id = video['id']['videoId']
@@ -62,7 +63,6 @@ class YouTubeAPI:
source="YouTube"
)
post.comments = []
comments_data = self.get_video_comments(video_id, comment_limit)
for comment_thread in comments_data:
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
@@ -75,8 +75,8 @@ class YouTubeAPI:
reply_to=None,
source="YouTube"
)
post.comments.append(comment)
comments.append(comment)
posts.append(post)
return posts
return posts, comments

View File

@@ -30,20 +30,23 @@ def save_to_jsonl(filename, posts):
def main():
boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=5)
boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=400)
save_to_jsonl(posts_file, boards_posts)
save_to_jsonl(comments_file, boards_comments)
#reddit_posts = reddit_connector.get_new_subreddit_posts('cork', limit=350)
#reddit_posts = remove_empty_posts(reddit_posts)
#save_to_jsonl(data_file, reddit_posts)
reddit_posts, reddit_comments = reddit_connector.get_new_subreddit_posts('cork', limit=400)
reddit_posts = remove_empty_posts(reddit_posts)
save_to_jsonl(posts_file, reddit_posts)
save_to_jsonl(comments_file, reddit_comments)
#ireland_posts = reddit_connector.search_subreddit('cork', 'ireland', limit=350, timeframe='year')
#ireland_posts = remove_empty_posts(ireland_posts)
#save_to_jsonl(data_file, ireland_posts)
ireland_posts, ireland_comments = reddit_connector.search_new_subreddit_posts('cork', 'ireland', limit=10)
ireland_posts = remove_empty_posts(ireland_posts)
save_to_jsonl(posts_file, ireland_posts)
save_to_jsonl(comments_file, ireland_comments)
#youtube_videos = youtube_connector.fetch_and_parse_videos('cork city', 100, 100)
#save_to_jsonl(data_file, youtube_videos)
youtube_videos, youtube_comments = youtube_connector.fetch_video_and_comments('cork city', 100, 100)
save_to_jsonl(posts_file, youtube_videos)
save_to_jsonl(comments_file, youtube_comments)
if __name__ == "__main__":
main()