youtube connector returns posts and comments in a flat manner
This commit is contained in:
@@ -40,9 +40,10 @@ class YouTubeAPI:
|
|||||||
return []
|
return []
|
||||||
return response.get('items', [])
|
return response.get('items', [])
|
||||||
|
|
||||||
def fetch_and_parse_videos(self, query, video_limit, comment_limit):
|
def fetch_video_and_comments(self, query, video_limit, comment_limit) -> tuple[list[Post], list[Comment]]:
|
||||||
videos = self.search_videos(query, video_limit)
|
videos = self.search_videos(query, video_limit)
|
||||||
posts = []
|
posts = []
|
||||||
|
comments = []
|
||||||
|
|
||||||
for video in videos:
|
for video in videos:
|
||||||
video_id = video['id']['videoId']
|
video_id = video['id']['videoId']
|
||||||
@@ -62,7 +63,6 @@ class YouTubeAPI:
|
|||||||
source="YouTube"
|
source="YouTube"
|
||||||
)
|
)
|
||||||
|
|
||||||
post.comments = []
|
|
||||||
comments_data = self.get_video_comments(video_id, comment_limit)
|
comments_data = self.get_video_comments(video_id, comment_limit)
|
||||||
for comment_thread in comments_data:
|
for comment_thread in comments_data:
|
||||||
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
|
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
|
||||||
@@ -75,8 +75,8 @@ class YouTubeAPI:
|
|||||||
reply_to=None,
|
reply_to=None,
|
||||||
source="YouTube"
|
source="YouTube"
|
||||||
)
|
)
|
||||||
post.comments.append(comment)
|
|
||||||
|
|
||||||
|
comments.append(comment)
|
||||||
posts.append(post)
|
posts.append(post)
|
||||||
|
|
||||||
return posts
|
return posts, comments
|
||||||
@@ -30,20 +30,23 @@ def save_to_jsonl(filename, posts):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=5)
|
boards_posts, boards_comments = boards_connector.get_new_category_posts('cork-city', limit=400)
|
||||||
save_to_jsonl(posts_file, boards_posts)
|
save_to_jsonl(posts_file, boards_posts)
|
||||||
save_to_jsonl(comments_file, boards_comments)
|
save_to_jsonl(comments_file, boards_comments)
|
||||||
|
|
||||||
#reddit_posts = reddit_connector.get_new_subreddit_posts('cork', limit=350)
|
reddit_posts, reddit_comments = reddit_connector.get_new_subreddit_posts('cork', limit=400)
|
||||||
#reddit_posts = remove_empty_posts(reddit_posts)
|
reddit_posts = remove_empty_posts(reddit_posts)
|
||||||
#save_to_jsonl(data_file, reddit_posts)
|
save_to_jsonl(posts_file, reddit_posts)
|
||||||
|
save_to_jsonl(comments_file, reddit_comments)
|
||||||
#ireland_posts = reddit_connector.search_subreddit('cork', 'ireland', limit=350, timeframe='year')
|
|
||||||
#ireland_posts = remove_empty_posts(ireland_posts)
|
|
||||||
#save_to_jsonl(data_file, ireland_posts)
|
|
||||||
|
|
||||||
#youtube_videos = youtube_connector.fetch_and_parse_videos('cork city', 100, 100)
|
ireland_posts, ireland_comments = reddit_connector.search_new_subreddit_posts('cork', 'ireland', limit=10)
|
||||||
#save_to_jsonl(data_file, youtube_videos)
|
ireland_posts = remove_empty_posts(ireland_posts)
|
||||||
|
save_to_jsonl(posts_file, ireland_posts)
|
||||||
|
save_to_jsonl(comments_file, ireland_comments)
|
||||||
|
|
||||||
|
youtube_videos, youtube_comments = youtube_connector.fetch_video_and_comments('cork city', 100, 100)
|
||||||
|
save_to_jsonl(posts_file, youtube_videos)
|
||||||
|
save_to_jsonl(comments_file, youtube_comments)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user