Automatic Scraping of dataset options #9

Merged
dylan merged 36 commits from feat/automatic-scraping-datasets into main 2026-03-14 21:58:49 +00:00
Showing only changes of commit 5b0441c34b - Show all commits

View File

@@ -24,10 +24,9 @@ class YouTubeAPI(BaseConnector):
def get_new_posts_by_search(self, def get_new_posts_by_search(self,
search: str, search: str,
category: str, category: str,
post_limit: int, post_limit: int
comment_limit: int
) -> list[Post]: ) -> list[Post]:
videos = self.search_videos(search, post_limit) videos = self._search_videos(search, post_limit)
posts = [] posts = []
for video in videos: for video in videos:
@@ -39,7 +38,7 @@ class YouTubeAPI(BaseConnector):
channel_title = snippet['channelTitle'] channel_title = snippet['channelTitle']
comments = [] comments = []
comments_data = self.get_video_comments(video_id, comment_limit) comments_data = self._get_video_comments(video_id)
for comment_thread in comments_data: for comment_thread in comments_data:
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet'] comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
comment = Comment( comment = Comment(
@@ -72,7 +71,7 @@ class YouTubeAPI(BaseConnector):
def category_exists(self, category): def category_exists(self, category):
return True return True
def search_videos(self, query, limit): def _search_videos(self, query, limit):
request = self.youtube.search().list( request = self.youtube.search().list(
q=query, q=query,
part='snippet', part='snippet',
@@ -82,11 +81,10 @@ class YouTubeAPI(BaseConnector):
response = request.execute() response = request.execute()
return response.get('items', []) return response.get('items', [])
def get_video_comments(self, video_id, limit): def _get_video_comments(self, video_id):
request = self.youtube.commentThreads().list( request = self.youtube.commentThreads().list(
part='snippet', part='snippet',
videoId=video_id, videoId=video_id,
maxResults=limit,
textFormat='plainText' textFormat='plainText'
) )