refactor(dataset creation): update API methods to return only posts

This commit is contained in:
2026-02-09 21:20:08 +00:00
parent 645d2fdfdb
commit ec91904481
6 changed files with 87 additions and 65 deletions

View File

@@ -40,10 +40,9 @@ class YouTubeAPI:
return []
return response.get('items', [])
def fetch_video_and_comments(self, query, video_limit, comment_limit) -> tuple[list[Post], list[Comment]]:
def fetch_videos(self, query, video_limit, comment_limit) -> list[Post]:
videos = self.search_videos(query, video_limit)
posts = []
comments = []
for video in videos:
video_id = video['id']['videoId']
@@ -53,16 +52,7 @@ class YouTubeAPI:
published_at = datetime.datetime.strptime(snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ").timestamp()
channel_title = snippet['channelTitle']
post = Post(
id=video_id,
content=f"{title}\n\n{description}",
author=channel_title,
timestamp=published_at,
url=f"https://www.youtube.com/watch?v={video_id}",
title=title,
source="YouTube"
)
comments = []
comments_data = self.get_video_comments(video_id, comment_limit)
for comment_thread in comments_data:
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
@@ -77,6 +67,18 @@ class YouTubeAPI:
)
comments.append(comment)
post = Post(
id=video_id,
content=f"{title}\n\n{description}",
author=channel_title,
timestamp=published_at,
url=f"https://www.youtube.com/watch?v={video_id}",
title=title,
source="YouTube",
comments=comments
)
posts.append(post)
return posts, comments
return posts