Implement YouTube API integration for video and comment fetching
This commit is contained in:
76
connectors/youtube_api.py
Normal file
76
connectors/youtube_api.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
from dto.post import Post
|
||||||
|
from dto.comment import Comment
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
API_KEY = os.getenv("YOUTUBE_API_KEY")
|
||||||
|
print(API_KEY)
|
||||||
|
|
||||||
|
class YouTubeAPI:
|
||||||
|
def __init__(self):
|
||||||
|
self.youtube = build('youtube', 'v3', developerKey=API_KEY)
|
||||||
|
|
||||||
|
def search_videos(self, query, limit):
|
||||||
|
request = self.youtube.search().list(
|
||||||
|
q=query,
|
||||||
|
part='snippet',
|
||||||
|
type='video',
|
||||||
|
maxResults=limit
|
||||||
|
)
|
||||||
|
response = request.execute()
|
||||||
|
return response.get('items', [])
|
||||||
|
|
||||||
|
def get_video_comments(self, video_id, limit):
|
||||||
|
request = self.youtube.commentThreads().list(
|
||||||
|
part='snippet',
|
||||||
|
videoId=video_id,
|
||||||
|
maxResults=limit,
|
||||||
|
textFormat='plainText'
|
||||||
|
)
|
||||||
|
response = request.execute()
|
||||||
|
return response.get('items', [])
|
||||||
|
|
||||||
|
def fetch_and_parse_videos(self, query, video_limit, comment_limit):
|
||||||
|
videos = self.search_videos(query, video_limit)
|
||||||
|
posts = []
|
||||||
|
|
||||||
|
for video in videos:
|
||||||
|
video_id = video['id']['videoId']
|
||||||
|
snippet = video['snippet']
|
||||||
|
title = snippet['title']
|
||||||
|
description = snippet['description']
|
||||||
|
published_at = snippet['publishedAt']
|
||||||
|
channel_title = snippet['channelTitle']
|
||||||
|
|
||||||
|
post = Post(
|
||||||
|
id=video_id,
|
||||||
|
content=f"{title}\n\n{description}",
|
||||||
|
author=channel_title,
|
||||||
|
timestamp=published_at,
|
||||||
|
url=f"https://www.youtube.com/watch?v={video_id}",
|
||||||
|
title=title,
|
||||||
|
source="YouTube"
|
||||||
|
)
|
||||||
|
|
||||||
|
post.comments = []
|
||||||
|
comments_data = self.get_video_comments(video_id, comment_limit)
|
||||||
|
for comment_thread in comments_data:
|
||||||
|
comment_snippet = comment_thread['snippet']['topLevelComment']['snippet']
|
||||||
|
comment = Comment(
|
||||||
|
id=comment_thread['id'],
|
||||||
|
post_id=video_id,
|
||||||
|
content=comment_snippet['textDisplay'],
|
||||||
|
author=comment_snippet['authorDisplayName'],
|
||||||
|
timestamp=comment_snippet['publishedAt'],
|
||||||
|
reply_to=None,
|
||||||
|
source="YouTube"
|
||||||
|
)
|
||||||
|
post.comments.append(comment)
|
||||||
|
|
||||||
|
posts.append(post)
|
||||||
|
|
||||||
|
return posts
|
||||||
@@ -2,10 +2,12 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
from connectors.reddit_api import RedditAPI
|
from connectors.reddit_api import RedditAPI
|
||||||
from connectors.boards_api import BoardsAPI
|
from connectors.boards_api import BoardsAPI
|
||||||
|
from connectors.youtube_api import YouTubeAPI
|
||||||
|
|
||||||
data_file = 'data/posts.json'
|
data_file = 'data/posts.jsonl'
|
||||||
reddit_connector = RedditAPI()
|
reddit_connector = RedditAPI()
|
||||||
boards_connector = BoardsAPI()
|
boards_connector = BoardsAPI()
|
||||||
|
youtube_connector = YouTubeAPI()
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||||
@@ -40,5 +42,8 @@ def main():
|
|||||||
ireland_posts = remove_empty_posts(ireland_posts)
|
ireland_posts = remove_empty_posts(ireland_posts)
|
||||||
save_to_jsonl(data_file, ireland_posts)
|
save_to_jsonl(data_file, ireland_posts)
|
||||||
|
|
||||||
|
youtube_videos = youtube_connector.fetch_and_parse_videos('cork city', 50, 50)
|
||||||
|
save_to_jsonl(data_file, youtube_videos)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@@ -4,15 +4,29 @@ certifi==2026.1.4
|
|||||||
charset-normalizer==3.4.4
|
charset-normalizer==3.4.4
|
||||||
click==8.3.1
|
click==8.3.1
|
||||||
Flask==3.1.2
|
Flask==3.1.2
|
||||||
|
google-api-core==2.29.0
|
||||||
|
google-api-python-client==2.188.0
|
||||||
|
google-auth==2.47.0
|
||||||
|
google-auth-httplib2==0.3.0
|
||||||
|
googleapis-common-protos==1.72.0
|
||||||
|
httplib2==0.31.1
|
||||||
idna==3.11
|
idna==3.11
|
||||||
itsdangerous==2.2.0
|
itsdangerous==2.2.0
|
||||||
Jinja2==3.1.6
|
Jinja2==3.1.6
|
||||||
MarkupSafe==3.0.3
|
MarkupSafe==3.0.3
|
||||||
|
proto-plus==1.27.0
|
||||||
|
protobuf==6.33.4
|
||||||
psycopg2==2.9.11
|
psycopg2==2.9.11
|
||||||
psycopg2-binary==2.9.11
|
psycopg2-binary==2.9.11
|
||||||
|
pyasn1==0.6.2
|
||||||
|
pyasn1_modules==0.4.2
|
||||||
|
pyparsing==3.3.1
|
||||||
|
python-dotenv==1.2.1
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
|
rsa==4.9.1
|
||||||
soupsieve==2.8.1
|
soupsieve==2.8.1
|
||||||
typing_extensions==4.15.0
|
typing_extensions==4.15.0
|
||||||
|
uritemplate==4.2.0
|
||||||
urllib3==2.6.3
|
urllib3==2.6.3
|
||||||
vaderSentiment==3.3.2
|
vaderSentiment==3.3.2
|
||||||
Werkzeug==3.1.5
|
Werkzeug==3.1.5
|
||||||
|
|||||||
Reference in New Issue
Block a user