feat(connectors): implement category validation in scraping process
This commit is contained in:
@@ -22,4 +22,8 @@ class BaseConnector(ABC):
|
||||
category: str = None,
|
||||
post_limit: int = 10
|
||||
) -> list[Post]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def category_exists(self, category: str) -> bool:
|
||||
...
|
||||
@@ -37,6 +37,9 @@ class BoardsAPI(BaseConnector):
|
||||
return self._get_posts(f"{self.base_url}/categories/{category}", post_limit)
|
||||
else:
|
||||
return self._get_posts(f"{self.base_url}/discussions", post_limit)
|
||||
|
||||
def category_exists(self, category):
|
||||
return True
|
||||
|
||||
## Private
|
||||
def _get_posts(self, url, limit) -> list[Post]:
|
||||
|
||||
@@ -94,6 +94,17 @@ class RedditAPI(BaseConnector):
|
||||
data = self._fetch_post_overviews(f"user/{username}/about.json", {})
|
||||
return self._parse_user(data)
|
||||
|
||||
def category_exists(self, category: str) -> bool:
|
||||
try:
|
||||
data = self._fetch_post_overviews(f"r/{category}/about.json", {})
|
||||
return (
|
||||
data is not None
|
||||
and 'data' in data
|
||||
and data['data'].get('id') is not None
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
## Private Methods ##
|
||||
def _parse_posts(self, data) -> list[Post]:
|
||||
posts = []
|
||||
|
||||
@@ -68,6 +68,9 @@ class YouTubeAPI(BaseConnector):
|
||||
posts.append(post)
|
||||
|
||||
return posts
|
||||
|
||||
def category_exists(self, category):
|
||||
return True
|
||||
|
||||
def search_videos(self, query, limit):
|
||||
request = self.youtube.search().list(
|
||||
|
||||
Reference in New Issue
Block a user