Compare commits

..

4 Commits

6 changed files with 66 additions and 4 deletions

View File

@@ -50,7 +50,24 @@ const DatasetsPage = () => {
}, []);
if (loading) {
return <p style={{ ...styles.page, minHeight: "100vh" }}>Loading datasets...</p>;
return (
<div style={styles.loadingPage}>
<div style={{ ...styles.loadingCard, transform: "translateY(-100px)" }}>
<div style={styles.loadingHeader}>
<div style={styles.loadingSpinner} />
<div>
<h2 style={styles.loadingTitle}>Loading datasets</h2>
</div>
</div>
<div style={styles.loadingSkeleton}>
<div style={{ ...styles.loadingSkeletonLine, ...styles.loadingSkeletonLineLong }} />
<div style={{ ...styles.loadingSkeletonLine, ...styles.loadingSkeletonLineMed }} />
<div style={{ ...styles.loadingSkeletonLine, ...styles.loadingSkeletonLineShort }} />
</div>
</div>
</div>
)
}
return (

View File

@@ -157,16 +157,21 @@ def scrape_data():
return jsonify({"error": "Limit must be an integer"}), 400
name = source["name"]
category = source.get("category")
search = source.get("search")
if name not in connector_metadata:
return jsonify({"error": "Source not supported"}), 400
if "search" in source and not connector_metadata[name]["search_enabled"]:
if search and not connector_metadata[name]["search_enabled"]:
return jsonify({"error": f"Source {name} does not support search"}), 400
if "category" in source and not connector_metadata[name]["categories_enabled"]:
if category and not connector_metadata[name]["categories_enabled"]:
return jsonify({"error": f"Source {name} does not support categories"}), 400
if category and not connectors[name]().category_exists(category):
return jsonify({"error": f"Category does not exist for {name}"}), 400
try:
dataset_id = dataset_manager.save_dataset_info(
user_id,

View File

@@ -23,3 +23,7 @@ class BaseConnector(ABC):
post_limit: int = 10
) -> list[Post]:
...
@abstractmethod
def category_exists(self, category: str) -> bool:
...

View File

@@ -38,6 +38,28 @@ class BoardsAPI(BaseConnector):
else:
return self._get_posts(f"{self.base_url}/discussions", post_limit)
def category_exists(self, category: str) -> bool:
if not category:
return False
url = f"{self.base_url}/categories/{category}"
try:
response = requests.head(url, headers=HEADERS, allow_redirects=True)
if response.status_code == 200:
return True
if response.status_code == 404:
return False
# fallback if HEAD not supported
response = requests.get(url, headers=HEADERS)
return response.status_code == 200
except requests.RequestException as e:
logger.error(f"Error checking category '{category}': {e}")
return False
## Private
def _get_posts(self, url, limit) -> list[Post]:
urls = []

View File

@@ -94,6 +94,17 @@ class RedditAPI(BaseConnector):
data = self._fetch_post_overviews(f"user/{username}/about.json", {})
return self._parse_user(data)
def category_exists(self, category: str) -> bool:
try:
data = self._fetch_post_overviews(f"r/{category}/about.json", {})
return (
data is not None
and 'data' in data
and data['data'].get('id') is not None
)
except Exception:
return False
## Private Methods ##
def _parse_posts(self, data) -> list[Post]:
posts = []

View File

@@ -69,6 +69,9 @@ class YouTubeAPI(BaseConnector):
return posts
def category_exists(self, category):
return True
def search_videos(self, query, limit):
request = self.youtube.search().list(
q=query,