feat(api): support per-source search, category and limit configuration

This commit is contained in:
2026-03-10 23:15:33 +00:00
parent d520e2af98
commit 2ab74d922a
2 changed files with 33 additions and 26 deletions

View File

@@ -1,5 +1,5 @@
import pandas as pd
import json
import logging
from server.queue.celery_app import celery
from server.analysis.enrichment import DatasetEnrichment
@@ -7,6 +7,8 @@ from server.db.database import PostgresConnector
from server.core.datasets import DatasetManager
from server.connectors.registry import get_available_connectors
logger = logging.getLogger(__name__)
@celery.task(bind=True, max_retries=3)
def process_dataset(self, dataset_id: int, posts: list, topics: dict):
db = PostgresConnector()
@@ -26,9 +28,7 @@ def process_dataset(self, dataset_id: int, posts: list, topics: dict):
@celery.task(bind=True, max_retries=3)
def fetch_and_process_dataset(self,
dataset_id: int,
per_source: dict[str, int],
search: str,
category: str,
source_info: list[dict],
topics: dict):
connectors = get_available_connectors()
db = PostgresConnector()
@@ -36,13 +36,18 @@ def fetch_and_process_dataset(self,
posts = []
try:
for source_name, source_limit in per_source.items():
connector = connectors[source_name]()
for metadata in source_info:
name = metadata["name"]
search = metadata.get("search")
category = metadata.get("category")
limit = metadata.get("limit", 100)
connector = connectors[name]()
raw_posts = connector.get_new_posts_by_search(
search=search,
category=category,
post_limit=source_limit,
comment_limit=source_limit
post_limit=limit,
comment_limit=limit
)
posts.extend(post.to_dict() for post in raw_posts)