2026-03-14 21:58:49 +00:00
1 changed files with 57 additions and 25 deletions
--- a/server/app.py
+++ b/server/app.py
@@ -119,50 +119,82 @@ def get_user_datasets():
@app.route("/datasets/sources", methods=["GET"])
 def get_dataset_sources():
-    return jsonify(get_connector_metadata())
+    list_metadata = list(get_connector_metadata().values())
    return jsonify(list_metadata)
@app.route("/datasets/scrape", methods=["POST"])
@jwt_required()
 def scrape_data():
    data = request.get_json()
    connector_metadata = get_connector_metadata()
    # Strong validation needed, otherwise data goes to Celery and crashes silently
    if not data or "sources" not in data:
        return jsonify({"error": "Sources must be provided"}), 400
-    user_id = int(get_jwt_identity())
+    if "name" not in data or not str(data["name"]).strip():
        return jsonify({"error": "Dataset name is required"}), 400
    dataset_name = data["name"].strip()
    user_id = int(get_jwt_identity())
    source_configs = data["sources"]
    if not isinstance(source_configs, list) or len(source_configs) == 0:
        return jsonify({"error": "Sources must be a non-empty list"}), 400
    # Light Validation
    for source in source_configs:
        if not isinstance(source, dict):
            return jsonify({"error": "Each source must be an object"}), 400
        if "name" not in source:
            return jsonify({"error": "Each source must contain a name"}), 400
        if "limit" in source:
            source["limit"] = int(source["limit"])
-    dataset_id = dataset_manager.save_dataset_info(user_id, dataset_name, default_topic_list)
+        if "limit" in source:
            try:
                source["limit"] = int(source["limit"])
            except (ValueError, TypeError):
                return jsonify({"error": "Limit must be an integer"}), 400
        name = source["name"]
        if name not in connector_metadata:
            return jsonify({"error": "Source not supported"}), 400
        if "search" in source and not connector_metadata[name]["search_enabled"]:
            return jsonify({"error": f"Source {name} does not support search"}), 400
        if "category" in source and not connector_metadata[name]["categories_enabled"]:
            return jsonify({"error": f"Source {name} does not support categories"}), 400
    try:
        dataset_id = dataset_manager.save_dataset_info(
            user_id,
            dataset_name,
            default_topic_list
        )
        dataset_manager.set_dataset_status(
            dataset_id,
            "fetching",
            f"Data is being fetched from {', '.join(source['name'] for source in source_configs)}"
        )
-    try:
+        fetch_and_process_dataset.delay(
-        fetch_and_process_dataset.delay(dataset_id, source_configs, default_topic_list)
+            dataset_id,
-
+            source_configs,
-        return jsonify(
+            default_topic_list
-            {
+        )
                "message": "Dataset queued for processing",
                "dataset_id": dataset_id,
                "status": "processing",
            }
        ), 202
    except Exception:
        print(traceback.format_exc())
-        return jsonify({"error": "An unexpected error occurred"}), 500
+        return jsonify({"error": "Failed to queue dataset processing"}), 500
    return jsonify({
        "message": "Dataset queued for processing",
        "dataset_id": dataset_id,
        "status": "processing"
    }), 202
@app.route("/datasets/upload", methods=["POST"])
@jwt_required()