import axios from "axios"; import { useEffect, useState } from "react"; import { useNavigate } from "react-router-dom"; import StatsStyling from "../styles/stats_styling"; const styles = StatsStyling; const API_BASE_URL = import.meta.env.VITE_BACKEND_URL; type SourceOption = { id: string; label: string; search_enabled?: boolean; categories_enabled?: boolean; searchEnabled?: boolean; categoriesEnabled?: boolean; }; type SourceConfig = { sourceName: string; limit: string; search: string; category: string; }; type TopicMap = Record; const buildEmptySourceConfig = (sourceName = ""): SourceConfig => ({ sourceName, limit: "100", search: "", category: "", }); const supportsSearch = (source?: SourceOption): boolean => Boolean(source?.search_enabled ?? source?.searchEnabled); const supportsCategories = (source?: SourceOption): boolean => Boolean(source?.categories_enabled ?? source?.categoriesEnabled); const AutoFetchPage = () => { const navigate = useNavigate(); const [datasetName, setDatasetName] = useState(""); const [sourceOptions, setSourceOptions] = useState([]); const [sourceConfigs, setSourceConfigs] = useState([]); const [returnMessage, setReturnMessage] = useState(""); const [isLoadingSources, setIsLoadingSources] = useState(true); const [isSubmitting, setIsSubmitting] = useState(false); const [hasError, setHasError] = useState(false); const [useCustomTopics, setUseCustomTopics] = useState(false); const [customTopicsText, setCustomTopicsText] = useState(""); useEffect(() => { axios .get(`${API_BASE_URL}/datasets/sources`) .then((response) => { const options = response.data || []; setSourceOptions(options); setSourceConfigs([buildEmptySourceConfig(options[0]?.id || "")]); }) .catch((requestError: unknown) => { setHasError(true); if (axios.isAxiosError(requestError)) { setReturnMessage( `Failed to load available sources: ${String( requestError.response?.data?.error || requestError.message, )}`, ); } else { setReturnMessage("Failed to load available sources."); } }) .finally(() => { setIsLoadingSources(false); }); }, []); const updateSourceConfig = ( index: number, field: keyof SourceConfig, value: string, ) => { setSourceConfigs((previous) => previous.map((config, configIndex) => configIndex === index ? field === "sourceName" ? { ...config, sourceName: value, search: "", category: "" } : { ...config, [field]: value } : config, ), ); }; const getSourceOption = (sourceName: string) => sourceOptions.find((option) => option.id === sourceName); const addSourceConfig = () => { setSourceConfigs((previous) => [ ...previous, buildEmptySourceConfig(sourceOptions[0]?.id || ""), ]); }; const removeSourceConfig = (index: number) => { setSourceConfigs((previous) => previous.filter((_, configIndex) => configIndex !== index), ); }; const autoFetch = async () => { const token = localStorage.getItem("access_token"); if (!token) { setHasError(true); setReturnMessage("You must be signed in to auto fetch a dataset."); return; } const normalizedDatasetName = datasetName.trim(); if (!normalizedDatasetName) { setHasError(true); setReturnMessage("Please add a dataset name before continuing."); return; } if (sourceConfigs.length === 0) { setHasError(true); setReturnMessage("Please add at least one source."); return; } const normalizedSources = sourceConfigs.map((source) => { const sourceOption = getSourceOption(source.sourceName); return { name: source.sourceName, limit: Number(source.limit || 100), search: supportsSearch(sourceOption) ? source.search.trim() || undefined : undefined, category: supportsCategories(sourceOption) ? source.category.trim() || undefined : undefined, }; }); const invalidSource = normalizedSources.find( (source) => !source.name || !Number.isFinite(source.limit) || source.limit <= 0, ); if (invalidSource) { setHasError(true); setReturnMessage( "Every source needs a name and a limit greater than zero.", ); return; } let normalizedTopics: TopicMap | undefined; if (useCustomTopics) { const customTopicsJson = customTopicsText.trim(); if (!customTopicsJson) { setHasError(true); setReturnMessage( "Custom topics are enabled, so please provide a JSON topic map.", ); return; } let parsedTopics: unknown; try { parsedTopics = JSON.parse(customTopicsJson); } catch { setHasError(true); setReturnMessage("Custom topic list must be valid JSON."); return; } if ( !parsedTopics || Array.isArray(parsedTopics) || typeof parsedTopics !== "object" ) { setHasError(true); setReturnMessage( "Custom topic list must be a JSON object: {\"Topic\": \"keywords\"}.", ); return; } const entries = Object.entries(parsedTopics); if (entries.length === 0) { setHasError(true); setReturnMessage("Custom topic list cannot be empty."); return; } const hasInvalidTopic = entries.some( ([topicName, keywords]) => !topicName.trim() || typeof keywords !== "string" || !keywords.trim(), ); if (hasInvalidTopic) { setHasError(true); setReturnMessage( "Every custom topic must have a non-empty name and keyword string.", ); return; } normalizedTopics = Object.fromEntries( entries.map(([topicName, keywords]) => [ topicName.trim(), String(keywords).trim(), ]), ); } const requestBody: { name: string; sources: Array<{ name: string; limit: number; search?: string; category?: string; }>; topics?: TopicMap; } = { name: normalizedDatasetName, sources: normalizedSources, }; if (normalizedTopics) { requestBody.topics = normalizedTopics; } try { setIsSubmitting(true); setHasError(false); setReturnMessage(""); const response = await axios.post( `${API_BASE_URL}/datasets/fetch`, requestBody, { headers: { Authorization: `Bearer ${token}`, }, }, ); const datasetId = Number(response.data.dataset_id); setReturnMessage( `Auto fetch queued successfully (dataset #${datasetId}). Redirecting to processing status...`, ); setTimeout(() => { navigate(`/dataset/${datasetId}/status`); }, 400); } catch (requestError: unknown) { setHasError(true); if (axios.isAxiosError(requestError)) { const message = String( requestError.response?.data?.error || requestError.message || "Auto fetch failed.", ); setReturnMessage(`Auto fetch failed: ${message}`); } else { setReturnMessage("Auto fetch failed due to an unexpected error."); } } finally { setIsSubmitting(false); } }; return (

Auto Fetch Dataset

Select sources and fetch settings, then queue processing automatically.

Warning: Fetching more than 250 posts from any single site can take hours due to rate limits.

Dataset Name

Use a clear label so you can identify this run later.

setDatasetName(event.target.value)} />

Sources

Configure source, limit, optional search, and optional category.

{isLoadingSources && (

Loading sources...

)} {!isLoadingSources && sourceOptions.length === 0 && (

No source connectors are currently available.

)} {!isLoadingSources && sourceOptions.length > 0 && (
{sourceConfigs.map((source, index) => { const sourceOption = getSourceOption(source.sourceName); const searchEnabled = supportsSearch(sourceOption); const categoriesEnabled = supportsCategories(sourceOption); return (
updateSourceConfig(index, "limit", event.target.value) } /> updateSourceConfig( index, "search", event.target.value, ) } /> updateSourceConfig( index, "category", event.target.value, ) } /> {sourceConfigs.length > 1 && ( )}
); })}
)}

Topic List

Use the default topic list, or provide your own JSON topic map.