diff --git a/frontend/src/pages/AutoScrape.tsx b/frontend/src/pages/AutoScrape.tsx index 9ebfd50..9697fec 100644 --- a/frontend/src/pages/AutoScrape.tsx +++ b/frontend/src/pages/AutoScrape.tsx @@ -22,6 +22,8 @@ type SourceConfig = { category: string; }; +type TopicMap = Record; + const buildEmptySourceConfig = (sourceName = ""): SourceConfig => ({ sourceName, limit: "100", @@ -44,6 +46,8 @@ const AutoScrapePage = () => { const [isLoadingSources, setIsLoadingSources] = useState(true); const [isSubmitting, setIsSubmitting] = useState(false); const [hasError, setHasError] = useState(false); + const [useCustomTopics, setUseCustomTopics] = useState(false); + const [customTopicsText, setCustomTopicsText] = useState(""); useEffect(() => { axios @@ -151,6 +155,88 @@ const AutoScrapePage = () => { return; } + let normalizedTopics: TopicMap | undefined; + + if (useCustomTopics) { + const customTopicsJson = customTopicsText.trim(); + + if (!customTopicsJson) { + setHasError(true); + setReturnMessage( + "Custom topics are enabled, so please provide a JSON topic map.", + ); + return; + } + + let parsedTopics: unknown; + try { + parsedTopics = JSON.parse(customTopicsJson); + } catch { + setHasError(true); + setReturnMessage("Custom topic list must be valid JSON."); + return; + } + + if ( + !parsedTopics || + Array.isArray(parsedTopics) || + typeof parsedTopics !== "object" + ) { + setHasError(true); + setReturnMessage( + "Custom topic list must be a JSON object: {\"Topic\": \"keywords\"}.", + ); + return; + } + + const entries = Object.entries(parsedTopics); + if (entries.length === 0) { + setHasError(true); + setReturnMessage("Custom topic list cannot be empty."); + return; + } + + const hasInvalidTopic = entries.some( + ([topicName, keywords]) => + !topicName.trim() || + typeof keywords !== "string" || + !keywords.trim(), + ); + + if (hasInvalidTopic) { + setHasError(true); + setReturnMessage( + "Every custom topic must have a non-empty name and keyword string.", + ); + return; + } + + normalizedTopics = Object.fromEntries( + entries.map(([topicName, keywords]) => [ + topicName.trim(), + String(keywords).trim(), + ]), + ); + } + + const requestBody: { + name: string; + sources: Array<{ + name: string; + limit: number; + search?: string; + category?: string; + }>; + topics?: TopicMap; + } = { + name: normalizedDatasetName, + sources: normalizedSources, + }; + + if (normalizedTopics) { + requestBody.topics = normalizedTopics; + } + try { setIsSubmitting(true); setHasError(false); @@ -158,10 +244,7 @@ const AutoScrapePage = () => { const response = await axios.post( `${API_BASE_URL}/datasets/scrape`, - { - name: normalizedDatasetName, - sources: normalizedSources, - }, + requestBody, { headers: { Authorization: `Bearer ${token}`, @@ -381,6 +464,52 @@ const AutoScrapePage = () => { )} + +
+

+ Topic List +

+

+ Use the default topic list, or provide your own JSON topic map. +

+ + + +