From b2ae1a9f7013755387b3d3b9704992a5181abe94 Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Wed, 11 Mar 2026 19:41:34 +0000 Subject: [PATCH] feat(frontend): add page for scraping endpoint --- frontend/src/App.tsx | 2 + frontend/src/pages/AutoScrape.tsx | 299 ++++++++++++++++++++++++++++ frontend/src/pages/Datasets.tsx | 15 +- frontend/src/utils/documentTitle.ts | 1 + 4 files changed, 314 insertions(+), 3 deletions(-) create mode 100644 frontend/src/pages/AutoScrape.tsx diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index b1e6045..1b10f61 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -5,6 +5,7 @@ import DatasetsPage from "./pages/Datasets"; import DatasetStatusPage from "./pages/DatasetStatus"; import LoginPage from "./pages/Login"; import UploadPage from "./pages/Upload"; +import AutoScrapePage from "./pages/AutoScrape"; import StatPage from "./pages/Stats"; import { getDocumentTitle } from "./utils/documentTitle"; import DatasetEditPage from "./pages/DatasetEdit"; @@ -22,6 +23,7 @@ function App() { } /> } /> } /> + } /> } /> } /> } /> diff --git a/frontend/src/pages/AutoScrape.tsx b/frontend/src/pages/AutoScrape.tsx new file mode 100644 index 0000000..7e8e754 --- /dev/null +++ b/frontend/src/pages/AutoScrape.tsx @@ -0,0 +1,299 @@ +import axios from "axios"; +import { useEffect, useState } from "react"; +import { useNavigate } from "react-router-dom"; +import StatsStyling from "../styles/stats_styling"; + +const styles = StatsStyling; +const API_BASE_URL = import.meta.env.VITE_BACKEND_URL; + +type SourceOption = { + id: string; + label: string; +}; + +type SourceConfig = { + sourceName: string; + limit: string; + search: string; + category: string; +}; + +const buildEmptySourceConfig = (sourceName = ""): SourceConfig => ({ + sourceName, + limit: "100", + search: "", + category: "", +}); + +const AutoScrapePage = () => { + const navigate = useNavigate(); + const [datasetName, setDatasetName] = useState(""); + const [sourceOptions, setSourceOptions] = useState([]); + const [sourceConfigs, setSourceConfigs] = useState([]); + const [returnMessage, setReturnMessage] = useState(""); + const [isLoadingSources, setIsLoadingSources] = useState(true); + const [isSubmitting, setIsSubmitting] = useState(false); + const [hasError, setHasError] = useState(false); + + useEffect(() => { + axios + .get(`${API_BASE_URL}/datasets/sources`) + .then((response) => { + const options = response.data || []; + setSourceOptions(options); + setSourceConfigs([buildEmptySourceConfig(options[0]?.id || "")]); + }) + .catch((requestError: unknown) => { + setHasError(true); + if (axios.isAxiosError(requestError)) { + setReturnMessage( + `Failed to load available sources: ${String( + requestError.response?.data?.error || requestError.message + )}` + ); + } else { + setReturnMessage("Failed to load available sources."); + } + }) + .finally(() => { + setIsLoadingSources(false); + }); + }, []); + + const updateSourceConfig = (index: number, field: keyof SourceConfig, value: string) => { + setSourceConfigs((previous) => + previous.map((config, configIndex) => + configIndex === index ? { ...config, [field]: value } : config + ) + ); + }; + + const addSourceConfig = () => { + setSourceConfigs((previous) => [ + ...previous, + buildEmptySourceConfig(sourceOptions[0]?.id || ""), + ]); + }; + + const removeSourceConfig = (index: number) => { + setSourceConfigs((previous) => previous.filter((_, configIndex) => configIndex !== index)); + }; + + const autoScrape = async () => { + const token = localStorage.getItem("access_token"); + if (!token) { + setHasError(true); + setReturnMessage("You must be signed in to auto scrape a dataset."); + return; + } + + const normalizedDatasetName = datasetName.trim(); + if (!normalizedDatasetName) { + setHasError(true); + setReturnMessage("Please add a dataset name before continuing."); + return; + } + + if (sourceConfigs.length === 0) { + setHasError(true); + setReturnMessage("Please add at least one source."); + return; + } + + const normalizedSources = sourceConfigs.map((source) => ({ + name: source.sourceName, + limit: Number(source.limit || 100), + search: source.search.trim() || undefined, + category: source.category.trim() || undefined, + })); + + const invalidSource = normalizedSources.find( + (source) => !source.name || !Number.isFinite(source.limit) || source.limit <= 0 + ); + + if (invalidSource) { + setHasError(true); + setReturnMessage("Every source needs a name and a limit greater than zero."); + return; + } + + try { + setIsSubmitting(true); + setHasError(false); + setReturnMessage(""); + + const response = await axios.post( + `${API_BASE_URL}/datasets/scrape`, + { + name: normalizedDatasetName, + sources: normalizedSources, + }, + { + headers: { + Authorization: `Bearer ${token}`, + }, + } + ); + + const datasetId = Number(response.data.dataset_id); + + setReturnMessage( + `Auto scrape queued successfully (dataset #${datasetId}). Redirecting to processing status...` + ); + + setTimeout(() => { + navigate(`/dataset/${datasetId}/status`); + }, 400); + } catch (requestError: unknown) { + setHasError(true); + if (axios.isAxiosError(requestError)) { + const message = String( + requestError.response?.data?.error || requestError.message || "Auto scrape failed." + ); + setReturnMessage(`Auto scrape failed: ${message}`); + } else { + setReturnMessage("Auto scrape failed due to an unexpected error."); + } + } finally { + setIsSubmitting(false); + } + }; + + return ( +
+
+
+
+

Auto Scrape Dataset

+

+ Select sources and scrape settings, then queue processing automatically. +

+
+ +
+ +
+
+

Dataset Name

+

Use a clear label so you can identify this run later.

+ setDatasetName(event.target.value)} + /> +
+ +
+

Sources

+

+ Configure source, limit, optional search, and optional category. +

+ + {isLoadingSources &&

Loading sources...

} + + {!isLoadingSources && sourceOptions.length === 0 && ( +

No source connectors are currently available.

+ )} + + {!isLoadingSources && sourceOptions.length > 0 && ( +
+ {sourceConfigs.map((source, index) => ( +
+ + + updateSourceConfig(index, "limit", event.target.value)} + /> + + updateSourceConfig(index, "search", event.target.value)} + /> + + updateSourceConfig(index, "category", event.target.value)} + /> + + {sourceConfigs.length > 1 && ( + + )} +
+ ))} + + +
+ )} +
+
+ +
+ {returnMessage || + "After queueing, your dataset is fetched and processed in the background automatically."} +
+
+
+ ); +}; + +export default AutoScrapePage; diff --git a/frontend/src/pages/Datasets.tsx b/frontend/src/pages/Datasets.tsx index ede2317..daffaf2 100644 --- a/frontend/src/pages/Datasets.tsx +++ b/frontend/src/pages/Datasets.tsx @@ -63,9 +63,18 @@ const DatasetsPage = () => { View and reopen datasets you previously uploaded.

- +
+ + +
{error && ( diff --git a/frontend/src/utils/documentTitle.ts b/frontend/src/utils/documentTitle.ts index 904a6a8..5c7d00d 100644 --- a/frontend/src/utils/documentTitle.ts +++ b/frontend/src/utils/documentTitle.ts @@ -3,6 +3,7 @@ const DEFAULT_TITLE = "Ethnograph View"; const STATIC_TITLES: Record = { "/login": "Sign In", "/upload": "Upload Dataset", + "/auto-scrape": "Auto Scrape Dataset", "/datasets": "My Datasets", };