diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index b1e6045..1b10f61 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -5,6 +5,7 @@ import DatasetsPage from "./pages/Datasets";
import DatasetStatusPage from "./pages/DatasetStatus";
import LoginPage from "./pages/Login";
import UploadPage from "./pages/Upload";
+import AutoScrapePage from "./pages/AutoScrape";
import StatPage from "./pages/Stats";
import { getDocumentTitle } from "./utils/documentTitle";
import DatasetEditPage from "./pages/DatasetEdit";
@@ -22,6 +23,7 @@ function App() {
} />
} />
} />
+ } />
} />
} />
} />
diff --git a/frontend/src/pages/AutoScrape.tsx b/frontend/src/pages/AutoScrape.tsx
new file mode 100644
index 0000000..7e8e754
--- /dev/null
+++ b/frontend/src/pages/AutoScrape.tsx
@@ -0,0 +1,299 @@
+import axios from "axios";
+import { useEffect, useState } from "react";
+import { useNavigate } from "react-router-dom";
+import StatsStyling from "../styles/stats_styling";
+
+const styles = StatsStyling;
+const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
+
+type SourceOption = {
+ id: string;
+ label: string;
+};
+
+type SourceConfig = {
+ sourceName: string;
+ limit: string;
+ search: string;
+ category: string;
+};
+
+const buildEmptySourceConfig = (sourceName = ""): SourceConfig => ({
+ sourceName,
+ limit: "100",
+ search: "",
+ category: "",
+});
+
+const AutoScrapePage = () => {
+ const navigate = useNavigate();
+ const [datasetName, setDatasetName] = useState("");
+ const [sourceOptions, setSourceOptions] = useState([]);
+ const [sourceConfigs, setSourceConfigs] = useState([]);
+ const [returnMessage, setReturnMessage] = useState("");
+ const [isLoadingSources, setIsLoadingSources] = useState(true);
+ const [isSubmitting, setIsSubmitting] = useState(false);
+ const [hasError, setHasError] = useState(false);
+
+ useEffect(() => {
+ axios
+ .get(`${API_BASE_URL}/datasets/sources`)
+ .then((response) => {
+ const options = response.data || [];
+ setSourceOptions(options);
+ setSourceConfigs([buildEmptySourceConfig(options[0]?.id || "")]);
+ })
+ .catch((requestError: unknown) => {
+ setHasError(true);
+ if (axios.isAxiosError(requestError)) {
+ setReturnMessage(
+ `Failed to load available sources: ${String(
+ requestError.response?.data?.error || requestError.message
+ )}`
+ );
+ } else {
+ setReturnMessage("Failed to load available sources.");
+ }
+ })
+ .finally(() => {
+ setIsLoadingSources(false);
+ });
+ }, []);
+
+ const updateSourceConfig = (index: number, field: keyof SourceConfig, value: string) => {
+ setSourceConfigs((previous) =>
+ previous.map((config, configIndex) =>
+ configIndex === index ? { ...config, [field]: value } : config
+ )
+ );
+ };
+
+ const addSourceConfig = () => {
+ setSourceConfigs((previous) => [
+ ...previous,
+ buildEmptySourceConfig(sourceOptions[0]?.id || ""),
+ ]);
+ };
+
+ const removeSourceConfig = (index: number) => {
+ setSourceConfigs((previous) => previous.filter((_, configIndex) => configIndex !== index));
+ };
+
+ const autoScrape = async () => {
+ const token = localStorage.getItem("access_token");
+ if (!token) {
+ setHasError(true);
+ setReturnMessage("You must be signed in to auto scrape a dataset.");
+ return;
+ }
+
+ const normalizedDatasetName = datasetName.trim();
+ if (!normalizedDatasetName) {
+ setHasError(true);
+ setReturnMessage("Please add a dataset name before continuing.");
+ return;
+ }
+
+ if (sourceConfigs.length === 0) {
+ setHasError(true);
+ setReturnMessage("Please add at least one source.");
+ return;
+ }
+
+ const normalizedSources = sourceConfigs.map((source) => ({
+ name: source.sourceName,
+ limit: Number(source.limit || 100),
+ search: source.search.trim() || undefined,
+ category: source.category.trim() || undefined,
+ }));
+
+ const invalidSource = normalizedSources.find(
+ (source) => !source.name || !Number.isFinite(source.limit) || source.limit <= 0
+ );
+
+ if (invalidSource) {
+ setHasError(true);
+ setReturnMessage("Every source needs a name and a limit greater than zero.");
+ return;
+ }
+
+ try {
+ setIsSubmitting(true);
+ setHasError(false);
+ setReturnMessage("");
+
+ const response = await axios.post(
+ `${API_BASE_URL}/datasets/scrape`,
+ {
+ name: normalizedDatasetName,
+ sources: normalizedSources,
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${token}`,
+ },
+ }
+ );
+
+ const datasetId = Number(response.data.dataset_id);
+
+ setReturnMessage(
+ `Auto scrape queued successfully (dataset #${datasetId}). Redirecting to processing status...`
+ );
+
+ setTimeout(() => {
+ navigate(`/dataset/${datasetId}/status`);
+ }, 400);
+ } catch (requestError: unknown) {
+ setHasError(true);
+ if (axios.isAxiosError(requestError)) {
+ const message = String(
+ requestError.response?.data?.error || requestError.message || "Auto scrape failed."
+ );
+ setReturnMessage(`Auto scrape failed: ${message}`);
+ } else {
+ setReturnMessage("Auto scrape failed due to an unexpected error.");
+ }
+ } finally {
+ setIsSubmitting(false);
+ }
+ };
+
+ return (
+
+
+
+
+
Auto Scrape Dataset
+
+ Select sources and scrape settings, then queue processing automatically.
+
+
+
+
+
+
+
+
Dataset Name
+
Use a clear label so you can identify this run later.
+
setDatasetName(event.target.value)}
+ />
+
+
+
+
Sources
+
+ Configure source, limit, optional search, and optional category.
+
+
+ {isLoadingSources &&
Loading sources...
}
+
+ {!isLoadingSources && sourceOptions.length === 0 && (
+
No source connectors are currently available.
+ )}
+
+ {!isLoadingSources && sourceOptions.length > 0 && (
+
+ )}
+
+
+
+
+ {returnMessage ||
+ "After queueing, your dataset is fetched and processed in the background automatically."}
+
+
+
+ );
+};
+
+export default AutoScrapePage;
diff --git a/frontend/src/pages/Datasets.tsx b/frontend/src/pages/Datasets.tsx
index ede2317..daffaf2 100644
--- a/frontend/src/pages/Datasets.tsx
+++ b/frontend/src/pages/Datasets.tsx
@@ -63,9 +63,18 @@ const DatasetsPage = () => {
View and reopen datasets you previously uploaded.
-
+
+
+
+
{error && (
diff --git a/frontend/src/utils/documentTitle.ts b/frontend/src/utils/documentTitle.ts
index 904a6a8..5c7d00d 100644
--- a/frontend/src/utils/documentTitle.ts
+++ b/frontend/src/utils/documentTitle.ts
@@ -3,6 +3,7 @@ const DEFAULT_TITLE = "Ethnograph View";
const STATIC_TITLES: Record = {
"/login": "Sign In",
"/upload": "Upload Dataset",
+ "/auto-scrape": "Auto Scrape Dataset",
"/datasets": "My Datasets",
};