Automatic Scraping of dataset options #9

Merged
dylan merged 36 commits from feat/automatic-scraping-datasets into main 2026-03-14 21:58:49 +00:00
Showing only changes of commit 162a4de64e - Show all commits

View File

@@ -9,6 +9,10 @@ const API_BASE_URL = import.meta.env.VITE_BACKEND_URL;
type SourceOption = { type SourceOption = {
id: string; id: string;
label: string; label: string;
search_enabled?: boolean;
categories_enabled?: boolean;
searchEnabled?: boolean;
categoriesEnabled?: boolean;
}; };
type SourceConfig = { type SourceConfig = {
@@ -25,6 +29,12 @@ const buildEmptySourceConfig = (sourceName = ""): SourceConfig => ({
category: "", category: "",
}); });
const supportsSearch = (source?: SourceOption): boolean =>
Boolean(source?.search_enabled ?? source?.searchEnabled);
const supportsCategories = (source?: SourceOption): boolean =>
Boolean(source?.categories_enabled ?? source?.categoriesEnabled);
const AutoScrapePage = () => { const AutoScrapePage = () => {
const navigate = useNavigate(); const navigate = useNavigate();
const [datasetName, setDatasetName] = useState(""); const [datasetName, setDatasetName] = useState("");
@@ -63,11 +73,18 @@ const AutoScrapePage = () => {
const updateSourceConfig = (index: number, field: keyof SourceConfig, value: string) => { const updateSourceConfig = (index: number, field: keyof SourceConfig, value: string) => {
setSourceConfigs((previous) => setSourceConfigs((previous) =>
previous.map((config, configIndex) => previous.map((config, configIndex) =>
configIndex === index ? { ...config, [field]: value } : config configIndex === index
? field === "sourceName"
? { ...config, sourceName: value, search: "", category: "" }
: { ...config, [field]: value }
: config
) )
); );
}; };
const getSourceOption = (sourceName: string) =>
sourceOptions.find((option) => option.id === sourceName);
const addSourceConfig = () => { const addSourceConfig = () => {
setSourceConfigs((previous) => [ setSourceConfigs((previous) => [
...previous, ...previous,
@@ -100,12 +117,18 @@ const AutoScrapePage = () => {
return; return;
} }
const normalizedSources = sourceConfigs.map((source) => ({ const normalizedSources = sourceConfigs.map((source) => {
name: source.sourceName, const sourceOption = getSourceOption(source.sourceName);
limit: Number(source.limit || 100),
search: source.search.trim() || undefined, return {
category: source.category.trim() || undefined, name: source.sourceName,
})); limit: Number(source.limit || 100),
search: supportsSearch(sourceOption) ? source.search.trim() || undefined : undefined,
category: supportsCategories(sourceOption)
? source.category.trim() || undefined
: undefined,
};
});
const invalidSource = normalizedSources.find( const invalidSource = normalizedSources.find(
(source) => !source.name || !Number.isFinite(source.limit) || source.limit <= 0 (source) => !source.name || !Number.isFinite(source.limit) || source.limit <= 0
@@ -212,7 +235,12 @@ const AutoScrapePage = () => {
{!isLoadingSources && sourceOptions.length > 0 && ( {!isLoadingSources && sourceOptions.length > 0 && (
<div style={{ display: "flex", flexDirection: "column", gap: 10 }}> <div style={{ display: "flex", flexDirection: "column", gap: 10 }}>
{sourceConfigs.map((source, index) => ( {sourceConfigs.map((source, index) => {
const sourceOption = getSourceOption(source.sourceName);
const searchEnabled = supportsSearch(sourceOption);
const categoriesEnabled = supportsCategories(sourceOption);
return (
<div <div
key={`source-${index}`} key={`source-${index}`}
style={{ style={{
@@ -248,16 +276,26 @@ const AutoScrapePage = () => {
<input <input
type="text" type="text"
value={source.search} value={source.search}
placeholder="Search term (optional)" placeholder={
searchEnabled
? "Search term (optional)"
: "Search not supported for this source"
}
style={{ ...styles.input, ...styles.inputFullWidth }} style={{ ...styles.input, ...styles.inputFullWidth }}
disabled={!searchEnabled}
onChange={(event) => updateSourceConfig(index, "search", event.target.value)} onChange={(event) => updateSourceConfig(index, "search", event.target.value)}
/> />
<input <input
type="text" type="text"
value={source.category} value={source.category}
placeholder="Category (optional)" placeholder={
categoriesEnabled
? "Category (optional)"
: "Categories not supported for this source"
}
style={{ ...styles.input, ...styles.inputFullWidth }} style={{ ...styles.input, ...styles.inputFullWidth }}
disabled={!categoriesEnabled}
onChange={(event) => updateSourceConfig(index, "category", event.target.value)} onChange={(event) => updateSourceConfig(index, "category", event.target.value)}
/> />
@@ -271,7 +309,8 @@ const AutoScrapePage = () => {
</button> </button>
)} )}
</div> </div>
))} );
})}
<button type="button" style={styles.buttonSecondary} onClick={addSourceConfig}> <button type="button" style={styles.buttonSecondary} onClick={addSourceConfig}>
Add another source Add another source