Corpus Explorer Feature #11

Merged
dylan merged 14 commits from feat/corpus-explorer into main 2026-04-13 19:02:45 +01:00
6 changed files with 22 additions and 22 deletions
Showing only changes of commit 37d08c63b8 - Show all commits

View File

@@ -5,7 +5,7 @@ import DatasetsPage from "./pages/Datasets";
import DatasetStatusPage from "./pages/DatasetStatus"; import DatasetStatusPage from "./pages/DatasetStatus";
import LoginPage from "./pages/Login"; import LoginPage from "./pages/Login";
import UploadPage from "./pages/Upload"; import UploadPage from "./pages/Upload";
import AutoScrapePage from "./pages/AutoScrape"; import AutoFetchPage from "./pages/AutoFetch";
import StatPage from "./pages/Stats"; import StatPage from "./pages/Stats";
import { getDocumentTitle } from "./utils/documentTitle"; import { getDocumentTitle } from "./utils/documentTitle";
import DatasetEditPage from "./pages/DatasetEdit"; import DatasetEditPage from "./pages/DatasetEdit";
@@ -23,7 +23,7 @@ function App() {
<Route path="/" element={<Navigate to="/login" replace />} /> <Route path="/" element={<Navigate to="/login" replace />} />
<Route path="/login" element={<LoginPage />} /> <Route path="/login" element={<LoginPage />} />
<Route path="/upload" element={<UploadPage />} /> <Route path="/upload" element={<UploadPage />} />
<Route path="/auto-scrape" element={<AutoScrapePage />} /> <Route path="/auto-fetch" element={<AutoFetchPage />} />
<Route path="/datasets" element={<DatasetsPage />} /> <Route path="/datasets" element={<DatasetsPage />} />
<Route path="/dataset/:datasetId/status" element={<DatasetStatusPage />} /> <Route path="/dataset/:datasetId/status" element={<DatasetStatusPage />} />
<Route path="/dataset/:datasetId/stats" element={<StatPage />} /> <Route path="/dataset/:datasetId/stats" element={<StatPage />} />

View File

@@ -37,7 +37,7 @@ const supportsSearch = (source?: SourceOption): boolean =>
const supportsCategories = (source?: SourceOption): boolean => const supportsCategories = (source?: SourceOption): boolean =>
Boolean(source?.categories_enabled ?? source?.categoriesEnabled); Boolean(source?.categories_enabled ?? source?.categoriesEnabled);
const AutoScrapePage = () => { const AutoFetchPage = () => {
const navigate = useNavigate(); const navigate = useNavigate();
const [datasetName, setDatasetName] = useState(""); const [datasetName, setDatasetName] = useState("");
const [sourceOptions, setSourceOptions] = useState<SourceOption[]>([]); const [sourceOptions, setSourceOptions] = useState<SourceOption[]>([]);
@@ -106,11 +106,11 @@ const AutoScrapePage = () => {
); );
}; };
const autoScrape = async () => { const autoFetch = async () => {
const token = localStorage.getItem("access_token"); const token = localStorage.getItem("access_token");
if (!token) { if (!token) {
setHasError(true); setHasError(true);
setReturnMessage("You must be signed in to auto scrape a dataset."); setReturnMessage("You must be signed in to auto fetch a dataset.");
return; return;
} }
@@ -243,7 +243,7 @@ const AutoScrapePage = () => {
setReturnMessage(""); setReturnMessage("");
const response = await axios.post( const response = await axios.post(
`${API_BASE_URL}/datasets/scrape`, `${API_BASE_URL}/datasets/fetch`,
requestBody, requestBody,
{ {
headers: { headers: {
@@ -255,7 +255,7 @@ const AutoScrapePage = () => {
const datasetId = Number(response.data.dataset_id); const datasetId = Number(response.data.dataset_id);
setReturnMessage( setReturnMessage(
`Auto scrape queued successfully (dataset #${datasetId}). Redirecting to processing status...`, `Auto fetch queued successfully (dataset #${datasetId}). Redirecting to processing status...`,
); );
setTimeout(() => { setTimeout(() => {
@@ -267,11 +267,11 @@ const AutoScrapePage = () => {
const message = String( const message = String(
requestError.response?.data?.error || requestError.response?.data?.error ||
requestError.message || requestError.message ||
"Auto scrape failed.", "Auto fetch failed.",
); );
setReturnMessage(`Auto scrape failed: ${message}`); setReturnMessage(`Auto fetch failed: ${message}`);
} else { } else {
setReturnMessage("Auto scrape failed due to an unexpected error."); setReturnMessage("Auto fetch failed due to an unexpected error.");
} }
} finally { } finally {
setIsSubmitting(false); setIsSubmitting(false);
@@ -283,9 +283,9 @@ const AutoScrapePage = () => {
<div style={styles.containerWide}> <div style={styles.containerWide}>
<div style={{ ...styles.card, ...styles.headerBar }}> <div style={{ ...styles.card, ...styles.headerBar }}>
<div> <div>
<h1 style={styles.sectionHeaderTitle}>Auto Scrape Dataset</h1> <h1 style={styles.sectionHeaderTitle}>Auto Fetch Dataset</h1>
<p style={styles.sectionHeaderSubtitle}> <p style={styles.sectionHeaderSubtitle}>
Select sources and scrape settings, then queue processing Select sources and fetch settings, then queue processing
automatically. automatically.
</p> </p>
<p <p
@@ -295,7 +295,7 @@ const AutoScrapePage = () => {
color: "#9a6700", color: "#9a6700",
}} }}
> >
Warning: Scraping more than 250 posts from any single site can Warning: Fetching more than 250 posts from any single site can
take hours due to rate limits. take hours due to rate limits.
</p> </p>
</div> </div>
@@ -305,10 +305,10 @@ const AutoScrapePage = () => {
...styles.buttonPrimary, ...styles.buttonPrimary,
opacity: isSubmitting || isLoadingSources ? 0.75 : 1, opacity: isSubmitting || isLoadingSources ? 0.75 : 1,
}} }}
onClick={autoScrape} onClick={autoFetch}
disabled={isSubmitting || isLoadingSources} disabled={isSubmitting || isLoadingSources}
> >
{isSubmitting ? "Queueing..." : "Auto Scrape and Analyze"} {isSubmitting ? "Queueing..." : "Auto Fetch and Analyze"}
</button> </button>
</div> </div>
@@ -527,4 +527,4 @@ const AutoScrapePage = () => {
); );
}; };
export default AutoScrapePage; export default AutoFetchPage;

View File

@@ -108,9 +108,9 @@ const DatasetsPage = () => {
<button <button
type="button" type="button"
style={styles.buttonSecondary} style={styles.buttonSecondary}
onClick={() => navigate("/auto-scrape")} onClick={() => navigate("/auto-fetch")}
> >
Auto Scrape Dataset Auto Fetch Dataset
</button> </button>
</div> </div>
</div> </div>

View File

@@ -3,7 +3,7 @@ const DEFAULT_TITLE = "Ethnograph View";
const STATIC_TITLES: Record<string, string> = { const STATIC_TITLES: Record<string, string> = {
"/login": "Sign In", "/login": "Sign In",
"/upload": "Upload Dataset", "/upload": "Upload Dataset",
"/auto-scrape": "Auto Scrape Dataset", "/auto-fetch": "Auto Fetch Dataset",
"/datasets": "My Datasets", "/datasets": "My Datasets",
}; };

View File

@@ -152,9 +152,9 @@ def get_dataset_sources():
return jsonify(list_metadata) return jsonify(list_metadata)
@app.route("/datasets/scrape", methods=["POST"]) @app.route("/datasets/fetch", methods=["POST"])
@jwt_required() @jwt_required()
def scrape_data(): def fetch_data():
data = request.get_json() data = request.get_json()
connector_metadata = get_connector_metadata() connector_metadata = get_connector_metadata()

View File

@@ -11,7 +11,7 @@ from server.connectors.base import BaseConnector
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumScraper/1.0)"} HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumFetcher/1.0)"}
class BoardsAPI(BaseConnector): class BoardsAPI(BaseConnector):