Corpus Explorer Feature #11

Merged
dylan merged 14 commits from feat/corpus-explorer into main 2026-04-13 19:02:45 +01:00
6 changed files with 22 additions and 22 deletions
Showing only changes of commit 37d08c63b8 - Show all commits

View File

@@ -5,7 +5,7 @@ import DatasetsPage from "./pages/Datasets";
import DatasetStatusPage from "./pages/DatasetStatus";
import LoginPage from "./pages/Login";
import UploadPage from "./pages/Upload";
import AutoScrapePage from "./pages/AutoScrape";
import AutoFetchPage from "./pages/AutoFetch";
import StatPage from "./pages/Stats";
import { getDocumentTitle } from "./utils/documentTitle";
import DatasetEditPage from "./pages/DatasetEdit";
@@ -23,7 +23,7 @@ function App() {
<Route path="/" element={<Navigate to="/login" replace />} />
<Route path="/login" element={<LoginPage />} />
<Route path="/upload" element={<UploadPage />} />
<Route path="/auto-scrape" element={<AutoScrapePage />} />
<Route path="/auto-fetch" element={<AutoFetchPage />} />
<Route path="/datasets" element={<DatasetsPage />} />
<Route path="/dataset/:datasetId/status" element={<DatasetStatusPage />} />
<Route path="/dataset/:datasetId/stats" element={<StatPage />} />

View File

@@ -37,7 +37,7 @@ const supportsSearch = (source?: SourceOption): boolean =>
const supportsCategories = (source?: SourceOption): boolean =>
Boolean(source?.categories_enabled ?? source?.categoriesEnabled);
const AutoScrapePage = () => {
const AutoFetchPage = () => {
const navigate = useNavigate();
const [datasetName, setDatasetName] = useState("");
const [sourceOptions, setSourceOptions] = useState<SourceOption[]>([]);
@@ -106,11 +106,11 @@ const AutoScrapePage = () => {
);
};
const autoScrape = async () => {
const autoFetch = async () => {
const token = localStorage.getItem("access_token");
if (!token) {
setHasError(true);
setReturnMessage("You must be signed in to auto scrape a dataset.");
setReturnMessage("You must be signed in to auto fetch a dataset.");
return;
}
@@ -243,7 +243,7 @@ const AutoScrapePage = () => {
setReturnMessage("");
const response = await axios.post(
`${API_BASE_URL}/datasets/scrape`,
`${API_BASE_URL}/datasets/fetch`,
requestBody,
{
headers: {
@@ -255,7 +255,7 @@ const AutoScrapePage = () => {
const datasetId = Number(response.data.dataset_id);
setReturnMessage(
`Auto scrape queued successfully (dataset #${datasetId}). Redirecting to processing status...`,
`Auto fetch queued successfully (dataset #${datasetId}). Redirecting to processing status...`,
);
setTimeout(() => {
@@ -267,11 +267,11 @@ const AutoScrapePage = () => {
const message = String(
requestError.response?.data?.error ||
requestError.message ||
"Auto scrape failed.",
"Auto fetch failed.",
);
setReturnMessage(`Auto scrape failed: ${message}`);
setReturnMessage(`Auto fetch failed: ${message}`);
} else {
setReturnMessage("Auto scrape failed due to an unexpected error.");
setReturnMessage("Auto fetch failed due to an unexpected error.");
}
} finally {
setIsSubmitting(false);
@@ -283,9 +283,9 @@ const AutoScrapePage = () => {
<div style={styles.containerWide}>
<div style={{ ...styles.card, ...styles.headerBar }}>
<div>
<h1 style={styles.sectionHeaderTitle}>Auto Scrape Dataset</h1>
<h1 style={styles.sectionHeaderTitle}>Auto Fetch Dataset</h1>
<p style={styles.sectionHeaderSubtitle}>
Select sources and scrape settings, then queue processing
Select sources and fetch settings, then queue processing
automatically.
</p>
<p
@@ -295,7 +295,7 @@ const AutoScrapePage = () => {
color: "#9a6700",
}}
>
Warning: Scraping more than 250 posts from any single site can
Warning: Fetching more than 250 posts from any single site can
take hours due to rate limits.
</p>
</div>
@@ -305,10 +305,10 @@ const AutoScrapePage = () => {
...styles.buttonPrimary,
opacity: isSubmitting || isLoadingSources ? 0.75 : 1,
}}
onClick={autoScrape}
onClick={autoFetch}
disabled={isSubmitting || isLoadingSources}
>
{isSubmitting ? "Queueing..." : "Auto Scrape and Analyze"}
{isSubmitting ? "Queueing..." : "Auto Fetch and Analyze"}
</button>
</div>
@@ -527,4 +527,4 @@ const AutoScrapePage = () => {
);
};
export default AutoScrapePage;
export default AutoFetchPage;

View File

@@ -108,9 +108,9 @@ const DatasetsPage = () => {
<button
type="button"
style={styles.buttonSecondary}
onClick={() => navigate("/auto-scrape")}
onClick={() => navigate("/auto-fetch")}
>
Auto Scrape Dataset
Auto Fetch Dataset
</button>
</div>
</div>

View File

@@ -3,7 +3,7 @@ const DEFAULT_TITLE = "Ethnograph View";
const STATIC_TITLES: Record<string, string> = {
"/login": "Sign In",
"/upload": "Upload Dataset",
"/auto-scrape": "Auto Scrape Dataset",
"/auto-fetch": "Auto Fetch Dataset",
"/datasets": "My Datasets",
};

View File

@@ -152,9 +152,9 @@ def get_dataset_sources():
return jsonify(list_metadata)
@app.route("/datasets/scrape", methods=["POST"])
@app.route("/datasets/fetch", methods=["POST"])
@jwt_required()
def scrape_data():
def fetch_data():
data = request.get_json()
connector_metadata = get_connector_metadata()

View File

@@ -11,7 +11,7 @@ from server.connectors.base import BaseConnector
logger = logging.getLogger(__name__)
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumScraper/1.0)"}
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumFetcher/1.0)"}
class BoardsAPI(BaseConnector):