Corpus Explorer Feature #11
@@ -5,7 +5,7 @@ import DatasetsPage from "./pages/Datasets";
|
|||||||
import DatasetStatusPage from "./pages/DatasetStatus";
|
import DatasetStatusPage from "./pages/DatasetStatus";
|
||||||
import LoginPage from "./pages/Login";
|
import LoginPage from "./pages/Login";
|
||||||
import UploadPage from "./pages/Upload";
|
import UploadPage from "./pages/Upload";
|
||||||
import AutoScrapePage from "./pages/AutoScrape";
|
import AutoFetchPage from "./pages/AutoFetch";
|
||||||
import StatPage from "./pages/Stats";
|
import StatPage from "./pages/Stats";
|
||||||
import { getDocumentTitle } from "./utils/documentTitle";
|
import { getDocumentTitle } from "./utils/documentTitle";
|
||||||
import DatasetEditPage from "./pages/DatasetEdit";
|
import DatasetEditPage from "./pages/DatasetEdit";
|
||||||
@@ -23,7 +23,7 @@ function App() {
|
|||||||
<Route path="/" element={<Navigate to="/login" replace />} />
|
<Route path="/" element={<Navigate to="/login" replace />} />
|
||||||
<Route path="/login" element={<LoginPage />} />
|
<Route path="/login" element={<LoginPage />} />
|
||||||
<Route path="/upload" element={<UploadPage />} />
|
<Route path="/upload" element={<UploadPage />} />
|
||||||
<Route path="/auto-scrape" element={<AutoScrapePage />} />
|
<Route path="/auto-fetch" element={<AutoFetchPage />} />
|
||||||
<Route path="/datasets" element={<DatasetsPage />} />
|
<Route path="/datasets" element={<DatasetsPage />} />
|
||||||
<Route path="/dataset/:datasetId/status" element={<DatasetStatusPage />} />
|
<Route path="/dataset/:datasetId/status" element={<DatasetStatusPage />} />
|
||||||
<Route path="/dataset/:datasetId/stats" element={<StatPage />} />
|
<Route path="/dataset/:datasetId/stats" element={<StatPage />} />
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ const supportsSearch = (source?: SourceOption): boolean =>
|
|||||||
const supportsCategories = (source?: SourceOption): boolean =>
|
const supportsCategories = (source?: SourceOption): boolean =>
|
||||||
Boolean(source?.categories_enabled ?? source?.categoriesEnabled);
|
Boolean(source?.categories_enabled ?? source?.categoriesEnabled);
|
||||||
|
|
||||||
const AutoScrapePage = () => {
|
const AutoFetchPage = () => {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
const [datasetName, setDatasetName] = useState("");
|
const [datasetName, setDatasetName] = useState("");
|
||||||
const [sourceOptions, setSourceOptions] = useState<SourceOption[]>([]);
|
const [sourceOptions, setSourceOptions] = useState<SourceOption[]>([]);
|
||||||
@@ -106,11 +106,11 @@ const AutoScrapePage = () => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
const autoScrape = async () => {
|
const autoFetch = async () => {
|
||||||
const token = localStorage.getItem("access_token");
|
const token = localStorage.getItem("access_token");
|
||||||
if (!token) {
|
if (!token) {
|
||||||
setHasError(true);
|
setHasError(true);
|
||||||
setReturnMessage("You must be signed in to auto scrape a dataset.");
|
setReturnMessage("You must be signed in to auto fetch a dataset.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,7 +243,7 @@ const AutoScrapePage = () => {
|
|||||||
setReturnMessage("");
|
setReturnMessage("");
|
||||||
|
|
||||||
const response = await axios.post(
|
const response = await axios.post(
|
||||||
`${API_BASE_URL}/datasets/scrape`,
|
`${API_BASE_URL}/datasets/fetch`,
|
||||||
requestBody,
|
requestBody,
|
||||||
{
|
{
|
||||||
headers: {
|
headers: {
|
||||||
@@ -255,7 +255,7 @@ const AutoScrapePage = () => {
|
|||||||
const datasetId = Number(response.data.dataset_id);
|
const datasetId = Number(response.data.dataset_id);
|
||||||
|
|
||||||
setReturnMessage(
|
setReturnMessage(
|
||||||
`Auto scrape queued successfully (dataset #${datasetId}). Redirecting to processing status...`,
|
`Auto fetch queued successfully (dataset #${datasetId}). Redirecting to processing status...`,
|
||||||
);
|
);
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
@@ -267,11 +267,11 @@ const AutoScrapePage = () => {
|
|||||||
const message = String(
|
const message = String(
|
||||||
requestError.response?.data?.error ||
|
requestError.response?.data?.error ||
|
||||||
requestError.message ||
|
requestError.message ||
|
||||||
"Auto scrape failed.",
|
"Auto fetch failed.",
|
||||||
);
|
);
|
||||||
setReturnMessage(`Auto scrape failed: ${message}`);
|
setReturnMessage(`Auto fetch failed: ${message}`);
|
||||||
} else {
|
} else {
|
||||||
setReturnMessage("Auto scrape failed due to an unexpected error.");
|
setReturnMessage("Auto fetch failed due to an unexpected error.");
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
setIsSubmitting(false);
|
setIsSubmitting(false);
|
||||||
@@ -283,9 +283,9 @@ const AutoScrapePage = () => {
|
|||||||
<div style={styles.containerWide}>
|
<div style={styles.containerWide}>
|
||||||
<div style={{ ...styles.card, ...styles.headerBar }}>
|
<div style={{ ...styles.card, ...styles.headerBar }}>
|
||||||
<div>
|
<div>
|
||||||
<h1 style={styles.sectionHeaderTitle}>Auto Scrape Dataset</h1>
|
<h1 style={styles.sectionHeaderTitle}>Auto Fetch Dataset</h1>
|
||||||
<p style={styles.sectionHeaderSubtitle}>
|
<p style={styles.sectionHeaderSubtitle}>
|
||||||
Select sources and scrape settings, then queue processing
|
Select sources and fetch settings, then queue processing
|
||||||
automatically.
|
automatically.
|
||||||
</p>
|
</p>
|
||||||
<p
|
<p
|
||||||
@@ -295,7 +295,7 @@ const AutoScrapePage = () => {
|
|||||||
color: "#9a6700",
|
color: "#9a6700",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Warning: Scraping more than 250 posts from any single site can
|
Warning: Fetching more than 250 posts from any single site can
|
||||||
take hours due to rate limits.
|
take hours due to rate limits.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
@@ -305,10 +305,10 @@ const AutoScrapePage = () => {
|
|||||||
...styles.buttonPrimary,
|
...styles.buttonPrimary,
|
||||||
opacity: isSubmitting || isLoadingSources ? 0.75 : 1,
|
opacity: isSubmitting || isLoadingSources ? 0.75 : 1,
|
||||||
}}
|
}}
|
||||||
onClick={autoScrape}
|
onClick={autoFetch}
|
||||||
disabled={isSubmitting || isLoadingSources}
|
disabled={isSubmitting || isLoadingSources}
|
||||||
>
|
>
|
||||||
{isSubmitting ? "Queueing..." : "Auto Scrape and Analyze"}
|
{isSubmitting ? "Queueing..." : "Auto Fetch and Analyze"}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -527,4 +527,4 @@ const AutoScrapePage = () => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default AutoScrapePage;
|
export default AutoFetchPage;
|
||||||
@@ -108,9 +108,9 @@ const DatasetsPage = () => {
|
|||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
style={styles.buttonSecondary}
|
style={styles.buttonSecondary}
|
||||||
onClick={() => navigate("/auto-scrape")}
|
onClick={() => navigate("/auto-fetch")}
|
||||||
>
|
>
|
||||||
Auto Scrape Dataset
|
Auto Fetch Dataset
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ const DEFAULT_TITLE = "Ethnograph View";
|
|||||||
const STATIC_TITLES: Record<string, string> = {
|
const STATIC_TITLES: Record<string, string> = {
|
||||||
"/login": "Sign In",
|
"/login": "Sign In",
|
||||||
"/upload": "Upload Dataset",
|
"/upload": "Upload Dataset",
|
||||||
"/auto-scrape": "Auto Scrape Dataset",
|
"/auto-fetch": "Auto Fetch Dataset",
|
||||||
"/datasets": "My Datasets",
|
"/datasets": "My Datasets",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -152,9 +152,9 @@ def get_dataset_sources():
|
|||||||
return jsonify(list_metadata)
|
return jsonify(list_metadata)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/datasets/scrape", methods=["POST"])
|
@app.route("/datasets/fetch", methods=["POST"])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def scrape_data():
|
def fetch_data():
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
connector_metadata = get_connector_metadata()
|
connector_metadata = get_connector_metadata()
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from server.connectors.base import BaseConnector
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumScraper/1.0)"}
|
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; ForumFetcher/1.0)"}
|
||||||
|
|
||||||
|
|
||||||
class BoardsAPI(BaseConnector):
|
class BoardsAPI(BaseConnector):
|
||||||
|
|||||||
Reference in New Issue
Block a user