Corpus Explorer Feature #11
@@ -66,6 +66,38 @@ const EMPTY_EXPLORER_STATE: ExplorerState = {
|
|||||||
error: "",
|
error: "",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getExplorerRecordIdentity = (record: DatasetRecord) =>
|
||||||
|
JSON.stringify({
|
||||||
|
post_id: record.post_id ?? null,
|
||||||
|
parent_id: record.parent_id ?? null,
|
||||||
|
reply_to: record.reply_to ?? null,
|
||||||
|
author: record.author ?? null,
|
||||||
|
type: record.type ?? null,
|
||||||
|
timestamp: record.timestamp ?? null,
|
||||||
|
dt: record.dt ?? null,
|
||||||
|
title: record.title ?? null,
|
||||||
|
content: record.content ?? null,
|
||||||
|
source: record.source ?? null,
|
||||||
|
topic: record.topic ?? null,
|
||||||
|
});
|
||||||
|
|
||||||
|
const dedupeExplorerRecords = (records: DatasetRecord[]) => {
|
||||||
|
const uniqueRecords: DatasetRecord[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
for (const record of records) {
|
||||||
|
const identity = getExplorerRecordIdentity(record);
|
||||||
|
if (seen.has(identity)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
seen.add(identity);
|
||||||
|
uniqueRecords.push(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqueRecords;
|
||||||
|
};
|
||||||
|
|
||||||
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
|
const normalizeRecordPayload = (payload: unknown): DatasetRecord[] => {
|
||||||
if (typeof payload === "string") {
|
if (typeof payload === "string") {
|
||||||
try {
|
try {
|
||||||
@@ -233,7 +265,9 @@ const StatPage = () => {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
const normalizedRecords = normalizeRecordPayload(response.data);
|
const normalizedRecords = dedupeExplorerRecords(
|
||||||
|
normalizeRecordPayload(response.data),
|
||||||
|
);
|
||||||
|
|
||||||
setAllRecords(normalizedRecords);
|
setAllRecords(normalizedRecords);
|
||||||
setAllRecordsKey(filterKey);
|
setAllRecordsKey(filterKey);
|
||||||
@@ -254,7 +288,9 @@ const StatPage = () => {
|
|||||||
try {
|
try {
|
||||||
const records = await ensureFilteredRecords();
|
const records = await ensureFilteredRecords();
|
||||||
const context = buildExplorerContext(records);
|
const context = buildExplorerContext(records);
|
||||||
const matched = records.filter((record) => spec.matcher(record, context));
|
const matched = dedupeExplorerRecords(
|
||||||
|
records.filter((record) => spec.matcher(record, context)),
|
||||||
|
);
|
||||||
matched.sort((a, b) => {
|
matched.sort((a, b) => {
|
||||||
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
const aValue = String(a.dt ?? a.date ?? a.timestamp ?? "");
|
||||||
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
const bValue = String(b.dt ?? b.date ?? b.timestamp ?? "");
|
||||||
@@ -662,7 +698,7 @@ const StatPage = () => {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "interactional" && interactionData && (
|
{activeView === "interactional" && interactionData && (
|
||||||
<InteractionalStats data={interactionData} onExplore={openExplorer} />
|
<InteractionalStats data={interactionData} />
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{activeView === "interactional" && !interactionData && (
|
{activeView === "interactional" && !interactionData && (
|
||||||
|
|||||||
@@ -89,10 +89,37 @@ class StatGen:
|
|||||||
df.to_json(orient="records", date_format="iso", date_unit="s")
|
df.to_json(orient="records", date_format="iso", date_unit="s")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _dedupe_records(self, records: list[dict]) -> list[dict]:
|
||||||
|
unique_records = []
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
key_data = {
|
||||||
|
"post_id": record.get("post_id"),
|
||||||
|
"parent_id": record.get("parent_id"),
|
||||||
|
"reply_to": record.get("reply_to"),
|
||||||
|
"author": record.get("author"),
|
||||||
|
"type": record.get("type"),
|
||||||
|
"timestamp": record.get("timestamp"),
|
||||||
|
"dt": record.get("dt"),
|
||||||
|
"title": record.get("title"),
|
||||||
|
"content": record.get("content"),
|
||||||
|
"source": record.get("source"),
|
||||||
|
"topic": record.get("topic"),
|
||||||
|
}
|
||||||
|
key = json.dumps(key_data, sort_keys=True, separators=(",", ":"))
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen.add(key)
|
||||||
|
unique_records.append(record)
|
||||||
|
|
||||||
|
return unique_records
|
||||||
|
|
||||||
## Public Methods
|
## Public Methods
|
||||||
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
def filter_dataset(self, df: pd.DataFrame, filters: dict | None = None) -> list[dict]:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
return self._json_ready_records(filtered_df)
|
return self._dedupe_records(self._json_ready_records(filtered_df))
|
||||||
|
|
||||||
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def temporal(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
filtered_df = self._prepare_filtered_df(df, filters)
|
filtered_df = self._prepare_filtered_df(df, filters)
|
||||||
|
|||||||
Reference in New Issue
Block a user