feat(tasks): add fetch and NLP processing time logging to dataset status

This commit is contained in:
2026-04-14 17:35:43 +01:00
parent e35e51d295
commit 76591bc89e

View File

@@ -1,3 +1,5 @@
from time import time
import pandas as pd
import logging
@@ -46,6 +48,7 @@ def fetch_and_process_dataset(
try:
for metadata in source_info:
fetch_start = time()
name = metadata["name"]
search = metadata.get("search")
category = metadata.get("category")
@@ -57,8 +60,11 @@ def fetch_and_process_dataset(
)
posts.extend(post.to_dict() for post in raw_posts)
fetch_time = time() - fetch_start
df = pd.DataFrame(posts)
nlp_start = time()
dataset_manager.set_dataset_status(
dataset_id, "processing", "NLP Processing Started"
)
@@ -66,9 +72,11 @@ def fetch_and_process_dataset(
processor = DatasetEnrichment(df, topics)
enriched_df = processor.enrich()
nlp_time = time() - nlp_start
dataset_manager.save_dataset_content(dataset_id, enriched_df)
dataset_manager.set_dataset_status(
dataset_id, "complete", "NLP Processing Completed Successfully"
dataset_id, "complete", f"Completed Successfully. Fetch time: {fetch_time:.2f}s, NLP time: {nlp_time:.2f}s"
)
except Exception as e:
dataset_manager.set_dataset_status(