feat(tasks): add fetch and NLP processing time logging to dataset status
This commit is contained in:
@@ -1,3 +1,5 @@
|
|||||||
|
from time import time
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
@@ -46,6 +48,7 @@ def fetch_and_process_dataset(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
for metadata in source_info:
|
for metadata in source_info:
|
||||||
|
fetch_start = time()
|
||||||
name = metadata["name"]
|
name = metadata["name"]
|
||||||
search = metadata.get("search")
|
search = metadata.get("search")
|
||||||
category = metadata.get("category")
|
category = metadata.get("category")
|
||||||
@@ -57,8 +60,11 @@ def fetch_and_process_dataset(
|
|||||||
)
|
)
|
||||||
posts.extend(post.to_dict() for post in raw_posts)
|
posts.extend(post.to_dict() for post in raw_posts)
|
||||||
|
|
||||||
|
fetch_time = time() - fetch_start
|
||||||
df = pd.DataFrame(posts)
|
df = pd.DataFrame(posts)
|
||||||
|
|
||||||
|
nlp_start = time()
|
||||||
|
|
||||||
dataset_manager.set_dataset_status(
|
dataset_manager.set_dataset_status(
|
||||||
dataset_id, "processing", "NLP Processing Started"
|
dataset_id, "processing", "NLP Processing Started"
|
||||||
)
|
)
|
||||||
@@ -66,9 +72,11 @@ def fetch_and_process_dataset(
|
|||||||
processor = DatasetEnrichment(df, topics)
|
processor = DatasetEnrichment(df, topics)
|
||||||
enriched_df = processor.enrich()
|
enriched_df = processor.enrich()
|
||||||
|
|
||||||
|
nlp_time = time() - nlp_start
|
||||||
|
|
||||||
dataset_manager.save_dataset_content(dataset_id, enriched_df)
|
dataset_manager.save_dataset_content(dataset_id, enriched_df)
|
||||||
dataset_manager.set_dataset_status(
|
dataset_manager.set_dataset_status(
|
||||||
dataset_id, "complete", "NLP Processing Completed Successfully"
|
dataset_id, "complete", f"Completed Successfully. Fetch time: {fetch_time:.2f}s, NLP time: {nlp_time:.2f}s"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
dataset_manager.set_dataset_status(
|
dataset_manager.set_dataset_status(
|
||||||
|
|||||||
Reference in New Issue
Block a user