From 4abbd0643eed92765c6996cc6e5e35c0ca40556d Mon Sep 17 00:00:00 2001 From: Dylan De Faoite Date: Thu, 5 Feb 2026 19:11:51 +0000 Subject: [PATCH] perf: use gpu on topic AI & move Model Init into functions By passing model initialisation into the function itself, the model is unloaded from memory after completion which avoids OOM errors --- server/nlp_processor.py | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/server/nlp_processor.py b/server/nlp_processor.py index bbd4f25..494c30e 100644 --- a/server/nlp_processor.py +++ b/server/nlp_processor.py @@ -3,18 +3,19 @@ import pandas as pd from transformers import pipeline from keybert import KeyBERT +from sentence_transformers import SentenceTransformer -kw_model = KeyBERT(model="all-MiniLM-L6-v2") +sentence_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda") -emotion_classifier = pipeline( - "text-classification", - model="j-hartmann/emotion-english-distilroberta-base", - top_k=None, - truncation=True, - device=0 if torch.cuda.is_available() else -1 -) +def add_emotion_cols(df: pd.DataFrame, content_col: str) -> None: + emotion_classifier = pipeline( + "text-classification", + model="j-hartmann/emotion-english-distilroberta-base", + top_k=None, + truncation=True, + device=0 if torch.cuda.is_available() else -1 + ) -def add_emotion_cols(df: pd.Dataframe, content_col: str) -> None: texts = df[content_col].astype(str).str.slice(0, 512).tolist() results = emotion_classifier( @@ -30,17 +31,16 @@ def add_emotion_cols(df: pd.Dataframe, content_col: str) -> None: for row in results ] -def add_topic_col(df: pd.DataFrame, content_col: str, top_n: int = 3) -> None: - topics = [] +def add_topic_col(df: pd.DataFrame, content_col: str): + kw_model = KeyBERT(model=sentence_model) - for text in df["content"].astype(str): - keywords = kw_model.extract_keywords( - text, - keyphrase_ngram_range=(1, 3), - stop_words="english", - top_n=top_n - ) + texts = df[content_col].fillna("").astype(str).tolist() + + raw_results = kw_model.extract_keywords( + texts, + keyphrase_ngram_range=(1, 1), + stop_words='english', + top_n=1 + ) - topics.append([kw for kw, _ in keywords]) - - df["topics"] = topics \ No newline at end of file + df['theme'] = [res[0][0] if len(res) > 0 else None for res in raw_results] \ No newline at end of file