From 4abbd0643eed92765c6996cc6e5e35c0ca40556d Mon Sep 17 00:00:00 2001
From: Dylan De Faoite <dylanseandefaoite@gmail.com>
Date: Thu, 5 Feb 2026 19:11:51 +0000
Subject: [PATCH] perf: use gpu on topic AI & move Model Init into functions

By passing model initialisation into the function itself, the model is unloaded from memory after completion which avoids OOM errors
---
 server/nlp_processor.py | 42 ++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/server/nlp_processor.py b/server/nlp_processor.py
index bbd4f25..494c30e 100644
--- a/server/nlp_processor.py
+++ b/server/nlp_processor.py
@@ -3,18 +3,19 @@ import pandas as pd
 
 from transformers import pipeline
 from keybert import KeyBERT
+from sentence_transformers import SentenceTransformer
 
-kw_model = KeyBERT(model="all-MiniLM-L6-v2")
+sentence_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")
 
-emotion_classifier = pipeline(
-    "text-classification",
-    model="j-hartmann/emotion-english-distilroberta-base",
-    top_k=None,
-    truncation=True,
-    device=0 if torch.cuda.is_available() else -1
-)
+def add_emotion_cols(df: pd.DataFrame, content_col: str) -> None:
+    emotion_classifier = pipeline(
+        "text-classification",
+        model="j-hartmann/emotion-english-distilroberta-base",
+        top_k=None,
+        truncation=True,
+        device=0 if torch.cuda.is_available() else -1
+    )
 
-def add_emotion_cols(df: pd.Dataframe, content_col: str) -> None:
     texts = df[content_col].astype(str).str.slice(0, 512).tolist()
 
     results = emotion_classifier(
@@ -30,17 +31,16 @@ def add_emotion_cols(df: pd.Dataframe, content_col: str) -> None:
             for row in results
         ]
 
-def add_topic_col(df: pd.DataFrame, content_col: str, top_n: int = 3) -> None:
-    topics = []
+def add_topic_col(df: pd.DataFrame, content_col: str):
+    kw_model = KeyBERT(model=sentence_model)
 
-    for text in df["content"].astype(str):
-        keywords = kw_model.extract_keywords(
-            text,
-            keyphrase_ngram_range=(1, 3),
-            stop_words="english",
-            top_n=top_n
-        )
+    texts = df[content_col].fillna("").astype(str).tolist()
+    
+    raw_results = kw_model.extract_keywords(
+        texts, 
+        keyphrase_ngram_range=(1, 1), 
+        stop_words='english', 
+        top_n=1
+    )
 
-        topics.append([kw for kw, _ in keywords])
-
-    df["topics"] = topics
\ No newline at end of file
+    df['theme'] = [res[0][0] if len(res) > 0 else None for res in raw_results]
\ No newline at end of file