refactor: rename nlp_processor to nlp

2026-02-08 13:54:16 +00:00
parent 7cc681ed23
commit b99718f6fe
2 changed files with 1 additions and 1 deletions
--- a/server/nlp.py
+++ b/server/nlp.py
@@ -0,0 +1,46 @@
+import torch
+import pandas as pd
+
+from transformers import pipeline
+from keybert import KeyBERT
+from sentence_transformers import SentenceTransformer
+
+sentence_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")
+
+def add_emotion_cols(df: pd.DataFrame, content_col: str) -> None:
+    emotion_classifier = pipeline(
+        "text-classification",
+        model="j-hartmann/emotion-english-distilroberta-base",
+        top_k=None,
+        truncation=True,
+        device=0 if torch.cuda.is_available() else -1
+    )
+
+    texts = df[content_col].astype(str).str.slice(0, 512).tolist()
+
+    results = emotion_classifier(
+        texts,
+        batch_size=64
+    )
+
+    labels = [r["label"] for r in results[0]]
+
+    for label in labels:
+        df[f"emotion_{label}"] = [
+            next(item["score"] for item in row if item["label"] == label)
+            for row in results
+        ]
+
+def add_topic_col(df: pd.DataFrame, content_col: str):
+    kw_model = KeyBERT(model=sentence_model)
+
+    texts = df[content_col].fillna("").astype(str).tolist()
+    
+    raw_results = kw_model.extract_keywords(
+        texts, 
+        keyphrase_ngram_range=(1, 1), 
+        stop_words='english', 
+        top_n=1
+    )
+
+    df['theme'] = [res[0][0] if len(res) > 0 else None for res in raw_results]