refactor: rename nlp_processor to nlp
This commit is contained in:
46
server/nlp.py
Normal file
46
server/nlp.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import torch
|
||||
import pandas as pd
|
||||
|
||||
from transformers import pipeline
|
||||
from keybert import KeyBERT
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
sentence_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")
|
||||
|
||||
def add_emotion_cols(df: pd.DataFrame, content_col: str) -> None:
|
||||
emotion_classifier = pipeline(
|
||||
"text-classification",
|
||||
model="j-hartmann/emotion-english-distilroberta-base",
|
||||
top_k=None,
|
||||
truncation=True,
|
||||
device=0 if torch.cuda.is_available() else -1
|
||||
)
|
||||
|
||||
texts = df[content_col].astype(str).str.slice(0, 512).tolist()
|
||||
|
||||
results = emotion_classifier(
|
||||
texts,
|
||||
batch_size=64
|
||||
)
|
||||
|
||||
labels = [r["label"] for r in results[0]]
|
||||
|
||||
for label in labels:
|
||||
df[f"emotion_{label}"] = [
|
||||
next(item["score"] for item in row if item["label"] == label)
|
||||
for row in results
|
||||
]
|
||||
|
||||
def add_topic_col(df: pd.DataFrame, content_col: str):
|
||||
kw_model = KeyBERT(model=sentence_model)
|
||||
|
||||
texts = df[content_col].fillna("").astype(str).tolist()
|
||||
|
||||
raw_results = kw_model.extract_keywords(
|
||||
texts,
|
||||
keyphrase_ngram_range=(1, 1),
|
||||
stop_words='english',
|
||||
top_n=1
|
||||
)
|
||||
|
||||
df['theme'] = [res[0][0] if len(res) > 0 else None for res in raw_results]
|
||||
Reference in New Issue
Block a user