|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
from peft import PeftModel |
|
|
|
|
|
class SentimentAnalysisHandler: |
|
|
def __init__(self): |
|
|
"""Load base model and fine-tuned adapter.""" |
|
|
self.base_model_id = "unsloth/llama-3-8b-bnb-4bit" |
|
|
self.adapter_model_id = "samiur-r/BanglishSentiment-Llama3-8B" |
|
|
|
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id) |
|
|
|
|
|
|
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
self.base_model_id, |
|
|
device_map="auto", |
|
|
torch_dtype=torch.bfloat16 |
|
|
) |
|
|
|
|
|
|
|
|
self.model = PeftModel.from_pretrained(self.model, self.adapter_model_id) |
|
|
self.model.eval() |
|
|
|
|
|
def preprocess(self, input_text): |
|
|
"""Tokenize input text.""" |
|
|
inputs = self.tokenizer(input_text, return_tensors="pt").to("cuda") |
|
|
return inputs |
|
|
|
|
|
def inference(self, inputs): |
|
|
"""Perform model inference.""" |
|
|
with torch.no_grad(): |
|
|
output = self.model.generate(**inputs, max_new_tokens=256) |
|
|
return output |
|
|
|
|
|
def postprocess(self, output): |
|
|
"""Decode model output.""" |
|
|
sentiment = self.tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
return sentiment |
|
|
|
|
|
def predict(self, input_text): |
|
|
"""Full prediction pipeline.""" |
|
|
inputs = self.preprocess(input_text) |
|
|
output = self.inference(inputs) |
|
|
return self.postprocess(output) |
|
|
|
|
|
|
|
|
_model_handler = SentimentAnalysisHandler() |
|
|
|
|
|
def handle(inputs, context): |
|
|
"""Entry point for model API inference.""" |
|
|
text = inputs.get("text", "") |
|
|
return {"prediction": _model_handler.predict(text)} |
|
|
|