samiur-r's picture
Create handler.py
5037564 verified
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
class SentimentAnalysisHandler:
def __init__(self):
"""Load base model and fine-tuned adapter."""
self.base_model_id = "unsloth/llama-3-8b-bnb-4bit"
self.adapter_model_id = "samiur-r/BanglishSentiment-Llama3-8B"
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id)
# Load base model
self.model = AutoModelForCausalLM.from_pretrained(
self.base_model_id,
device_map="auto",
torch_dtype=torch.bfloat16
)
# Attach LoRA adapter
self.model = PeftModel.from_pretrained(self.model, self.adapter_model_id)
self.model.eval()
def preprocess(self, input_text):
"""Tokenize input text."""
inputs = self.tokenizer(input_text, return_tensors="pt").to("cuda")
return inputs
def inference(self, inputs):
"""Perform model inference."""
with torch.no_grad():
output = self.model.generate(**inputs, max_new_tokens=256)
return output
def postprocess(self, output):
"""Decode model output."""
sentiment = self.tokenizer.decode(output[0], skip_special_tokens=True)
return sentiment
def predict(self, input_text):
"""Full prediction pipeline."""
inputs = self.preprocess(input_text)
output = self.inference(inputs)
return self.postprocess(output)
# Create handler instance
_model_handler = SentimentAnalysisHandler()
def handle(inputs, context):
"""Entry point for model API inference."""
text = inputs.get("text", "")
return {"prediction": _model_handler.predict(text)}