import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel class SentimentAnalysisHandler: def __init__(self): """Load base model and fine-tuned adapter.""" self.base_model_id = "unsloth/llama-3-8b-bnb-4bit" self.adapter_model_id = "samiur-r/BanglishSentiment-Llama3-8B" # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id) # Load base model self.model = AutoModelForCausalLM.from_pretrained( self.base_model_id, device_map="auto", torch_dtype=torch.bfloat16 ) # Attach LoRA adapter self.model = PeftModel.from_pretrained(self.model, self.adapter_model_id) self.model.eval() def preprocess(self, input_text): """Tokenize input text.""" inputs = self.tokenizer(input_text, return_tensors="pt").to("cuda") return inputs def inference(self, inputs): """Perform model inference.""" with torch.no_grad(): output = self.model.generate(**inputs, max_new_tokens=256) return output def postprocess(self, output): """Decode model output.""" sentiment = self.tokenizer.decode(output[0], skip_special_tokens=True) return sentiment def predict(self, input_text): """Full prediction pipeline.""" inputs = self.preprocess(input_text) output = self.inference(inputs) return self.postprocess(output) # Create handler instance _model_handler = SentimentAnalysisHandler() def handle(inputs, context): """Entry point for model API inference.""" text = inputs.get("text", "") return {"prediction": _model_handler.predict(text)}