# /repository/handler.py import os, io, base64, sys from typing import Any, Dict import torch from PIL import Image class _LlavaPulseCore: def __init__(self): self.model = None self.tokenizer = None self.image_processor = None self.conv_templates = None self.process_images = None self._load_pretrained_model = None def _import_llava(self): try: from llava.model.builder import load_pretrained_model from llava.mm_utils import process_images from llava.conversation import conv_templates except Exception as e: raise RuntimeError(f"LLaVA import error: {e}") self._load_pretrained_model = load_pretrained_model self.process_images = process_images self.conv_templates = conv_templates def load(self, model_dir: str): self._import_llava() model_path = os.environ.get("HF_MODEL_ID", model_dir) kwargs = {} if torch.cuda.is_available(): kwargs["device_map"] = "auto" else: kwargs["device_map"] = {"": "cpu"} tried = [] def _try(*args, **k): try: return self._load_pretrained_model(*args, **k) except Exception as e: tried.append(str(e)); return None out = (_try(model_path, **kwargs) or _try(model_path, None, **kwargs) or _try(model_path, model_base=None, **kwargs) or _try(model_path, model_name="llava-v1.5-7b", **kwargs)) if out is None: raise RuntimeError("load_pretrained_model failed:\n- " + "\n- ".join(tried)) if len(out) == 4: tokenizer, model, image_processor, _ = out elif len(out) == 3: tokenizer, model, image_processor = out else: raise RuntimeError(f"Unexpected load_pretrained_model output len={len(out)}") self.model = model.eval() self.tokenizer = tokenizer self.image_processor = image_processor def _decode_image(self, image_payload: Any) -> Image.Image: if image_payload is None: raise ValueError("Missing 'image' in request.") if isinstance(image_payload, str): if image_payload.startswith("data:image"): image_payload = image_payload.split(",", 1)[-1] raw = base64.b64decode(image_payload) return Image.open(io.BytesIO(raw)).convert("RGB") if isinstance(image_payload, (bytes, bytearray)): return Image.open(io.BytesIO(image_payload)).convert("RGB") if hasattr(image_payload, "read"): return Image.open(image_payload).convert("RGB") if isinstance(image_payload, Image.Image): return image_payload.convert("RGB") raise TypeError(f"Unsupported image type: {type(image_payload)}") def infer(self, payload: Dict[str, Any]) -> Dict[str, Any]: prompt = payload.get("prompt") or payload.get("inputs") or "" params = payload.get("parameters") or {} max_new_tokens = int(params.get("max_new_tokens", 512)) temperature = float(params.get("temperature", 0.2)) pil_img = self._decode_image(payload.get("image")) images = self.process_images([pil_img], self.image_processor) conv = self.conv_templates.get("llava_v1").copy() conv.append_message(conv.roles[0], prompt) conv.append_message(conv.roles[1], None) prompt_text = conv.get_prompt() input_ids = self.tokenizer([prompt_text], return_tensors="pt").input_ids if torch.cuda.is_available(): input_ids = input_ids.to("cuda") if isinstance(images, torch.Tensor): images = images.to(dtype=torch.float16, device="cuda") with torch.no_grad(): output_ids = self.model.generate( input_ids=input_ids, images=images, # LLaVA argümanı do_sample=(temperature > 0), temperature=temperature, max_new_tokens=max_new_tokens, ) text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) if text.startswith(prompt_text): text = text[len(prompt_text):].strip() return {"result": text} # 🔴 HF Inference Toolkit burada TAM OLARAK bu sınıfı arıyor: class EndpointHandler: def __init__(self, model_dir: str): self.core = _LlavaPulseCore() self.core.load(model_dir) # Bazı toolkıt sürümleri __call__ ister: def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: return self.core.infer(data) # Bazıları predict/infer arar — ikisini de sağlayalım: def predict(self, data: Dict[str, Any]) -> Dict[str, Any]: return self.core.infer(data) def infer(self, data: Dict[str, Any]) -> Dict[str, Any]: return self.core.infer(data)