Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import Pipeline | |
| from PIL import Image | |
| from typing import Union | |
| from copy import deepcopy | |
| import matplotlib.pyplot as plt | |
| import io | |
| class VLEForVQAPipeline(Pipeline): | |
| def __init__(self, vle_processor, *args, **kwargs): | |
| self.vle_processor = vle_processor | |
| super().__init__(*args, **kwargs) | |
| def _sanitize_parameters(self, top_k=None, **kwargs): | |
| preprocess_params, forward_params, postprocess_params = {}, {}, {} | |
| if top_k is not None: | |
| postprocess_params["top_k"] = top_k | |
| return preprocess_params, forward_params, postprocess_params | |
| def __call__(self, image: Union["Image.Image", str], question: str = None, **kwargs): | |
| if isinstance(image, (Image.Image, str)) and isinstance(question, str): | |
| inputs = {"image": image, "question": question} | |
| else: | |
| """ | |
| Supports the following format | |
| - {"image": image, "question": question} | |
| - [{"image": image, "question": question}] | |
| - Generator and datasets | |
| """ | |
| inputs = image | |
| results = super().__call__(inputs, **kwargs) | |
| return results | |
| def preprocess(self, inputs): | |
| model_inputs = self.vle_processor(text=inputs['question'], images=inputs['image'], return_tensors="pt",padding=True) | |
| return model_inputs | |
| def _forward(self, model_inputs): | |
| model_outputs = self.model(**model_inputs) | |
| return model_outputs | |
| def postprocess(self, model_outputs, top_k=1): | |
| if top_k > self.model.num_vqa_labels: | |
| top_k = self.model.num_vqa_labels | |
| probs = torch.softmax(model_outputs['logits'], dim=-1) | |
| probs, preds = torch.sort(probs, descending=True) | |
| probs = probs[:,:top_k].tolist()[0] | |
| preds = preds[:,:top_k].tolist()[0] | |
| return [{"score": score, "answer": self.model.config.id2label[pred]} for score, pred in zip(probs, preds)] | |
| class VLEForPBCPipeline(Pipeline): | |
| def __init__(self, vle_processor, *args, **kwargs): | |
| self.vle_processor = vle_processor | |
| self.id2label = {0:"False",1:"True"} | |
| super().__init__(*args, **kwargs) | |
| def _sanitize_parameters(self, **kwargs): | |
| preprocess_params, forward_params, postprocess_params = {}, {}, {} | |
| return preprocess_params, forward_params, postprocess_params | |
| def __call__(self, image: Union["Image.Image", str], text: str = None, **kwargs): | |
| if isinstance(image, (Image.Image, str)) and isinstance(text, str): | |
| inputs = {"image": image, "text": text} | |
| else: | |
| """ | |
| Supports the following format | |
| - {"image": image, "text": text} | |
| - [{"image": image, "text": text}] | |
| - Generator and datasets | |
| """ | |
| inputs = image | |
| results = super().__call__(inputs, **kwargs) | |
| return results | |
| def preprocess(self, inputs): | |
| model_inputs = self.vle_processor(text=inputs['text'], images=inputs['image'], return_tensors="pt",padding=True) | |
| return model_inputs, inputs['image'] | |
| def _forward(self, model_inputs): | |
| model_outputs = self.model(**model_inputs[0]) | |
| return model_outputs, model_inputs[1] | |
| def postprocess(self, model_outputs): | |
| probs = torch.softmax(model_outputs[0]['logits'], dim=-1) | |
| probs = probs.tolist()[0] | |
| new_image = self.paint_in_image(model_outputs[0]['logits'], model_outputs[1]) | |
| return {"score": probs, "image": new_image} | |
| def paint_in_image(self, logits, raw_image): | |
| image_back = deepcopy(raw_image) | |
| raw_image_size = image_back.size | |
| resized_image_size = self.model.config.vision_config.image_size | |
| patch_size = self.model.config.vision_config.patch_size | |
| probs = torch.softmax(logits.detach()[0,:,1].to('cpu'),dim=-1).numpy().reshape(-1, resized_image_size//patch_size) | |
| plt.close('all') | |
| plt.axis('off') | |
| plt.imshow(probs, cmap='gray', interpolation='None', vmin=(probs.max()-probs.min())*2/5+probs.min(),alpha=0.7) | |
| plt.xticks([]) | |
| plt.yticks([]) | |
| buf = io.BytesIO() | |
| plt.savefig(buf, dpi=100, transparent=True, bbox_inches='tight', pad_inches=0) | |
| image_front = Image.open(buf) | |
| def filter_image_front(img: Image.Image): | |
| width, height = img.width, img.height | |
| for x in range(width): | |
| for y in range(height): | |
| r,g,b,a = img.getpixel((x,y)) | |
| a = int (a * (1-r/255)) | |
| img.putpixel((x,y), (r,g,b,a)) | |
| return img | |
| image_front = filter_image_front(image_front).resize(raw_image_size) | |
| image_back.paste(image_front, (0,0), image_front) | |
| mixed_image = image_back.resize(raw_image_size) | |
| buf.close() | |
| return mixed_image | |
| class VLEForITMPipeline(Pipeline): | |
| def __init__(self, vle_processor, *args, **kwargs): | |
| self.vle_processor = vle_processor | |
| self.id2label = {0:"False",1:"True"} | |
| super().__init__(*args, **kwargs) | |
| def _sanitize_parameters(self, **kwargs): | |
| preprocess_params, forward_params, postprocess_params = {}, {}, {} | |
| return preprocess_params, forward_params, postprocess_params | |
| def __call__(self, image: Union["Image.Image", str], text: str = None, **kwargs): | |
| if isinstance(image, (Image.Image, str)) and isinstance(text, str): | |
| inputs = {"image": image, "text": text} | |
| else: | |
| """ | |
| Supports the following format | |
| - {"image": image, "text": text} | |
| - [{"image": image, "text": text}] | |
| - Generator and datasets | |
| """ | |
| inputs = image | |
| results = super().__call__(inputs, **kwargs) | |
| return results | |
| def preprocess(self, inputs): | |
| model_inputs = self.vle_processor(text=inputs['text'], images=inputs['image'], return_tensors="pt",padding=True) | |
| return model_inputs | |
| def _forward(self, model_inputs): | |
| model_outputs = self.model(**model_inputs) | |
| return model_outputs | |
| def postprocess(self, model_outputs): | |
| probs = torch.softmax(model_outputs['logits'], dim=-1) | |
| preds = torch.argmax(probs, dim=-1) | |
| probs = probs.tolist()[0] | |
| preds = self.id2label[preds.tolist()[0]] | |
| return {"score": probs, "match": preds} |