idefics2_deploy

Runtime error

App Files Files Community

jihadzakki commited on Jun 10, 2024

Commit

230a39b

verified ·

1 Parent(s): 7678417

Update app_dialogue.py

Browse files

Files changed (1) hide show

app_dialogue.py +46 -134

app_dialogue.py CHANGED Viewed

@@ -2,15 +2,15 @@ import os
 import subprocess
 # Install flash attention
-subprocess.run(
-    "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    shell=True,
-)
 import copy
-import spaces
 import time
 import torch
@@ -21,19 +21,47 @@ from PIL import Image
 import io
 import datasets
 import gradio as gr
 from transformers import AutoProcessor, TextIteratorStreamer
 from transformers import Idefics2ForConditionalGeneration
 DEVICE = torch.device("cuda")
-MODELS = {
-    "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
-        "HuggingFaceM4/idefics2-8b-chatty",
-        torch_dtype=torch.bfloat16,
-        _attn_implementation="flash_attention_2",
-    ).to(DEVICE),
-}
 PROCESSOR = AutoProcessor.from_pretrained(
     "HuggingFaceM4/idefics2-8b",
 )
@@ -58,116 +86,6 @@ SYSTEM_PROMPT = [
         ],
     }
 ]
-examples_path = os.path.dirname(__file__)
-EXAMPLES = [
-    [
-        {
-            "text": "For 2024, the interest expense is twice what it was in 2014, and the long-term debt is 10% higher than its 2015 level. Can you calculate the combined total of the interest and long-term debt for 2024?",
-            "files": [f"{examples_path}/example_images/mmmu_example_2.png"],
-        }
-    ],
-    [
-        {
-            "text": "What's in the image?",
-            "files": [f"{examples_path}/example_images/plant_bulb.webp"],
-        }
-    ],
-    [
-        {
-            "text": "Describe the image",
-            "files": [f"{examples_path}/example_images/baguettes_guarding_paris.png"],
-        }
-    ],
-    [
-        {
-            "text": "Read what's written on the paper",
-            "files": [f"{examples_path}/example_images/paper_with_text.png"],
-        }
-    ],
-    [
-        {
-            "text": "The respective main characters of these two movies meet in real life. Imagine their discussion. It should be sassy, and the beginning of a mysterious adventure.",
-            "files": [f"{examples_path}/example_images/barbie.jpeg", f"{examples_path}/example_images/oppenheimer.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Can you explain this meme?",
-            "files": [f"{examples_path}/example_images/running_girl_meme.webp"],
-        }
-    ],
-    [
-        {
-            "text": "What happens to fish if pelicans increase?",
-            "files": [f"{examples_path}/example_images/ai2d_example_2.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Give an art-critic description of this well known painting",
-            "files": [f"{examples_path}/example_images/Van-Gogh-Starry-Night.jpg"],
-        }
-    ],
-    [
-        {
-            "text": "Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend?",
-            "files": [f"{examples_path}/example_images/mmmu_example.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Write an online ad for that product.",
-            "files": [f"{examples_path}/example_images/shampoo.jpg"],
-        }
-    ],
-    [
-        {
-            "text": "Describe this image in detail and explain why it is disturbing.",
-            "files": [f"{examples_path}/example_images/cat_cloud.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Why is this image cute?",
-            "files": [
-                f"{examples_path}/example_images/kittens-cats-pet-cute-preview.jpg"
-            ],
-        }
-    ],
-    [
-        {
-            "text": "What is formed by the deposition of either the weathered remains of other rocks?",
-            "files": [f"{examples_path}/example_images/ai2d_example.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "What's funny about this image?",
-            "files": [f"{examples_path}/example_images/pope_doudoune.webp"],
-        }
-    ],
-    [
-        {
-            "text": "Can this happen in real life?",
-            "files": [f"{examples_path}/example_images/elephant_spider_web.webp"],
-        }
-    ],
-    [
-        {
-            "text": "What's unusual about this image?",
-            "files": [f"{examples_path}/example_images/dragons_playing.png"],
-        }
-    ],
-    [
-        {
-            "text": "Why is that image comical?",
-            "files": [f"{examples_path}/example_images/eye_glasses.jpeg"],
-        }
-    ],
-]
-BOT_AVATAR = "IDEFICS_logo.png"
 # Chatbot utils
 def turn_is_pure_media(turn):
@@ -265,7 +183,7 @@ def extract_images_from_msg_list(msg_list):
     return all_images
-@spaces.GPU(duration=180)
 def model_inference(
     user_prompt,
     chat_history,
@@ -324,12 +242,6 @@ def model_inference(
     inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
     generation_args.update(inputs)
-    # # The regular non streaming generation mode
-    # _ = generation_args.pop("streamer")
-    # generated_ids = MODELS[model_selector].generate(**generation_args)
-    # generated_text = PROCESSOR.batch_decode(generated_ids[:, generation_args["input_ids"].size(-1): ], skip_special_tokens=True)[0]
-    # return generated_text
     # The streaming generation mode
     thread = Thread(
         target=MODELS[model_selector].generate,
@@ -414,8 +326,8 @@ top_p = gr.Slider(
 chatbot = gr.Chatbot(
-    label="Idefics2-Chatty",
-    avatar_images=[None, BOT_AVATAR],
     height=450,
 )
@@ -464,7 +376,7 @@ with gr.Blocks(
     gr.ChatInterface(
         fn=model_inference,
         chatbot=chatbot,
-        examples=EXAMPLES,
         multimodal=True,
         cache_examples=False,
         additional_inputs=[
@@ -477,4 +389,4 @@ with gr.Blocks(
         ],
     )
-demo.launch()

 import subprocess
 # Install flash attention
+# subprocess.run(
+#     "pip install flash-attn --no-build-isolation",
+#     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+#     shell=True,
+# )
 import copy
+# import spaces
 import time
 import torch
 import io
 import datasets
+# import loralib
+# import bitsandbytes
 import gradio as gr
 from transformers import AutoProcessor, TextIteratorStreamer
 from transformers import Idefics2ForConditionalGeneration
+import torch
+from peft import LoraConfig
+from transformers import AutoProcessor, BitsAndBytesConfig, IdeficsForVisionText2Text
 DEVICE = torch.device("cuda")
+USE_LORA = False
+USE_QLORA = True
+if USE_QLORA or USE_LORA:
+    lora_config = LoraConfig(
+        r=8,
+        lora_alpha=8,
+        lora_dropout=0.1,
+        target_modules='.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$',
+        use_dora=False if USE_QLORA else True,
+        init_lora_weights="gaussian"
+    )
+    if USE_QLORA:
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16
+        )
+    MODELS = {
+    "idefics2-8b-vqarad-delta": Idefics2ForConditionalGeneration.from_pretrained(
+        "jihadzakki/idefics2-8b-vqarad-delta",
+        torch_dtype=torch.float16,
+        quantization_config=bnb_config if USE_QLORA else None,
+    )
+    }
 PROCESSOR = AutoProcessor.from_pretrained(
     "HuggingFaceM4/idefics2-8b",
 )
         ],
     }
 ]
 # Chatbot utils
 def turn_is_pure_media(turn):
     return all_images
+# @spaces.GPU(duration=180)
 def model_inference(
     user_prompt,
     chat_history,
     inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
     generation_args.update(inputs)
     # The streaming generation mode
     thread = Thread(
         target=MODELS[model_selector].generate,
 chatbot = gr.Chatbot(
+    label="idefics2-8b-vqarad-delta",
+    # avatar_images=[None, BOT_AVATAR],
     height=450,
 )
     gr.ChatInterface(
         fn=model_inference,
         chatbot=chatbot,
+        # examples=EXAMPLES,
         multimodal=True,
         cache_examples=False,
         additional_inputs=[
         ],
     )
+demo.launch()