Spaces:

szymmon
/

SmolVLM_Essay_Knowledge_Distillation

Runtime error

App Files Files Community

szymmon commited on Feb 5

Commit

7a9fb99

1 Parent(s): 62733d8

ui

Browse files

Files changed (1) hide show

app.py +38 -48

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import logging
 logger = logging.getLogger(__name__)
-class CustomModelChat:
     def __init__(self):
         self.model = None
         self.processor = None
@@ -21,58 +21,41 @@ class CustomModelChat:
             )
             self.processor = AutoProcessor.from_pretrained(model_id)
-            # Load your custom adapter
             adapter_path = "smolvlm-instruct-trl-sft-ChartQA"
             self.model.load_adapter(adapter_path)
         except Exception as e:
             logger.error(f"Error initializing model: {e}")
             raise
-    def process_chat_history(self, history, system_message):
-        # Convert chat history to the format expected by the model
-        messages = [{"role": "system", "content": system_message}]
-        for user_msg, assistant_msg in history:
-            if user_msg:
-                messages.append({"role": "user", "content": user_msg})
-            if assistant_msg:
-                messages.append({"role": "assistant", "content": assistant_msg})
-        return messages
     def generate_response(
         self,
-        message,
-        history,
-        system_message,
         max_tokens=512,
         temperature=0.7,
-        top_p=0.95,
-        image=None
     ):
         try:
-            messages = self.process_chat_history(history, system_message)
-            messages.append({"role": "user", "content": message})
-            # Prepare the chat template
             chat_input = self.processor.apply_chat_template(
-                messages[1:2],  # Exclude system message
                 add_generation_prompt=True
             )
-            # Handle image input if provided
             if image is not None:
                 if image.mode != 'RGB':
                     image = image.convert('RGB')
-                # Create a list of None for previous turns and add the current image
-                image_inputs = [None] * (len(messages) - 1) + [image]
             else:
                 image_inputs = None
             # Prepare model inputs
             model_inputs = self.processor(
                 text=chat_input,
-                images=image_inputs if image_inputs else None,
                 return_tensors="pt",
             ).to(self.model.device)
@@ -85,41 +68,48 @@ class CustomModelChat:
                 do_sample=True
             )
-            # Trim and decode the response
             trimmed_generated_ids = [
                 out_ids[len(in_ids):] for in_ids, out_ids in zip(model_inputs.input_ids, generated_ids)
             ]
             output_text = self.processor.batch_decode(
                 trimmed_generated_ids,
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=False
-            )
-            yield output_text[0]
         except Exception as e:
             logger.error(f"Error generating response: {e}")
-            yield f"Error: {str(e)}"
-def create_chat_interface():
-    chat_model = CustomModelChat()
-    demo = gr.ChatInterface(
-        chat_model.generate_response,
-        additional_inputs=[
-            gr.Textbox(value="You are a helpful assistant.", label="System message"),
-            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-            gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
-            gr.Image(type="pil", label="Upload Image (optional)")
-        ],
-        title="Custom SmolVLM Chat",
-        description="Chat interface using custom fine-tuned SmolVLM model"
-    )
     return demo
 if __name__ == "__main__":
-    demo = create_chat_interface()
     demo.launch()

 logger = logging.getLogger(__name__)
+class SimpleVLMInterface:
     def __init__(self):
         self.model = None
         self.processor = None
             )
             self.processor = AutoProcessor.from_pretrained(model_id)
+            # Load custom adapter
             adapter_path = "smolvlm-instruct-trl-sft-ChartQA"
             self.model.load_adapter(adapter_path)
         except Exception as e:
             logger.error(f"Error initializing model: {e}")
             raise
     def generate_response(
         self,
+        text_input,
+        image=None,
         max_tokens=512,
         temperature=0.7,
+        top_p=0.95
     ):
         try:
+            # Prepare the input text
+            messages = [{"role": "user", "content": text_input}]
             chat_input = self.processor.apply_chat_template(
+                messages,
                 add_generation_prompt=True
             )
+            # Handle image input
             if image is not None:
                 if image.mode != 'RGB':
                     image = image.convert('RGB')
+                image_inputs = [image]
             else:
                 image_inputs = None
             # Prepare model inputs
             model_inputs = self.processor(
                 text=chat_input,
+                images=image_inputs,
                 return_tensors="pt",
             ).to(self.model.device)
                 do_sample=True
             )
+            # Process output
             trimmed_generated_ids = [
                 out_ids[len(in_ids):] for in_ids, out_ids in zip(model_inputs.input_ids, generated_ids)
             ]
             output_text = self.processor.batch_decode(
                 trimmed_generated_ids,
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=False
+            )[0]
+            return output_text
         except Exception as e:
             logger.error(f"Error generating response: {e}")
+            return f"Error: {str(e)}"
+def create_interface():
+    vlm = SimpleVLMInterface()
+    with gr.Blocks(title="Simple VLM Interface") as demo:
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload Image (optional)")
+                text_input = gr.Textbox(label="Enter your text", lines=2)
+                with gr.Row():
+                    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens")
+                    temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+                    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
+                submit_btn = gr.Button("Generate Response")
+            output_text = gr.Textbox(label="Response", lines=4)
+        submit_btn.click(
+            fn=vlm.generate_response,
+            inputs=[text_input, image_input, max_tokens, temperature, top_p],
+            outputs=output_text
+        )
     return demo
 if __name__ == "__main__":
+    demo = create_interface()
     demo.launch()