Spaces:
Paused
Paused
| import gradio as gr | |
| from transformers import pipeline | |
| import torch | |
| import spaces | |
| # Load the model pipeline | |
| pipe = pipeline("text-generation", model="google/vaultgemma-1b", device="cuda", torch_dtype=torch.float16) | |
| # Define the chat function | |
| def chat(message, history): | |
| # Format the conversation history for the model | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # Generate response | |
| response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9) | |
| generated_text = response[0]['generated_text'] | |
| # Extract only the assistant's response | |
| assistant_response = generated_text.split("Assistant:")[-1].strip() | |
| return assistant_response | |
| # Create the Gradio chat interface | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="VaultGemma-1B Chatbot", | |
| description="A chatbot powered by Google's VaultGemma-1B model.", | |
| theme="soft", | |
| examples=[ | |
| "What is the capital of France?", | |
| "Tell me a joke.", | |
| "Explain quantum computing in simple terms." | |
| ], | |
| concurrency_limit=1 | |
| ) | |
| # Launch the app | |
| demo.launch() |