vaultgemma-1b / app.py
akhaliq's picture
akhaliq HF Staff
Upload app.py with huggingface_hub
7e96c2c verified
import gradio as gr
from transformers import pipeline
import torch
import spaces
# Load the model pipeline
pipe = pipeline("text-generation", model="google/vaultgemma-1b", device="cuda", torch_dtype=torch.float16)
# Define the chat function
@spaces.GPU(duration=120)
def chat(message, history):
# Format the conversation history for the model
prompt = ""
for user_msg, bot_msg in history:
prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
prompt += f"User: {message}\nAssistant:"
# Generate response
response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)
generated_text = response[0]['generated_text']
# Extract only the assistant's response
assistant_response = generated_text.split("Assistant:")[-1].strip()
return assistant_response
# Create the Gradio chat interface
demo = gr.ChatInterface(
fn=chat,
title="VaultGemma-1B Chatbot",
description="A chatbot powered by Google's VaultGemma-1B model.",
theme="soft",
examples=[
"What is the capital of France?",
"Tell me a joke.",
"Explain quantum computing in simple terms."
],
concurrency_limit=1
)
# Launch the app
demo.launch()