Spaces:

akhaliq
/

vaultgemma-1b

Paused

vaultgemma-1b / app.py

akhaliq HF Staff

Upload app.py with huggingface_hub

7e96c2c verified 2 months ago

1.24 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	import spaces

	# Load the model pipeline
	pipe = pipeline("text-generation", model="google/vaultgemma-1b", device="cuda", torch_dtype=torch.float16)

	# Define the chat function
	@spaces.GPU(duration=120)
	def chat(message, history):
	# Format the conversation history for the model
	prompt = ""
	for user_msg, bot_msg in history:
	prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
	prompt += f"User: {message}\nAssistant:"

	# Generate response
	response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)
	generated_text = response[0]['generated_text']

	# Extract only the assistant's response
	assistant_response = generated_text.split("Assistant:")[-1].strip()

	return assistant_response

	# Create the Gradio chat interface
	demo = gr.ChatInterface(
	fn=chat,
	title="VaultGemma-1B Chatbot",
	description="A chatbot powered by Google's VaultGemma-1B model.",
	theme="soft",
	examples=[
	"What is the capital of France?",
	"Tell me a joke.",
	"Explain quantum computing in simple terms."
	],
	concurrency_limit=1
	)

	# Launch the app
	demo.launch()