akhaliq's picture
akhaliq HF Staff
Update app.py
68c1a70 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import spaces
MODEL_NAME = "swiss-ai/Apertus-8B-Instruct-2509"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load model and tokenizer
HF_TOKEN = os.getenv("HF_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HF_TOKEN).to(device)
@spaces.GPU
def predict(message, history):
messages = []
# Add history to messages
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
# Tokenize inputs
model_inputs = tokenizer([text], return_tensors="pt", add_special_tokens=False).to(model.device)
# Generate response
generated_ids = model.generate(**model_inputs, max_new_tokens=1000)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
response = tokenizer.decode(output_ids, skip_special_tokens=True)
return response
# Create ChatInterface
chatbot = gr.ChatInterface(
predict,
title="Apertus-8B Chatbot",
description="Chat with the Apertus-8B-Instruct model. Enter your message and get a response.",
examples=[
"Explain quantum computing in simple terms",
"How do I make a sandwich?",
"What is the capital of France?",
"Write a short poem about the ocean"
]
)
# Launch the app
chatbot.launch()