its / app.py
saurluca's picture
add hugginface login
851f27f verified
import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
# Log in to Hugging Face with your token from secrets
login(token="hf_token")
# model_name = "saurluca/Apertus-8B-Instruct-2509-bnb-4bit"
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
device = "cuda" # for GPU usage or "cpu" for CPU usage
# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
).to(device)
@spaces.GPU
def greet(prompt: str):
print("Running")
# prepare the model input
messages_think = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages_think,
tokenize=False,
add_generation_prompt=True,
)
model_inputs = tokenizer([text], return_tensors="pt", add_special_tokens=False).to(model.device)
# Generate the output
generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
# Get and decode the output
output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
output = tokenizer.decode(output_ids, skip_special_tokens=True)
print("Finished")
return output
demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
demo.launch()