Spaces:

saurluca
/

its

Paused

its / app.py

add hugginface login

851f27f verified 2 months ago

1.35 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import login

	# Log in to Hugging Face with your token from secrets
	login(token="hf_token")

	# model_name = "saurluca/Apertus-8B-Instruct-2509-bnb-4bit"
	model_name = "swiss-ai/Apertus-8B-Instruct-2509"
	device = "cuda" # for GPU usage or "cpu" for CPU usage

	# load the tokenizer and the model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	).to(device)

	@spaces.GPU
	def greet(prompt: str):
	print("Running")
	# prepare the model input
	messages_think = [
	{"role": "user", "content": prompt}
	]

	text = tokenizer.apply_chat_template(
	messages_think,
	tokenize=False,
	add_generation_prompt=True,
	)
	model_inputs = tokenizer([text], return_tensors="pt", add_special_tokens=False).to(model.device)

	# Generate the output
	generated_ids = model.generate(**model_inputs, max_new_tokens=32768)

	# Get and decode the output
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
	output = tokenizer.decode(output_ids, skip_special_tokens=True)
	print("Finished")
	return output

	demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
	demo.launch()