Spaces:

umint
/

gemma-3-270m

Running

gemma-3-270m / app.py

github-actions[bot]

Automatically deploy

d9203d2 29 days ago

5.25 kB

	#
	# SPDX-FileCopyrightText: Hadad <[email protected]>
	# SPDX-License-Identifier: Apache-2.0
	#

	import os
	from ollama import AsyncClient
	import gradio as gr

	async def playground(
	message,
	history,
	num_ctx,
	temperature,
	repeat_penalty,
	min_p,
	top_k,
	top_p
	):
	if not isinstance(message, str) or not message.strip():
	yield []
	return

	client = AsyncClient(
	host=os.getenv("OLLAMA_API_BASE_URL"),
	headers={
	"Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}"
	}
	)

	messages = []
	for item in history:
	if isinstance(item, dict) and "role" in item and "content" in item:
	messages.append({
	"role": item["role"],
	"content": item["content"]
	})
	messages.append({"role": "user", "content": message})

	response = ""
	async for part in await client.chat(
	model="gemma3:270m",
	messages=messages,
	options={
	"num_ctx": int(num_ctx),
	"temperature": float(temperature),
	"repeat_penalty": float(repeat_penalty),
	"min_p": float(min_p),
	"top_k": int(top_k),
	"top_p": float(top_p)
	},
	stream=True
	):
	response += part.get("message", {}).get("content", "")
	yield response

	with gr.Blocks(
	fill_height=True,
	fill_width=True
	) as app:
	with gr.Sidebar():
	gr.Markdown("## Ollama Playground by UltimaX Intelligence")
	gr.HTML(
	"""
	This space run the <b><a href=
	"https://huggingface.co/google/gemma-3-270m"
	target="_blank">Gemma 3 (270M)</a></b> model from
	<b>Google</b>, hosted on a server using <b>Ollama</b> and
	accessed via the <b>Ollama Python SDK</b>.<br><br>

	Official <b>documentation</b> for using Ollama with the
	Python SDK can be found
	<b><a href="https://github.com/ollama/ollama-python"
	target="_blank">here</a></b>.<br><br>

	Gemma 3 (270M) runs entirely on <b>CPU</b>, utilizing only a
	<b>single core</b>. Thanks to its small size, the model can
	operate efficiently on minimal hardware.<br><br>

	The Gemma 3 (270M) model can also be viewed or downloaded
	from the official Ollama website
	<b><a href="https://ollama.com/library/gemma3:270m"
	target="_blank">here</a></b>.<br><br>

	While Gemma 3 has multimodal capabilities, running it on CPU
	with a relatively small number of parameters may limit its
	contextual understanding. For this reason, the upload
	functionality has been disabled.<br><br>

	<b>Like this project? You can support me by buying a
	<a href="https://ko-fi.com/hadad" target="_blank">
	coffee</a></b>.
	"""
	)
	gr.Markdown("---")
	gr.Markdown("## Model Parameters")
	num_ctx = gr.Slider(
	minimum=512,
	maximum=1024,
	value=512,
	step=128,
	label="Context Length (num_ctx)",
	info="Maximum context window size. Limited to CPU usage."
	)
	gr.Markdown("")
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Temperature",
	info="Controls randomness in generation"
	)
	gr.Markdown("")
	repeat_penalty = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Repeat Penalty",
	info="Penalty for repeating tokens"
	)
	gr.Markdown("")
	min_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.001,
	step=0.001,
	label="Min P",
	info="Minimum probability threshold"
	)
	gr.Markdown("")
	top_k = gr.Slider(
	minimum=0,
	maximum=100,
	value=64,
	step=1,
	label="Top K",
	info="Number of top tokens to consider"
	)
	gr.Markdown("")
	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top P",
	info="Cumulative probability threshold"
	)

	gr.ChatInterface(
	fn=playground,
	additional_inputs=[
	num_ctx,
	temperature,
	repeat_penalty,
	min_p,
	top_k,
	top_p
	],
	chatbot=gr.Chatbot(
	label="Ollama \| Gemma 3 (270M)",
	type="messages",
	show_copy_button=True,
	scale=1
	),
	type="messages",
	examples=[
	["Please introduce yourself."],
	["What caused World War II?"],
	["Give me a short introduction to large language model."],
	["Explain about quantum computers."]
	],
	cache_examples=False,
	show_api=False
	)

	app.launch(
	server_name="0.0.0.0",
	pwa=True
	)