Spaces:
Running
Running
| # | |
| # SPDX-FileCopyrightText: Hadad <[email protected]> | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| import os | |
| from ollama import AsyncClient | |
| import gradio as gr | |
| async def playground( | |
| message, | |
| history, | |
| num_ctx, | |
| temperature, | |
| repeat_penalty, | |
| min_p, | |
| top_k, | |
| top_p | |
| ): | |
| if not isinstance(message, str) or not message.strip(): | |
| yield [] | |
| return | |
| client = AsyncClient( | |
| host=os.getenv("OLLAMA_API_BASE_URL"), | |
| headers={ | |
| "Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}" | |
| } | |
| ) | |
| messages = [] | |
| for item in history: | |
| if isinstance(item, dict) and "role" in item and "content" in item: | |
| messages.append({ | |
| "role": item["role"], | |
| "content": item["content"] | |
| }) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| async for part in await client.chat( | |
| model="gemma3:270m", | |
| messages=messages, | |
| options={ | |
| "num_ctx": int(num_ctx), | |
| "temperature": float(temperature), | |
| "repeat_penalty": float(repeat_penalty), | |
| "min_p": float(min_p), | |
| "top_k": int(top_k), | |
| "top_p": float(top_p) | |
| }, | |
| stream=True | |
| ): | |
| response += part.get("message", {}).get("content", "") | |
| yield response | |
| with gr.Blocks( | |
| fill_height=True, | |
| fill_width=True | |
| ) as app: | |
| with gr.Sidebar(): | |
| gr.Markdown("## Ollama Playground by UltimaX Intelligence") | |
| gr.HTML( | |
| """ | |
| This space run the <b><a href= | |
| "https://huggingface.co/google/gemma-3-270m" | |
| target="_blank">Gemma 3 (270M)</a></b> model from | |
| <b>Google</b>, hosted on a server using <b>Ollama</b> and | |
| accessed via the <b>Ollama Python SDK</b>.<br><br> | |
| Official <b>documentation</b> for using Ollama with the | |
| Python SDK can be found | |
| <b><a href="https://github.com/ollama/ollama-python" | |
| target="_blank">here</a></b>.<br><br> | |
| Gemma 3 (270M) runs entirely on <b>CPU</b>, utilizing only a | |
| <b>single core</b>. Thanks to its small size, the model can | |
| operate efficiently on minimal hardware.<br><br> | |
| The Gemma 3 (270M) model can also be viewed or downloaded | |
| from the official Ollama website | |
| <b><a href="https://ollama.com/library/gemma3:270m" | |
| target="_blank">here</a></b>.<br><br> | |
| While Gemma 3 has multimodal capabilities, running it on CPU | |
| with a relatively small number of parameters may limit its | |
| contextual understanding. For this reason, the upload | |
| functionality has been disabled.<br><br> | |
| <b>Like this project? You can support me by buying a | |
| <a href="https://ko-fi.com/hadad" target="_blank"> | |
| coffee</a></b>. | |
| """ | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("## Model Parameters") | |
| num_ctx = gr.Slider( | |
| minimum=512, | |
| maximum=1024, | |
| value=512, | |
| step=128, | |
| label="Context Length (num_ctx)", | |
| info="Maximum context window size. Limited to CPU usage." | |
| ) | |
| gr.Markdown("") | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Temperature", | |
| info="Controls randomness in generation" | |
| ) | |
| gr.Markdown("") | |
| repeat_penalty = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Repeat Penalty", | |
| info="Penalty for repeating tokens" | |
| ) | |
| gr.Markdown("") | |
| min_p = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.001, | |
| step=0.001, | |
| label="Min P", | |
| info="Minimum probability threshold" | |
| ) | |
| gr.Markdown("") | |
| top_k = gr.Slider( | |
| minimum=0, | |
| maximum=100, | |
| value=64, | |
| step=1, | |
| label="Top K", | |
| info="Number of top tokens to consider" | |
| ) | |
| gr.Markdown("") | |
| top_p = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top P", | |
| info="Cumulative probability threshold" | |
| ) | |
| gr.ChatInterface( | |
| fn=playground, | |
| additional_inputs=[ | |
| num_ctx, | |
| temperature, | |
| repeat_penalty, | |
| min_p, | |
| top_k, | |
| top_p | |
| ], | |
| chatbot=gr.Chatbot( | |
| label="Ollama | Gemma 3 (270M)", | |
| type="messages", | |
| show_copy_button=True, | |
| scale=1 | |
| ), | |
| type="messages", | |
| examples=[ | |
| ["Please introduce yourself."], | |
| ["What caused World War II?"], | |
| ["Give me a short introduction to large language model."], | |
| ["Explain about quantum computers."] | |
| ], | |
| cache_examples=False, | |
| show_api=False | |
| ) | |
| app.launch( | |
| server_name="0.0.0.0", | |
| pwa=True | |
| ) |