cpschat / app.py
yogies's picture
Update app.py
37b3fba verified
raw
history blame
11.5 kB
import gradio as gr
import os
import json
from datetime import datetime, date
from openai import OpenAI
# ----------------------------------------------------------------------
# Helper to read secrets from the HF Space environment
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = None) -> str:
val = os.getenv(key)
if val is not None:
return val
if fallback is not None:
return fallback
raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.")
# ----------------------------------------------------------------------
# User Management
# ----------------------------------------------------------------------
def load_users():
"""Load users from secrets or environment variables"""
users = {}
# Try to load from JSON string
users_json = _secret("CHAT_USERS", "{}")
try:
users_data = json.loads(users_json)
for username, password in users_data.items():
users[username] = password
except:
pass
return users
# Load users
VALID_USERS = load_users()
def authenticate_user(username, password):
"""Authenticate user against the valid users dictionary"""
return username in VALID_USERS and VALID_USERS[username] == password
# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
# Available models with their respective API configurations
MODELS = {
# "Qwen3-4B-Thinking-2507": {
# "provider": "huggingface",
# "model_name": "Qwen/Qwen3-4B-Thinking-2507:nscale",
# "api_url": "https://router.huggingface.co/v1"
# },
"Free - NVIDIA Nemotron-nano-9b [EN] + Gemma 3n4b [ID]": {
"provider": "openrouter",
"model_name": "nvidia/nemotron-nano-9b-v2:free",
"api_url": "https://openrouter.ai/api/v1",
"translate":"yes"
},
# "Free - Gpt-oss-20b [EN] + Gemma 3n4b [ID]": {
# "provider": "openrouter",
# "model_name": "openai/gpt-oss-20b:free",
# "api_url": "https://openrouter.ai/api/v1",
# "translate":"yes"
# },
"Free - Glm-4.5-air [EN] + Gemma 3n4b [ID]": {
"provider": "openrouter",
"model_name": "z-ai/glm-4.5-air:free",
"api_url": "https://openrouter.ai/api/v1",
"translate":"yes"
},
"Free - Deepseek-chat-v3.1": {
"provider": "openrouter",
"model_name": "deepseek/deepseek-chat-v3.1:free",
"api_url": "https://openrouter.ai/api/v1",
"translate":"no"
},
# "Ringan - Gemma-3n4b": {
# "provider": "openrouter",
# "model_name": "google/gemma-3n-e4b-it:floor",
# "api_url": "https://openrouter.ai/api/v1"
# },
# "Gpt-oss-20b": {
# "provider": "openrouter",
# "model_name": "openai/gpt-oss-20b:floor",
# "api_url": "https://openrouter.ai/api/v1",
# "translate":"no"
# },
"Tongyi-deepresearch-30b-a3b": {
"provider": "openrouter",
"model_name": "alibaba/tongyi-deepresearch-30b-a3b:floor",
"api_url": "https://openrouter.ai/api/v1",
"translate":"no"
},
# "Kompleks - Gpt-oss-120b": {
# "provider": "openrouter",
# "model_name": "openai/gpt-oss-120b:floor",
# "api_url": "https://openrouter.ai/api/v1",
# "translate":"no"
# }
}
# Get model display names for dropdown
MODEL_NAMES = list(MODELS.keys())
# ----------------------------------------------------------------------
# Core Chat Logic
# ----------------------------------------------------------------------
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
selected_model,
):
"""
Handle chat responses using the selected model.
For 'translate'=='yes': First call responds in English, second translates to Indonesian.
History reflects Indonesian queries and responses.
"""
try:
# Get model configuration
model_config = MODELS[selected_model]
provider = model_config["provider"]
# Get API key based on provider
if provider == "huggingface":
api_key = _secret("HF_TOKEN")
else: # openrouter
api_key = _secret("OPENROUTER_KEY")
# Configure main client for selected model
client = OpenAI(
base_url=model_config["api_url"],
api_key=api_key,
)
# Configure translator client (always Gemma on OpenRouter)
translator_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=_secret("OPENROUTER_KEY"),
)
# Modify system message to force English response if translate=yes
if model_config.get("translate") == "yes":
final_system_message = "**HIGHEST PRIORITY: YOU MUST ALWAYS THINK AND RESPOND IN ENGLISH REGARDLESS OF OTHER INSTRUCTIONS FROM HERE ON.\n" + system_message
else:
final_system_message = system_message
# Prepare messages for main API call (full history + current message)
messages = [{"role": "system", "content": final_system_message}]
messages.extend(history) # History: Indo queries + prior Indo responses
messages.append({"role": "user", "content": message}) # New Indo query
# First API call: Get response from selected model (English if translate=yes)
response = client.chat.completions.create(
model=model_config["model_name"],
messages=messages,
max_tokens=max_tokens,
stream=False,
)
english_response = response.choices[0].message.content
# If translate=yes, make second API call to Gemma for Indonesian translation
if model_config.get("translate") == "yes":
try:
# Translation prompt: Focus only on translating the response (not query)
translation_messages = [
{
"role": "system",
"content": (
"Translate the following English text to natural, accurate Bahasa Indonesia. "
"**IMPORTANT: OUTPUT ONLY THE TRANSLATION. NO PREAMBLES, COMMENTS, OR EXPLANATIONS. "
"Just the Indonesian text."
)
},
{
"role": "user",
"content": english_response # The English response to translate
}
]
translation_response = translator_client.chat.completions.create(
model="google/gemma-3n-e4b-it:floor",
messages=translation_messages,
max_tokens=max_tokens, # Reuse limit; translation is short
stream=False,
)
final_response = translation_response.choices[0].message.content.strip()
# Fallback to English if translation is empty or invalid
if not final_response or len(final_response) < 10: # Basic sanity check
final_response = english_response
except Exception as trans_error:
print(f"Translation error: {trans_error}")
final_response = english_response # Fallback to English
else:
final_response = english_response
return final_response # Gradio appends this (Indonesian) as assistant message to history
except Exception as e:
print(f"Error in respond function: {e}")
return f"Error: {str(e)}" # Return error string; Gradio appends it
# ----------------------------------------------------------------------
# Custom Auth Function for Gradio
# ----------------------------------------------------------------------
def gradio_auth(username, password):
"""Custom authentication function for Gradio"""
return authenticate_user(username, password)
# ----------------------------------------------------------------------
# UI Layout
# ----------------------------------------------------------------------
# Tips section
tips_md = """
"""
# Footer
footer_md = """
---
**Providers**: Hugging Face Inference API + OpenRouter, dipilih providers dengan kebijakan ZDR (Zero Data Retention). Artinya data request/response tidak disimpan dan tidak digunakan untuk training data.
Jika error, kemungkinan kena rate limit sehingga bisa coba model lain.
"""
# Create the chat interface
with gr.Blocks(
title="AI Chat",
theme=gr.themes.Soft()
) as demo:
gr.Markdown("# AI Chat")
gr.Markdown("Data tidak disimpan providers (ZDR-Zero Data Retention), tidak digunakan untuk training, dan tidak di-log (YOI/250929).")
# Model selection and settings in sidebar
with gr.Sidebar():
gr.Markdown("### ⚙️ Configuration")
# Model selection
selected_model = gr.Dropdown(
choices=MODEL_NAMES,
value=MODEL_NAMES[0],
label="Select Model",
info="Choose which AI model to use"
)
# Display current user (if available)
current_user = gr.Textbox(
label="Current User",
value="Authenticated User",
interactive=False,
visible=False # Hide by default, can set to True if you want to show
)
# Advanced settings
with gr.Accordion("Advanced Settings", open=False):
system_message = gr.Textbox(
value="Anda adalah asisten AI.",
label="System Message",
info="Instruksi untuk AI."
)
max_tokens = gr.Slider(
minimum=1, maximum=8096, value=4096, step=1,
label="Max New Tokens",
info="Jumlah token respon maksimum."
)
# Main chat interface
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
system_message,
max_tokens,
selected_model,
],
examples=[
["Jelaskan penggunaan King's Safety Stock dalam inventory management."],
["Bandingkan use‑case dan tingkat kesulitan antara penggunaan R, Excel, dan Tableau untuk analisis data."],
["Kampanye training perusahaan “Ceria Melayani Semangat Berprestasi” bertujuan meningkatkan kolaborasi antar departemen. Jelaskan kenapa ini 'tone-deaf' dan bukan solusi masalah."],
["Apa saran praktis untuk transisi perusahaan brick dan mortar dengan data maturity yang rendah untuk membangun budaya yang data-driven?"]
],
cache_examples=False,
)
# Tips and footer
gr.Markdown(tips_md)
gr.Markdown(footer_md)
# ----------------------------------------------------------------------
# Launch with Custom Auth
# ----------------------------------------------------------------------
if __name__ == "__main__":
demo.launch(
auth=gradio_auth, # Use our custom auth function
auth_message="Please login to access the chat interface",
server_name="0.0.0.0",
ssr_mode=False,
server_port=7860,
show_error=True
)