Spaces:

sds-ai
/

Yee-R1-Demo

Running

App Files Files Community

Shining-Data commited on Jun 4

Commit

e017f5c

verified ·

1 Parent(s): fd5e60d

Create app.py

Browse files

Files changed (1) hide show

app.py +428 -0

app.py ADDED Viewed

	@@ -0,0 +1,428 @@

+import os
+import time
+import gc
+import threading
+from itertools import islice
+from datetime import datetime
+import re  # for parsing <think> blocks
+import gradio as gr
+import torch
+from transformers import pipeline, TextIteratorStreamer
+from transformers import AutoTokenizer
+from duckduckgo_search import DDGS
+import spaces  # Import spaces early to enable ZeroGPU support
+# Optional: Disable GPU visibility if you wish to force CPU usage
+# os.environ["CUDA_VISIBLE_DEVICES"] = ""
+# ------------------------------
+# Global Cancellation Event
+# ------------------------------
+cancel_event = threading.Event()
+# ------------------------------
+# Torch-Compatible Model Definitions with Adjusted Descriptions
+# ------------------------------
+MODELS = {
+    "Yee-R1-mini":      {"repo_id":"sds-ai/Yee-R1-mini","description":"小熠（Yee）AI 数据安全专家"},
+    "secgpt-mini":      {"repo_id":"clouditera/secgpt-mini","description":"SecGPT 是由 云起无垠 于 2023 年正式推出的开源大模型，专为网络安全场景打造，旨在以人工智能技术全面提升安全防护效率与效果。"},
+    "Qwen3-0.6B":    {"repo_id":"Qwen/Qwen3-0.6B","description":"Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."},
+    "Qwen3-1.7B":    {"repo_id":"Qwen/Qwen3-1.7B","description":"Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."},
+}
+# Global cache for pipelines to avoid re-loading.
+PIPELINES = {}
+def load_pipeline(model_name):
+    """
+    Load and cache a transformers pipeline for text generation.
+    Tries bfloat16, falls back to float16 or float32 if unsupported.
+    """
+    global PIPELINES
+    if model_name in PIPELINES:
+        return PIPELINES[model_name]
+    repo = MODELS[model_name]["repo_id"]
+    tokenizer = AutoTokenizer.from_pretrained(repo)
+    for dtype in (torch.bfloat16, torch.float16, torch.float32):
+        try:
+            pipe = pipeline(
+                task="text-generation",
+                model=repo,
+                tokenizer=tokenizer,
+                trust_remote_code=True,
+                torch_dtype=dtype,
+                device_map="auto"
+            )
+            PIPELINES[model_name] = pipe
+            return pipe
+        except Exception:
+            continue
+    # Final fallback
+    pipe = pipeline(
+        task="text-generation",
+        model=repo,
+        tokenizer=tokenizer,
+        trust_remote_code=True,
+        device_map="auto"
+    )
+    PIPELINES[model_name] = pipe
+    return pipe
+def retrieve_context(query, max_results=6, max_chars=1000):
+    """
+    Retrieve search snippets from DuckDuckGo (runs in background).
+    Returns a list of result strings.
+    """
+    try:
+        with DDGS() as ddgs:
+            return [f"{i+1}. {r.get('title','No Title')} - {r.get('body','')[:max_chars]}"
+                    for i, r in enumerate(islice(ddgs.text(query, region="wt-wt", safesearch="off", timelimit="y"), max_results))]
+    except Exception:
+        return []
+def format_conversation(history, system_prompt, tokenizer):
+    if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
+        messages = [{"role": "system", "content": system_prompt.strip()}] + history
+        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
+    else:
+        # Fallback for base LMs without chat template
+        prompt = system_prompt.strip() + "\n"
+        for msg in history:
+            if msg['role'] == 'user':
+                prompt += "User: " + msg['content'].strip() + "\n"
+            elif msg['role'] == 'assistant':
+                prompt += "Assistant: " + msg['content'].strip() + "\n"
+        if not prompt.strip().endswith("Assistant:"):
+            prompt += "Assistant: "
+        return prompt
+@spaces.GPU(duration=60)
+def chat_response(user_msg, chat_history, system_prompt,
+                  enable_search, max_results, max_chars,
+                  model_name, max_tokens, temperature,
+                  top_k, top_p, repeat_penalty, search_timeout):
+    """
+    Generates streaming chat responses, optionally with background web search.
+    """
+    cancel_event.clear()
+    history = list(chat_history or [])
+    history.append({'role': 'user', 'content': user_msg})
+    # Launch web search if enabled
+    debug = ''
+    search_results = []
+    if enable_search:
+        debug = 'Search task started.'
+        thread_search = threading.Thread(
+            target=lambda: search_results.extend(
+                retrieve_context(user_msg, int(max_results), int(max_chars))
+            )
+        )
+        thread_search.daemon = True
+        thread_search.start()
+    else:
+        debug = 'Web search disabled.'
+    try:
+        # merge any fetched search results into the system prompt
+        if search_results:
+            enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
+        else:
+            enriched = system_prompt
+        # wait up to 1s for snippets, then replace debug with them
+        if enable_search:
+            thread_search.join(timeout=float(search_timeout))
+            if search_results:
+                debug = "### Search results merged into prompt\n\n" + "\n".join(
+                    f"- {r}" for r in search_results
+                )
+            else:
+                debug = "*No web search results found.*"
+        # merge fetched snippets into the system prompt
+        if search_results:
+            enriched = system_prompt.strip() + "\n\nRelevant context:\n" + "\n".join(search_results)
+        else:
+            enriched = system_prompt
+        pipe = load_pipeline(model_name)
+        prompt = format_conversation(history, enriched, pipe.tokenizer)
+        prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
+        streamer = TextIteratorStreamer(pipe.tokenizer,
+                                        skip_prompt=True,
+                                        skip_special_tokens=True)
+        gen_thread = threading.Thread(
+            target=pipe,
+            args=(prompt,),
+            kwargs={
+                'max_new_tokens': max_tokens,
+                'temperature': temperature,
+                'top_k': top_k,
+                'top_p': top_p,
+                'repetition_penalty': repeat_penalty,
+                'streamer': streamer,
+                'return_full_text': False,
+            }
+        )
+        gen_thread.start()
+        # Buffers for thought vs answer
+        thought_buf = ''
+        answer_buf = ''
+        in_thought = False
+        # Stream tokens
+        for chunk in streamer:
+            if cancel_event.is_set():
+                break
+            text = chunk
+            # Detect start of thinking
+            if not in_thought and '<think>' in text:
+                in_thought = True
+                # Insert thought placeholder
+                history.append({
+                    'role': 'assistant',
+                    'content': '',
+                    'metadata': {'title': '💭 Thought'}
+                })
+                # Capture after opening tag
+                after = text.split('<think>', 1)[1]
+                thought_buf += after
+                # If closing tag in same chunk
+                if '</think>' in thought_buf:
+                    before, after2 = thought_buf.split('</think>', 1)
+                    history[-1]['content'] = before.strip()
+                    in_thought = False
+                    # Start answer buffer
+                    answer_buf = after2
+                    history.append({'role': 'assistant', 'content': answer_buf})
+                else:
+                    history[-1]['content'] = thought_buf
+                yield history, debug
+                continue
+            # Continue thought streaming
+            if in_thought:
+                thought_buf += text
+                if '</think>' in thought_buf:
+                    before, after2 = thought_buf.split('</think>', 1)
+                    history[-1]['content'] = before.strip()
+                    in_thought = False
+                    # Start answer buffer
+                    answer_buf = after2
+                    history.append({'role': 'assistant', 'content': answer_buf})
+                else:
+                    history[-1]['content'] = thought_buf
+                yield history, debug
+                continue
+            # Stream answer
+            if not answer_buf:
+                history.append({'role': 'assistant', 'content': ''})
+            answer_buf += text
+            history[-1]['content'] = answer_buf
+            yield history, debug
+        gen_thread.join()
+        yield history, debug + prompt_debug
+    except Exception as e:
+        history.append({'role': 'assistant', 'content': f"Error: {e}"})
+        yield history, debug
+    finally:
+        gc.collect()
+def cancel_generation():
+    cancel_event.set()
+    return 'Generation cancelled.'
+def update_default_prompt(enable_search):
+    today = datetime.now().strftime('%Y-%m-%d')
+    return f"You are a helpful assistant. Today is {today}."
+def toggle_theme(current_theme):
+    """Toggle between light and dark themes"""
+    if current_theme == "light":
+        return "dark", "☀️ Light Mode"
+    else:
+        return "light", "🌙 Dark Mode"
+def toggle_language(current_lang):
+    """Toggle between Chinese and English"""
+    if current_lang == "zh":
+        return "en"
+    else:
+        return "zh"
+def get_ui_text(lang):
+    """Get UI text based on language"""
+    texts = {
+        "zh": {
+            "title": "## Yee-R1 Demo",
+            "subtitle": "小熠（Yee）AI 数据安全专家",
+            "dark_mode": "🌙 暗黑模式",
+            "light_mode": "☀️ 明亮模式",
+            "lang_btn": "🌐 English",
+            "select_model": "选择模型",
+            "enable_search": "启用网络搜索",
+            "system_prompt": "系统提示词",
+            "gen_params": "### 生成参数",
+            "max_tokens": "最大令牌数",
+            "temperature": "温度",
+            "top_k": "Top-K",
+            "top_p": "Top-P",
+            "repeat_penalty": "重复惩罚",
+            "search_settings": "### 网络搜索设置",
+            "max_results": "最大结果数",
+            "max_chars": "每个结果最大字符数",
+            "search_timeout": "搜索超时时间 (秒)",
+            "clear_chat": "清空对话",
+            "cancel_gen": "取消生成",
+            "placeholder": "输入您的消息并按回车..."
+        },
+        "en": {
+            "title": "## Yee-R1 Demo",
+            "subtitle": "Yee AI Data Security Expert",
+            "dark_mode": "🌙 Dark Mode",
+            "light_mode": "☀️ Light Mode",
+            "lang_btn": "🌐 中文",
+            "select_model": "Select Model",
+            "enable_search": "Enable Web Search",
+            "system_prompt": "System Prompt",
+            "gen_params": "### Generation Parameters",
+            "max_tokens": "Max Tokens",
+            "temperature": "Temperature",
+            "top_k": "Top-K",
+            "top_p": "Top-P",
+            "repeat_penalty": "Repetition Penalty",
+            "search_settings": "### Web Search Settings",
+            "max_results": "Max Results",
+            "max_chars": "Max Chars/Result",
+            "search_timeout": "Search Timeout (s)",
+            "clear_chat": "Clear Chat",
+            "cancel_gen": "Cancel Generation",
+            "placeholder": "Type your message and press Enter..."
+        }
+    }
+    return texts[lang]
+# ------------------------------
+# Gradio UI
+# ------------------------------
+with gr.Blocks(title="Yee-R1-Demo", theme=gr.themes.Default()) as demo:
+    # States
+    theme_state = gr.State("light")
+    lang_state = gr.State("zh")
+    # Header with controls
+    with gr.Row():
+        title_md = gr.Markdown("## Yee-R1 Demo")
+        with gr.Row(scale=0):
+            lang_btn = gr.Button("🌐 English", size="sm")
+            theme_btn = gr.Button("🌙 暗黑模式", size="sm")
+    subtitle_md = gr.Markdown("小熠（Yee）AI 数据安全专家")
+    with gr.Row():
+        with gr.Column(scale=3):
+            model_dd = gr.Dropdown(label="选择模型", choices=list(MODELS.keys()), value=list(MODELS.keys())[0])
+            search_chk = gr.Checkbox(label="启用网络搜索", value=True)
+            sys_prompt = gr.Textbox(label="系统提示词", lines=3, value=update_default_prompt(search_chk.value))
+            gen_params_md = gr.Markdown("### 生成参数")
+            max_tok = gr.Slider(64, 16384, value=4096, step=32, label="最大令牌数")
+            temp = gr.Slider(0.1, 2.0, value=0.6, step=0.1, label="温度")
+            k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
+            p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
+            rp = gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="重复惩罚")
+            search_settings_md = gr.Markdown("### 网络搜索设置")
+            mr = gr.Number(value=6, precision=0, label="最大结果数")
+            mc = gr.Number(value=600, precision=0, label="每个结果最大字符数")
+            st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="搜索超时时间 (秒)")
+            clr = gr.Button("清空对话")
+            cnl = gr.Button("取消生成")
+        with gr.Column(scale=7):
+            chat = gr.Chatbot(type="messages", show_copy_all_button=True, height="50vh")
+            txt = gr.Textbox(placeholder="输入您的消息并按回车...")
+            dbg = gr.Markdown()
+    # Event handlers
+    search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
+    clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
+    cnl.click(fn=cancel_generation, outputs=dbg)
+    # Theme toggle functionality
+    def handle_theme_toggle(current_theme, current_lang):
+        new_theme, _ = toggle_theme(current_theme)
+        ui_text = get_ui_text(current_lang)
+        new_btn_text = ui_text["light_mode"] if new_theme == "dark" else ui_text["dark_mode"]
+        if new_theme == "dark":
+            demo._theme = gr.themes.Monochrome()
+        else:
+            demo._theme = gr.themes.Default()
+        return new_theme, new_btn_text
+    # Language toggle functionality
+    def handle_language_toggle(current_lang, current_theme):
+        new_lang = toggle_language(current_lang)
+        ui_text = get_ui_text(new_lang)
+        # Update all UI text
+        updates = [
+            new_lang,  # lang_state
+            ui_text["lang_btn"],  # lang_btn
+            ui_text["light_mode"] if current_theme == "dark" else ui_text["dark_mode"],  # theme_btn
+            ui_text["title"],  # title_md
+            ui_text["subtitle"],  # subtitle_md
+            ui_text["select_model"],  # model_dd label
+            ui_text["enable_search"],  # search_chk label
+            ui_text["system_prompt"],  # sys_prompt label
+            ui_text["gen_params"],  # gen_params_md
+            ui_text["max_tokens"],  # max_tok label
+            ui_text["temperature"],  # temp label
+            ui_text["top_k"],  # k label
+            ui_text["top_p"],  # p label
+            ui_text["repeat_penalty"],  # rp label
+            ui_text["search_settings"],  # search_settings_md
+            ui_text["max_results"],  # mr label
+            ui_text["max_chars"],  # mc label
+            ui_text["search_timeout"],  # st label
+            ui_text["clear_chat"],  # clr
+            ui_text["cancel_gen"],  # cnl
+            ui_text["placeholder"]  # txt placeholder
+        ]
+        return updates
+    theme_btn.click(
+        fn=handle_theme_toggle,
+        inputs=[theme_state, lang_state],
+        outputs=[theme_state, theme_btn]
+    )
+    lang_btn.click(
+        fn=handle_language_toggle,
+        inputs=[lang_state, theme_state],
+        outputs=[
+            lang_state, lang_btn, theme_btn, title_md, subtitle_md,
+            model_dd, search_chk, sys_prompt, gen_params_md,
+            max_tok, temp, k, p, rp, search_settings_md,
+            mr, mc, st, clr, cnl, txt
+        ]
+    )
+    txt.submit(fn=chat_response,
+               inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
+                       model_dd, max_tok, temp, k, p, rp, st],
+               outputs=[chat, dbg])
+    demo.launch()