Spaces:
Running
Running
| """Main Gradio app for moderation model testing.""" | |
| import os | |
| import sys | |
| import gradio as gr | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from datetime import datetime | |
| from utils.dataset import format_categories_and_reasoning, save_to_dataset | |
| from utils.helpers import get_hf_token | |
| from utils.model_interface import extract_model_id, run_test | |
| from ui.sidebar import build_sidebar | |
| from ui.tab_config import build_config_tab | |
| from ui.tab_dataset import build_dataset_tab | |
| from ui.tab_policy import build_policy_tab | |
| from ui.tab_testing import ( | |
| build_testing_tab, | |
| format_model_info, | |
| format_reasoning_info, | |
| format_test_result, | |
| ) | |
| # ============================================================================ | |
| # Handlers | |
| # ============================================================================ | |
| def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None): | |
| """Handle test execution.""" | |
| if not test_input or not test_input.strip(): | |
| model_info = format_model_info(model_choice, reasoning_effort) | |
| return model_info, "*Please enter test content*", "*No content*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False) | |
| if not current_policy or current_policy == "*No policy loaded*": | |
| model_info = format_model_info(model_choice, reasoning_effort) | |
| return model_info, "*Please load a policy first*", "*No policy*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False) | |
| # OAuth token is automatically injected by Gradio - we don't pass login_button as input | |
| hf_token, _ = get_hf_token(oauth_token) | |
| if hf_token is None: | |
| model_info = format_model_info(model_choice, reasoning_effort) | |
| return model_info, "*Please log in to use Inference Providers*", "*Authentication required*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False) | |
| model_id = extract_model_id(model_choice) | |
| result = run_test( | |
| model_id=model_id, | |
| test_input=test_input, | |
| policy=current_policy, | |
| hf_token=hf_token, | |
| reasoning_effort=reasoning_effort, | |
| max_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| system_prompt=system_prompt_val, | |
| response_format=response_format_val, | |
| ) | |
| label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result) | |
| reasoning_visible = bool(reasoning and reasoning.strip()) | |
| model_info = format_model_info(model_choice, reasoning_effort) | |
| reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning) | |
| # Save to dataset if enabled | |
| if save_mode == "Save to Dataset" and hf_token is not None: | |
| try: | |
| categories_and_reasoning_text = format_categories_and_reasoning(parsed) | |
| policy_violation = parsed.get("label", -1) | |
| data = { | |
| "input": test_input, | |
| "policy_violation": policy_violation, | |
| "categories_and_reasoning": categories_and_reasoning_text, | |
| "policy": current_policy, | |
| "model_selection": model_choice, | |
| "raw_response": raw_response, | |
| "reasoning_trace": reasoning or "", | |
| "reasoning_effort": reasoning_effort or "", | |
| "max_tokens": int(max_tokens), | |
| "temperature": float(temperature), | |
| "top_p": float(top_p), | |
| "system_prompt": system_prompt_val or "", | |
| "response_format": response_format_val or "", | |
| "timestamp": datetime.now().isoformat(), | |
| } | |
| save_to_dataset(hf_token, data) | |
| except Exception as e: | |
| # Log error but don't break test execution | |
| print(f"Failed to save to dataset: {e}") | |
| return ( | |
| model_info, | |
| label_text, | |
| cat_text, | |
| raw_response, | |
| gr.update(value=reasoning_info_text, visible=reasoning_info_visible), | |
| gr.update(value=reasoning or "", visible=reasoning_visible), | |
| ) | |
| # ============================================================================ | |
| # UI Components | |
| # ============================================================================ | |
| with gr.Blocks(title="Moderation Model Testing") as demo: | |
| gr.Markdown("# Moderation Model Testing Interface") | |
| gr.Markdown( | |
| "Test moderation models with custom content policies. Define your policy, select a model, " | |
| "and evaluate how different models classify content according to your rules. " | |
| "Supports reasoning models that provide detailed explanations for their decisions." | |
| ) | |
| # Sidebar (collapsible) | |
| sidebar_components = build_sidebar() | |
| login_button = sidebar_components["login_button"] | |
| # Main content area with tabs | |
| with gr.Tabs(): | |
| # Build tabs | |
| testing_components = build_testing_tab() | |
| test_input = testing_components["test_input"] | |
| run_test_btn = testing_components["run_test_btn"] | |
| save_mode = testing_components["save_mode"] | |
| model_info_display = testing_components["model_info_display"] | |
| label_display = testing_components["label_display"] | |
| categories_display = testing_components["categories_display"] | |
| model_response_display = testing_components["model_response_display"] | |
| reasoning_info = testing_components["reasoning_info"] | |
| reasoning_display = testing_components["reasoning_display"] | |
| policy_components = build_policy_tab(os.path.dirname(__file__)) | |
| current_policy_state = policy_components["current_policy_state"] | |
| config_components = build_config_tab() | |
| model_dropdown = config_components["model_dropdown"] | |
| reasoning_effort = config_components["reasoning_effort"] | |
| max_tokens = config_components["max_tokens"] | |
| temperature = config_components["temperature"] | |
| top_p = config_components["top_p"] | |
| system_prompt_textbox = config_components["system_prompt_textbox"] | |
| response_format_textbox = config_components["response_format_textbox"] | |
| dataset_components = build_dataset_tab() | |
| example_dropdown = dataset_components["example_dropdown"] | |
| cached_examples = dataset_components["cached_examples"] | |
| dropdown_choices_state = dataset_components["dropdown_choices_state"] | |
| # ============================================================================ | |
| # Event Handlers | |
| # ============================================================================ | |
| # Cross-tab handler: Run test (needs components from all tabs) | |
| run_test_btn.click( | |
| handle_run_test, | |
| inputs=[ | |
| test_input, | |
| current_policy_state, | |
| model_dropdown, | |
| reasoning_effort, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| system_prompt_textbox, | |
| response_format_textbox, | |
| save_mode, | |
| ], | |
| outputs=[ | |
| model_info_display, | |
| label_display, | |
| categories_display, | |
| model_response_display, | |
| reasoning_info, | |
| reasoning_display, | |
| ], | |
| ) | |
| model_dropdown.change( | |
| format_model_info, | |
| inputs=[model_dropdown, reasoning_effort], | |
| outputs=model_info_display, | |
| ) | |
| reasoning_effort.change( | |
| format_model_info, | |
| inputs=[model_dropdown, reasoning_effort], | |
| outputs=model_info_display, | |
| ) | |
| # Dataset load handler | |
| def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list): | |
| """Load example from dataset and populate all fields.""" | |
| if (not cached_examples_list or not selected_label or | |
| not dropdown_choices_list or selected_label not in dropdown_choices_list): | |
| # Return None to skip updates | |
| return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None | |
| try: | |
| # Find index by matching label | |
| idx = dropdown_choices_list.index(selected_label) | |
| if idx < 0 or idx >= len(cached_examples_list): | |
| return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None | |
| example = cached_examples_list[idx] | |
| # Get policy - ensure it's a string (not None) | |
| policy = example.get("policy", "") or "" | |
| # Extract saved results | |
| policy_violation = example.get("policy_violation", -1) | |
| categories_and_reasoning = example.get("categories_and_reasoning", "") | |
| raw_response = example.get("raw_response", "") | |
| reasoning_trace = example.get("reasoning_trace", "") | |
| model_selection = example.get("model_selection", "") | |
| reasoning_effort_val = example.get("reasoning_effort", "") | |
| # Format label text | |
| if policy_violation == 1: | |
| label_text = "## ❌ Policy Violation Detected" | |
| elif policy_violation == 0: | |
| label_text = "## ✅ No Policy Violation" | |
| else: | |
| label_text = "## ⚠️ Unable to determine label" | |
| # Format model info | |
| model_info = format_model_info(model_selection, reasoning_effort_val) | |
| # Format reasoning info | |
| reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace) | |
| reasoning_visible = bool(reasoning_trace and reasoning_trace.strip()) | |
| return ( | |
| example.get("input", ""), | |
| policy, # current_policy_state - UI syncs automatically via change handler | |
| example.get("model_selection", ""), | |
| example.get("reasoning_effort", ""), | |
| example.get("max_tokens", 0), | |
| example.get("temperature", 0.0), | |
| example.get("top_p", 0.0), | |
| example.get("system_prompt", ""), | |
| example.get("response_format", ""), | |
| # Results | |
| model_info, | |
| label_text, | |
| categories_and_reasoning, | |
| raw_response, | |
| gr.update(value=reasoning_info_text, visible=reasoning_info_visible), | |
| gr.update(value=reasoning_trace or "", visible=reasoning_visible), | |
| ) | |
| except (ValueError, IndexError): | |
| return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None | |
| example_dropdown.change( | |
| load_example_from_dataset, | |
| inputs=[example_dropdown, cached_examples, dropdown_choices_state], | |
| outputs=[ | |
| test_input, | |
| current_policy_state, # UI components sync automatically via change handler | |
| model_dropdown, | |
| reasoning_effort, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| system_prompt_textbox, | |
| response_format_textbox, | |
| # Results | |
| model_info_display, | |
| label_display, | |
| categories_display, | |
| model_response_display, | |
| reasoning_info, | |
| reasoning_display, | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |