Spaces:

yjernite
/

llm_moderation_testing

Running

Yacine Jernite

load dataset

06103c4 3 days ago

11.8 kB

	"""Main Gradio app for moderation model testing."""

	import os
	import sys

	import gradio as gr

	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	from datetime import datetime

	from utils.dataset import format_categories_and_reasoning, save_to_dataset
	from utils.helpers import get_hf_token
	from utils.model_interface import extract_model_id, run_test
	from ui.sidebar import build_sidebar
	from ui.tab_config import build_config_tab
	from ui.tab_dataset import build_dataset_tab
	from ui.tab_policy import build_policy_tab
	from ui.tab_testing import (
	build_testing_tab,
	format_model_info,
	format_reasoning_info,
	format_test_result,
	)


	# ============================================================================
	# Handlers
	# ============================================================================

	def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken \| None = None):
	"""Handle test execution."""

	if not test_input or not test_input.strip():
	model_info = format_model_info(model_choice, reasoning_effort)
	return model_info, "Please enter test content", "No content", "No response yet", gr.update(value="", visible=False), gr.update(value="", visible=False)

	if not current_policy or current_policy == "No policy loaded":
	model_info = format_model_info(model_choice, reasoning_effort)
	return model_info, "Please load a policy first", "No policy", "No response yet", gr.update(value="", visible=False), gr.update(value="", visible=False)

	# OAuth token is automatically injected by Gradio - we don't pass login_button as input
	hf_token, _ = get_hf_token(oauth_token)
	if hf_token is None:
	model_info = format_model_info(model_choice, reasoning_effort)
	return model_info, "Please log in to use Inference Providers", "Authentication required", "No response yet", gr.update(value="", visible=False), gr.update(value="", visible=False)

	model_id = extract_model_id(model_choice)

	result = run_test(
	model_id=model_id,
	test_input=test_input,
	policy=current_policy,
	hf_token=hf_token,
	reasoning_effort=reasoning_effort,
	max_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	system_prompt=system_prompt_val,
	response_format=response_format_val,
	)
	label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result)
	reasoning_visible = bool(reasoning and reasoning.strip())
	model_info = format_model_info(model_choice, reasoning_effort)
	reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)

	# Save to dataset if enabled
	if save_mode == "Save to Dataset" and hf_token is not None:
	try:
	categories_and_reasoning_text = format_categories_and_reasoning(parsed)
	policy_violation = parsed.get("label", -1)

	data = {
	"input": test_input,
	"policy_violation": policy_violation,
	"categories_and_reasoning": categories_and_reasoning_text,
	"policy": current_policy,
	"model_selection": model_choice,
	"raw_response": raw_response,
	"reasoning_trace": reasoning or "",
	"reasoning_effort": reasoning_effort or "",
	"max_tokens": int(max_tokens),
	"temperature": float(temperature),
	"top_p": float(top_p),
	"system_prompt": system_prompt_val or "",
	"response_format": response_format_val or "",
	"timestamp": datetime.now().isoformat(),
	}
	save_to_dataset(hf_token, data)
	except Exception as e:
	# Log error but don't break test execution
	print(f"Failed to save to dataset: {e}")

	return (
	model_info,
	label_text,
	cat_text,
	raw_response,
	gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
	gr.update(value=reasoning or "", visible=reasoning_visible),
	)


	# ============================================================================
	# UI Components
	# ============================================================================

	with gr.Blocks(title="Moderation Model Testing") as demo:
	gr.Markdown("# Moderation Model Testing Interface")
	gr.Markdown(
	"Test moderation models with custom content policies. Define your policy, select a model, "
	"and evaluate how different models classify content according to your rules. "
	"Supports reasoning models that provide detailed explanations for their decisions."
	)

	# Sidebar (collapsible)
	sidebar_components = build_sidebar()
	login_button = sidebar_components["login_button"]

	# Main content area with tabs
	with gr.Tabs():
	# Build tabs
	testing_components = build_testing_tab()
	test_input = testing_components["test_input"]
	run_test_btn = testing_components["run_test_btn"]
	save_mode = testing_components["save_mode"]
	model_info_display = testing_components["model_info_display"]
	label_display = testing_components["label_display"]
	categories_display = testing_components["categories_display"]
	model_response_display = testing_components["model_response_display"]
	reasoning_info = testing_components["reasoning_info"]
	reasoning_display = testing_components["reasoning_display"]

	policy_components = build_policy_tab(os.path.dirname(__file__))
	current_policy_state = policy_components["current_policy_state"]

	config_components = build_config_tab()
	model_dropdown = config_components["model_dropdown"]
	reasoning_effort = config_components["reasoning_effort"]
	max_tokens = config_components["max_tokens"]
	temperature = config_components["temperature"]
	top_p = config_components["top_p"]
	system_prompt_textbox = config_components["system_prompt_textbox"]
	response_format_textbox = config_components["response_format_textbox"]

	dataset_components = build_dataset_tab()
	example_dropdown = dataset_components["example_dropdown"]
	cached_examples = dataset_components["cached_examples"]
	dropdown_choices_state = dataset_components["dropdown_choices_state"]

	# ============================================================================
	# Event Handlers
	# ============================================================================

	# Cross-tab handler: Run test (needs components from all tabs)
	run_test_btn.click(
	handle_run_test,
	inputs=[
	test_input,
	current_policy_state,
	model_dropdown,
	reasoning_effort,
	max_tokens,
	temperature,
	top_p,
	system_prompt_textbox,
	response_format_textbox,
	save_mode,
	],
	outputs=[
	model_info_display,
	label_display,
	categories_display,
	model_response_display,
	reasoning_info,
	reasoning_display,
	],
	)

	model_dropdown.change(
	format_model_info,
	inputs=[model_dropdown, reasoning_effort],
	outputs=model_info_display,
	)

	reasoning_effort.change(
	format_model_info,
	inputs=[model_dropdown, reasoning_effort],
	outputs=model_info_display,
	)

	# Dataset load handler
	def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
	"""Load example from dataset and populate all fields."""
	if (not cached_examples_list or not selected_label or
	not dropdown_choices_list or selected_label not in dropdown_choices_list):
	# Return None to skip updates
	return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None

	try:
	# Find index by matching label
	idx = dropdown_choices_list.index(selected_label)
	if idx < 0 or idx >= len(cached_examples_list):
	return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None

	example = cached_examples_list[idx]

	# Get policy - ensure it's a string (not None)
	policy = example.get("policy", "") or ""

	# Extract saved results
	policy_violation = example.get("policy_violation", -1)
	categories_and_reasoning = example.get("categories_and_reasoning", "")
	raw_response = example.get("raw_response", "")
	reasoning_trace = example.get("reasoning_trace", "")
	model_selection = example.get("model_selection", "")
	reasoning_effort_val = example.get("reasoning_effort", "")

	# Format label text
	if policy_violation == 1:
	label_text = "## ❌ Policy Violation Detected"
	elif policy_violation == 0:
	label_text = "## ✅ No Policy Violation"
	else:
	label_text = "## ⚠️ Unable to determine label"

	# Format model info
	model_info = format_model_info(model_selection, reasoning_effort_val)

	# Format reasoning info
	reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)

	reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())

	return (
	example.get("input", ""),
	policy, # current_policy_state - UI syncs automatically via change handler
	example.get("model_selection", ""),
	example.get("reasoning_effort", ""),
	example.get("max_tokens", 0),
	example.get("temperature", 0.0),
	example.get("top_p", 0.0),
	example.get("system_prompt", ""),
	example.get("response_format", ""),
	# Results
	model_info,
	label_text,
	categories_and_reasoning,
	raw_response,
	gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
	gr.update(value=reasoning_trace or "", visible=reasoning_visible),
	)
	except (ValueError, IndexError):
	return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None

	example_dropdown.change(
	load_example_from_dataset,
	inputs=[example_dropdown, cached_examples, dropdown_choices_state],
	outputs=[
	test_input,
	current_policy_state, # UI components sync automatically via change handler
	model_dropdown,
	reasoning_effort,
	max_tokens,
	temperature,
	top_p,
	system_prompt_textbox,
	response_format_textbox,
	# Results
	model_info_display,
	label_display,
	categories_display,
	model_response_display,
	reasoning_info,
	reasoning_display,
	],
	)


	if __name__ == "__main__":
	demo.launch(ssr_mode=False)