AutoRefine / demo.py
yrshi's picture
update some space settings
84486cb
import gradio as gr
from infer import run_search, question_list
import subprocess
import time
import atexit
from urllib.parse import urlparse
# ... (keep all your other imports like transformers, torch, requests, re, gr) ...
# --- NEW: Server Launch Block ---
# Insert this block *before* you load the model
# -----------------------------------------------------------------
print("Attempting to start retrieval server...")
# Start the server as a background process
# subprocess.Popen does not block, unlike os.system
try:
server_process = subprocess.Popen(["bash", "retrieval_launch.sh"])
print(f"Server process started with PID: {server_process.pid}")
# Register a function to kill the server when app.py exits
def cleanup():
print("Shutting down retrieval server...")
server_process.terminate()
server_process.wait()
print("Server process terminated.")
atexit.register(cleanup)
except Exception as e:
print(f"Failed to start retrieval_launch.sh: {e}")
print("WARNING: The retrieval server may not be running.")
def gradio_answer(question: str) -> str:
print(f"\nReceived question for Gradio: {question}")
try:
# Call the core inference function, passing the pre-loaded assets
trajectory, answer = run_search(question)
answer_string = f"Final answer: {answer.strip()}"
answer_string += f"\n\n====== Trajectory of reasoning steps ======\n{trajectory.strip()}"
return answer_string
except Exception as e:
# Basic error handling for the Gradio interface
return f"An error occurred: {e}. Please check the console for more details."
iface = gr.Interface(
fn=gradio_answer,
inputs=gr.Textbox(
lines=3,
label="Enter your question",
placeholder="e.g., Who invented the telephone?"
),
outputs=gr.Textbox(
label="Answer",
show_copy_button=True, # Allow users to easily copy the answer
elem_id="answer_output" # Optional: for custom CSS/JS targeting
),
title="Demo of AutoRefine: Question Answering with Search and Refine During Thinking",
description=("Ask a question and this model will use a multi-turn reasoning and search mechanism to find the answer."),
examples=question_list, # Use the list of example questions
live=False, # Set to True if you want real-time updates as user types
allow_flagging="never", # Disable flagging functionality
theme=gr.themes.Soft(), # Apply a clean theme
cache_examples=True, # Cache the examples for faster loading
)
iface.launch(share=True)