import gradio as gr from infer import run_search, question_list import subprocess import time import atexit from urllib.parse import urlparse # ... (keep all your other imports like transformers, torch, requests, re, gr) ... # --- NEW: Server Launch Block --- # Insert this block *before* you load the model # ----------------------------------------------------------------- print("Attempting to start retrieval server...") # Start the server as a background process # subprocess.Popen does not block, unlike os.system try: server_process = subprocess.Popen(["bash", "retrieval_launch.sh"]) print(f"Server process started with PID: {server_process.pid}") # Register a function to kill the server when app.py exits def cleanup(): print("Shutting down retrieval server...") server_process.terminate() server_process.wait() print("Server process terminated.") atexit.register(cleanup) except Exception as e: print(f"Failed to start retrieval_launch.sh: {e}") print("WARNING: The retrieval server may not be running.") def gradio_answer(question: str) -> str: print(f"\nReceived question for Gradio: {question}") try: # Call the core inference function, passing the pre-loaded assets trajectory, answer = run_search(question) answer_string = f"Final answer: {answer.strip()}" answer_string += f"\n\n====== Trajectory of reasoning steps ======\n{trajectory.strip()}" return answer_string except Exception as e: # Basic error handling for the Gradio interface return f"An error occurred: {e}. Please check the console for more details." iface = gr.Interface( fn=gradio_answer, inputs=gr.Textbox( lines=3, label="Enter your question", placeholder="e.g., Who invented the telephone?" ), outputs=gr.Textbox( label="Answer", show_copy_button=True, # Allow users to easily copy the answer elem_id="answer_output" # Optional: for custom CSS/JS targeting ), title="Demo of AutoRefine: Question Answering with Search and Refine During Thinking", description=("Ask a question and this model will use a multi-turn reasoning and search mechanism to find the answer."), examples=question_list, # Use the list of example questions live=False, # Set to True if you want real-time updates as user types allow_flagging="never", # Disable flagging functionality theme=gr.themes.Soft(), # Apply a clean theme cache_examples=True, # Cache the examples for faster loading ) iface.launch(share=True)