File size: 2,614 Bytes
84486cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from infer import run_search, question_list

import subprocess
import time
import atexit
from urllib.parse import urlparse

# ... (keep all your other imports like transformers, torch, requests, re, gr) ...

# --- NEW: Server Launch Block ---
# Insert this block *before* you load the model
# -----------------------------------------------------------------
print("Attempting to start retrieval server...")

# Start the server as a background process
# subprocess.Popen does not block, unlike os.system
try:
    server_process = subprocess.Popen(["bash", "retrieval_launch.sh"])
    print(f"Server process started with PID: {server_process.pid}")

    # Register a function to kill the server when app.py exits
    def cleanup():
        print("Shutting down retrieval server...")
        server_process.terminate()
        server_process.wait()
        print("Server process terminated.")
    
    atexit.register(cleanup)

except Exception as e:
    print(f"Failed to start retrieval_launch.sh: {e}")
    print("WARNING: The retrieval server may not be running.")

def gradio_answer(question: str) -> str:
    print(f"\nReceived question for Gradio: {question}")
    try:
        # Call the core inference function, passing the pre-loaded assets
        trajectory, answer = run_search(question)
        answer_string = f"Final answer: {answer.strip()}"
        answer_string += f"\n\n====== Trajectory of reasoning steps ======\n{trajectory.strip()}"
        return answer_string
    except Exception as e:
        # Basic error handling for the Gradio interface
        return f"An error occurred: {e}. Please check the console for more details."


iface = gr.Interface(
    fn=gradio_answer,
    inputs=gr.Textbox(
        lines=3,
        label="Enter your question",
        placeholder="e.g., Who invented the telephone?"
    ),
    outputs=gr.Textbox(
        label="Answer",
        show_copy_button=True, # Allow users to easily copy the answer
        elem_id="answer_output" # Optional: for custom CSS/JS targeting
    ),
    title="Demo of AutoRefine: Question Answering with Search and Refine During Thinking",
    description=("Ask a question and this model will use a multi-turn reasoning and search mechanism to find the answer."),
    examples=question_list, # Use the list of example questions
    live=False, # Set to True if you want real-time updates as user types
    allow_flagging="never", # Disable flagging functionality
    theme=gr.themes.Soft(), # Apply a clean theme
    cache_examples=True, # Cache the examples for faster loading
)

iface.launch(share=True)