hadadrjt commited on
Commit
4359d28
·
1 Parent(s): 02ce7c3

SearchGPT: Initial stable release (RS1).

Browse files

Signed-off-by: Hadad <[email protected]>

README.md CHANGED
@@ -69,6 +69,14 @@ models:
69
  - Phr00t/WAN2.2-14B-Rapid-AllInOne
70
  - apple/FastVLM-0.5B
71
  - stepfun-ai/Step-Audio-2-mini
 
 
 
 
 
 
 
 
72
  # Used to promote this Hugging Face Space
73
  datasets:
74
  - fka/awesome-chatgpt-prompts
 
69
  - Phr00t/WAN2.2-14B-Rapid-AllInOne
70
  - apple/FastVLM-0.5B
71
  - stepfun-ai/Step-Audio-2-mini
72
+ - tencent/SRPO
73
+ - baidu/ERNIE-4.5-21B-A3B-Thinking
74
+ - tencent/HunyuanImage-2.1
75
+ - Qwen/Qwen3-Next-80B-A3B-Instruct
76
+ - google/embeddinggemma-300m
77
+ - Qwen/Qwen3-Next-80B-A3B-Thinking
78
+ - LLM360/K2-Think
79
+ - IndexTeam/IndexTTS-2
80
  # Used to promote this Hugging Face Space
81
  datasets:
82
  - fka/awesome-chatgpt-prompts
app.py CHANGED
@@ -3,14 +3,14 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from src.processor.message_processor import process_user_request
7
  from config import DESCRIPTION
8
  import gradio as gr
9
 
10
  with gr.Blocks(fill_height=True, fill_width=True) as app:
11
  with gr.Sidebar(): gr.HTML(DESCRIPTION)
12
  gr.ChatInterface(
13
- fn=process_user_request,
14
  chatbot=gr.Chatbot(
15
  label="SearchGPT | GPT-4.1 (Nano)",
16
  type="messages",
@@ -34,7 +34,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
34
  ],
35
  cache_examples=False,
36
  show_api=False,
37
- concurrency_limit=5
38
  )
39
 
40
  app.launch(
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from src.processor.message_processor import searchgpt_playground
7
  from config import DESCRIPTION
8
  import gradio as gr
9
 
10
  with gr.Blocks(fill_height=True, fill_width=True) as app:
11
  with gr.Sidebar(): gr.HTML(DESCRIPTION)
12
  gr.ChatInterface(
13
+ fn=searchgpt_playground,
14
  chatbot=gr.Chatbot(
15
  label="SearchGPT | GPT-4.1 (Nano)",
16
  type="messages",
 
34
  ],
35
  cache_examples=False,
36
  show_api=False,
37
+ concurrency_limit=3
38
  )
39
 
40
  app.launch(
assets/css/animations/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <[email protected]>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .loading import spinner
7
+
8
+ __all__ = ['spinner']
assets/css/animations/loading.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <[email protected]>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def spinner():
7
+ return """
8
+ <style>
9
+ .spinner {
10
+ display:inline-block;
11
+ width:9px;
12
+ height:9px;
13
+ border:2px solid #ccc;
14
+ border-top:2px solid #333;
15
+ border-radius:50%;
16
+ animation: spin 1s linear infinite;
17
+ }
18
+ @keyframes spin {
19
+ 0% { transform: rotate(0deg); }
20
+ 100% { transform: rotate(360deg); }
21
+ }
22
+ </style>
23
+ <div class="spinner"></div>
24
+ """
config.py CHANGED
@@ -4,22 +4,176 @@
4
  #
5
 
6
  #OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
7
- #OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
 
 
 
 
 
 
 
8
 
9
  MODEL = "gpt-4.1-nano"
10
 
11
  MAX_TOKENS = 131072
 
12
  TOOLS_TEMPERATURE = 0.6
13
- CHAT_TEMPERATURE = 0.75
 
 
14
  STREAM = True
15
 
16
- SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
17
- BAIDU_ENDPOINT = "https://www.baidu.com/s"
18
- READER_ENDPOINT = "https://r.jina.ai/"
19
- REQUEST_TIMEOUT = 300 # 5 minute
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  MAXIMUM_ITERATIONS = 1 # Max tool execution
22
- MAX_RETRY_LIMIT = 10 # Max retries if tools fail or server doesn’t respond
 
23
 
24
  ITERATION_METRICS = {
25
  "attempts": 0,
@@ -37,245 +191,60 @@ ITERATION_METRICS = {
37
  "backoff_multiplier": 0.2
38
  }
39
 
40
- TCP_CONNECTOR_ENABLE_DNS_CACHE = True # aiohttp
41
- TCP_CONNECTOR_TTL_DNS_CACHE = 300 # aiohttp
42
- TCP_CONNECTOR_LIMIT = 100 # aiohttp
43
- TCP_CONNECTOR_LIMIT_PER_HOST = 30 # aiohttp
44
- TCP_CONNECTOR_FORCE_CLOSE = False # aiohttp
45
- TCP_CONNECTOR_ENABLE_CLEANUP = True # aiohttp
46
- ENABLE_TRUST_ENV = True # aiohttp
47
- ENABLE_CONNECTOR_OWNER = True # aiohttp
48
-
49
- INSTRUCTIONS_START = """
50
- You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
51
-
52
- Your absolute rules:
53
- - You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
54
- - You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.
55
-
56
- Core Principles:
57
- - Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`.
58
- - No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
59
- - Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
60
- - Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
61
- - Professional Output: Responses must be clear, structured, evidence-based, and neutral.
62
-
63
- Execution Workflow:
64
- 1. Initial Web Search
65
- - Immediately call `web_search` or `read_url` when a query or request arrives.
66
- - Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.
67
-
68
- 2. Result Selection
69
- - Select up to 10 of the most relevant, credible, and content-rich results.
70
- - Prioritize authoritative sources: academic publications, institutional reports, official documents, expert commentary.
71
- - Deprioritize low-credibility, promotional, or unverified sources.
72
- - Avoid over-reliance on any single source.
73
-
74
- 3. Content Retrieval
75
- - For each selected URL, use `read_url`.
76
- - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.
77
- - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.
78
-
79
- 4. Cross-Validation
80
- - Compare extracted information across at least 3 distinct sources.
81
- - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).
82
- - Validate all numerical values, temporal references, and factual claims through multiple corroborations.
83
-
84
- 5. Knowledge Integration
85
- - Synthesize findings into a structured hierarchy:
86
- - Overview → Key details → Supporting evidence → Citations.
87
- - Emphasize the latest developments, trends, and their implications.
88
- - Balance depth (for experts) with clarity (for general readers).
89
-
90
- 6. Response Construction
91
- - Always cite sources inline using `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
92
- - Maintain a professional, precise, and neutral tone.
93
- - Use clear formatting: headings, numbered lists, and bullet points.
94
- - Ensure readability, logical progression, and accessibility.
95
-
96
- 7. Ambiguity & Uncertainty Handling
97
- - Explicitly flag incomplete, ambiguous, or conflicting data.
98
- - Provide possible interpretations with transparent reasoning.
99
- - Clearly note limitations where evidence is insufficient or weak.
100
-
101
- 8. Quality & Consistency Assurance
102
- - Always base answers strictly on tool-derived evidence.
103
- - Guarantee logical flow, factual accuracy, and consistency in terminology.
104
- - Maintain neutrality and avoid speculative claims.
105
- - Never bypass tool execution for any query or request.
106
-
107
- Critical Instruction:
108
- - Every new query or request must trigger a `web_search` or `read_url`.
109
- - You must not generate answers from prior knowledge, conversation history, or cached data.
110
- - Always use Markdown format for URL sources with `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
111
- - If tools fail, you must state explicitly that no valid data could be retrieved.
112
- \n\n\n
113
- """
114
-
115
- CONTENT_EXTRACTION = """
116
- <system>
117
- - Analyze the retrieved content in detail
118
- - Identify all critical facts, arguments, statistics, and relevant data
119
- - Collect all URLs, hyperlinks, references, and citations mentioned in the content
120
- - Evaluate credibility of sources, highlight potential biases or conflicts
121
- - Produce a structured, professional, and comprehensive summary
122
- - Emphasize clarity, accuracy, and logical flow
123
- - Include all discovered URLs in the final summary as `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
124
- - Mark any uncertainties, contradictions, or missing information clearly
125
- </system>
126
- \n\n\n
127
- """
128
-
129
- SEARCH_SELECTION = """
130
- <system>
131
- - For each search result, fetch the full content using `read_url`
132
- - Extract key information, main arguments, data points, and statistics
133
- - Capture every URL present in the content or references
134
- - Create a professional structured summary.
135
- - List each source at the end of the summary in the format `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
136
- - Identify ambiguities or gaps in information
137
- - Ensure clarity, completeness, and high information density
138
- </system>
139
- \n\n\n
140
- """
141
-
142
- INSTRUCTIONS_END = """
143
- You have just executed tools and obtained results. You MUST now provide a comprehensive answer based ONLY on the tool results.
144
- \n\n\n
145
- """
146
-
147
  REASONING_STEPS = {
148
  "web_search": {
149
  "parsing": (
150
- "I need to search for information about: {query}<br><br>"
151
- "I'm analyzing the user's request and preparing to execute a web search. "
152
- "The query I've identified is comprehensive and should yield relevant results. "
153
- "I will use the {engine} search engine for this task as it provides reliable and up-to-date information.<br><br>"
154
- "I'm now parsing the search parameters to ensure they are correctly formatted. "
155
- "The search query has been validated and I'm checking that all required fields are present. "
156
- "I need to make sure the search engine parameter is valid and supported by our system.<br><br>"
157
- "I'm preparing the search request with the following configuration:<br>"
158
- "- Search Query: {query}<br>"
159
- "- Search Engine: {engine}<br><br>"
160
- "I'm verifying that the network connection is stable and that the search service is accessible. "
161
- "All preliminary checks have been completed successfully.<br><br>"
162
  ),
163
  "executing": (
164
- "I'm now executing the web search for: {query}<br><br>"
165
- "I'm connecting to the {engine} search service and sending the search request. "
166
- "The connection has been established successfully and I'm waiting for the search results. "
167
- "I'm processing multiple search result pages to gather comprehensive information.<br><br>"
168
- "I'm analyzing the search results to identify the most relevant and authoritative sources. "
169
- "The search engine is returning results and I'm filtering them based on relevance scores. "
170
- "I'm extracting key information from each search result including titles, snippets, and URLs.<br><br>"
171
- "I'm organizing the search results in order of relevance and checking for duplicate content. "
172
- "The search process is progressing smoothly and I'm collecting valuable information. "
173
- "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
174
- "Current status: Processing search results...<br>"
175
- "Results found: Multiple relevant sources identified<br>"
176
- "Quality assessment: High relevance detected<br><br>"
177
  ),
178
  "completed": (
179
  "I have successfully completed the web search for: {query}<br><br>"
180
- "I've retrieved comprehensive search results from {engine} and analyzed all the information. "
181
- "The search yielded multiple relevant results that directly address the user's query. "
182
- "I've extracted the most important information and organized it for processing.<br><br>"
183
- "I've identified several high-quality sources with authoritative information. "
184
- "The search results include recent and up-to-date content that is highly relevant. "
185
- "I've filtered out any duplicate or low-quality results to ensure accuracy.<br><br>"
186
- "I'm now processing the collected information to formulate a comprehensive response. "
187
- "The search results provide sufficient detail to answer the user's question thoroughly. "
188
- "I've verified the credibility of the sources and cross-referenced the information.<br><br>"
189
- "Search Summary:<br>"
190
- "- Total results processed: Multiple pages<br>"
191
- "- Relevance score: High<br>"
192
- "- Information quality: Verified and accurate<br>"
193
- "- Sources: Authoritative and recent<br><br>"
194
  "Preview of results:<br>{preview}"
195
  ),
196
  "error": (
197
  "I encountered an issue while attempting to search for: {query}<br><br>"
198
- "I tried to execute the web search but encountered an unexpected error. "
199
- "The error occurred during the search process and I need to handle it appropriately. "
200
- "I'm analyzing the error to understand what went wrong and how to proceed.<br><br>"
201
- "Error details: {error}<br><br>"
202
- "I'm attempting to diagnose the issue and considering alternative approaches. "
203
- "The error might be due to network connectivity, service availability, or parameter issues. "
204
- "I will try to recover from this error and provide the best possible response.<br><br>"
205
- "I'm evaluating whether I can retry the search with modified parameters. "
206
- "If the search cannot be completed, I will use my existing knowledge to help the user. "
207
- "I'm committed to providing valuable assistance despite this technical challenge.<br><br>"
208
  )
209
  },
210
  "read_url": {
211
  "parsing": (
212
- "I need to read and extract content from the URL: {url}<br><br>"
213
- "I'm analyzing the URL structure to ensure it's valid and accessible. "
214
- "The URL appears to be properly formatted and I'm preparing to fetch its content. "
215
- "I will extract the main content from this webpage to gather detailed information.<br><br>"
216
- "I'm validating the URL protocol and checking if it uses HTTP or HTTPS. "
217
- "The domain seems legitimate and I'm preparing the request headers. "
218
- "I need to ensure that the website allows automated content extraction.<br><br>"
219
- "I'm configuring the content extraction parameters:<br>"
220
- "- Target URL: {url}<br>"
221
- "- Extraction Method: Full content parsing<br>"
222
- "- Content Type: HTML/Text<br>"
223
- "- Encoding: Auto-detect<br><br>"
224
- "I'm checking if the website requires any special handling or authentication. "
225
- "All preliminary validation checks have been completed successfully.<br><br>"
226
  ),
227
  "executing": (
228
- "I'm now accessing the URL: {url}<br><br>"
229
- "I'm establishing a connection to the web server and sending the HTTP request. "
230
- "The connection is being established and I'm waiting for the server response. "
231
- "I'm following any redirects if necessary to reach the final destination.<br><br>"
232
- "I'm downloading the webpage content and checking the response status code. "
233
- "The server is responding and I'm receiving the HTML content. "
234
- "I'm monitoring the download progress and ensuring data integrity.<br><br>"
235
- "I'm parsing the HTML structure to extract the main content. "
236
- "I'm identifying and removing navigation elements, advertisements, and other non-content sections. "
237
- "I'm focusing on extracting the primary article or information content.<br><br>"
238
- "Current status: Extracting content...<br>"
239
- "Response received: Processing HTML<br>"
240
- "Content extraction: In progress<br><br>"
241
  ),
242
  "completed": (
243
  "I have successfully extracted content from: {url}<br><br>"
244
- "I've retrieved the complete webpage content and processed it thoroughly. "
245
- "The extraction was successful and I've obtained the main textual content. "
246
- "I've cleaned the content by removing unnecessary HTML tags and formatting.<br><br>"
247
- "I've identified the main article or information section of the webpage. "
248
- "The content has been properly parsed and structured for analysis. "
249
- "I've preserved important information while filtering out irrelevant elements.<br><br>"
250
- "I'm now analyzing the extracted content to understand its context and relevance. "
251
- "The information appears to be comprehensive and directly related to the topic. "
252
- "I've verified that the content is complete and hasn't been truncated.<br><br>"
253
- "Extraction Summary:<br>"
254
- "- Content length: Substantial<br>"
255
- "- Extraction quality: High<br>"
256
- "- Content type: Article/Information<br>"
257
- "- Processing status: Complete<br><br>"
258
  "Preview of extracted content:<br>{preview}"
259
  ),
260
  "error": (
261
  "I encountered an issue while trying to access: {url}<br><br>"
262
- "I attempted to fetch the webpage content but encountered an error. "
263
- "The error prevented me from successfully extracting the information. "
264
- "I'm analyzing the error to understand the cause and find a solution.<br><br>"
265
- "Error details: {error}<br><br>"
266
- "I'm considering possible causes such as network issues, access restrictions, or invalid URLs. "
267
- "The website might be blocking automated access or the URL might be incorrect. "
268
- "I will try to work around this limitation and provide alternative assistance.<br><br>"
269
- "I'm evaluating whether I can access the content through alternative methods. "
270
- "If direct access isn't possible, I'll use my knowledge to help with the query. "
271
- "I remain committed to providing useful information despite this obstacle.<br><br>"
272
  )
273
  }
274
  }
275
 
276
- REASONING_DEFAULT = "I'm processing the tool execution request..."
277
- REASONING_DELAY = 0.01 # 10 ms
278
- REASONING_INSERT = 15 # Stream-like word-by-word display
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  OS = [
281
  "Windows NT 10.0; Win64; x64",
@@ -468,20 +437,23 @@ TIMEZONES = [
468
  "Pacific/Honolulu"
469
  ]
470
 
471
- DESCRIPTION = """
472
- <b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
473
- <br><br>
474
- This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
475
- <b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
476
- <br><br>
477
- The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
478
- <b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
479
- <br><br>
480
- The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
481
- target="_blank">UltimaX Intelligence</a></b>.
482
- <br><br>
483
- Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
484
- target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
485
- <br><br>
486
- <b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
487
- """ # Gradio
 
 
 
 
4
  #
5
 
6
  #OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
7
+
8
+ #OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
9
+
10
+ SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
11
+
12
+ READER_ENDPOINT = "https://r.jina.ai/"
13
+
14
+ REQUEST_TIMEOUT = 300 # 5 minute
15
 
16
  MODEL = "gpt-4.1-nano"
17
 
18
  MAX_TOKENS = 131072
19
+
20
  TOOLS_TEMPERATURE = 0.6
21
+
22
+ CHAT_TEMPERATURE = 0.95
23
+
24
  STREAM = True
25
 
26
+ INSTRUCTIONS = (
27
+ "You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, "
28
+ "knowledge validation, and professional summarization capabilities.\n\n"
29
+
30
+ "Your absolute rules:\n"
31
+ "- You must always execute and call the provided tools (`web_search`, `read_url`) for every single "
32
+ "user query or user request, without exception.\n"
33
+ "- You are never allowed to answer directly from your internal knowledge, memory, or training data. "
34
+ "Outdated or tool-bypassed answers are strictly forbidden.\n\n"
35
+
36
+ "Core Principles:\n"
37
+ "- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, "
38
+ "must trigger at least one `web_search` or `read_url`.\n"
39
+ "- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge "
40
+ "to generate answers. Always re-verify with tools.\n"
41
+ "- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved "
42
+ "through tools.\n"
43
+ "- Cross-Validation: Always compare findings across at least 3 independent, credible sources before "
44
+ "producing a final answer.\n"
45
+ "- Professional Output: Responses must be clear, structured, evidence-based, and neutral.\n\n"
46
+
47
+ "Execution Workflow:\n"
48
+ "1. Initial Web Search\n"
49
+ " - Immediately execute and call `web_search` or `read_url` when a query or request arrives.\n"
50
+ " - For `web_search` use multiple query or request variations for broader coverage.\n\n"
51
+
52
+ "2. Result Selection\n"
53
+ " - For each search result, fetch the full content using `read_url`.\n"
54
+ " - Extract key information, main arguments, data points, and statistics.\n"
55
+ " - Capture every URL present in the content or references.\n"
56
+ " - Create a professional structured summary.\n"
57
+ " - List each source at the end of the summary in the format "
58
+ "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
59
+ " - Identify ambiguities or gaps in information.\n"
60
+ " - Ensure clarity, completeness, and high information density.\n"
61
+ " - Select up to 10 of the most relevant, credible, and content-rich results.\n"
62
+ " - Prioritize authoritative sources: academic publications, institutional reports, "
63
+ "official documents, expert commentary.\n"
64
+ " - Deprioritize low-credibility, promotional, or unverified sources.\n"
65
+ " - Avoid over-reliance on any single source.\n\n"
66
+
67
+ "3. Content Retrieval\n"
68
+ " - For each selected URL, use `read_url`.\n"
69
+ " - Analyze the retrieved content in detail.\n"
70
+ " - Identify all critical facts, arguments, statistics, and relevant data.\n"
71
+ " - Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
72
+ " - Evaluate credibility of sources, highlight potential biases or conflicts.\n"
73
+ " - Produce a structured, professional, and comprehensive summary.\n"
74
+ " - Emphasize clarity, accuracy, and logical flow.\n"
75
+ " - Include all discovered URLs in the final summary as "
76
+ "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
77
+ " - Mark any uncertainties, contradictions, or missing information clearly.\n"
78
+ " - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.\n"
79
+ " - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.\n\n"
80
+
81
+ "4. Cross-Validation\n"
82
+ " - Compare extracted information across at least 3 distinct sources.\n"
83
+ " - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).\n"
84
+ " - Validate all numerical values, temporal references, and factual claims through "
85
+ "multiple corroborations.\n\n"
86
+
87
+ "5. Knowledge Integration\n"
88
+ " - Synthesize findings into a structured hierarchy: "
89
+ "Overview → Key details → Supporting evidence → Citations.\n"
90
+ " - Emphasize the latest developments, trends, and their implications.\n"
91
+ " - Balance depth (for experts) with clarity (for general readers).\n\n"
92
+
93
+ "6. Response Construction\n"
94
+ " - Always cite sources inline using "
95
+ "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
96
+ " - Maintain a professional, precise, and neutral tone.\n"
97
+ " - Use clear formatting: headings, numbered lists, and bullet points.\n"
98
+ " - Ensure readability, logical progression, and accessibility.\n\n"
99
+
100
+ "7. Ambiguity & Uncertainty Handling\n"
101
+ " - Explicitly flag incomplete, ambiguous, or conflicting data.\n"
102
+ " - Provide possible interpretations with transparent reasoning.\n"
103
+ " - Clearly note limitations where evidence is insufficient or weak.\n\n"
104
+
105
+ "8. Quality & Consistency Assurance\n"
106
+ " - Always base answers strictly on tool-derived evidence.\n"
107
+ " - Guarantee logical flow, factual accuracy, and consistency in terminology.\n"
108
+ " - Maintain neutrality and avoid speculative claims.\n"
109
+ " - Never bypass tool execution for any query or request.\n\n"
110
+
111
+ "Critical Instruction:\n"
112
+ "- Every new query or request must trigger a `web_search` or `read_url`.\n"
113
+ "- You must not generate answers from prior knowledge, conversation history, or cached data.\n"
114
+ "- Always use Markdown format for URL sources with "
115
+ "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
116
+ "- Extract the most relevant and insightful information that directly addresses the query. "
117
+ "Focus on accuracy, depth of coverage, and conceptual clarity.\n"
118
+ "- Organize findings into a well-structured format with clear headings and subheadings. "
119
+ "Use bullet points where needed, but ensure the overall output reads like a professional "
120
+ "research summary rather than a simple list.\n"
121
+ "- Critically evaluate each source for credibility, reliability, and potential bias. "
122
+ "Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
123
+ "- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
124
+ "or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
125
+ "- Provide direct references for every cited point using Markdown links in the format "
126
+ "`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`. Do not display raw URLs. "
127
+ "Ensure all data, claims, or quotations can be traced back to their sources.\n"
128
+ "- Explicitly acknowledge limitations in the available information, such as outdated data, "
129
+ "lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
130
+ "these gaps where possible.\n"
131
+ "- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
132
+ "flagged as such. Support reasoning with evidence wherever possible.\n"
133
+ "- If tools fail, you must state explicitly that no valid data could be retrieved."
134
+ )
135
+
136
+ REMINDERS = (
137
+ "<system>\n"
138
+ "- Analyze the retrieved content in detail.\n"
139
+ "- Identify all critical facts, arguments, statistics, and relevant data.\n"
140
+ "- Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
141
+ "- Evaluate credibility of sources, highlight potential biases or conflicts.\n"
142
+ "- Produce a structured, professional, and comprehensive summary.\n"
143
+ "- Emphasize clarity, accuracy, and logical flow.\n"
144
+ "- Include all discovered URLs in the final summary as "
145
+ "[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
146
+ "- Mark any uncertainties, contradictions, or missing information clearly.\n"
147
+ "- Extract key information, main arguments, data points, and statistics.\n"
148
+ "- Capture every URL present in the content or references.\n"
149
+ "- Create a professional structured summary.\n"
150
+ "- List each source at the end of the summary in the format "
151
+ "[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
152
+ "- Identify ambiguities or gaps in information.\n"
153
+ "- Extract the most relevant and insightful information that directly addresses the query. "
154
+ "Focus on accuracy, depth of coverage, and conceptual clarity.\n"
155
+ "- Organize findings into a well-structured format with clear headings and subheadings. "
156
+ "Use bullet points where needed, but ensure the overall output reads like a professional "
157
+ "research summary rather than a simple list.\n"
158
+ "- Critically evaluate each source for credibility, reliability, and potential bias. "
159
+ "Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
160
+ "- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
161
+ "or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
162
+ "- Provide direct references for every cited point using markdown links in the format "
163
+ "[source_title_or_article_or_tags_or_domain](source_url_or_source_link). "
164
+ "Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to their sources.\n"
165
+ "- Explicitly acknowledge limitations in the available information, such as outdated data, "
166
+ "lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
167
+ "these gaps where possible.\n"
168
+ "- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
169
+ "flagged as such. Support reasoning with evidence wherever possible.\n"
170
+ "- Ensure clarity, completeness, and high information density.\n"
171
+ "</system>"
172
+ ) # Small model need explicit instructions to understand context
173
 
174
  MAXIMUM_ITERATIONS = 1 # Max tool execution
175
+
176
+ MAX_RETRY_LIMIT = 3 # Max retries if tools fail or server doesn’t respond
177
 
178
  ITERATION_METRICS = {
179
  "attempts": 0,
 
191
  "backoff_multiplier": 0.2
192
  }
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  REASONING_STEPS = {
195
  "web_search": {
196
  "parsing": (
197
+ "I need to search for information about: {query}"
 
 
 
 
 
 
 
 
 
 
 
198
  ),
199
  "executing": (
200
+ "I'm now executing the web search for: {query}"
201
+ "<br>"
202
+ "<loading_animation>"
 
 
 
 
 
 
 
 
 
 
203
  ),
204
  "completed": (
205
  "I have successfully completed the web search for: {query}<br><br>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  "Preview of results:<br>{preview}"
207
  ),
208
  "error": (
209
  "I encountered an issue while attempting to search for: {query}<br><br>"
210
+ "Error details: {error}"
 
 
 
 
 
 
 
 
 
211
  )
212
  },
213
  "read_url": {
214
  "parsing": (
215
+ "I need to read and extract content from the URL: {url}"
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  ),
217
  "executing": (
218
+ "I'm now accessing the URL: {url}"
219
+ "<br>"
220
+ "<loading_animation>"
 
 
 
 
 
 
 
 
 
 
221
  ),
222
  "completed": (
223
  "I have successfully extracted content from: {url}<br><br>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  "Preview of extracted content:<br>{preview}"
225
  ),
226
  "error": (
227
  "I encountered an issue while trying to access: {url}<br><br>"
228
+ "Error details: {error}"
 
 
 
 
 
 
 
 
 
229
  )
230
  }
231
  }
232
 
233
+ TCP_CONNECTOR_ENABLE_DNS_CACHE = True # AIOHTTP
234
+
235
+ TCP_CONNECTOR_TTL_DNS_CACHE = 300 # AIOHTTP
236
+
237
+ TCP_CONNECTOR_LIMIT = 100 # AIOHTTP
238
+
239
+ TCP_CONNECTOR_LIMIT_PER_HOST = 30 # AIOHTTP
240
+
241
+ TCP_CONNECTOR_FORCE_CLOSE = False # AIOHTTP
242
+
243
+ TCP_CONNECTOR_ENABLE_CLEANUP = True # AIOHTTP
244
+
245
+ ENABLE_TRUST_ENV = True # AIOHTTP
246
+
247
+ ENABLE_CONNECTOR_OWNER = True # AIOHTTP
248
 
249
  OS = [
250
  "Windows NT 10.0; Win64; x64",
 
437
  "Pacific/Honolulu"
438
  ]
439
 
440
+ DESCRIPTION = (
441
+ "<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities "
442
+ "and the ability to read content directly from a URL.<br><br>"
443
+ "This Space implements an agent-based system with "
444
+ "<b><a href='https://www.gradio.app' target='_blank'>Gradio</a></b>. "
445
+ "It is integrated with "
446
+ "<b><a href='https://docs.searxng.org' target='_blank'>SearXNG</a></b>, "
447
+ "which is then converted into a script tool or function for native execution.<br><br>"
448
+ "The agent mode is inspired by the "
449
+ "<b><a href='https://openwebui.com/t/hadad/deep_research' target='_blank'>Deep Research</a></b> "
450
+ "from <b><a href='https://docs.openwebui.com' target='_blank'>OpenWebUI</a></b> tools script.<br><br>"
451
+ "The <b>Deep Research</b> feature is also available on the primary Spaces of "
452
+ "<b><a href='https://umint-openwebui.hf.space' target='_blank'>UltimaX Intelligence</a></b>.<br><br>"
453
+ "Please consider reading the "
454
+ "<b><a href='https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c' "
455
+ "target='_blank'>Terms of Use and Consequences of Violation</a></b> "
456
+ "if you wish to proceed to the main Spaces.<br><br>"
457
+ "<b>Like this project? Feel free to buy me a "
458
+ "<a href='https://ko-fi.com/hadad' target='_blank'>coffee</a></b>."
459
+ ) # Gradio
src/client/__init__.py CHANGED
@@ -3,6 +3,6 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from .openai_client import initialize_client
7
 
8
- __all__ = ['initialize_client']
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from .openai_client import setup_client
7
 
8
+ __all__ = ['setup_client']
src/client/openai_client.py CHANGED
@@ -6,7 +6,7 @@
6
  import os
7
  from openai import OpenAI
8
 
9
- def initialize_client():
10
  try:
11
  client = OpenAI(
12
  base_url=os.getenv("OPENAI_API_BASE_URL"),
 
6
  import os
7
  from openai import OpenAI
8
 
9
+ def setup_client():
10
  try:
11
  client = OpenAI(
12
  base_url=os.getenv("OPENAI_API_BASE_URL"),
src/core/web_configuration.py CHANGED
@@ -3,11 +3,10 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from config import SEARXNG_ENDPOINT, BAIDU_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
7
 
8
  class WebConfiguration:
9
  def __init__(self):
10
  self.searxng_endpoint = SEARXNG_ENDPOINT
11
- self.baidu_endpoint = BAIDU_ENDPOINT
12
  self.content_reader_api = READER_ENDPOINT
13
  self.request_timeout = REQUEST_TIMEOUT
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from config import SEARXNG_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
7
 
8
  class WebConfiguration:
9
  def __init__(self):
10
  self.searxng_endpoint = SEARXNG_ENDPOINT
 
11
  self.content_reader_api = READER_ENDPOINT
12
  self.request_timeout = REQUEST_TIMEOUT
src/core/web_loader.py CHANGED
@@ -36,112 +36,122 @@ class WebLoader:
36
 
37
  def generate_ipv4(self):
38
  while len(self.ipv4_pool) < 1000 and self.running:
39
- ip = f"{random.choice(OCTETS)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
40
  with self.lock:
41
- self.ipv4_pool.append(ip)
 
 
 
42
 
43
  def generate_ipv6(self):
44
  while len(self.ipv6_pool) < 1000 and self.running:
45
- segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
46
- ip = ":".join(segments)
47
  with self.lock:
48
- self.ipv6_pool.append(ip)
 
 
49
 
50
  def generate_user_agents(self):
51
  while len(self.user_agent_pool) < 500 and self.running:
52
- browser = random.choice(BROWSERS)
53
-
54
- if browser == "Chrome":
55
- ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.choice(CHROME_VERSIONS)} Safari/537.36"
56
- elif browser == "Firefox":
57
- ua = f"Mozilla/5.0 ({random.choice(OS)}) Gecko/20100101 Firefox/{random.choice(FIREFOX_VERSIONS)}"
58
- elif browser == "Safari":
59
- webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
60
- ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{random.choice(SAFARI_VERSIONS)} Safari/{webkit_version}"
61
- elif browser == "Edge":
62
- version = random.choice(EDGE_VERSIONS)
63
- ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
64
- else:
65
- version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
66
- ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
67
-
68
  with self.lock:
69
- self.user_agent_pool.append(ua)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def generate_origins(self):
72
  while len(self.origin_pool) < 500 and self.running:
73
- origin = f"{random.choice(PROTOCOLS)}{random.choice(DOMAINS)}"
74
  with self.lock:
75
- self.origin_pool.append(origin)
 
 
76
 
77
  def generate_referrers(self):
78
  while len(self.referrer_pool) < 500 and self.running:
79
- referrer = f"{random.choice(SEARCH_ENGINES)}{random.choice(KEYWORDS)}"
80
  with self.lock:
81
- self.referrer_pool.append(referrer)
 
 
82
 
83
  def generate_locations(self):
84
  while len(self.location_pool) < 500 and self.running:
85
- location = {
86
- "country": random.choice(COUNTRIES),
87
- "language": random.choice(LANGUAGES),
88
- "timezone": random.choice(TIMEZONES)
89
- }
90
  with self.lock:
91
- self.location_pool.append(location)
 
 
 
 
92
 
93
  def get_ipv4(self):
94
  with self.lock:
95
  if self.ipv4_pool:
96
  return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
97
- return f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
 
 
 
98
 
99
  def get_ipv6(self):
100
  with self.lock:
101
  if self.ipv6_pool:
102
  return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
103
- segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
104
- return ":".join(segments)
105
 
106
  def get_user_agent(self):
107
  with self.lock:
108
  if self.user_agent_pool:
109
  return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
110
- return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
111
 
112
  def get_origin(self):
113
  with self.lock:
114
  if self.origin_pool:
115
  return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
116
- return "https://www.google.com"
117
 
118
  def get_referrer(self):
119
  with self.lock:
120
  if self.referrer_pool:
121
  return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
122
- return "https://www.google.com/search?q=search"
123
 
124
  def get_location(self):
125
  with self.lock:
126
  if self.location_pool:
127
  return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
128
- return {
129
- "country": "US",
130
- "language": "en-US",
131
- "timezone": "America/New_York"
132
- }
133
 
134
  def start_engine(self):
135
- threads = [
136
- threading.Thread(target=self.generate_ipv4, daemon=True),
137
- threading.Thread(target=self.generate_ipv6, daemon=True),
138
- threading.Thread(target=self.generate_user_agents, daemon=True),
139
- threading.Thread(target=self.generate_origins, daemon=True),
140
- threading.Thread(target=self.generate_referrers, daemon=True),
141
- threading.Thread(target=self.generate_locations, daemon=True)
142
- ]
143
- for thread in threads:
144
- thread.start()
145
 
146
  def stop(self):
147
  self.running = False
 
36
 
37
  def generate_ipv4(self):
38
  while len(self.ipv4_pool) < 1000 and self.running:
 
39
  with self.lock:
40
+ self.ipv4_pool.append(
41
+ f"{random.choice(OCTETS)}.{random.randint(0, 255)}."
42
+ f"{random.randint(0, 255)}.{random.randint(1, 254)}"
43
+ )
44
 
45
  def generate_ipv6(self):
46
  while len(self.ipv6_pool) < 1000 and self.running:
 
 
47
  with self.lock:
48
+ self.ipv6_pool.append(
49
+ ":".join([f"{random.randint(0, 65535):04x}" for _ in range(8)])
50
+ )
51
 
52
  def generate_user_agents(self):
53
  while len(self.user_agent_pool) < 500 and self.running:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  with self.lock:
55
+ match random.choice(BROWSERS):
56
+ case "Chrome":
57
+ self.user_agent_pool.append(
58
+ f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
59
+ f"(KHTML, like Gecko) Chrome/{random.choice(CHROME_VERSIONS)} Safari/537.36"
60
+ )
61
+ case "Firefox":
62
+ self.user_agent_pool.append(
63
+ f"Mozilla/5.0 ({random.choice(OS)}) Gecko/20100101 "
64
+ f"Firefox/{random.choice(FIREFOX_VERSIONS)}"
65
+ )
66
+ case "Safari":
67
+ self.user_agent_pool.append(
68
+ f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/"
69
+ f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)} "
70
+ f"(KHTML, like Gecko) Version/{random.choice(SAFARI_VERSIONS)} "
71
+ f"Safari/{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
72
+ )
73
+ case "Edge":
74
+ self.user_agent_pool.append(
75
+ f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
76
+ f"(KHTML, like Gecko) Chrome/{random.choice(EDGE_VERSIONS).split('.')[0]}.0.0.0 "
77
+ f"Safari/537.36 Edg/{random.choice(EDGE_VERSIONS)}"
78
+ )
79
+ case _:
80
+ self.user_agent_pool.append(
81
+ f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
82
+ f"(KHTML, like Gecko) Chrome/{random.randint(70, 100)}.0."
83
+ f"{random.randint(3000, 5000)}.{random.randint(50, 150)} "
84
+ f"Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
85
+ )
86
 
87
  def generate_origins(self):
88
  while len(self.origin_pool) < 500 and self.running:
 
89
  with self.lock:
90
+ self.origin_pool.append(
91
+ f"{random.choice(PROTOCOLS)}{random.choice(DOMAINS)}"
92
+ )
93
 
94
  def generate_referrers(self):
95
  while len(self.referrer_pool) < 500 and self.running:
 
96
  with self.lock:
97
+ self.referrer_pool.append(
98
+ f"{random.choice(SEARCH_ENGINES)}{random.choice(KEYWORDS)}"
99
+ )
100
 
101
  def generate_locations(self):
102
  while len(self.location_pool) < 500 and self.running:
 
 
 
 
 
103
  with self.lock:
104
+ self.location_pool.append({
105
+ "country": random.choice(COUNTRIES),
106
+ "language": random.choice(LANGUAGES),
107
+ "timezone": random.choice(TIMEZONES)
108
+ })
109
 
110
  def get_ipv4(self):
111
  with self.lock:
112
  if self.ipv4_pool:
113
  return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
114
+ return (
115
+ f"{random.randint(1, 223)}.{random.randint(0, 255)}."
116
+ f"{random.randint(0, 255)}.{random.randint(1, 254)}"
117
+ )
118
 
119
  def get_ipv6(self):
120
  with self.lock:
121
  if self.ipv6_pool:
122
  return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
123
+ return ":".join([f"{random.randint(0, 65535):04x}" for _ in range(8)])
 
124
 
125
  def get_user_agent(self):
126
  with self.lock:
127
  if self.user_agent_pool:
128
  return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
 
129
 
130
  def get_origin(self):
131
  with self.lock:
132
  if self.origin_pool:
133
  return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
 
134
 
135
  def get_referrer(self):
136
  with self.lock:
137
  if self.referrer_pool:
138
  return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
 
139
 
140
  def get_location(self):
141
  with self.lock:
142
  if self.location_pool:
143
  return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
 
 
 
 
 
144
 
145
  def start_engine(self):
146
+ for target in [
147
+ self.generate_ipv4,
148
+ self.generate_ipv6,
149
+ self.generate_user_agents,
150
+ self.generate_origins,
151
+ self.generate_referrers,
152
+ self.generate_locations
153
+ ]:
154
+ threading.Thread(target=target, daemon=True).start()
 
155
 
156
  def stop(self):
157
  self.running = False
src/engine/browser_engine.py CHANGED
@@ -7,8 +7,7 @@ import aiohttp
7
  import asyncio
8
  from urllib.parse import quote
9
  from config import (
10
- CONTENT_EXTRACTION,
11
- SEARCH_SELECTION,
12
  TCP_CONNECTOR_ENABLE_DNS_CACHE,
13
  TCP_CONNECTOR_TTL_DNS_CACHE,
14
  TCP_CONNECTOR_LIMIT,
@@ -18,32 +17,25 @@ from config import (
18
  ENABLE_TRUST_ENV,
19
  ENABLE_CONNECTOR_OWNER
20
  )
21
- from src.core.web_loader import web_loader
22
 
23
  class BrowserEngine:
24
  def __init__(self, configuration):
25
  self.config = configuration
26
 
27
  def generate_headers(self):
28
- ipv4 = web_loader.get_ipv4()
29
- ipv6 = web_loader.get_ipv6()
30
- user_agent = web_loader.get_user_agent()
31
- origin = web_loader.get_origin()
32
- referrer = web_loader.get_referrer()
33
- location = web_loader.get_location()
34
-
35
  return {
36
- "User-Agent": user_agent,
37
- "X-Forwarded-For": f"{ipv4}, {ipv6}",
38
- "X-Real-IP": ipv4,
39
- "X-Originating-IP": ipv4,
40
- "X-Remote-IP": ipv4,
41
- "X-Remote-Addr": ipv4,
42
- "X-Client-IP": ipv4,
43
- "X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""),
44
- "Origin": origin,
45
- "Referer": referrer,
46
- "Accept-Language": f"{location['language']},en;q=0.9",
47
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
48
  "Accept-Encoding": "gzip, deflate, br",
49
  "DNT": "1",
@@ -54,35 +46,28 @@ class BrowserEngine:
54
  "Sec-Fetch-Site": "cross-site",
55
  "Sec-Fetch-User": "?1",
56
  "Cache-Control": "max-age=0",
57
- "X-Country": location['country'],
58
- "X-Timezone": location['timezone']
59
  }
60
 
61
  def web_selector(self, search_query: str, search_provider: str = "google"):
62
- if search_provider == "baidu":
63
- return (
64
- f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
65
- "#content_left"
66
- )
67
- provider_prefix = "!go" if search_provider == "google" else "!bi"
68
  return (
69
  f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
70
  "#urls"
71
  )
72
 
73
  async def web_request(self, method: str, url: str, headers: dict, data: dict = None):
74
- timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
75
- connector = aiohttp.TCPConnector(
76
- use_dns_cache=TCP_CONNECTOR_ENABLE_DNS_CACHE,
77
- ttl_dns_cache=TCP_CONNECTOR_TTL_DNS_CACHE,
78
- limit=TCP_CONNECTOR_LIMIT,
79
- limit_per_host=TCP_CONNECTOR_LIMIT_PER_HOST,
80
- force_close=TCP_CONNECTOR_FORCE_CLOSE,
81
- enable_cleanup_closed=TCP_CONNECTOR_ENABLE_CLEANUP
82
- )
83
  async with aiohttp.ClientSession(
84
- timeout=timeout,
85
- connector=connector,
 
 
 
 
 
 
 
86
  trust_env=ENABLE_TRUST_ENV,
87
  connector_owner=ENABLE_CONNECTOR_OWNER
88
  ) as session:
@@ -102,7 +87,7 @@ class BrowserEngine:
102
  payload = {"url": target_url}
103
  try:
104
  extracted_content = asyncio.run(self._post(self.config.content_reader_api, payload, headers))
105
- return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
106
  except Exception as error:
107
  return f"Error reading URL: {str(error)}"
108
 
@@ -112,6 +97,6 @@ class BrowserEngine:
112
  headers["X-Target-Selector"] = selector
113
  try:
114
  search_results = asyncio.run(self._get(full_url, headers))
115
- return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
116
  except Exception as error:
117
  return f"Error during search: {str(error)}"
 
7
  import asyncio
8
  from urllib.parse import quote
9
  from config import (
10
+ REMINDERS,
 
11
  TCP_CONNECTOR_ENABLE_DNS_CACHE,
12
  TCP_CONNECTOR_TTL_DNS_CACHE,
13
  TCP_CONNECTOR_LIMIT,
 
17
  ENABLE_TRUST_ENV,
18
  ENABLE_CONNECTOR_OWNER
19
  )
20
+ from ..core.web_loader import web_loader
21
 
22
  class BrowserEngine:
23
  def __init__(self, configuration):
24
  self.config = configuration
25
 
26
  def generate_headers(self):
 
 
 
 
 
 
 
27
  return {
28
+ "User-Agent": web_loader.get_user_agent(),
29
+ "X-Forwarded-For": f"{web_loader.get_ipv4()}, {web_loader.get_ipv6()}",
30
+ "X-Real-IP": web_loader.get_ipv4(),
31
+ "X-Originating-IP": web_loader.get_ipv4(),
32
+ "X-Remote-IP": web_loader.get_ipv4(),
33
+ "X-Remote-Addr": web_loader.get_ipv4(),
34
+ "X-Client-IP": web_loader.get_ipv4(),
35
+ "X-Forwarded-Host": web_loader.get_origin().replace("https://", "").replace("http://", ""),
36
+ "Origin": web_loader.get_origin(),
37
+ "Referer": web_loader.get_referrer(),
38
+ "Accept-Language": f"{web_loader.get_location()['language']},en;q=0.9",
39
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
40
  "Accept-Encoding": "gzip, deflate, br",
41
  "DNT": "1",
 
46
  "Sec-Fetch-Site": "cross-site",
47
  "Sec-Fetch-User": "?1",
48
  "Cache-Control": "max-age=0",
49
+ "X-Country": web_loader.get_location()['country'],
50
+ "X-Timezone": web_loader.get_location()['timezone']
51
  }
52
 
53
  def web_selector(self, search_query: str, search_provider: str = "google"):
54
+ provider_prefix = "!go"
 
 
 
 
 
55
  return (
56
  f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
57
  "#urls"
58
  )
59
 
60
  async def web_request(self, method: str, url: str, headers: dict, data: dict = None):
 
 
 
 
 
 
 
 
 
61
  async with aiohttp.ClientSession(
62
+ timeout=aiohttp.ClientTimeout(total=self.config.request_timeout),
63
+ connector=aiohttp.TCPConnector(
64
+ use_dns_cache=TCP_CONNECTOR_ENABLE_DNS_CACHE,
65
+ ttl_dns_cache=TCP_CONNECTOR_TTL_DNS_CACHE,
66
+ limit=TCP_CONNECTOR_LIMIT,
67
+ limit_per_host=TCP_CONNECTOR_LIMIT_PER_HOST,
68
+ force_close=TCP_CONNECTOR_FORCE_CLOSE,
69
+ enable_cleanup_closed=TCP_CONNECTOR_ENABLE_CLEANUP
70
+ ),
71
  trust_env=ENABLE_TRUST_ENV,
72
  connector_owner=ENABLE_CONNECTOR_OWNER
73
  ) as session:
 
87
  payload = {"url": target_url}
88
  try:
89
  extracted_content = asyncio.run(self._post(self.config.content_reader_api, payload, headers))
90
+ return f"{extracted_content}\n\n\n{REMINDERS}\n\n\n"
91
  except Exception as error:
92
  return f"Error reading URL: {str(error)}"
93
 
 
97
  headers["X-Target-Selector"] = selector
98
  try:
99
  search_results = asyncio.run(self._get(full_url, headers))
100
+ return f"{search_results}\n\n\n{REMINDERS}\n\n\n"
101
  except Exception as error:
102
  return f"Error during search: {str(error)}"
src/processor/__init__.py CHANGED
@@ -3,6 +3,6 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from .message_processor import process_user_request
7
 
8
- __all__ = ['process_user_request']
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from .message_processor import searchgpt_playground
7
 
8
+ __all__ = ['searchgpt_playground']
src/processor/message_processor.py CHANGED
@@ -5,15 +5,15 @@
5
 
6
  import traceback
7
  from config import MODEL
8
- from src.core.web_configuration import WebConfiguration
9
- from src.engine.browser_engine import BrowserEngine
10
- from src.tools.tool_manager import construct_tool_definitions
11
- from src.client.openai_client import initialize_client
12
  from .response.setup import setup_response
13
  from .response.generator import generate_response
14
- from .tools.interaction import process_tool_interactions
15
 
16
- def process_user_request(user_message, chat_history):
17
  if not isinstance(user_message, str) or not user_message.strip():
18
  yield []
19
  return
@@ -21,30 +21,22 @@ def process_user_request(user_message, chat_history):
21
  output_content = ""
22
 
23
  try:
24
- server, client_initialization_error = initialize_client()
25
  if client_initialization_error:
26
  output_content = client_initialization_error
27
  yield output_content
28
  return
29
 
30
- search_configuration = WebConfiguration()
31
- search_engine_instance = BrowserEngine(search_configuration)
32
- available_tools = construct_tool_definitions()
33
-
34
- conversation_messages = setup_response(
35
- chat_history,
36
- user_message
37
- )
38
-
39
  tool_response = ""
40
  tools_done = False
41
 
42
- for tool_update in process_tool_interactions(
43
  server=server,
44
  model_name=MODEL,
45
  conversation_messages=conversation_messages,
46
- tool_definitions=available_tools,
47
- search_engine=search_engine_instance
48
  ):
49
  if isinstance(tool_update, str):
50
  tool_response = tool_update
@@ -61,7 +53,7 @@ def process_user_request(user_message, chat_history):
61
  server=server,
62
  model_name=MODEL,
63
  conversation_messages=conversation_messages,
64
- tool_definitions=available_tools,
65
  tools_done=tools_done
66
  )
67
 
 
5
 
6
  import traceback
7
  from config import MODEL
8
+ from ..core.web_configuration import WebConfiguration
9
+ from ..engine.browser_engine import BrowserEngine
10
+ from ..tools.tool_manager import local_tools
11
+ from ..client.openai_client import setup_client
12
  from .response.setup import setup_response
13
  from .response.generator import generate_response
14
+ from .tools.interaction import tools_setup
15
 
16
+ def searchgpt_playground(user_message, chat_history):
17
  if not isinstance(user_message, str) or not user_message.strip():
18
  yield []
19
  return
 
21
  output_content = ""
22
 
23
  try:
24
+ server, client_initialization_error = setup_client()
25
  if client_initialization_error:
26
  output_content = client_initialization_error
27
  yield output_content
28
  return
29
 
30
+ conversation_messages = setup_response(chat_history, user_message)
 
 
 
 
 
 
 
 
31
  tool_response = ""
32
  tools_done = False
33
 
34
+ for tool_update in tools_setup(
35
  server=server,
36
  model_name=MODEL,
37
  conversation_messages=conversation_messages,
38
+ tool_definitions=local_tools(),
39
+ search_engine=BrowserEngine(WebConfiguration())
40
  ):
41
  if isinstance(tool_update, str):
42
  tool_response = tool_update
 
53
  server=server,
54
  model_name=MODEL,
55
  conversation_messages=conversation_messages,
56
+ tool_definitions=local_tools(),
57
  tools_done=tools_done
58
  )
59
 
src/processor/reasoning/interface.py CHANGED
@@ -3,16 +3,9 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
 
 
6
  def reasoning_interfaces(text, current_length=0):
7
- if current_length < len(text):
8
- reasoning_steps = text[:current_length]
9
-
10
- if current_length > 0 and not reasoning_steps.endswith((
11
- '<br>',
12
- '<br><br>'
13
- )):
14
- reasoning_steps += '...'
15
-
16
- return reasoning_steps
17
-
18
- return text
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from assets.css.animations.loading import spinner
7
+
8
  def reasoning_interfaces(text, current_length=0):
9
+ return text[:current_length].replace(
10
+ "<loading_animation>", spinner()
11
+ )
 
 
 
 
 
 
 
 
 
src/processor/reasoning/tool_reasoning.py CHANGED
@@ -3,36 +3,41 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from config import REASONING_STEPS, REASONING_DEFAULT
7
 
8
  def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
9
  if tool_name == "web_search":
10
- query = tool_arguments.get("query", "") if tool_arguments else ""
11
- engine = tool_arguments.get("engine", "google") if tool_arguments else "google"
12
-
13
- template = REASONING_STEPS.get("web_search", {}).get(stage)
14
-
15
- if template:
16
- if stage == "completed":
17
- preview = result[:300] + "..." if result and len(result) > 300 else result
18
- return template.format(query=query, engine=engine, preview=preview)
19
- elif stage == "error":
20
- return template.format(query=query, engine=engine, error=error)
21
- else:
22
- return template.format(query=query, engine=engine)
23
-
 
 
 
 
24
  elif tool_name == "read_url":
25
- url = tool_arguments.get("url", "") if tool_arguments else ""
26
-
27
- template = REASONING_STEPS.get("read_url", {}).get(stage)
28
-
29
- if template:
30
- if stage == "completed":
31
- preview = result[:300] + "..." if result and len(result) > 300 else result
32
- return template.format(url=url, preview=preview)
33
- elif stage == "error":
34
- return template.format(url=url, error=error)
35
- else:
36
- return template.format(url=url)
37
-
38
- return REASONING_DEFAULT
 
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from config import REASONING_STEPS
7
 
8
  def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
9
  if tool_name == "web_search":
10
+ if stage == "completed":
11
+ preview = result[:300] + "..." if result and len(result) > 300 else result
12
+ return REASONING_STEPS["web_search"][stage].format(
13
+ query=tool_arguments.get("query", "") if tool_arguments else "",
14
+ engine=tool_arguments.get("engine", "google") if tool_arguments else "google",
15
+ preview=preview
16
+ )
17
+ elif stage == "error":
18
+ return REASONING_STEPS["web_search"][stage].format(
19
+ query=tool_arguments.get("query", "") if tool_arguments else "",
20
+ engine=tool_arguments.get("engine", "google") if tool_arguments else "google",
21
+ error=error
22
+ )
23
+ else:
24
+ return REASONING_STEPS["web_search"][stage].format(
25
+ query=tool_arguments.get("query", "") if tool_arguments else "",
26
+ engine=tool_arguments.get("engine", "google") if tool_arguments else "google"
27
+ )
28
  elif tool_name == "read_url":
29
+ if stage == "completed":
30
+ preview = result[:300] + "..." if result and len(result) > 300 else result
31
+ return REASONING_STEPS["read_url"][stage].format(
32
+ url=tool_arguments.get("url", "") if tool_arguments else "",
33
+ preview=preview
34
+ )
35
+ elif stage == "error":
36
+ return REASONING_STEPS["read_url"][stage].format(
37
+ url=tool_arguments.get("url", "") if tool_arguments else "",
38
+ error=error
39
+ )
40
+ else:
41
+ return REASONING_STEPS["read_url"][stage].format(
42
+ url=tool_arguments.get("url", "") if tool_arguments else ""
43
+ )
src/processor/response/generator.py CHANGED
@@ -5,7 +5,6 @@
5
 
6
  import traceback
7
  from config import (
8
- INSTRUCTIONS_END,
9
  MAX_TOKENS,
10
  CHAT_TEMPERATURE,
11
  STREAM
@@ -20,12 +19,6 @@ def generate_response(
20
  ):
21
  response_generator = ""
22
 
23
- if tools_done:
24
- conversation_messages.append({
25
- "role": "system",
26
- "content": INSTRUCTIONS_END
27
- })
28
-
29
  try:
30
  response = server.chat.completions.create(
31
  model=model_name,
 
5
 
6
  import traceback
7
  from config import (
 
8
  MAX_TOKENS,
9
  CHAT_TEMPERATURE,
10
  STREAM
 
19
  ):
20
  response_generator = ""
21
 
 
 
 
 
 
 
22
  try:
23
  response = server.chat.completions.create(
24
  model=model_name,
src/processor/response/setup.py CHANGED
@@ -4,7 +4,7 @@
4
  #
5
 
6
  from ...utils.time import get_current_time
7
- from config import INSTRUCTIONS_START
8
 
9
  def setup_response(conversation_history, user_input):
10
  history = []
@@ -16,7 +16,7 @@ def setup_response(conversation_history, user_input):
16
  "content": (
17
  f"Today is: {get_current_time()}"
18
  + "\n\n\n"
19
- + INSTRUCTIONS_START
20
  )
21
  }
22
  )
 
4
  #
5
 
6
  from ...utils.time import get_current_time
7
+ from config import INSTRUCTIONS
8
 
9
  def setup_response(conversation_history, user_input):
10
  history = []
 
16
  "content": (
17
  f"Today is: {get_current_time()}"
18
  + "\n\n\n"
19
+ + INSTRUCTIONS
20
  )
21
  }
22
  )
src/processor/tools/__init__.py CHANGED
@@ -3,12 +3,12 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from .parser import extract_tool_parameters
7
- from .executor import invoke_tool_function
8
- from .interaction import process_tool_interactions
9
 
10
  __all__ = [
11
- 'extract_tool_parameters',
12
- 'invoke_tool_function',
13
- 'process_tool_interactions'
14
  ]
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from .parser import tools_parser
7
+ from .executor import tools_call
8
+ from .interaction import tools_setup
9
 
10
  __all__ = [
11
+ 'tools_parser',
12
+ 'tools_call',
13
+ 'tools_setup'
14
  ]
src/processor/tools/executor.py CHANGED
@@ -3,7 +3,7 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- def invoke_tool_function(search_engine, function_name, function_params):
7
  if function_name == "web_search":
8
  return search_engine.perform_search(
9
  search_query=function_params.get("query", ""),
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ def tools_call(search_engine, function_name, function_params):
7
  if function_name == "web_search":
8
  return search_engine.perform_search(
9
  search_query=function_params.get("query", ""),
src/processor/tools/interaction.py CHANGED
@@ -8,209 +8,131 @@ from assets.css.reasoning import styles
8
  from ..response.formatter import assistant_response
9
  from ..reasoning.interface import reasoning_interfaces
10
  from ..reasoning.tool_reasoning import tool_reasoning
11
- from .parser import extract_tool_parameters
12
- from .executor import invoke_tool_function
13
  from config import (
14
  MAX_TOKENS,
15
- REASONING_DELAY,
16
- REASONING_INSERT,
17
  TOOLS_TEMPERATURE,
18
  MAXIMUM_ITERATIONS,
19
  MAX_RETRY_LIMIT,
20
  ITERATION_METRICS
21
  )
22
 
23
- def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
24
- retry_count = 0
25
- logs_generator = ""
26
- tool_results = []
27
- execution_success = False
28
- last_error = None
29
- error_history = []
30
-
31
- while MAXIMUM_ITERATIONS <= MAX_RETRY_LIMIT and not execution_success:
32
- ITERATION_METRICS["attempts"] += 1
33
- current_iteration_successful = False
34
- iteration_errors = []
35
-
36
- for iteration_index in range(MAXIMUM_ITERATIONS):
37
  try:
38
- retry_delay = ITERATION_METRICS["retry_delays"][min(retry_count, len(ITERATION_METRICS["retry_delays"]) - 1)]
39
- if retry_count > 0:
40
- time.sleep(retry_delay * ITERATION_METRICS["backoff_multiplier"])
41
 
42
- model_response = server.chat.completions.create(
43
  model=model_name,
44
  messages=conversation_messages,
45
  tools=tool_definitions,
46
  tool_choice="auto",
47
  max_tokens=MAX_TOKENS,
48
  temperature=TOOLS_TEMPERATURE
49
- )
50
-
51
- response_choice = model_response.choices[0]
52
- assistant_message = response_choice.message
53
- formatted_assistant_message = assistant_response(assistant_message)
54
 
55
- conversation_messages.append(
56
- {
57
- "role": formatted_assistant_message["role"],
58
- "content": formatted_assistant_message["content"],
59
- "tool_calls": formatted_assistant_message["tool_calls"]
60
- }
61
- )
62
 
63
- pending_tool_calls = assistant_message.tool_calls or []
64
- if not pending_tool_calls:
65
- if logs_generator:
66
- logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
67
- execution_success = True
68
- current_iteration_successful = True
69
- break
70
 
71
- tool_execution_errors = []
72
- for tool_invocation in pending_tool_calls:
73
- tool_name = tool_invocation.function.name
74
- tool_arguments_raw = tool_invocation.function.arguments
75
-
76
- extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
77
-
78
- if extraction_error:
79
- error_key = f"{tool_name}_extraction"
80
- ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
81
- tool_execution_errors.append({
82
- "tool": tool_name,
83
- "error": extraction_error,
84
- "type": "extraction"
85
- })
86
 
87
- reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
88
- for i in range(0, len(reasoning_error), REASONING_INSERT):
89
- logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
90
- yield logs_generator
91
- time.sleep(REASONING_DELAY)
92
- logs_generator = styles(reasoning_error, expanded=True)
93
- yield logs_generator
94
- tool_execution_result = extraction_error
 
 
 
 
95
  else:
96
- reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
97
- for i in range(0, len(reasoning_status), REASONING_INSERT):
98
- logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
99
- yield logs_generator
100
- time.sleep(REASONING_DELAY)
101
 
102
- reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
103
- for i in range(0, len(reasoning_start), REASONING_INSERT):
104
- logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
105
- yield logs_generator
106
- time.sleep(REASONING_DELAY)
107
 
108
  try:
109
- tool_execution_result = invoke_tool_function(
110
- search_engine,
111
- tool_name,
112
- extracted_arguments
113
- )
114
- tool_results.append({
115
- "tool": tool_name,
116
- "arguments": extracted_arguments,
117
- "result": tool_execution_result,
118
- "iteration": MAXIMUM_ITERATIONS,
119
- "retry_count": retry_count
120
  })
121
 
122
- reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
123
- for i in range(0, len(reasoning_done), REASONING_INSERT):
124
- logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
125
- yield logs_generator
126
- time.sleep(REASONING_DELAY)
127
- logs_generator = styles(reasoning_done, expanded=False)
128
- yield logs_generator
129
 
130
  except Exception as tool_error:
131
- error_key = f"{tool_name}_execution"
132
- ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
133
- tool_execution_errors.append({
134
- "tool": tool_name,
135
- "error": str(tool_error),
136
- "type": "execution",
137
- "arguments": extracted_arguments
138
- })
139
 
140
- reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
141
- for i in range(0, len(reasoning_error), REASONING_INSERT):
142
- logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
143
- yield logs_generator
144
- time.sleep(REASONING_DELAY)
145
- logs_generator = styles(reasoning_error, expanded=True)
146
- yield logs_generator
147
- tool_execution_result = str(tool_error)
148
-
149
- conversation_messages.append(
150
- {
151
- "role": "tool",
152
- "tool_call_id": tool_invocation.id,
153
- "name": tool_name,
154
- "content": tool_execution_result
155
- }
156
- )
157
 
158
- if not tool_execution_errors:
159
- execution_success = True
160
- current_iteration_successful = True
161
- break
162
- else:
163
- iteration_errors.extend(tool_execution_errors)
164
 
165
- except Exception as model_error:
166
- last_error = str(model_error)
167
- error_history.append({
168
- "iteration": MAXIMUM_ITERATIONS,
169
- "error": last_error,
170
- "timestamp": time.time()
171
- })
172
- ITERATION_METRICS["failures"] += 1
173
- iteration_errors.append({
174
- "error": last_error,
175
- "type": "model"
176
- })
177
 
178
- if current_iteration_successful:
179
- execution_success = True
180
- break
181
- else:
182
- if iteration_errors:
183
- error_history.extend(iteration_errors)
184
-
185
- retry_count += 1
186
- previous_iterations = MAXIMUM_ITERATIONS
187
-
188
- if ITERATION_METRICS["error_patterns"]:
189
- frequent_errors = max(ITERATION_METRICS["error_patterns"].values())
190
- if frequent_errors > 3:
191
- new_iterations = min(MAXIMUM_ITERATIONS + 2, MAX_RETRY_LIMIT)
192
- else:
193
- new_iterations = min(MAXIMUM_ITERATIONS + 1, MAX_RETRY_LIMIT)
194
  else:
195
- new_iterations = min(MAXIMUM_ITERATIONS + 1, MAX_RETRY_LIMIT)
196
-
197
- if new_iterations > previous_iterations:
198
- retry_reasoning = f"Retrying with increased iterations: {new_iterations} (attempt {retry_count + 1})"
199
- for i in range(0, len(retry_reasoning), REASONING_INSERT):
200
- logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
201
- yield logs_generator
202
- time.sleep(REASONING_DELAY)
203
-
204
- if new_iterations >= MAX_RETRY_LIMIT:
205
- final_error = f"Maximum retry limit reached after {ITERATION_METRICS['attempts']} attempts with {ITERATION_METRICS['failures']} failures"
206
- logs_generator = styles(final_error, expanded=True)
207
- yield logs_generator
208
- break
 
209
 
210
- ITERATION_METRICS["success_rate"] = (len(tool_results) / max(ITERATION_METRICS["attempts"], 1)) * 100
211
 
212
- if logs_generator:
213
- logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
214
 
215
- generator_results = len(tool_results) > 0
216
- return conversation_messages, logs_generator, generator_results
 
8
  from ..response.formatter import assistant_response
9
  from ..reasoning.interface import reasoning_interfaces
10
  from ..reasoning.tool_reasoning import tool_reasoning
11
+ from .parser import tools_parser
12
+ from .executor import tools_call
13
  from config import (
14
  MAX_TOKENS,
 
 
15
  TOOLS_TEMPERATURE,
16
  MAXIMUM_ITERATIONS,
17
  MAX_RETRY_LIMIT,
18
  ITERATION_METRICS
19
  )
20
 
21
+ def tools_setup(server, model_name, conversation_messages, tool_definitions, search_engine):
22
+ if "current_iterations" not in ITERATION_METRICS:
23
+ ITERATION_METRICS["current_iterations"] = MAXIMUM_ITERATIONS
24
+
25
+ while ITERATION_METRICS["current_iterations"] <= MAX_RETRY_LIMIT:
26
+ for iteration_index in range(ITERATION_METRICS["current_iterations"]):
 
 
 
 
 
 
 
 
27
  try:
28
+ if ITERATION_METRICS.get("retry_count", 0) > 0:
29
+ time.sleep(ITERATION_METRICS["retry_delays"][min(ITERATION_METRICS.get("retry_count", 0), len(ITERATION_METRICS["retry_delays"]) - 1)] * ITERATION_METRICS["backoff_multiplier"])
 
30
 
31
+ response = server.chat.completions.create(
32
  model=model_name,
33
  messages=conversation_messages,
34
  tools=tool_definitions,
35
  tool_choice="auto",
36
  max_tokens=MAX_TOKENS,
37
  temperature=TOOLS_TEMPERATURE
38
+ ).choices[0].message
 
 
 
 
39
 
40
+ conversation_messages.append(assistant_response(response))
 
 
 
 
 
 
41
 
42
+ if not (response.tool_calls or []):
43
+ if ITERATION_METRICS.get("logs_generator", ""):
44
+ ITERATION_METRICS["logs_generator"] = styles(ITERATION_METRICS.get("logs_generator", "").replace('<br>', '\n').strip(), expanded=False)
45
+ return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), True
 
 
 
46
 
47
+ for tool_invocation in (response.tool_calls or []):
48
+ if tools_parser(tool_invocation.function.arguments)[1]:
49
+ ITERATION_METRICS["error_patterns"][f"{tool_invocation.function.name}_extraction"] = ITERATION_METRICS["error_patterns"].get(f"{tool_invocation.function.name}_extraction", 0) + 1
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ for i in range(0, len(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1])) + 1):
52
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1]), i), expanded=True)
53
+ yield ITERATION_METRICS["logs_generator"]
54
+ ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1]), expanded=True)
55
+ yield ITERATION_METRICS["logs_generator"]
56
+
57
+ conversation_messages.append({
58
+ "role": "tool",
59
+ "tool_call_id": tool_invocation.id,
60
+ "name": tool_invocation.function.name,
61
+ "content": tools_parser(tool_invocation.function.arguments)[1]
62
+ })
63
  else:
64
+ for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "parsing")) + 1):
65
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "parsing"), i), expanded=True)
66
+ yield ITERATION_METRICS["logs_generator"]
 
 
67
 
68
+ for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "executing")) + 1):
69
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "executing"), i), expanded=True)
70
+ yield ITERATION_METRICS["logs_generator"]
 
 
71
 
72
  try:
73
+ conversation_messages.append({
74
+ "role": "tool",
75
+ "tool_call_id": tool_invocation.id,
76
+ "name": tool_invocation.function.name,
77
+ "content": tools_call(
78
+ search_engine,
79
+ tool_invocation.function.name,
80
+ tools_parser(tool_invocation.function.arguments)[0]
81
+ )
 
 
82
  })
83
 
84
+ for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"])) + 1):
85
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"]), i), expanded=True)
86
+ yield ITERATION_METRICS["logs_generator"]
87
+ ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"]), expanded=False)
88
+ yield ITERATION_METRICS["logs_generator"]
 
 
89
 
90
  except Exception as tool_error:
91
+ ITERATION_METRICS["error_patterns"][f"{tool_invocation.function.name}_execution"] = ITERATION_METRICS["error_patterns"].get(f"{tool_invocation.function.name}_execution", 0) + 1
 
 
 
 
 
 
 
92
 
93
+ for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error))) + 1):
94
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error)), i), expanded=True)
95
+ yield ITERATION_METRICS["logs_generator"]
96
+ ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error)), expanded=True)
97
+ yield ITERATION_METRICS["logs_generator"]
98
+
99
+ conversation_messages.append({
100
+ "role": "tool",
101
+ "tool_call_id": tool_invocation.id,
102
+ "name": tool_invocation.function.name,
103
+ "content": str(tool_error)
104
+ })
 
 
 
 
 
105
 
106
+ return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), True
 
 
 
 
 
107
 
108
+ except Exception:
109
+ ITERATION_METRICS["failures"] = ITERATION_METRICS.get("failures", 0) + 1
 
 
 
 
 
 
 
 
 
 
110
 
111
+ ITERATION_METRICS["retry_count"] = ITERATION_METRICS.get("retry_count", 0) + 1
112
+
113
+ if ITERATION_METRICS["error_patterns"]:
114
+ if max(ITERATION_METRICS["error_patterns"].values()) > 3:
115
+ ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 2, MAX_RETRY_LIMIT)
 
 
 
 
 
 
 
 
 
 
 
116
  else:
117
+ ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 1, MAX_RETRY_LIMIT)
118
+ else:
119
+ ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 1, MAX_RETRY_LIMIT)
120
+
121
+ if ITERATION_METRICS["current_iterations"] > ITERATION_METRICS.get("previous_iterations", 0):
122
+ for i in range(0, len(f"Retrying with increased iterations: {ITERATION_METRICS['current_iterations']} (attempt {ITERATION_METRICS.get('retry_count', 0) + 1})") + 1):
123
+ ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(f"Retrying with increased iterations: {ITERATION_METRICS['current_iterations']} (attempt {ITERATION_METRICS.get('retry_count', 0) + 1})", i), expanded=True)
124
+ yield ITERATION_METRICS["logs_generator"]
125
+
126
+ ITERATION_METRICS["previous_iterations"] = ITERATION_METRICS["current_iterations"]
127
+
128
+ if ITERATION_METRICS["current_iterations"] >= MAX_RETRY_LIMIT:
129
+ ITERATION_METRICS["logs_generator"] = styles(f"Maximum retry limit reached after {ITERATION_METRICS.get('attempts', 0)} attempts with {ITERATION_METRICS.get('failures', 0)} failures", expanded=True)
130
+ yield ITERATION_METRICS["logs_generator"]
131
+ break
132
 
133
+ ITERATION_METRICS["success_rate"] = (ITERATION_METRICS.get("tool_results_count", 0) / max(ITERATION_METRICS.get("attempts", 1), 1)) * 100
134
 
135
+ if ITERATION_METRICS.get("logs_generator", ""):
136
+ ITERATION_METRICS["logs_generator"] = styles(ITERATION_METRICS.get("logs_generator", "").replace('<br>', '\n').strip(), expanded=False)
137
 
138
+ return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), ITERATION_METRICS.get("tool_results_count", 0) > 0
 
src/processor/tools/parser.py CHANGED
@@ -5,13 +5,10 @@
5
 
6
  import json
7
 
8
- def extract_tool_parameters(raw_parameters, fallback_engine="google"):
9
  try:
10
  parsed_params = json.loads(raw_parameters or "{}")
11
- if "engine" in parsed_params and parsed_params["engine"] not in ["google", "bing", "baidu"]:
12
- parsed_params["engine"] = fallback_engine
13
- if "engine" not in parsed_params:
14
- parsed_params["engine"] = fallback_engine
15
  return parsed_params, None
16
  except Exception as parse_error:
17
  return None, f"Invalid tool arguments: {str(parse_error)}"
 
5
 
6
  import json
7
 
8
+ def tools_parser(raw_parameters):
9
  try:
10
  parsed_params = json.loads(raw_parameters or "{}")
11
+ parsed_params["engine"] = "google"
 
 
 
12
  return parsed_params, None
13
  except Exception as parse_error:
14
  return None, f"Invalid tool arguments: {str(parse_error)}"
src/tools/__init__.py CHANGED
@@ -3,6 +3,6 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- from .tool_manager import construct_tool_definitions
7
 
8
- __all__ = ['construct_tool_definitions']
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ from .tool_manager import local_tools
7
 
8
+ __all__ = ['local_tools']
src/tools/tool_manager.py CHANGED
@@ -3,13 +3,13 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- def construct_tool_definitions():
7
  return [
8
  {
9
  "type": "function",
10
  "function": {
11
  "name": "web_search",
12
- "description": "Perform a web search via SearXNG (Google or Bing) or Baidu.",
13
  "parameters": {
14
  "type": "object",
15
  "properties": {
@@ -18,12 +18,7 @@ def construct_tool_definitions():
18
  },
19
  "engine": {
20
  "type": "string",
21
- "enum": [
22
- "google",
23
- "bing",
24
- "baidu"
25
- ],
26
- "default": "google",
27
  },
28
  },
29
  "required": ["query"],
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ def local_tools():
7
  return [
8
  {
9
  "type": "function",
10
  "function": {
11
  "name": "web_search",
12
+ "description": "Perform a web search via SearXNG (Google only).",
13
  "parameters": {
14
  "type": "object",
15
  "properties": {
 
18
  },
19
  "engine": {
20
  "type": "string",
21
+ "enum": ["google"]
 
 
 
 
 
22
  },
23
  },
24
  "required": ["query"],