hadadrjt commited on
Commit
02ce7c3
·
1 Parent(s): 1262be4

SearchGPT: Release pre-stable scripts.

Browse files

Signed-off-by: Hadad <[email protected]>

config.py CHANGED
@@ -9,12 +9,43 @@
9
  MODEL = "gpt-4.1-nano"
10
 
11
  MAX_TOKENS = 131072
 
 
 
12
 
13
  SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
14
  BAIDU_ENDPOINT = "https://www.baidu.com/s"
15
  READER_ENDPOINT = "https://r.jina.ai/"
16
  REQUEST_TIMEOUT = 300 # 5 minute
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  INSTRUCTIONS_START = """
19
  You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
20
 
@@ -57,7 +88,7 @@ Execution Workflow:
57
  - Balance depth (for experts) with clarity (for general readers).
58
 
59
  6. Response Construction
60
- - Always cite sources inline using `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
61
  - Maintain a professional, precise, and neutral tone.
62
  - Use clear formatting: headings, numbered lists, and bullet points.
63
  - Ensure readability, logical progression, and accessibility.
@@ -76,7 +107,7 @@ Execution Workflow:
76
  Critical Instruction:
77
  - Every new query or request must trigger a `web_search` or `read_url`.
78
  - You must not generate answers from prior knowledge, conversation history, or cached data.
79
- - Always use Markdown format for URL sources with `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
80
  - If tools fail, you must state explicitly that no valid data could be retrieved.
81
  \n\n\n
82
  """
@@ -89,7 +120,7 @@ CONTENT_EXTRACTION = """
89
  - Evaluate credibility of sources, highlight potential biases or conflicts
90
  - Produce a structured, professional, and comprehensive summary
91
  - Emphasize clarity, accuracy, and logical flow
92
- - Include all discovered URLs in the final summary as [Source Title](URL)
93
  - Mark any uncertainties, contradictions, or missing information clearly
94
  </system>
95
  \n\n\n
@@ -101,7 +132,7 @@ SEARCH_SELECTION = """
101
  - Extract key information, main arguments, data points, and statistics
102
  - Capture every URL present in the content or references
103
  - Create a professional structured summary.
104
- - List each source at the end of the summary in the format [Source title](link)
105
  - Identify ambiguities or gaps in information
106
  - Ensure clarity, completeness, and high information density
107
  </system>
@@ -127,7 +158,7 @@ REASONING_STEPS = {
127
  "- Search Query: {query}<br>"
128
  "- Search Engine: {engine}<br><br>"
129
  "I'm verifying that the network connection is stable and that the search service is accessible. "
130
- "All preliminary checks have been completed successfully."
131
  ),
132
  "executing": (
133
  "I'm now executing the web search for: {query}<br><br>"
@@ -142,7 +173,7 @@ REASONING_STEPS = {
142
  "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
143
  "Current status: Processing search results...<br>"
144
  "Results found: Multiple relevant sources identified<br>"
145
- "Quality assessment: High relevance detected"
146
  ),
147
  "completed": (
148
  "I have successfully completed the web search for: {query}<br><br>"
@@ -173,7 +204,7 @@ REASONING_STEPS = {
173
  "I will try to recover from this error and provide the best possible response.<br><br>"
174
  "I'm evaluating whether I can retry the search with modified parameters. "
175
  "If the search cannot be completed, I will use my existing knowledge to help the user. "
176
- "I'm committed to providing valuable assistance despite this technical challenge."
177
  )
178
  },
179
  "read_url": {
@@ -191,7 +222,7 @@ REASONING_STEPS = {
191
  "- Content Type: HTML/Text<br>"
192
  "- Encoding: Auto-detect<br><br>"
193
  "I'm checking if the website requires any special handling or authentication. "
194
- "All preliminary validation checks have been completed successfully."
195
  ),
196
  "executing": (
197
  "I'm now accessing the URL: {url}<br><br>"
@@ -206,7 +237,7 @@ REASONING_STEPS = {
206
  "I'm focusing on extracting the primary article or information content.<br><br>"
207
  "Current status: Extracting content...<br>"
208
  "Response received: Processing HTML<br>"
209
- "Content extraction: In progress"
210
  ),
211
  "completed": (
212
  "I have successfully extracted content from: {url}<br><br>"
@@ -237,14 +268,14 @@ REASONING_STEPS = {
237
  "I will try to work around this limitation and provide alternative assistance.<br><br>"
238
  "I'm evaluating whether I can access the content through alternative methods. "
239
  "If direct access isn't possible, I'll use my knowledge to help with the query. "
240
- "I remain committed to providing useful information despite this obstacle."
241
  )
242
  }
243
  }
244
 
245
  REASONING_DEFAULT = "I'm processing the tool execution request..."
246
-
247
- REASONING_DELAY = 0.01 # 10 ms
248
 
249
  OS = [
250
  "Windows NT 10.0; Win64; x64",
 
9
  MODEL = "gpt-4.1-nano"
10
 
11
  MAX_TOKENS = 131072
12
+ TOOLS_TEMPERATURE = 0.6
13
+ CHAT_TEMPERATURE = 0.75
14
+ STREAM = True
15
 
16
  SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
17
  BAIDU_ENDPOINT = "https://www.baidu.com/s"
18
  READER_ENDPOINT = "https://r.jina.ai/"
19
  REQUEST_TIMEOUT = 300 # 5 minute
20
 
21
+ MAXIMUM_ITERATIONS = 1 # Max tool execution
22
+ MAX_RETRY_LIMIT = 10 # Max retries if tools fail or server doesn’t respond
23
+
24
+ ITERATION_METRICS = {
25
+ "attempts": 0,
26
+ "failures": 0,
27
+ "success_rate": 0,
28
+ "error_patterns": {},
29
+ "retry_delays": [
30
+ 0.02,
31
+ 0.03,
32
+ 0.04,
33
+ 0.05,
34
+ 0.06,
35
+ 0.07
36
+ ],
37
+ "backoff_multiplier": 0.2
38
+ }
39
+
40
+ TCP_CONNECTOR_ENABLE_DNS_CACHE = True # aiohttp
41
+ TCP_CONNECTOR_TTL_DNS_CACHE = 300 # aiohttp
42
+ TCP_CONNECTOR_LIMIT = 100 # aiohttp
43
+ TCP_CONNECTOR_LIMIT_PER_HOST = 30 # aiohttp
44
+ TCP_CONNECTOR_FORCE_CLOSE = False # aiohttp
45
+ TCP_CONNECTOR_ENABLE_CLEANUP = True # aiohttp
46
+ ENABLE_TRUST_ENV = True # aiohttp
47
+ ENABLE_CONNECTOR_OWNER = True # aiohttp
48
+
49
  INSTRUCTIONS_START = """
50
  You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
51
 
 
88
  - Balance depth (for experts) with clarity (for general readers).
89
 
90
  6. Response Construction
91
+ - Always cite sources inline using `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
92
  - Maintain a professional, precise, and neutral tone.
93
  - Use clear formatting: headings, numbered lists, and bullet points.
94
  - Ensure readability, logical progression, and accessibility.
 
107
  Critical Instruction:
108
  - Every new query or request must trigger a `web_search` or `read_url`.
109
  - You must not generate answers from prior knowledge, conversation history, or cached data.
110
+ - Always use Markdown format for URL sources with `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
111
  - If tools fail, you must state explicitly that no valid data could be retrieved.
112
  \n\n\n
113
  """
 
120
  - Evaluate credibility of sources, highlight potential biases or conflicts
121
  - Produce a structured, professional, and comprehensive summary
122
  - Emphasize clarity, accuracy, and logical flow
123
+ - Include all discovered URLs in the final summary as `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
124
  - Mark any uncertainties, contradictions, or missing information clearly
125
  </system>
126
  \n\n\n
 
132
  - Extract key information, main arguments, data points, and statistics
133
  - Capture every URL present in the content or references
134
  - Create a professional structured summary.
135
+ - List each source at the end of the summary in the format `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
136
  - Identify ambiguities or gaps in information
137
  - Ensure clarity, completeness, and high information density
138
  </system>
 
158
  "- Search Query: {query}<br>"
159
  "- Search Engine: {engine}<br><br>"
160
  "I'm verifying that the network connection is stable and that the search service is accessible. "
161
+ "All preliminary checks have been completed successfully.<br><br>"
162
  ),
163
  "executing": (
164
  "I'm now executing the web search for: {query}<br><br>"
 
173
  "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
174
  "Current status: Processing search results...<br>"
175
  "Results found: Multiple relevant sources identified<br>"
176
+ "Quality assessment: High relevance detected<br><br>"
177
  ),
178
  "completed": (
179
  "I have successfully completed the web search for: {query}<br><br>"
 
204
  "I will try to recover from this error and provide the best possible response.<br><br>"
205
  "I'm evaluating whether I can retry the search with modified parameters. "
206
  "If the search cannot be completed, I will use my existing knowledge to help the user. "
207
+ "I'm committed to providing valuable assistance despite this technical challenge.<br><br>"
208
  )
209
  },
210
  "read_url": {
 
222
  "- Content Type: HTML/Text<br>"
223
  "- Encoding: Auto-detect<br><br>"
224
  "I'm checking if the website requires any special handling or authentication. "
225
+ "All preliminary validation checks have been completed successfully.<br><br>"
226
  ),
227
  "executing": (
228
  "I'm now accessing the URL: {url}<br><br>"
 
237
  "I'm focusing on extracting the primary article or information content.<br><br>"
238
  "Current status: Extracting content...<br>"
239
  "Response received: Processing HTML<br>"
240
+ "Content extraction: In progress<br><br>"
241
  ),
242
  "completed": (
243
  "I have successfully extracted content from: {url}<br><br>"
 
268
  "I will try to work around this limitation and provide alternative assistance.<br><br>"
269
  "I'm evaluating whether I can access the content through alternative methods. "
270
  "If direct access isn't possible, I'll use my knowledge to help with the query. "
271
+ "I remain committed to providing useful information despite this obstacle.<br><br>"
272
  )
273
  }
274
  }
275
 
276
  REASONING_DEFAULT = "I'm processing the tool execution request..."
277
+ REASONING_DELAY = 0.01 # 10 ms
278
+ REASONING_INSERT = 15 # Stream-like word-by-word display
279
 
280
  OS = [
281
  "Windows NT 10.0; Win64; x64",
src/core/web_loader.py CHANGED
@@ -5,7 +5,6 @@
5
 
6
  import random
7
  import threading
8
- import time
9
  from collections import deque
10
  from config import (
11
  OS,
@@ -37,96 +36,59 @@ class WebLoader:
37
 
38
  def generate_ipv4(self):
39
  while len(self.ipv4_pool) < 1000 and self.running:
40
- octet = random.choice(OCTETS)
41
- ip = f"{octet}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
42
  with self.lock:
43
  self.ipv4_pool.append(ip)
44
- time.sleep(0.001)
45
 
46
  def generate_ipv6(self):
47
  while len(self.ipv6_pool) < 1000 and self.running:
48
- segments = []
49
- for _ in range(8):
50
- segments.append(f"{random.randint(0, 65535):04x}")
51
  ip = ":".join(segments)
52
  with self.lock:
53
  self.ipv6_pool.append(ip)
54
- time.sleep(0.001)
55
 
56
  def generate_user_agents(self):
57
- os_list = OS
58
- browsers = BROWSERS
59
- chrome_versions = CHROME_VERSIONS
60
- firefox_versions = FIREFOX_VERSIONS
61
- safari_versions = SAFARI_VERSIONS
62
- edge_versions = EDGE_VERSIONS
63
-
64
  while len(self.user_agent_pool) < 500 and self.running:
65
- browser = random.choice(browsers)
66
- os_string = random.choice(os_list)
67
 
68
  if browser == "Chrome":
69
- version = random.choice(chrome_versions)
70
- ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36"
71
  elif browser == "Firefox":
72
- version = random.choice(firefox_versions)
73
- ua = f"Mozilla/5.0 ({os_string}) Gecko/20100101 Firefox/{version}"
74
  elif browser == "Safari":
75
- version = random.choice(safari_versions)
76
  webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
77
- ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Safari/{webkit_version}"
78
  elif browser == "Edge":
79
- version = random.choice(edge_versions)
80
- ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
81
  else:
82
  version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
83
- ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
84
 
85
  with self.lock:
86
  self.user_agent_pool.append(ua)
87
- time.sleep(0.002)
88
 
89
  def generate_origins(self):
90
- domains = DOMAINS
91
- protocols = PROTOCOLS
92
-
93
  while len(self.origin_pool) < 500 and self.running:
94
- protocol = random.choice(protocols)
95
- domain = random.choice(domains)
96
- origin = f"{protocol}{domain}"
97
  with self.lock:
98
  self.origin_pool.append(origin)
99
- time.sleep(0.002)
100
 
101
  def generate_referrers(self):
102
- search_engines = SEARCH_ENGINES
103
- keywords = KEYWORDS
104
-
105
  while len(self.referrer_pool) < 500 and self.running:
106
- engine = random.choice(search_engines)
107
- keyword = random.choice(keywords)
108
- referrer = f"{engine}{keyword}"
109
  with self.lock:
110
  self.referrer_pool.append(referrer)
111
- time.sleep(0.002)
112
 
113
  def generate_locations(self):
114
- countries = COUNTRIES
115
- languages = LANGUAGES
116
- timezones = TIMEZONES
117
-
118
  while len(self.location_pool) < 500 and self.running:
119
- country = random.choice(countries)
120
- language = random.choice(languages)
121
- timezone = random.choice(timezones)
122
  location = {
123
- "country": country,
124
- "language": language,
125
- "timezone": timezone
126
  }
127
  with self.lock:
128
  self.location_pool.append(location)
129
- time.sleep(0.002)
130
 
131
  def get_ipv4(self):
132
  with self.lock:
 
5
 
6
  import random
7
  import threading
 
8
  from collections import deque
9
  from config import (
10
  OS,
 
36
 
37
  def generate_ipv4(self):
38
  while len(self.ipv4_pool) < 1000 and self.running:
39
+ ip = f"{random.choice(OCTETS)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
 
40
  with self.lock:
41
  self.ipv4_pool.append(ip)
 
42
 
43
  def generate_ipv6(self):
44
  while len(self.ipv6_pool) < 1000 and self.running:
45
+ segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
 
 
46
  ip = ":".join(segments)
47
  with self.lock:
48
  self.ipv6_pool.append(ip)
 
49
 
50
  def generate_user_agents(self):
 
 
 
 
 
 
 
51
  while len(self.user_agent_pool) < 500 and self.running:
52
+ browser = random.choice(BROWSERS)
 
53
 
54
  if browser == "Chrome":
55
+ ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.choice(CHROME_VERSIONS)} Safari/537.36"
 
56
  elif browser == "Firefox":
57
+ ua = f"Mozilla/5.0 ({random.choice(OS)}) Gecko/20100101 Firefox/{random.choice(FIREFOX_VERSIONS)}"
 
58
  elif browser == "Safari":
 
59
  webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
60
+ ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{random.choice(SAFARI_VERSIONS)} Safari/{webkit_version}"
61
  elif browser == "Edge":
62
+ version = random.choice(EDGE_VERSIONS)
63
+ ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
64
  else:
65
  version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
66
+ ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
67
 
68
  with self.lock:
69
  self.user_agent_pool.append(ua)
 
70
 
71
  def generate_origins(self):
 
 
 
72
  while len(self.origin_pool) < 500 and self.running:
73
+ origin = f"{random.choice(PROTOCOLS)}{random.choice(DOMAINS)}"
 
 
74
  with self.lock:
75
  self.origin_pool.append(origin)
 
76
 
77
  def generate_referrers(self):
 
 
 
78
  while len(self.referrer_pool) < 500 and self.running:
79
+ referrer = f"{random.choice(SEARCH_ENGINES)}{random.choice(KEYWORDS)}"
 
 
80
  with self.lock:
81
  self.referrer_pool.append(referrer)
 
82
 
83
  def generate_locations(self):
 
 
 
 
84
  while len(self.location_pool) < 500 and self.running:
 
 
 
85
  location = {
86
+ "country": random.choice(COUNTRIES),
87
+ "language": random.choice(LANGUAGES),
88
+ "timezone": random.choice(TIMEZONES)
89
  }
90
  with self.lock:
91
  self.location_pool.append(location)
 
92
 
93
  def get_ipv4(self):
94
  with self.lock:
src/engine/browser_engine.py CHANGED
@@ -3,11 +3,21 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- import asyncio
7
  import aiohttp
8
- import requests
9
  from urllib.parse import quote
10
- from config import CONTENT_EXTRACTION, SEARCH_SELECTION
 
 
 
 
 
 
 
 
 
 
 
11
  from src.core.web_loader import web_loader
12
 
13
  class BrowserEngine:
@@ -21,7 +31,7 @@ class BrowserEngine:
21
  origin = web_loader.get_origin()
22
  referrer = web_loader.get_referrer()
23
  location = web_loader.get_location()
24
-
25
  return {
26
  "User-Agent": user_agent,
27
  "X-Forwarded-For": f"{ipv4}, {ipv6}",
@@ -48,7 +58,7 @@ class BrowserEngine:
48
  "X-Timezone": location['timezone']
49
  }
50
 
51
- def _build_search_url_and_selector(self, search_query: str, search_provider: str = "google"):
52
  if search_provider == "baidu":
53
  return (
54
  f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
@@ -60,80 +70,48 @@ class BrowserEngine:
60
  "#urls"
61
  )
62
 
63
- async def _async_post(self, url: str, data: dict, headers: dict):
64
- timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
65
- async with aiohttp.ClientSession(timeout=timeout) as session:
66
- async with session.post(url, data=data, headers=headers) as response:
67
- text = await response.text()
68
- if response.status >= 400:
69
- raise aiohttp.ClientResponseError(
70
- request_info=response.request_info,
71
- history=response.history,
72
- status=response.status,
73
- message=text,
74
- headers=response.headers
75
- )
76
- return text
77
-
78
- async def _async_get(self, url: str, headers: dict):
79
  timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
80
- async with aiohttp.ClientSession(timeout=timeout) as session:
81
- async with session.get(url, headers=headers) as response:
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  text = await response.text()
83
- if response.status >= 400:
84
- raise aiohttp.ClientResponseError(
85
- request_info=response.request_info,
86
- history=response.history,
87
- status=response.status,
88
- message=text,
89
- headers=response.headers
90
- )
91
  return text
92
 
93
- def _sync_post(self, url: str, data: dict, headers: dict):
94
- response = requests.post(url, data=data, headers=headers, timeout=self.config.request_timeout)
95
- response.raise_for_status()
96
- return response.text
97
 
98
- def _sync_get(self, url: str, headers: dict):
99
- response = requests.get(url, headers=headers, timeout=self.config.request_timeout)
100
- response.raise_for_status()
101
- return response.text
102
 
103
- async def async_extract_page_content(self, target_url: str) -> str:
104
  headers = self.generate_headers()
105
  payload = {"url": target_url}
106
- extracted_content = await self._async_post(self.config.content_reader_api, payload, headers)
107
- return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
108
-
109
- def extract_page_content(self, target_url: str) -> str:
110
  try:
111
- return asyncio.run(self.async_extract_page_content(target_url))
112
- except Exception:
113
- try:
114
- headers = self.generate_headers()
115
- payload = {"url": target_url}
116
- extracted_content = self._sync_post(self.config.content_reader_api, payload, headers)
117
- return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
118
- except Exception as error:
119
- return f"Error reading URL: {str(error)}"
120
 
121
- async def async_perform_search(self, search_query: str, search_provider: str = "google") -> str:
122
  headers = self.generate_headers()
123
- full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
124
  headers["X-Target-Selector"] = selector
125
- search_results = await self._async_get(full_url, headers)
126
- return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
127
-
128
- def perform_search(self, search_query: str, search_provider: str = "google") -> str:
129
  try:
130
- return asyncio.run(self.async_perform_search(search_query, search_provider))
131
- except Exception:
132
- try:
133
- headers = self.generate_headers()
134
- full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
135
- headers["X-Target-Selector"] = selector
136
- search_results = self._sync_get(full_url, headers)
137
- return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
138
- except Exception as error:
139
- return f"Error during search: {str(error)}"
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
 
6
  import aiohttp
7
+ import asyncio
8
  from urllib.parse import quote
9
+ from config import (
10
+ CONTENT_EXTRACTION,
11
+ SEARCH_SELECTION,
12
+ TCP_CONNECTOR_ENABLE_DNS_CACHE,
13
+ TCP_CONNECTOR_TTL_DNS_CACHE,
14
+ TCP_CONNECTOR_LIMIT,
15
+ TCP_CONNECTOR_LIMIT_PER_HOST,
16
+ TCP_CONNECTOR_FORCE_CLOSE,
17
+ TCP_CONNECTOR_ENABLE_CLEANUP,
18
+ ENABLE_TRUST_ENV,
19
+ ENABLE_CONNECTOR_OWNER
20
+ )
21
  from src.core.web_loader import web_loader
22
 
23
  class BrowserEngine:
 
31
  origin = web_loader.get_origin()
32
  referrer = web_loader.get_referrer()
33
  location = web_loader.get_location()
34
+
35
  return {
36
  "User-Agent": user_agent,
37
  "X-Forwarded-For": f"{ipv4}, {ipv6}",
 
58
  "X-Timezone": location['timezone']
59
  }
60
 
61
+ def web_selector(self, search_query: str, search_provider: str = "google"):
62
  if search_provider == "baidu":
63
  return (
64
  f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
 
70
  "#urls"
71
  )
72
 
73
+ async def web_request(self, method: str, url: str, headers: dict, data: dict = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
75
+ connector = aiohttp.TCPConnector(
76
+ use_dns_cache=TCP_CONNECTOR_ENABLE_DNS_CACHE,
77
+ ttl_dns_cache=TCP_CONNECTOR_TTL_DNS_CACHE,
78
+ limit=TCP_CONNECTOR_LIMIT,
79
+ limit_per_host=TCP_CONNECTOR_LIMIT_PER_HOST,
80
+ force_close=TCP_CONNECTOR_FORCE_CLOSE,
81
+ enable_cleanup_closed=TCP_CONNECTOR_ENABLE_CLEANUP
82
+ )
83
+ async with aiohttp.ClientSession(
84
+ timeout=timeout,
85
+ connector=connector,
86
+ trust_env=ENABLE_TRUST_ENV,
87
+ connector_owner=ENABLE_CONNECTOR_OWNER
88
+ ) as session:
89
+ async with session.request(method, url, headers=headers, data=data) as response:
90
  text = await response.text()
91
+ response.raise_for_status()
 
 
 
 
 
 
 
92
  return text
93
 
94
+ async def _post(self, url: str, data: dict, headers: dict):
95
+ return await self.web_request("POST", url, headers, data)
 
 
96
 
97
+ async def _get(self, url: str, headers: dict):
98
+ return await self.web_request("GET", url, headers)
 
 
99
 
100
+ def extract_page_content(self, target_url: str) -> str:
101
  headers = self.generate_headers()
102
  payload = {"url": target_url}
 
 
 
 
103
  try:
104
+ extracted_content = asyncio.run(self._post(self.config.content_reader_api, payload, headers))
105
+ return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
106
+ except Exception as error:
107
+ return f"Error reading URL: {str(error)}"
 
 
 
 
 
108
 
109
+ def perform_search(self, search_query: str, search_provider: str = "google") -> str:
110
  headers = self.generate_headers()
111
+ full_url, selector = self.web_selector(search_query, search_provider)
112
  headers["X-Target-Selector"] = selector
 
 
 
 
113
  try:
114
+ search_results = asyncio.run(self._get(full_url, headers))
115
+ return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
116
+ except Exception as error:
117
+ return f"Error during search: {str(error)}"
 
 
 
 
 
 
src/processor/response/generator.py CHANGED
@@ -4,7 +4,12 @@
4
  #
5
 
6
  import traceback
7
- from config import MAX_TOKENS, INSTRUCTIONS_END
 
 
 
 
 
8
 
9
  def generate_response(
10
  server,
@@ -16,12 +21,11 @@ def generate_response(
16
  response_generator = ""
17
 
18
  if tools_done:
19
- system_reminder = {
20
  "role": "system",
21
  "content": INSTRUCTIONS_END
22
- }
23
- conversation_messages.append(system_reminder)
24
-
25
  try:
26
  response = server.chat.completions.create(
27
  model=model_name,
@@ -29,8 +33,8 @@ def generate_response(
29
  tools=tool_definitions if not tools_done else None,
30
  tool_choice="none",
31
  max_tokens=MAX_TOKENS,
32
- temperature=0.75,
33
- stream=True
34
  )
35
 
36
  for data in response:
 
4
  #
5
 
6
  import traceback
7
+ from config import (
8
+ INSTRUCTIONS_END,
9
+ MAX_TOKENS,
10
+ CHAT_TEMPERATURE,
11
+ STREAM
12
+ )
13
 
14
  def generate_response(
15
  server,
 
21
  response_generator = ""
22
 
23
  if tools_done:
24
+ conversation_messages.append({
25
  "role": "system",
26
  "content": INSTRUCTIONS_END
27
+ })
28
+
 
29
  try:
30
  response = server.chat.completions.create(
31
  model=model_name,
 
33
  tools=tool_definitions if not tools_done else None,
34
  tool_choice="none",
35
  max_tokens=MAX_TOKENS,
36
+ temperature=CHAT_TEMPERATURE,
37
+ stream=STREAM
38
  )
39
 
40
  for data in response:
src/processor/tools/interaction.py CHANGED
@@ -10,43 +10,34 @@ from ..reasoning.interface import reasoning_interfaces
10
  from ..reasoning.tool_reasoning import tool_reasoning
11
  from .parser import extract_tool_parameters
12
  from .executor import invoke_tool_function
13
- from config import MAX_TOKENS, REASONING_DELAY
 
 
 
 
 
 
 
 
14
 
15
  def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
16
- maximum_iterations = 1
17
- max_retry_limit = 10
18
  retry_count = 0
19
  logs_generator = ""
20
  tool_results = []
21
  execution_success = False
22
  last_error = None
23
  error_history = []
24
- iteration_metrics = {
25
- "attempts": 0,
26
- "failures": 0,
27
- "success_rate": 0,
28
- "error_patterns": {},
29
- "retry_delays": [
30
- 0.02,
31
- 0.03,
32
- 0.04,
33
- 0.05,
34
- 0.06,
35
- 0.07
36
- ],
37
- "backoff_multiplier": 1.0
38
- }
39
-
40
- while maximum_iterations <= max_retry_limit and not execution_success:
41
- iteration_metrics["attempts"] += 1
42
  current_iteration_successful = False
43
  iteration_errors = []
44
 
45
- for iteration_index in range(maximum_iterations):
46
  try:
47
- retry_delay = iteration_metrics["retry_delays"][min(retry_count, len(iteration_metrics["retry_delays"]) - 1)]
48
  if retry_count > 0:
49
- time.sleep(retry_delay * iteration_metrics["backoff_multiplier"])
50
 
51
  model_response = server.chat.completions.create(
52
  model=model_name,
@@ -54,7 +45,7 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
54
  tools=tool_definitions,
55
  tool_choice="auto",
56
  max_tokens=MAX_TOKENS,
57
- temperature=0.6
58
  )
59
 
60
  response_choice = model_response.choices[0]
@@ -86,7 +77,7 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
86
 
87
  if extraction_error:
88
  error_key = f"{tool_name}_extraction"
89
- iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
90
  tool_execution_errors.append({
91
  "tool": tool_name,
92
  "error": extraction_error,
@@ -94,7 +85,7 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
94
  })
95
 
96
  reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
97
- for i in range(0, len(reasoning_error), 10):
98
  logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
99
  yield logs_generator
100
  time.sleep(REASONING_DELAY)
@@ -103,13 +94,13 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
103
  tool_execution_result = extraction_error
104
  else:
105
  reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
106
- for i in range(0, len(reasoning_status), 10):
107
  logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
108
  yield logs_generator
109
  time.sleep(REASONING_DELAY)
110
 
111
  reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
112
- for i in range(0, len(reasoning_start), 10):
113
  logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
114
  yield logs_generator
115
  time.sleep(REASONING_DELAY)
@@ -124,12 +115,12 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
124
  "tool": tool_name,
125
  "arguments": extracted_arguments,
126
  "result": tool_execution_result,
127
- "iteration": maximum_iterations,
128
  "retry_count": retry_count
129
  })
130
 
131
  reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
132
- for i in range(0, len(reasoning_done), 10):
133
  logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
134
  yield logs_generator
135
  time.sleep(REASONING_DELAY)
@@ -138,7 +129,7 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
138
 
139
  except Exception as tool_error:
140
  error_key = f"{tool_name}_execution"
141
- iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
142
  tool_execution_errors.append({
143
  "tool": tool_name,
144
  "error": str(tool_error),
@@ -147,7 +138,7 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
147
  })
148
 
149
  reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
150
- for i in range(0, len(reasoning_error), 10):
151
  logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
152
  yield logs_generator
153
  time.sleep(REASONING_DELAY)
@@ -174,11 +165,11 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
174
  except Exception as model_error:
175
  last_error = str(model_error)
176
  error_history.append({
177
- "iteration": maximum_iterations,
178
  "error": last_error,
179
  "timestamp": time.time()
180
  })
181
- iteration_metrics["failures"] += 1
182
  iteration_errors.append({
183
  "error": last_error,
184
  "type": "model"
@@ -192,31 +183,31 @@ def process_tool_interactions(server, model_name, conversation_messages, tool_de
192
  error_history.extend(iteration_errors)
193
 
194
  retry_count += 1
195
- previous_iterations = maximum_iterations
196
 
197
- if iteration_metrics["error_patterns"]:
198
- frequent_errors = max(iteration_metrics["error_patterns"].values())
199
  if frequent_errors > 3:
200
- maximum_iterations = min(maximum_iterations + 2, max_retry_limit)
201
  else:
202
- maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
203
  else:
204
- maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
205
 
206
- if maximum_iterations > previous_iterations:
207
- retry_reasoning = f"Retrying with increased iterations: {maximum_iterations} (attempt {retry_count + 1})"
208
- for i in range(0, len(retry_reasoning), 10):
209
  logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
210
  yield logs_generator
211
  time.sleep(REASONING_DELAY)
212
 
213
- if maximum_iterations >= max_retry_limit:
214
- final_error = f"Maximum retry limit reached after {iteration_metrics['attempts']} attempts with {iteration_metrics['failures']} failures"
215
  logs_generator = styles(final_error, expanded=True)
216
  yield logs_generator
217
  break
218
 
219
- iteration_metrics["success_rate"] = (len(tool_results) / max(iteration_metrics["attempts"], 1)) * 100
220
 
221
  if logs_generator:
222
  logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
 
10
  from ..reasoning.tool_reasoning import tool_reasoning
11
  from .parser import extract_tool_parameters
12
  from .executor import invoke_tool_function
13
+ from config import (
14
+ MAX_TOKENS,
15
+ REASONING_DELAY,
16
+ REASONING_INSERT,
17
+ TOOLS_TEMPERATURE,
18
+ MAXIMUM_ITERATIONS,
19
+ MAX_RETRY_LIMIT,
20
+ ITERATION_METRICS
21
+ )
22
 
23
  def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
 
 
24
  retry_count = 0
25
  logs_generator = ""
26
  tool_results = []
27
  execution_success = False
28
  last_error = None
29
  error_history = []
30
+
31
+ while MAXIMUM_ITERATIONS <= MAX_RETRY_LIMIT and not execution_success:
32
+ ITERATION_METRICS["attempts"] += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  current_iteration_successful = False
34
  iteration_errors = []
35
 
36
+ for iteration_index in range(MAXIMUM_ITERATIONS):
37
  try:
38
+ retry_delay = ITERATION_METRICS["retry_delays"][min(retry_count, len(ITERATION_METRICS["retry_delays"]) - 1)]
39
  if retry_count > 0:
40
+ time.sleep(retry_delay * ITERATION_METRICS["backoff_multiplier"])
41
 
42
  model_response = server.chat.completions.create(
43
  model=model_name,
 
45
  tools=tool_definitions,
46
  tool_choice="auto",
47
  max_tokens=MAX_TOKENS,
48
+ temperature=TOOLS_TEMPERATURE
49
  )
50
 
51
  response_choice = model_response.choices[0]
 
77
 
78
  if extraction_error:
79
  error_key = f"{tool_name}_extraction"
80
+ ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
81
  tool_execution_errors.append({
82
  "tool": tool_name,
83
  "error": extraction_error,
 
85
  })
86
 
87
  reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
88
+ for i in range(0, len(reasoning_error), REASONING_INSERT):
89
  logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
90
  yield logs_generator
91
  time.sleep(REASONING_DELAY)
 
94
  tool_execution_result = extraction_error
95
  else:
96
  reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
97
+ for i in range(0, len(reasoning_status), REASONING_INSERT):
98
  logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
99
  yield logs_generator
100
  time.sleep(REASONING_DELAY)
101
 
102
  reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
103
+ for i in range(0, len(reasoning_start), REASONING_INSERT):
104
  logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
105
  yield logs_generator
106
  time.sleep(REASONING_DELAY)
 
115
  "tool": tool_name,
116
  "arguments": extracted_arguments,
117
  "result": tool_execution_result,
118
+ "iteration": MAXIMUM_ITERATIONS,
119
  "retry_count": retry_count
120
  })
121
 
122
  reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
123
+ for i in range(0, len(reasoning_done), REASONING_INSERT):
124
  logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
125
  yield logs_generator
126
  time.sleep(REASONING_DELAY)
 
129
 
130
  except Exception as tool_error:
131
  error_key = f"{tool_name}_execution"
132
+ ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
133
  tool_execution_errors.append({
134
  "tool": tool_name,
135
  "error": str(tool_error),
 
138
  })
139
 
140
  reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
141
+ for i in range(0, len(reasoning_error), REASONING_INSERT):
142
  logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
143
  yield logs_generator
144
  time.sleep(REASONING_DELAY)
 
165
  except Exception as model_error:
166
  last_error = str(model_error)
167
  error_history.append({
168
+ "iteration": MAXIMUM_ITERATIONS,
169
  "error": last_error,
170
  "timestamp": time.time()
171
  })
172
+ ITERATION_METRICS["failures"] += 1
173
  iteration_errors.append({
174
  "error": last_error,
175
  "type": "model"
 
183
  error_history.extend(iteration_errors)
184
 
185
  retry_count += 1
186
+ previous_iterations = MAXIMUM_ITERATIONS
187
 
188
+ if ITERATION_METRICS["error_patterns"]:
189
+ frequent_errors = max(ITERATION_METRICS["error_patterns"].values())
190
  if frequent_errors > 3:
191
+ new_iterations = min(MAXIMUM_ITERATIONS + 2, MAX_RETRY_LIMIT)
192
  else:
193
+ new_iterations = min(MAXIMUM_ITERATIONS + 1, MAX_RETRY_LIMIT)
194
  else:
195
+ new_iterations = min(MAXIMUM_ITERATIONS + 1, MAX_RETRY_LIMIT)
196
 
197
+ if new_iterations > previous_iterations:
198
+ retry_reasoning = f"Retrying with increased iterations: {new_iterations} (attempt {retry_count + 1})"
199
+ for i in range(0, len(retry_reasoning), REASONING_INSERT):
200
  logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
201
  yield logs_generator
202
  time.sleep(REASONING_DELAY)
203
 
204
+ if new_iterations >= MAX_RETRY_LIMIT:
205
+ final_error = f"Maximum retry limit reached after {ITERATION_METRICS['attempts']} attempts with {ITERATION_METRICS['failures']} failures"
206
  logs_generator = styles(final_error, expanded=True)
207
  yield logs_generator
208
  break
209
 
210
+ ITERATION_METRICS["success_rate"] = (len(tool_results) / max(ITERATION_METRICS["attempts"], 1)) * 100
211
 
212
  if logs_generator:
213
  logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)