Spaces:
Running
Running
SearchGPT: Initial stable release (RS1).
Browse filesSigned-off-by: Hadad <[email protected]>
- README.md +8 -0
- app.py +3 -3
- assets/css/animations/__init__.py +8 -0
- assets/css/animations/loading.py +24 -0
- config.py +206 -234
- src/client/__init__.py +2 -2
- src/client/openai_client.py +1 -1
- src/core/web_configuration.py +1 -2
- src/core/web_loader.py +63 -53
- src/engine/browser_engine.py +27 -42
- src/processor/__init__.py +2 -2
- src/processor/message_processor.py +12 -20
- src/processor/reasoning/interface.py +5 -12
- src/processor/reasoning/tool_reasoning.py +34 -29
- src/processor/response/generator.py +0 -7
- src/processor/response/setup.py +2 -2
- src/processor/tools/__init__.py +6 -6
- src/processor/tools/executor.py +1 -1
- src/processor/tools/interaction.py +92 -170
- src/processor/tools/parser.py +2 -5
- src/tools/__init__.py +2 -2
- src/tools/tool_manager.py +3 -8
README.md
CHANGED
|
@@ -69,6 +69,14 @@ models:
|
|
| 69 |
- Phr00t/WAN2.2-14B-Rapid-AllInOne
|
| 70 |
- apple/FastVLM-0.5B
|
| 71 |
- stepfun-ai/Step-Audio-2-mini
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# Used to promote this Hugging Face Space
|
| 73 |
datasets:
|
| 74 |
- fka/awesome-chatgpt-prompts
|
|
|
|
| 69 |
- Phr00t/WAN2.2-14B-Rapid-AllInOne
|
| 70 |
- apple/FastVLM-0.5B
|
| 71 |
- stepfun-ai/Step-Audio-2-mini
|
| 72 |
+
- tencent/SRPO
|
| 73 |
+
- baidu/ERNIE-4.5-21B-A3B-Thinking
|
| 74 |
+
- tencent/HunyuanImage-2.1
|
| 75 |
+
- Qwen/Qwen3-Next-80B-A3B-Instruct
|
| 76 |
+
- google/embeddinggemma-300m
|
| 77 |
+
- Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 78 |
+
- LLM360/K2-Think
|
| 79 |
+
- IndexTeam/IndexTTS-2
|
| 80 |
# Used to promote this Hugging Face Space
|
| 81 |
datasets:
|
| 82 |
- fka/awesome-chatgpt-prompts
|
app.py
CHANGED
|
@@ -3,14 +3,14 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from src.processor.message_processor import
|
| 7 |
from config import DESCRIPTION
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
with gr.Blocks(fill_height=True, fill_width=True) as app:
|
| 11 |
with gr.Sidebar(): gr.HTML(DESCRIPTION)
|
| 12 |
gr.ChatInterface(
|
| 13 |
-
fn=
|
| 14 |
chatbot=gr.Chatbot(
|
| 15 |
label="SearchGPT | GPT-4.1 (Nano)",
|
| 16 |
type="messages",
|
|
@@ -34,7 +34,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
|
|
| 34 |
],
|
| 35 |
cache_examples=False,
|
| 36 |
show_api=False,
|
| 37 |
-
concurrency_limit=
|
| 38 |
)
|
| 39 |
|
| 40 |
app.launch(
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from src.processor.message_processor import searchgpt_playground
|
| 7 |
from config import DESCRIPTION
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
with gr.Blocks(fill_height=True, fill_width=True) as app:
|
| 11 |
with gr.Sidebar(): gr.HTML(DESCRIPTION)
|
| 12 |
gr.ChatInterface(
|
| 13 |
+
fn=searchgpt_playground,
|
| 14 |
chatbot=gr.Chatbot(
|
| 15 |
label="SearchGPT | GPT-4.1 (Nano)",
|
| 16 |
type="messages",
|
|
|
|
| 34 |
],
|
| 35 |
cache_examples=False,
|
| 36 |
show_api=False,
|
| 37 |
+
concurrency_limit=3
|
| 38 |
)
|
| 39 |
|
| 40 |
app.launch(
|
assets/css/animations/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .loading import spinner
|
| 7 |
+
|
| 8 |
+
__all__ = ['spinner']
|
assets/css/animations/loading.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
def spinner():
|
| 7 |
+
return """
|
| 8 |
+
<style>
|
| 9 |
+
.spinner {
|
| 10 |
+
display:inline-block;
|
| 11 |
+
width:9px;
|
| 12 |
+
height:9px;
|
| 13 |
+
border:2px solid #ccc;
|
| 14 |
+
border-top:2px solid #333;
|
| 15 |
+
border-radius:50%;
|
| 16 |
+
animation: spin 1s linear infinite;
|
| 17 |
+
}
|
| 18 |
+
@keyframes spin {
|
| 19 |
+
0% { transform: rotate(0deg); }
|
| 20 |
+
100% { transform: rotate(360deg); }
|
| 21 |
+
}
|
| 22 |
+
</style>
|
| 23 |
+
<div class="spinner"></div>
|
| 24 |
+
"""
|
config.py
CHANGED
|
@@ -4,22 +4,176 @@
|
|
| 4 |
#
|
| 5 |
|
| 6 |
#OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
MODEL = "gpt-4.1-nano"
|
| 10 |
|
| 11 |
MAX_TOKENS = 131072
|
|
|
|
| 12 |
TOOLS_TEMPERATURE = 0.6
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
STREAM = True
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
MAXIMUM_ITERATIONS = 1 # Max tool execution
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
ITERATION_METRICS = {
|
| 25 |
"attempts": 0,
|
|
@@ -37,245 +191,60 @@ ITERATION_METRICS = {
|
|
| 37 |
"backoff_multiplier": 0.2
|
| 38 |
}
|
| 39 |
|
| 40 |
-
TCP_CONNECTOR_ENABLE_DNS_CACHE = True # aiohttp
|
| 41 |
-
TCP_CONNECTOR_TTL_DNS_CACHE = 300 # aiohttp
|
| 42 |
-
TCP_CONNECTOR_LIMIT = 100 # aiohttp
|
| 43 |
-
TCP_CONNECTOR_LIMIT_PER_HOST = 30 # aiohttp
|
| 44 |
-
TCP_CONNECTOR_FORCE_CLOSE = False # aiohttp
|
| 45 |
-
TCP_CONNECTOR_ENABLE_CLEANUP = True # aiohttp
|
| 46 |
-
ENABLE_TRUST_ENV = True # aiohttp
|
| 47 |
-
ENABLE_CONNECTOR_OWNER = True # aiohttp
|
| 48 |
-
|
| 49 |
-
INSTRUCTIONS_START = """
|
| 50 |
-
You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
|
| 51 |
-
|
| 52 |
-
Your absolute rules:
|
| 53 |
-
- You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
|
| 54 |
-
- You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.
|
| 55 |
-
|
| 56 |
-
Core Principles:
|
| 57 |
-
- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`.
|
| 58 |
-
- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
|
| 59 |
-
- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
|
| 60 |
-
- Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
|
| 61 |
-
- Professional Output: Responses must be clear, structured, evidence-based, and neutral.
|
| 62 |
-
|
| 63 |
-
Execution Workflow:
|
| 64 |
-
1. Initial Web Search
|
| 65 |
-
- Immediately call `web_search` or `read_url` when a query or request arrives.
|
| 66 |
-
- Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.
|
| 67 |
-
|
| 68 |
-
2. Result Selection
|
| 69 |
-
- Select up to 10 of the most relevant, credible, and content-rich results.
|
| 70 |
-
- Prioritize authoritative sources: academic publications, institutional reports, official documents, expert commentary.
|
| 71 |
-
- Deprioritize low-credibility, promotional, or unverified sources.
|
| 72 |
-
- Avoid over-reliance on any single source.
|
| 73 |
-
|
| 74 |
-
3. Content Retrieval
|
| 75 |
-
- For each selected URL, use `read_url`.
|
| 76 |
-
- Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.
|
| 77 |
-
- Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.
|
| 78 |
-
|
| 79 |
-
4. Cross-Validation
|
| 80 |
-
- Compare extracted information across at least 3 distinct sources.
|
| 81 |
-
- Identify convergences (agreement), divergences (contradictions), and gaps (missing data).
|
| 82 |
-
- Validate all numerical values, temporal references, and factual claims through multiple corroborations.
|
| 83 |
-
|
| 84 |
-
5. Knowledge Integration
|
| 85 |
-
- Synthesize findings into a structured hierarchy:
|
| 86 |
-
- Overview → Key details → Supporting evidence → Citations.
|
| 87 |
-
- Emphasize the latest developments, trends, and their implications.
|
| 88 |
-
- Balance depth (for experts) with clarity (for general readers).
|
| 89 |
-
|
| 90 |
-
6. Response Construction
|
| 91 |
-
- Always cite sources inline using `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
|
| 92 |
-
- Maintain a professional, precise, and neutral tone.
|
| 93 |
-
- Use clear formatting: headings, numbered lists, and bullet points.
|
| 94 |
-
- Ensure readability, logical progression, and accessibility.
|
| 95 |
-
|
| 96 |
-
7. Ambiguity & Uncertainty Handling
|
| 97 |
-
- Explicitly flag incomplete, ambiguous, or conflicting data.
|
| 98 |
-
- Provide possible interpretations with transparent reasoning.
|
| 99 |
-
- Clearly note limitations where evidence is insufficient or weak.
|
| 100 |
-
|
| 101 |
-
8. Quality & Consistency Assurance
|
| 102 |
-
- Always base answers strictly on tool-derived evidence.
|
| 103 |
-
- Guarantee logical flow, factual accuracy, and consistency in terminology.
|
| 104 |
-
- Maintain neutrality and avoid speculative claims.
|
| 105 |
-
- Never bypass tool execution for any query or request.
|
| 106 |
-
|
| 107 |
-
Critical Instruction:
|
| 108 |
-
- Every new query or request must trigger a `web_search` or `read_url`.
|
| 109 |
-
- You must not generate answers from prior knowledge, conversation history, or cached data.
|
| 110 |
-
- Always use Markdown format for URL sources with `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.
|
| 111 |
-
- If tools fail, you must state explicitly that no valid data could be retrieved.
|
| 112 |
-
\n\n\n
|
| 113 |
-
"""
|
| 114 |
-
|
| 115 |
-
CONTENT_EXTRACTION = """
|
| 116 |
-
<system>
|
| 117 |
-
- Analyze the retrieved content in detail
|
| 118 |
-
- Identify all critical facts, arguments, statistics, and relevant data
|
| 119 |
-
- Collect all URLs, hyperlinks, references, and citations mentioned in the content
|
| 120 |
-
- Evaluate credibility of sources, highlight potential biases or conflicts
|
| 121 |
-
- Produce a structured, professional, and comprehensive summary
|
| 122 |
-
- Emphasize clarity, accuracy, and logical flow
|
| 123 |
-
- Include all discovered URLs in the final summary as `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
|
| 124 |
-
- Mark any uncertainties, contradictions, or missing information clearly
|
| 125 |
-
</system>
|
| 126 |
-
\n\n\n
|
| 127 |
-
"""
|
| 128 |
-
|
| 129 |
-
SEARCH_SELECTION = """
|
| 130 |
-
<system>
|
| 131 |
-
- For each search result, fetch the full content using `read_url`
|
| 132 |
-
- Extract key information, main arguments, data points, and statistics
|
| 133 |
-
- Capture every URL present in the content or references
|
| 134 |
-
- Create a professional structured summary.
|
| 135 |
-
- List each source at the end of the summary in the format `[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`
|
| 136 |
-
- Identify ambiguities or gaps in information
|
| 137 |
-
- Ensure clarity, completeness, and high information density
|
| 138 |
-
</system>
|
| 139 |
-
\n\n\n
|
| 140 |
-
"""
|
| 141 |
-
|
| 142 |
-
INSTRUCTIONS_END = """
|
| 143 |
-
You have just executed tools and obtained results. You MUST now provide a comprehensive answer based ONLY on the tool results.
|
| 144 |
-
\n\n\n
|
| 145 |
-
"""
|
| 146 |
-
|
| 147 |
REASONING_STEPS = {
|
| 148 |
"web_search": {
|
| 149 |
"parsing": (
|
| 150 |
-
"I need to search for information about: {query}
|
| 151 |
-
"I'm analyzing the user's request and preparing to execute a web search. "
|
| 152 |
-
"The query I've identified is comprehensive and should yield relevant results. "
|
| 153 |
-
"I will use the {engine} search engine for this task as it provides reliable and up-to-date information.<br><br>"
|
| 154 |
-
"I'm now parsing the search parameters to ensure they are correctly formatted. "
|
| 155 |
-
"The search query has been validated and I'm checking that all required fields are present. "
|
| 156 |
-
"I need to make sure the search engine parameter is valid and supported by our system.<br><br>"
|
| 157 |
-
"I'm preparing the search request with the following configuration:<br>"
|
| 158 |
-
"- Search Query: {query}<br>"
|
| 159 |
-
"- Search Engine: {engine}<br><br>"
|
| 160 |
-
"I'm verifying that the network connection is stable and that the search service is accessible. "
|
| 161 |
-
"All preliminary checks have been completed successfully.<br><br>"
|
| 162 |
),
|
| 163 |
"executing": (
|
| 164 |
-
"I'm now executing the web search for: {query}
|
| 165 |
-
"
|
| 166 |
-
"
|
| 167 |
-
"I'm processing multiple search result pages to gather comprehensive information.<br><br>"
|
| 168 |
-
"I'm analyzing the search results to identify the most relevant and authoritative sources. "
|
| 169 |
-
"The search engine is returning results and I'm filtering them based on relevance scores. "
|
| 170 |
-
"I'm extracting key information from each search result including titles, snippets, and URLs.<br><br>"
|
| 171 |
-
"I'm organizing the search results in order of relevance and checking for duplicate content. "
|
| 172 |
-
"The search process is progressing smoothly and I'm collecting valuable information. "
|
| 173 |
-
"I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
|
| 174 |
-
"Current status: Processing search results...<br>"
|
| 175 |
-
"Results found: Multiple relevant sources identified<br>"
|
| 176 |
-
"Quality assessment: High relevance detected<br><br>"
|
| 177 |
),
|
| 178 |
"completed": (
|
| 179 |
"I have successfully completed the web search for: {query}<br><br>"
|
| 180 |
-
"I've retrieved comprehensive search results from {engine} and analyzed all the information. "
|
| 181 |
-
"The search yielded multiple relevant results that directly address the user's query. "
|
| 182 |
-
"I've extracted the most important information and organized it for processing.<br><br>"
|
| 183 |
-
"I've identified several high-quality sources with authoritative information. "
|
| 184 |
-
"The search results include recent and up-to-date content that is highly relevant. "
|
| 185 |
-
"I've filtered out any duplicate or low-quality results to ensure accuracy.<br><br>"
|
| 186 |
-
"I'm now processing the collected information to formulate a comprehensive response. "
|
| 187 |
-
"The search results provide sufficient detail to answer the user's question thoroughly. "
|
| 188 |
-
"I've verified the credibility of the sources and cross-referenced the information.<br><br>"
|
| 189 |
-
"Search Summary:<br>"
|
| 190 |
-
"- Total results processed: Multiple pages<br>"
|
| 191 |
-
"- Relevance score: High<br>"
|
| 192 |
-
"- Information quality: Verified and accurate<br>"
|
| 193 |
-
"- Sources: Authoritative and recent<br><br>"
|
| 194 |
"Preview of results:<br>{preview}"
|
| 195 |
),
|
| 196 |
"error": (
|
| 197 |
"I encountered an issue while attempting to search for: {query}<br><br>"
|
| 198 |
-
"
|
| 199 |
-
"The error occurred during the search process and I need to handle it appropriately. "
|
| 200 |
-
"I'm analyzing the error to understand what went wrong and how to proceed.<br><br>"
|
| 201 |
-
"Error details: {error}<br><br>"
|
| 202 |
-
"I'm attempting to diagnose the issue and considering alternative approaches. "
|
| 203 |
-
"The error might be due to network connectivity, service availability, or parameter issues. "
|
| 204 |
-
"I will try to recover from this error and provide the best possible response.<br><br>"
|
| 205 |
-
"I'm evaluating whether I can retry the search with modified parameters. "
|
| 206 |
-
"If the search cannot be completed, I will use my existing knowledge to help the user. "
|
| 207 |
-
"I'm committed to providing valuable assistance despite this technical challenge.<br><br>"
|
| 208 |
)
|
| 209 |
},
|
| 210 |
"read_url": {
|
| 211 |
"parsing": (
|
| 212 |
-
"I need to read and extract content from the URL: {url}
|
| 213 |
-
"I'm analyzing the URL structure to ensure it's valid and accessible. "
|
| 214 |
-
"The URL appears to be properly formatted and I'm preparing to fetch its content. "
|
| 215 |
-
"I will extract the main content from this webpage to gather detailed information.<br><br>"
|
| 216 |
-
"I'm validating the URL protocol and checking if it uses HTTP or HTTPS. "
|
| 217 |
-
"The domain seems legitimate and I'm preparing the request headers. "
|
| 218 |
-
"I need to ensure that the website allows automated content extraction.<br><br>"
|
| 219 |
-
"I'm configuring the content extraction parameters:<br>"
|
| 220 |
-
"- Target URL: {url}<br>"
|
| 221 |
-
"- Extraction Method: Full content parsing<br>"
|
| 222 |
-
"- Content Type: HTML/Text<br>"
|
| 223 |
-
"- Encoding: Auto-detect<br><br>"
|
| 224 |
-
"I'm checking if the website requires any special handling or authentication. "
|
| 225 |
-
"All preliminary validation checks have been completed successfully.<br><br>"
|
| 226 |
),
|
| 227 |
"executing": (
|
| 228 |
-
"I'm now accessing the URL: {url}
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"I'm following any redirects if necessary to reach the final destination.<br><br>"
|
| 232 |
-
"I'm downloading the webpage content and checking the response status code. "
|
| 233 |
-
"The server is responding and I'm receiving the HTML content. "
|
| 234 |
-
"I'm monitoring the download progress and ensuring data integrity.<br><br>"
|
| 235 |
-
"I'm parsing the HTML structure to extract the main content. "
|
| 236 |
-
"I'm identifying and removing navigation elements, advertisements, and other non-content sections. "
|
| 237 |
-
"I'm focusing on extracting the primary article or information content.<br><br>"
|
| 238 |
-
"Current status: Extracting content...<br>"
|
| 239 |
-
"Response received: Processing HTML<br>"
|
| 240 |
-
"Content extraction: In progress<br><br>"
|
| 241 |
),
|
| 242 |
"completed": (
|
| 243 |
"I have successfully extracted content from: {url}<br><br>"
|
| 244 |
-
"I've retrieved the complete webpage content and processed it thoroughly. "
|
| 245 |
-
"The extraction was successful and I've obtained the main textual content. "
|
| 246 |
-
"I've cleaned the content by removing unnecessary HTML tags and formatting.<br><br>"
|
| 247 |
-
"I've identified the main article or information section of the webpage. "
|
| 248 |
-
"The content has been properly parsed and structured for analysis. "
|
| 249 |
-
"I've preserved important information while filtering out irrelevant elements.<br><br>"
|
| 250 |
-
"I'm now analyzing the extracted content to understand its context and relevance. "
|
| 251 |
-
"The information appears to be comprehensive and directly related to the topic. "
|
| 252 |
-
"I've verified that the content is complete and hasn't been truncated.<br><br>"
|
| 253 |
-
"Extraction Summary:<br>"
|
| 254 |
-
"- Content length: Substantial<br>"
|
| 255 |
-
"- Extraction quality: High<br>"
|
| 256 |
-
"- Content type: Article/Information<br>"
|
| 257 |
-
"- Processing status: Complete<br><br>"
|
| 258 |
"Preview of extracted content:<br>{preview}"
|
| 259 |
),
|
| 260 |
"error": (
|
| 261 |
"I encountered an issue while trying to access: {url}<br><br>"
|
| 262 |
-
"
|
| 263 |
-
"The error prevented me from successfully extracting the information. "
|
| 264 |
-
"I'm analyzing the error to understand the cause and find a solution.<br><br>"
|
| 265 |
-
"Error details: {error}<br><br>"
|
| 266 |
-
"I'm considering possible causes such as network issues, access restrictions, or invalid URLs. "
|
| 267 |
-
"The website might be blocking automated access or the URL might be incorrect. "
|
| 268 |
-
"I will try to work around this limitation and provide alternative assistance.<br><br>"
|
| 269 |
-
"I'm evaluating whether I can access the content through alternative methods. "
|
| 270 |
-
"If direct access isn't possible, I'll use my knowledge to help with the query. "
|
| 271 |
-
"I remain committed to providing useful information despite this obstacle.<br><br>"
|
| 272 |
)
|
| 273 |
}
|
| 274 |
}
|
| 275 |
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
OS = [
|
| 281 |
"Windows NT 10.0; Win64; x64",
|
|
@@ -468,20 +437,23 @@ TIMEZONES = [
|
|
| 468 |
"Pacific/Honolulu"
|
| 469 |
]
|
| 470 |
|
| 471 |
-
DESCRIPTION =
|
| 472 |
-
<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities
|
| 473 |
-
|
| 474 |
-
This Space implements an agent-based system with
|
| 475 |
-
<b><a href=
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
target=
|
| 482 |
-
<
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
<
|
| 486 |
-
|
| 487 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
#
|
| 5 |
|
| 6 |
#OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
|
| 7 |
+
|
| 8 |
+
#OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
|
| 9 |
+
|
| 10 |
+
SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
|
| 11 |
+
|
| 12 |
+
READER_ENDPOINT = "https://r.jina.ai/"
|
| 13 |
+
|
| 14 |
+
REQUEST_TIMEOUT = 300 # 5 minute
|
| 15 |
|
| 16 |
MODEL = "gpt-4.1-nano"
|
| 17 |
|
| 18 |
MAX_TOKENS = 131072
|
| 19 |
+
|
| 20 |
TOOLS_TEMPERATURE = 0.6
|
| 21 |
+
|
| 22 |
+
CHAT_TEMPERATURE = 0.95
|
| 23 |
+
|
| 24 |
STREAM = True
|
| 25 |
|
| 26 |
+
INSTRUCTIONS = (
|
| 27 |
+
"You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, "
|
| 28 |
+
"knowledge validation, and professional summarization capabilities.\n\n"
|
| 29 |
+
|
| 30 |
+
"Your absolute rules:\n"
|
| 31 |
+
"- You must always execute and call the provided tools (`web_search`, `read_url`) for every single "
|
| 32 |
+
"user query or user request, without exception.\n"
|
| 33 |
+
"- You are never allowed to answer directly from your internal knowledge, memory, or training data. "
|
| 34 |
+
"Outdated or tool-bypassed answers are strictly forbidden.\n\n"
|
| 35 |
+
|
| 36 |
+
"Core Principles:\n"
|
| 37 |
+
"- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, "
|
| 38 |
+
"must trigger at least one `web_search` or `read_url`.\n"
|
| 39 |
+
"- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge "
|
| 40 |
+
"to generate answers. Always re-verify with tools.\n"
|
| 41 |
+
"- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved "
|
| 42 |
+
"through tools.\n"
|
| 43 |
+
"- Cross-Validation: Always compare findings across at least 3 independent, credible sources before "
|
| 44 |
+
"producing a final answer.\n"
|
| 45 |
+
"- Professional Output: Responses must be clear, structured, evidence-based, and neutral.\n\n"
|
| 46 |
+
|
| 47 |
+
"Execution Workflow:\n"
|
| 48 |
+
"1. Initial Web Search\n"
|
| 49 |
+
" - Immediately execute and call `web_search` or `read_url` when a query or request arrives.\n"
|
| 50 |
+
" - For `web_search` use multiple query or request variations for broader coverage.\n\n"
|
| 51 |
+
|
| 52 |
+
"2. Result Selection\n"
|
| 53 |
+
" - For each search result, fetch the full content using `read_url`.\n"
|
| 54 |
+
" - Extract key information, main arguments, data points, and statistics.\n"
|
| 55 |
+
" - Capture every URL present in the content or references.\n"
|
| 56 |
+
" - Create a professional structured summary.\n"
|
| 57 |
+
" - List each source at the end of the summary in the format "
|
| 58 |
+
"`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
|
| 59 |
+
" - Identify ambiguities or gaps in information.\n"
|
| 60 |
+
" - Ensure clarity, completeness, and high information density.\n"
|
| 61 |
+
" - Select up to 10 of the most relevant, credible, and content-rich results.\n"
|
| 62 |
+
" - Prioritize authoritative sources: academic publications, institutional reports, "
|
| 63 |
+
"official documents, expert commentary.\n"
|
| 64 |
+
" - Deprioritize low-credibility, promotional, or unverified sources.\n"
|
| 65 |
+
" - Avoid over-reliance on any single source.\n\n"
|
| 66 |
+
|
| 67 |
+
"3. Content Retrieval\n"
|
| 68 |
+
" - For each selected URL, use `read_url`.\n"
|
| 69 |
+
" - Analyze the retrieved content in detail.\n"
|
| 70 |
+
" - Identify all critical facts, arguments, statistics, and relevant data.\n"
|
| 71 |
+
" - Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
|
| 72 |
+
" - Evaluate credibility of sources, highlight potential biases or conflicts.\n"
|
| 73 |
+
" - Produce a structured, professional, and comprehensive summary.\n"
|
| 74 |
+
" - Emphasize clarity, accuracy, and logical flow.\n"
|
| 75 |
+
" - Include all discovered URLs in the final summary as "
|
| 76 |
+
"`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
|
| 77 |
+
" - Mark any uncertainties, contradictions, or missing information clearly.\n"
|
| 78 |
+
" - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.\n"
|
| 79 |
+
" - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.\n\n"
|
| 80 |
+
|
| 81 |
+
"4. Cross-Validation\n"
|
| 82 |
+
" - Compare extracted information across at least 3 distinct sources.\n"
|
| 83 |
+
" - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).\n"
|
| 84 |
+
" - Validate all numerical values, temporal references, and factual claims through "
|
| 85 |
+
"multiple corroborations.\n\n"
|
| 86 |
+
|
| 87 |
+
"5. Knowledge Integration\n"
|
| 88 |
+
" - Synthesize findings into a structured hierarchy: "
|
| 89 |
+
"Overview → Key details → Supporting evidence → Citations.\n"
|
| 90 |
+
" - Emphasize the latest developments, trends, and their implications.\n"
|
| 91 |
+
" - Balance depth (for experts) with clarity (for general readers).\n\n"
|
| 92 |
+
|
| 93 |
+
"6. Response Construction\n"
|
| 94 |
+
" - Always cite sources inline using "
|
| 95 |
+
"`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
|
| 96 |
+
" - Maintain a professional, precise, and neutral tone.\n"
|
| 97 |
+
" - Use clear formatting: headings, numbered lists, and bullet points.\n"
|
| 98 |
+
" - Ensure readability, logical progression, and accessibility.\n\n"
|
| 99 |
+
|
| 100 |
+
"7. Ambiguity & Uncertainty Handling\n"
|
| 101 |
+
" - Explicitly flag incomplete, ambiguous, or conflicting data.\n"
|
| 102 |
+
" - Provide possible interpretations with transparent reasoning.\n"
|
| 103 |
+
" - Clearly note limitations where evidence is insufficient or weak.\n\n"
|
| 104 |
+
|
| 105 |
+
"8. Quality & Consistency Assurance\n"
|
| 106 |
+
" - Always base answers strictly on tool-derived evidence.\n"
|
| 107 |
+
" - Guarantee logical flow, factual accuracy, and consistency in terminology.\n"
|
| 108 |
+
" - Maintain neutrality and avoid speculative claims.\n"
|
| 109 |
+
" - Never bypass tool execution for any query or request.\n\n"
|
| 110 |
+
|
| 111 |
+
"Critical Instruction:\n"
|
| 112 |
+
"- Every new query or request must trigger a `web_search` or `read_url`.\n"
|
| 113 |
+
"- You must not generate answers from prior knowledge, conversation history, or cached data.\n"
|
| 114 |
+
"- Always use Markdown format for URL sources with "
|
| 115 |
+
"`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`.\n"
|
| 116 |
+
"- Extract the most relevant and insightful information that directly addresses the query. "
|
| 117 |
+
"Focus on accuracy, depth of coverage, and conceptual clarity.\n"
|
| 118 |
+
"- Organize findings into a well-structured format with clear headings and subheadings. "
|
| 119 |
+
"Use bullet points where needed, but ensure the overall output reads like a professional "
|
| 120 |
+
"research summary rather than a simple list.\n"
|
| 121 |
+
"- Critically evaluate each source for credibility, reliability, and potential bias. "
|
| 122 |
+
"Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
|
| 123 |
+
"- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
|
| 124 |
+
"or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
|
| 125 |
+
"- Provide direct references for every cited point using Markdown links in the format "
|
| 126 |
+
"`[source_title_or_article_or_tags_or_domain](source_url_or_source_link)`. Do not display raw URLs. "
|
| 127 |
+
"Ensure all data, claims, or quotations can be traced back to their sources.\n"
|
| 128 |
+
"- Explicitly acknowledge limitations in the available information, such as outdated data, "
|
| 129 |
+
"lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
|
| 130 |
+
"these gaps where possible.\n"
|
| 131 |
+
"- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
|
| 132 |
+
"flagged as such. Support reasoning with evidence wherever possible.\n"
|
| 133 |
+
"- If tools fail, you must state explicitly that no valid data could be retrieved."
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
REMINDERS = (
|
| 137 |
+
"<system>\n"
|
| 138 |
+
"- Analyze the retrieved content in detail.\n"
|
| 139 |
+
"- Identify all critical facts, arguments, statistics, and relevant data.\n"
|
| 140 |
+
"- Collect all URLs, hyperlinks, references, and citations mentioned in the content.\n"
|
| 141 |
+
"- Evaluate credibility of sources, highlight potential biases or conflicts.\n"
|
| 142 |
+
"- Produce a structured, professional, and comprehensive summary.\n"
|
| 143 |
+
"- Emphasize clarity, accuracy, and logical flow.\n"
|
| 144 |
+
"- Include all discovered URLs in the final summary as "
|
| 145 |
+
"[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
|
| 146 |
+
"- Mark any uncertainties, contradictions, or missing information clearly.\n"
|
| 147 |
+
"- Extract key information, main arguments, data points, and statistics.\n"
|
| 148 |
+
"- Capture every URL present in the content or references.\n"
|
| 149 |
+
"- Create a professional structured summary.\n"
|
| 150 |
+
"- List each source at the end of the summary in the format "
|
| 151 |
+
"[source_title_or_article_or_tags_or_domain](source_url_or_source_link).\n"
|
| 152 |
+
"- Identify ambiguities or gaps in information.\n"
|
| 153 |
+
"- Extract the most relevant and insightful information that directly addresses the query. "
|
| 154 |
+
"Focus on accuracy, depth of coverage, and conceptual clarity.\n"
|
| 155 |
+
"- Organize findings into a well-structured format with clear headings and subheadings. "
|
| 156 |
+
"Use bullet points where needed, but ensure the overall output reads like a professional "
|
| 157 |
+
"research summary rather than a simple list.\n"
|
| 158 |
+
"- Critically evaluate each source for credibility, reliability, and potential bias. "
|
| 159 |
+
"Identify which sources are authoritative, widely cited, or most relevant to the research context.\n"
|
| 160 |
+
"- Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, "
|
| 161 |
+
"or uncertainty. Note any gaps in the existing information and suggest directions for further exploration.\n"
|
| 162 |
+
"- Provide direct references for every cited point using markdown links in the format "
|
| 163 |
+
"[source_title_or_article_or_tags_or_domain](source_url_or_source_link). "
|
| 164 |
+
"Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to their sources.\n"
|
| 165 |
+
"- Explicitly acknowledge limitations in the available information, such as outdated data, "
|
| 166 |
+
"lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming "
|
| 167 |
+
"these gaps where possible.\n"
|
| 168 |
+
"- Write with a professional, analytical, and objective tone. Avoid speculation unless clearly "
|
| 169 |
+
"flagged as such. Support reasoning with evidence wherever possible.\n"
|
| 170 |
+
"- Ensure clarity, completeness, and high information density.\n"
|
| 171 |
+
"</system>"
|
| 172 |
+
) # Small model need explicit instructions to understand context
|
| 173 |
|
| 174 |
MAXIMUM_ITERATIONS = 1 # Max tool execution
|
| 175 |
+
|
| 176 |
+
MAX_RETRY_LIMIT = 3 # Max retries if tools fail or server doesn’t respond
|
| 177 |
|
| 178 |
ITERATION_METRICS = {
|
| 179 |
"attempts": 0,
|
|
|
|
| 191 |
"backoff_multiplier": 0.2
|
| 192 |
}
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
REASONING_STEPS = {
|
| 195 |
"web_search": {
|
| 196 |
"parsing": (
|
| 197 |
+
"I need to search for information about: {query}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
),
|
| 199 |
"executing": (
|
| 200 |
+
"I'm now executing the web search for: {query}"
|
| 201 |
+
"<br>"
|
| 202 |
+
"<loading_animation>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
),
|
| 204 |
"completed": (
|
| 205 |
"I have successfully completed the web search for: {query}<br><br>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
"Preview of results:<br>{preview}"
|
| 207 |
),
|
| 208 |
"error": (
|
| 209 |
"I encountered an issue while attempting to search for: {query}<br><br>"
|
| 210 |
+
"Error details: {error}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
)
|
| 212 |
},
|
| 213 |
"read_url": {
|
| 214 |
"parsing": (
|
| 215 |
+
"I need to read and extract content from the URL: {url}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
),
|
| 217 |
"executing": (
|
| 218 |
+
"I'm now accessing the URL: {url}"
|
| 219 |
+
"<br>"
|
| 220 |
+
"<loading_animation>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
),
|
| 222 |
"completed": (
|
| 223 |
"I have successfully extracted content from: {url}<br><br>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
"Preview of extracted content:<br>{preview}"
|
| 225 |
),
|
| 226 |
"error": (
|
| 227 |
"I encountered an issue while trying to access: {url}<br><br>"
|
| 228 |
+
"Error details: {error}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
)
|
| 230 |
}
|
| 231 |
}
|
| 232 |
|
| 233 |
+
TCP_CONNECTOR_ENABLE_DNS_CACHE = True # AIOHTTP
|
| 234 |
+
|
| 235 |
+
TCP_CONNECTOR_TTL_DNS_CACHE = 300 # AIOHTTP
|
| 236 |
+
|
| 237 |
+
TCP_CONNECTOR_LIMIT = 100 # AIOHTTP
|
| 238 |
+
|
| 239 |
+
TCP_CONNECTOR_LIMIT_PER_HOST = 30 # AIOHTTP
|
| 240 |
+
|
| 241 |
+
TCP_CONNECTOR_FORCE_CLOSE = False # AIOHTTP
|
| 242 |
+
|
| 243 |
+
TCP_CONNECTOR_ENABLE_CLEANUP = True # AIOHTTP
|
| 244 |
+
|
| 245 |
+
ENABLE_TRUST_ENV = True # AIOHTTP
|
| 246 |
+
|
| 247 |
+
ENABLE_CONNECTOR_OWNER = True # AIOHTTP
|
| 248 |
|
| 249 |
OS = [
|
| 250 |
"Windows NT 10.0; Win64; x64",
|
|
|
|
| 437 |
"Pacific/Honolulu"
|
| 438 |
]
|
| 439 |
|
| 440 |
+
DESCRIPTION = (
|
| 441 |
+
"<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities "
|
| 442 |
+
"and the ability to read content directly from a URL.<br><br>"
|
| 443 |
+
"This Space implements an agent-based system with "
|
| 444 |
+
"<b><a href='https://www.gradio.app' target='_blank'>Gradio</a></b>. "
|
| 445 |
+
"It is integrated with "
|
| 446 |
+
"<b><a href='https://docs.searxng.org' target='_blank'>SearXNG</a></b>, "
|
| 447 |
+
"which is then converted into a script tool or function for native execution.<br><br>"
|
| 448 |
+
"The agent mode is inspired by the "
|
| 449 |
+
"<b><a href='https://openwebui.com/t/hadad/deep_research' target='_blank'>Deep Research</a></b> "
|
| 450 |
+
"from <b><a href='https://docs.openwebui.com' target='_blank'>OpenWebUI</a></b> tools script.<br><br>"
|
| 451 |
+
"The <b>Deep Research</b> feature is also available on the primary Spaces of "
|
| 452 |
+
"<b><a href='https://umint-openwebui.hf.space' target='_blank'>UltimaX Intelligence</a></b>.<br><br>"
|
| 453 |
+
"Please consider reading the "
|
| 454 |
+
"<b><a href='https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c' "
|
| 455 |
+
"target='_blank'>Terms of Use and Consequences of Violation</a></b> "
|
| 456 |
+
"if you wish to proceed to the main Spaces.<br><br>"
|
| 457 |
+
"<b>Like this project? Feel free to buy me a "
|
| 458 |
+
"<a href='https://ko-fi.com/hadad' target='_blank'>coffee</a></b>."
|
| 459 |
+
) # Gradio
|
src/client/__init__.py
CHANGED
|
@@ -3,6 +3,6 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from .openai_client import
|
| 7 |
|
| 8 |
-
__all__ = ['
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from .openai_client import setup_client
|
| 7 |
|
| 8 |
+
__all__ = ['setup_client']
|
src/client/openai_client.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
import os
|
| 7 |
from openai import OpenAI
|
| 8 |
|
| 9 |
-
def
|
| 10 |
try:
|
| 11 |
client = OpenAI(
|
| 12 |
base_url=os.getenv("OPENAI_API_BASE_URL"),
|
|
|
|
| 6 |
import os
|
| 7 |
from openai import OpenAI
|
| 8 |
|
| 9 |
+
def setup_client():
|
| 10 |
try:
|
| 11 |
client = OpenAI(
|
| 12 |
base_url=os.getenv("OPENAI_API_BASE_URL"),
|
src/core/web_configuration.py
CHANGED
|
@@ -3,11 +3,10 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from config import SEARXNG_ENDPOINT,
|
| 7 |
|
| 8 |
class WebConfiguration:
|
| 9 |
def __init__(self):
|
| 10 |
self.searxng_endpoint = SEARXNG_ENDPOINT
|
| 11 |
-
self.baidu_endpoint = BAIDU_ENDPOINT
|
| 12 |
self.content_reader_api = READER_ENDPOINT
|
| 13 |
self.request_timeout = REQUEST_TIMEOUT
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from config import SEARXNG_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
|
| 7 |
|
| 8 |
class WebConfiguration:
|
| 9 |
def __init__(self):
|
| 10 |
self.searxng_endpoint = SEARXNG_ENDPOINT
|
|
|
|
| 11 |
self.content_reader_api = READER_ENDPOINT
|
| 12 |
self.request_timeout = REQUEST_TIMEOUT
|
src/core/web_loader.py
CHANGED
|
@@ -36,112 +36,122 @@ class WebLoader:
|
|
| 36 |
|
| 37 |
def generate_ipv4(self):
|
| 38 |
while len(self.ipv4_pool) < 1000 and self.running:
|
| 39 |
-
ip = f"{random.choice(OCTETS)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
| 40 |
with self.lock:
|
| 41 |
-
self.ipv4_pool.append(
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def generate_ipv6(self):
|
| 44 |
while len(self.ipv6_pool) < 1000 and self.running:
|
| 45 |
-
segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
|
| 46 |
-
ip = ":".join(segments)
|
| 47 |
with self.lock:
|
| 48 |
-
self.ipv6_pool.append(
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def generate_user_agents(self):
|
| 51 |
while len(self.user_agent_pool) < 500 and self.running:
|
| 52 |
-
browser = random.choice(BROWSERS)
|
| 53 |
-
|
| 54 |
-
if browser == "Chrome":
|
| 55 |
-
ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.choice(CHROME_VERSIONS)} Safari/537.36"
|
| 56 |
-
elif browser == "Firefox":
|
| 57 |
-
ua = f"Mozilla/5.0 ({random.choice(OS)}) Gecko/20100101 Firefox/{random.choice(FIREFOX_VERSIONS)}"
|
| 58 |
-
elif browser == "Safari":
|
| 59 |
-
webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
|
| 60 |
-
ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{random.choice(SAFARI_VERSIONS)} Safari/{webkit_version}"
|
| 61 |
-
elif browser == "Edge":
|
| 62 |
-
version = random.choice(EDGE_VERSIONS)
|
| 63 |
-
ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
|
| 64 |
-
else:
|
| 65 |
-
version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
|
| 66 |
-
ua = f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
|
| 67 |
-
|
| 68 |
with self.lock:
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
def generate_origins(self):
|
| 72 |
while len(self.origin_pool) < 500 and self.running:
|
| 73 |
-
origin = f"{random.choice(PROTOCOLS)}{random.choice(DOMAINS)}"
|
| 74 |
with self.lock:
|
| 75 |
-
self.origin_pool.append(
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def generate_referrers(self):
|
| 78 |
while len(self.referrer_pool) < 500 and self.running:
|
| 79 |
-
referrer = f"{random.choice(SEARCH_ENGINES)}{random.choice(KEYWORDS)}"
|
| 80 |
with self.lock:
|
| 81 |
-
self.referrer_pool.append(
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def generate_locations(self):
|
| 84 |
while len(self.location_pool) < 500 and self.running:
|
| 85 |
-
location = {
|
| 86 |
-
"country": random.choice(COUNTRIES),
|
| 87 |
-
"language": random.choice(LANGUAGES),
|
| 88 |
-
"timezone": random.choice(TIMEZONES)
|
| 89 |
-
}
|
| 90 |
with self.lock:
|
| 91 |
-
self.location_pool.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def get_ipv4(self):
|
| 94 |
with self.lock:
|
| 95 |
if self.ipv4_pool:
|
| 96 |
return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
|
| 97 |
-
return
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def get_ipv6(self):
|
| 100 |
with self.lock:
|
| 101 |
if self.ipv6_pool:
|
| 102 |
return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
|
| 103 |
-
|
| 104 |
-
return ":".join(segments)
|
| 105 |
|
| 106 |
def get_user_agent(self):
|
| 107 |
with self.lock:
|
| 108 |
if self.user_agent_pool:
|
| 109 |
return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
|
| 110 |
-
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
| 111 |
|
| 112 |
def get_origin(self):
|
| 113 |
with self.lock:
|
| 114 |
if self.origin_pool:
|
| 115 |
return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
|
| 116 |
-
return "https://www.google.com"
|
| 117 |
|
| 118 |
def get_referrer(self):
|
| 119 |
with self.lock:
|
| 120 |
if self.referrer_pool:
|
| 121 |
return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
|
| 122 |
-
return "https://www.google.com/search?q=search"
|
| 123 |
|
| 124 |
def get_location(self):
|
| 125 |
with self.lock:
|
| 126 |
if self.location_pool:
|
| 127 |
return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
|
| 128 |
-
return {
|
| 129 |
-
"country": "US",
|
| 130 |
-
"language": "en-US",
|
| 131 |
-
"timezone": "America/New_York"
|
| 132 |
-
}
|
| 133 |
|
| 134 |
def start_engine(self):
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
]
|
| 143 |
-
|
| 144 |
-
thread.start()
|
| 145 |
|
| 146 |
def stop(self):
|
| 147 |
self.running = False
|
|
|
|
| 36 |
|
| 37 |
def generate_ipv4(self):
|
| 38 |
while len(self.ipv4_pool) < 1000 and self.running:
|
|
|
|
| 39 |
with self.lock:
|
| 40 |
+
self.ipv4_pool.append(
|
| 41 |
+
f"{random.choice(OCTETS)}.{random.randint(0, 255)}."
|
| 42 |
+
f"{random.randint(0, 255)}.{random.randint(1, 254)}"
|
| 43 |
+
)
|
| 44 |
|
| 45 |
def generate_ipv6(self):
|
| 46 |
while len(self.ipv6_pool) < 1000 and self.running:
|
|
|
|
|
|
|
| 47 |
with self.lock:
|
| 48 |
+
self.ipv6_pool.append(
|
| 49 |
+
":".join([f"{random.randint(0, 65535):04x}" for _ in range(8)])
|
| 50 |
+
)
|
| 51 |
|
| 52 |
def generate_user_agents(self):
|
| 53 |
while len(self.user_agent_pool) < 500 and self.running:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
with self.lock:
|
| 55 |
+
match random.choice(BROWSERS):
|
| 56 |
+
case "Chrome":
|
| 57 |
+
self.user_agent_pool.append(
|
| 58 |
+
f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
|
| 59 |
+
f"(KHTML, like Gecko) Chrome/{random.choice(CHROME_VERSIONS)} Safari/537.36"
|
| 60 |
+
)
|
| 61 |
+
case "Firefox":
|
| 62 |
+
self.user_agent_pool.append(
|
| 63 |
+
f"Mozilla/5.0 ({random.choice(OS)}) Gecko/20100101 "
|
| 64 |
+
f"Firefox/{random.choice(FIREFOX_VERSIONS)}"
|
| 65 |
+
)
|
| 66 |
+
case "Safari":
|
| 67 |
+
self.user_agent_pool.append(
|
| 68 |
+
f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/"
|
| 69 |
+
f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)} "
|
| 70 |
+
f"(KHTML, like Gecko) Version/{random.choice(SAFARI_VERSIONS)} "
|
| 71 |
+
f"Safari/{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
|
| 72 |
+
)
|
| 73 |
+
case "Edge":
|
| 74 |
+
self.user_agent_pool.append(
|
| 75 |
+
f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
|
| 76 |
+
f"(KHTML, like Gecko) Chrome/{random.choice(EDGE_VERSIONS).split('.')[0]}.0.0.0 "
|
| 77 |
+
f"Safari/537.36 Edg/{random.choice(EDGE_VERSIONS)}"
|
| 78 |
+
)
|
| 79 |
+
case _:
|
| 80 |
+
self.user_agent_pool.append(
|
| 81 |
+
f"Mozilla/5.0 ({random.choice(OS)}) AppleWebKit/537.36 "
|
| 82 |
+
f"(KHTML, like Gecko) Chrome/{random.randint(70, 100)}.0."
|
| 83 |
+
f"{random.randint(3000, 5000)}.{random.randint(50, 150)} "
|
| 84 |
+
f"Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
|
| 85 |
+
)
|
| 86 |
|
| 87 |
def generate_origins(self):
|
| 88 |
while len(self.origin_pool) < 500 and self.running:
|
|
|
|
| 89 |
with self.lock:
|
| 90 |
+
self.origin_pool.append(
|
| 91 |
+
f"{random.choice(PROTOCOLS)}{random.choice(DOMAINS)}"
|
| 92 |
+
)
|
| 93 |
|
| 94 |
def generate_referrers(self):
|
| 95 |
while len(self.referrer_pool) < 500 and self.running:
|
|
|
|
| 96 |
with self.lock:
|
| 97 |
+
self.referrer_pool.append(
|
| 98 |
+
f"{random.choice(SEARCH_ENGINES)}{random.choice(KEYWORDS)}"
|
| 99 |
+
)
|
| 100 |
|
| 101 |
def generate_locations(self):
|
| 102 |
while len(self.location_pool) < 500 and self.running:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
with self.lock:
|
| 104 |
+
self.location_pool.append({
|
| 105 |
+
"country": random.choice(COUNTRIES),
|
| 106 |
+
"language": random.choice(LANGUAGES),
|
| 107 |
+
"timezone": random.choice(TIMEZONES)
|
| 108 |
+
})
|
| 109 |
|
| 110 |
def get_ipv4(self):
|
| 111 |
with self.lock:
|
| 112 |
if self.ipv4_pool:
|
| 113 |
return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
|
| 114 |
+
return (
|
| 115 |
+
f"{random.randint(1, 223)}.{random.randint(0, 255)}."
|
| 116 |
+
f"{random.randint(0, 255)}.{random.randint(1, 254)}"
|
| 117 |
+
)
|
| 118 |
|
| 119 |
def get_ipv6(self):
|
| 120 |
with self.lock:
|
| 121 |
if self.ipv6_pool:
|
| 122 |
return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
|
| 123 |
+
return ":".join([f"{random.randint(0, 65535):04x}" for _ in range(8)])
|
|
|
|
| 124 |
|
| 125 |
def get_user_agent(self):
|
| 126 |
with self.lock:
|
| 127 |
if self.user_agent_pool:
|
| 128 |
return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
|
|
|
|
| 129 |
|
| 130 |
def get_origin(self):
|
| 131 |
with self.lock:
|
| 132 |
if self.origin_pool:
|
| 133 |
return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
|
|
|
|
| 134 |
|
| 135 |
def get_referrer(self):
|
| 136 |
with self.lock:
|
| 137 |
if self.referrer_pool:
|
| 138 |
return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
|
|
|
|
| 139 |
|
| 140 |
def get_location(self):
|
| 141 |
with self.lock:
|
| 142 |
if self.location_pool:
|
| 143 |
return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
def start_engine(self):
|
| 146 |
+
for target in [
|
| 147 |
+
self.generate_ipv4,
|
| 148 |
+
self.generate_ipv6,
|
| 149 |
+
self.generate_user_agents,
|
| 150 |
+
self.generate_origins,
|
| 151 |
+
self.generate_referrers,
|
| 152 |
+
self.generate_locations
|
| 153 |
+
]:
|
| 154 |
+
threading.Thread(target=target, daemon=True).start()
|
|
|
|
| 155 |
|
| 156 |
def stop(self):
|
| 157 |
self.running = False
|
src/engine/browser_engine.py
CHANGED
|
@@ -7,8 +7,7 @@ import aiohttp
|
|
| 7 |
import asyncio
|
| 8 |
from urllib.parse import quote
|
| 9 |
from config import (
|
| 10 |
-
|
| 11 |
-
SEARCH_SELECTION,
|
| 12 |
TCP_CONNECTOR_ENABLE_DNS_CACHE,
|
| 13 |
TCP_CONNECTOR_TTL_DNS_CACHE,
|
| 14 |
TCP_CONNECTOR_LIMIT,
|
|
@@ -18,32 +17,25 @@ from config import (
|
|
| 18 |
ENABLE_TRUST_ENV,
|
| 19 |
ENABLE_CONNECTOR_OWNER
|
| 20 |
)
|
| 21 |
-
from
|
| 22 |
|
| 23 |
class BrowserEngine:
|
| 24 |
def __init__(self, configuration):
|
| 25 |
self.config = configuration
|
| 26 |
|
| 27 |
def generate_headers(self):
|
| 28 |
-
ipv4 = web_loader.get_ipv4()
|
| 29 |
-
ipv6 = web_loader.get_ipv6()
|
| 30 |
-
user_agent = web_loader.get_user_agent()
|
| 31 |
-
origin = web_loader.get_origin()
|
| 32 |
-
referrer = web_loader.get_referrer()
|
| 33 |
-
location = web_loader.get_location()
|
| 34 |
-
|
| 35 |
return {
|
| 36 |
-
"User-Agent":
|
| 37 |
-
"X-Forwarded-For": f"{
|
| 38 |
-
"X-Real-IP":
|
| 39 |
-
"X-Originating-IP":
|
| 40 |
-
"X-Remote-IP":
|
| 41 |
-
"X-Remote-Addr":
|
| 42 |
-
"X-Client-IP":
|
| 43 |
-
"X-Forwarded-Host":
|
| 44 |
-
"Origin":
|
| 45 |
-
"Referer":
|
| 46 |
-
"Accept-Language": f"{
|
| 47 |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
| 48 |
"Accept-Encoding": "gzip, deflate, br",
|
| 49 |
"DNT": "1",
|
|
@@ -54,35 +46,28 @@ class BrowserEngine:
|
|
| 54 |
"Sec-Fetch-Site": "cross-site",
|
| 55 |
"Sec-Fetch-User": "?1",
|
| 56 |
"Cache-Control": "max-age=0",
|
| 57 |
-
"X-Country":
|
| 58 |
-
"X-Timezone":
|
| 59 |
}
|
| 60 |
|
| 61 |
def web_selector(self, search_query: str, search_provider: str = "google"):
|
| 62 |
-
|
| 63 |
-
return (
|
| 64 |
-
f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
|
| 65 |
-
"#content_left"
|
| 66 |
-
)
|
| 67 |
-
provider_prefix = "!go" if search_provider == "google" else "!bi"
|
| 68 |
return (
|
| 69 |
f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
|
| 70 |
"#urls"
|
| 71 |
)
|
| 72 |
|
| 73 |
async def web_request(self, method: str, url: str, headers: dict, data: dict = None):
|
| 74 |
-
timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
|
| 75 |
-
connector = aiohttp.TCPConnector(
|
| 76 |
-
use_dns_cache=TCP_CONNECTOR_ENABLE_DNS_CACHE,
|
| 77 |
-
ttl_dns_cache=TCP_CONNECTOR_TTL_DNS_CACHE,
|
| 78 |
-
limit=TCP_CONNECTOR_LIMIT,
|
| 79 |
-
limit_per_host=TCP_CONNECTOR_LIMIT_PER_HOST,
|
| 80 |
-
force_close=TCP_CONNECTOR_FORCE_CLOSE,
|
| 81 |
-
enable_cleanup_closed=TCP_CONNECTOR_ENABLE_CLEANUP
|
| 82 |
-
)
|
| 83 |
async with aiohttp.ClientSession(
|
| 84 |
-
timeout=
|
| 85 |
-
connector=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
trust_env=ENABLE_TRUST_ENV,
|
| 87 |
connector_owner=ENABLE_CONNECTOR_OWNER
|
| 88 |
) as session:
|
|
@@ -102,7 +87,7 @@ class BrowserEngine:
|
|
| 102 |
payload = {"url": target_url}
|
| 103 |
try:
|
| 104 |
extracted_content = asyncio.run(self._post(self.config.content_reader_api, payload, headers))
|
| 105 |
-
return f"{extracted_content}\n\n\n{
|
| 106 |
except Exception as error:
|
| 107 |
return f"Error reading URL: {str(error)}"
|
| 108 |
|
|
@@ -112,6 +97,6 @@ class BrowserEngine:
|
|
| 112 |
headers["X-Target-Selector"] = selector
|
| 113 |
try:
|
| 114 |
search_results = asyncio.run(self._get(full_url, headers))
|
| 115 |
-
return f"{search_results}\n\n\n{
|
| 116 |
except Exception as error:
|
| 117 |
return f"Error during search: {str(error)}"
|
|
|
|
| 7 |
import asyncio
|
| 8 |
from urllib.parse import quote
|
| 9 |
from config import (
|
| 10 |
+
REMINDERS,
|
|
|
|
| 11 |
TCP_CONNECTOR_ENABLE_DNS_CACHE,
|
| 12 |
TCP_CONNECTOR_TTL_DNS_CACHE,
|
| 13 |
TCP_CONNECTOR_LIMIT,
|
|
|
|
| 17 |
ENABLE_TRUST_ENV,
|
| 18 |
ENABLE_CONNECTOR_OWNER
|
| 19 |
)
|
| 20 |
+
from ..core.web_loader import web_loader
|
| 21 |
|
| 22 |
class BrowserEngine:
|
| 23 |
def __init__(self, configuration):
|
| 24 |
self.config = configuration
|
| 25 |
|
| 26 |
def generate_headers(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
return {
|
| 28 |
+
"User-Agent": web_loader.get_user_agent(),
|
| 29 |
+
"X-Forwarded-For": f"{web_loader.get_ipv4()}, {web_loader.get_ipv6()}",
|
| 30 |
+
"X-Real-IP": web_loader.get_ipv4(),
|
| 31 |
+
"X-Originating-IP": web_loader.get_ipv4(),
|
| 32 |
+
"X-Remote-IP": web_loader.get_ipv4(),
|
| 33 |
+
"X-Remote-Addr": web_loader.get_ipv4(),
|
| 34 |
+
"X-Client-IP": web_loader.get_ipv4(),
|
| 35 |
+
"X-Forwarded-Host": web_loader.get_origin().replace("https://", "").replace("http://", ""),
|
| 36 |
+
"Origin": web_loader.get_origin(),
|
| 37 |
+
"Referer": web_loader.get_referrer(),
|
| 38 |
+
"Accept-Language": f"{web_loader.get_location()['language']},en;q=0.9",
|
| 39 |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
| 40 |
"Accept-Encoding": "gzip, deflate, br",
|
| 41 |
"DNT": "1",
|
|
|
|
| 46 |
"Sec-Fetch-Site": "cross-site",
|
| 47 |
"Sec-Fetch-User": "?1",
|
| 48 |
"Cache-Control": "max-age=0",
|
| 49 |
+
"X-Country": web_loader.get_location()['country'],
|
| 50 |
+
"X-Timezone": web_loader.get_location()['timezone']
|
| 51 |
}
|
| 52 |
|
| 53 |
def web_selector(self, search_query: str, search_provider: str = "google"):
|
| 54 |
+
provider_prefix = "!go"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
return (
|
| 56 |
f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
|
| 57 |
"#urls"
|
| 58 |
)
|
| 59 |
|
| 60 |
async def web_request(self, method: str, url: str, headers: dict, data: dict = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
async with aiohttp.ClientSession(
|
| 62 |
+
timeout=aiohttp.ClientTimeout(total=self.config.request_timeout),
|
| 63 |
+
connector=aiohttp.TCPConnector(
|
| 64 |
+
use_dns_cache=TCP_CONNECTOR_ENABLE_DNS_CACHE,
|
| 65 |
+
ttl_dns_cache=TCP_CONNECTOR_TTL_DNS_CACHE,
|
| 66 |
+
limit=TCP_CONNECTOR_LIMIT,
|
| 67 |
+
limit_per_host=TCP_CONNECTOR_LIMIT_PER_HOST,
|
| 68 |
+
force_close=TCP_CONNECTOR_FORCE_CLOSE,
|
| 69 |
+
enable_cleanup_closed=TCP_CONNECTOR_ENABLE_CLEANUP
|
| 70 |
+
),
|
| 71 |
trust_env=ENABLE_TRUST_ENV,
|
| 72 |
connector_owner=ENABLE_CONNECTOR_OWNER
|
| 73 |
) as session:
|
|
|
|
| 87 |
payload = {"url": target_url}
|
| 88 |
try:
|
| 89 |
extracted_content = asyncio.run(self._post(self.config.content_reader_api, payload, headers))
|
| 90 |
+
return f"{extracted_content}\n\n\n{REMINDERS}\n\n\n"
|
| 91 |
except Exception as error:
|
| 92 |
return f"Error reading URL: {str(error)}"
|
| 93 |
|
|
|
|
| 97 |
headers["X-Target-Selector"] = selector
|
| 98 |
try:
|
| 99 |
search_results = asyncio.run(self._get(full_url, headers))
|
| 100 |
+
return f"{search_results}\n\n\n{REMINDERS}\n\n\n"
|
| 101 |
except Exception as error:
|
| 102 |
return f"Error during search: {str(error)}"
|
src/processor/__init__.py
CHANGED
|
@@ -3,6 +3,6 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from .message_processor import
|
| 7 |
|
| 8 |
-
__all__ = ['
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from .message_processor import searchgpt_playground
|
| 7 |
|
| 8 |
+
__all__ = ['searchgpt_playground']
|
src/processor/message_processor.py
CHANGED
|
@@ -5,15 +5,15 @@
|
|
| 5 |
|
| 6 |
import traceback
|
| 7 |
from config import MODEL
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
from .response.setup import setup_response
|
| 13 |
from .response.generator import generate_response
|
| 14 |
-
from .tools.interaction import
|
| 15 |
|
| 16 |
-
def
|
| 17 |
if not isinstance(user_message, str) or not user_message.strip():
|
| 18 |
yield []
|
| 19 |
return
|
|
@@ -21,30 +21,22 @@ def process_user_request(user_message, chat_history):
|
|
| 21 |
output_content = ""
|
| 22 |
|
| 23 |
try:
|
| 24 |
-
server, client_initialization_error =
|
| 25 |
if client_initialization_error:
|
| 26 |
output_content = client_initialization_error
|
| 27 |
yield output_content
|
| 28 |
return
|
| 29 |
|
| 30 |
-
|
| 31 |
-
search_engine_instance = BrowserEngine(search_configuration)
|
| 32 |
-
available_tools = construct_tool_definitions()
|
| 33 |
-
|
| 34 |
-
conversation_messages = setup_response(
|
| 35 |
-
chat_history,
|
| 36 |
-
user_message
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
tool_response = ""
|
| 40 |
tools_done = False
|
| 41 |
|
| 42 |
-
for tool_update in
|
| 43 |
server=server,
|
| 44 |
model_name=MODEL,
|
| 45 |
conversation_messages=conversation_messages,
|
| 46 |
-
tool_definitions=
|
| 47 |
-
search_engine=
|
| 48 |
):
|
| 49 |
if isinstance(tool_update, str):
|
| 50 |
tool_response = tool_update
|
|
@@ -61,7 +53,7 @@ def process_user_request(user_message, chat_history):
|
|
| 61 |
server=server,
|
| 62 |
model_name=MODEL,
|
| 63 |
conversation_messages=conversation_messages,
|
| 64 |
-
tool_definitions=
|
| 65 |
tools_done=tools_done
|
| 66 |
)
|
| 67 |
|
|
|
|
| 5 |
|
| 6 |
import traceback
|
| 7 |
from config import MODEL
|
| 8 |
+
from ..core.web_configuration import WebConfiguration
|
| 9 |
+
from ..engine.browser_engine import BrowserEngine
|
| 10 |
+
from ..tools.tool_manager import local_tools
|
| 11 |
+
from ..client.openai_client import setup_client
|
| 12 |
from .response.setup import setup_response
|
| 13 |
from .response.generator import generate_response
|
| 14 |
+
from .tools.interaction import tools_setup
|
| 15 |
|
| 16 |
+
def searchgpt_playground(user_message, chat_history):
|
| 17 |
if not isinstance(user_message, str) or not user_message.strip():
|
| 18 |
yield []
|
| 19 |
return
|
|
|
|
| 21 |
output_content = ""
|
| 22 |
|
| 23 |
try:
|
| 24 |
+
server, client_initialization_error = setup_client()
|
| 25 |
if client_initialization_error:
|
| 26 |
output_content = client_initialization_error
|
| 27 |
yield output_content
|
| 28 |
return
|
| 29 |
|
| 30 |
+
conversation_messages = setup_response(chat_history, user_message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
tool_response = ""
|
| 32 |
tools_done = False
|
| 33 |
|
| 34 |
+
for tool_update in tools_setup(
|
| 35 |
server=server,
|
| 36 |
model_name=MODEL,
|
| 37 |
conversation_messages=conversation_messages,
|
| 38 |
+
tool_definitions=local_tools(),
|
| 39 |
+
search_engine=BrowserEngine(WebConfiguration())
|
| 40 |
):
|
| 41 |
if isinstance(tool_update, str):
|
| 42 |
tool_response = tool_update
|
|
|
|
| 53 |
server=server,
|
| 54 |
model_name=MODEL,
|
| 55 |
conversation_messages=conversation_messages,
|
| 56 |
+
tool_definitions=local_tools(),
|
| 57 |
tools_done=tools_done
|
| 58 |
)
|
| 59 |
|
src/processor/reasoning/interface.py
CHANGED
|
@@ -3,16 +3,9 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
|
|
|
|
|
|
| 6 |
def reasoning_interfaces(text, current_length=0):
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
if current_length > 0 and not reasoning_steps.endswith((
|
| 11 |
-
'<br>',
|
| 12 |
-
'<br><br>'
|
| 13 |
-
)):
|
| 14 |
-
reasoning_steps += '...'
|
| 15 |
-
|
| 16 |
-
return reasoning_steps
|
| 17 |
-
|
| 18 |
-
return text
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from assets.css.animations.loading import spinner
|
| 7 |
+
|
| 8 |
def reasoning_interfaces(text, current_length=0):
|
| 9 |
+
return text[:current_length].replace(
|
| 10 |
+
"<loading_animation>", spinner()
|
| 11 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/processor/reasoning/tool_reasoning.py
CHANGED
|
@@ -3,36 +3,41 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from config import REASONING_STEPS
|
| 7 |
|
| 8 |
def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
|
| 9 |
if tool_name == "web_search":
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
elif tool_name == "read_url":
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from config import REASONING_STEPS
|
| 7 |
|
| 8 |
def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
|
| 9 |
if tool_name == "web_search":
|
| 10 |
+
if stage == "completed":
|
| 11 |
+
preview = result[:300] + "..." if result and len(result) > 300 else result
|
| 12 |
+
return REASONING_STEPS["web_search"][stage].format(
|
| 13 |
+
query=tool_arguments.get("query", "") if tool_arguments else "",
|
| 14 |
+
engine=tool_arguments.get("engine", "google") if tool_arguments else "google",
|
| 15 |
+
preview=preview
|
| 16 |
+
)
|
| 17 |
+
elif stage == "error":
|
| 18 |
+
return REASONING_STEPS["web_search"][stage].format(
|
| 19 |
+
query=tool_arguments.get("query", "") if tool_arguments else "",
|
| 20 |
+
engine=tool_arguments.get("engine", "google") if tool_arguments else "google",
|
| 21 |
+
error=error
|
| 22 |
+
)
|
| 23 |
+
else:
|
| 24 |
+
return REASONING_STEPS["web_search"][stage].format(
|
| 25 |
+
query=tool_arguments.get("query", "") if tool_arguments else "",
|
| 26 |
+
engine=tool_arguments.get("engine", "google") if tool_arguments else "google"
|
| 27 |
+
)
|
| 28 |
elif tool_name == "read_url":
|
| 29 |
+
if stage == "completed":
|
| 30 |
+
preview = result[:300] + "..." if result and len(result) > 300 else result
|
| 31 |
+
return REASONING_STEPS["read_url"][stage].format(
|
| 32 |
+
url=tool_arguments.get("url", "") if tool_arguments else "",
|
| 33 |
+
preview=preview
|
| 34 |
+
)
|
| 35 |
+
elif stage == "error":
|
| 36 |
+
return REASONING_STEPS["read_url"][stage].format(
|
| 37 |
+
url=tool_arguments.get("url", "") if tool_arguments else "",
|
| 38 |
+
error=error
|
| 39 |
+
)
|
| 40 |
+
else:
|
| 41 |
+
return REASONING_STEPS["read_url"][stage].format(
|
| 42 |
+
url=tool_arguments.get("url", "") if tool_arguments else ""
|
| 43 |
+
)
|
src/processor/response/generator.py
CHANGED
|
@@ -5,7 +5,6 @@
|
|
| 5 |
|
| 6 |
import traceback
|
| 7 |
from config import (
|
| 8 |
-
INSTRUCTIONS_END,
|
| 9 |
MAX_TOKENS,
|
| 10 |
CHAT_TEMPERATURE,
|
| 11 |
STREAM
|
|
@@ -20,12 +19,6 @@ def generate_response(
|
|
| 20 |
):
|
| 21 |
response_generator = ""
|
| 22 |
|
| 23 |
-
if tools_done:
|
| 24 |
-
conversation_messages.append({
|
| 25 |
-
"role": "system",
|
| 26 |
-
"content": INSTRUCTIONS_END
|
| 27 |
-
})
|
| 28 |
-
|
| 29 |
try:
|
| 30 |
response = server.chat.completions.create(
|
| 31 |
model=model_name,
|
|
|
|
| 5 |
|
| 6 |
import traceback
|
| 7 |
from config import (
|
|
|
|
| 8 |
MAX_TOKENS,
|
| 9 |
CHAT_TEMPERATURE,
|
| 10 |
STREAM
|
|
|
|
| 19 |
):
|
| 20 |
response_generator = ""
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
try:
|
| 23 |
response = server.chat.completions.create(
|
| 24 |
model=model_name,
|
src/processor/response/setup.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
#
|
| 5 |
|
| 6 |
from ...utils.time import get_current_time
|
| 7 |
-
from config import
|
| 8 |
|
| 9 |
def setup_response(conversation_history, user_input):
|
| 10 |
history = []
|
|
@@ -16,7 +16,7 @@ def setup_response(conversation_history, user_input):
|
|
| 16 |
"content": (
|
| 17 |
f"Today is: {get_current_time()}"
|
| 18 |
+ "\n\n\n"
|
| 19 |
-
+
|
| 20 |
)
|
| 21 |
}
|
| 22 |
)
|
|
|
|
| 4 |
#
|
| 5 |
|
| 6 |
from ...utils.time import get_current_time
|
| 7 |
+
from config import INSTRUCTIONS
|
| 8 |
|
| 9 |
def setup_response(conversation_history, user_input):
|
| 10 |
history = []
|
|
|
|
| 16 |
"content": (
|
| 17 |
f"Today is: {get_current_time()}"
|
| 18 |
+ "\n\n\n"
|
| 19 |
+
+ INSTRUCTIONS
|
| 20 |
)
|
| 21 |
}
|
| 22 |
)
|
src/processor/tools/__init__.py
CHANGED
|
@@ -3,12 +3,12 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from .parser import
|
| 7 |
-
from .executor import
|
| 8 |
-
from .interaction import
|
| 9 |
|
| 10 |
__all__ = [
|
| 11 |
-
'
|
| 12 |
-
'
|
| 13 |
-
'
|
| 14 |
]
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from .parser import tools_parser
|
| 7 |
+
from .executor import tools_call
|
| 8 |
+
from .interaction import tools_setup
|
| 9 |
|
| 10 |
__all__ = [
|
| 11 |
+
'tools_parser',
|
| 12 |
+
'tools_call',
|
| 13 |
+
'tools_setup'
|
| 14 |
]
|
src/processor/tools/executor.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
def
|
| 7 |
if function_name == "web_search":
|
| 8 |
return search_engine.perform_search(
|
| 9 |
search_query=function_params.get("query", ""),
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
def tools_call(search_engine, function_name, function_params):
|
| 7 |
if function_name == "web_search":
|
| 8 |
return search_engine.perform_search(
|
| 9 |
search_query=function_params.get("query", ""),
|
src/processor/tools/interaction.py
CHANGED
|
@@ -8,209 +8,131 @@ from assets.css.reasoning import styles
|
|
| 8 |
from ..response.formatter import assistant_response
|
| 9 |
from ..reasoning.interface import reasoning_interfaces
|
| 10 |
from ..reasoning.tool_reasoning import tool_reasoning
|
| 11 |
-
from .parser import
|
| 12 |
-
from .executor import
|
| 13 |
from config import (
|
| 14 |
MAX_TOKENS,
|
| 15 |
-
REASONING_DELAY,
|
| 16 |
-
REASONING_INSERT,
|
| 17 |
TOOLS_TEMPERATURE,
|
| 18 |
MAXIMUM_ITERATIONS,
|
| 19 |
MAX_RETRY_LIMIT,
|
| 20 |
ITERATION_METRICS
|
| 21 |
)
|
| 22 |
|
| 23 |
-
def
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
error_history = []
|
| 30 |
-
|
| 31 |
-
while MAXIMUM_ITERATIONS <= MAX_RETRY_LIMIT and not execution_success:
|
| 32 |
-
ITERATION_METRICS["attempts"] += 1
|
| 33 |
-
current_iteration_successful = False
|
| 34 |
-
iteration_errors = []
|
| 35 |
-
|
| 36 |
-
for iteration_index in range(MAXIMUM_ITERATIONS):
|
| 37 |
try:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
time.sleep(retry_delay * ITERATION_METRICS["backoff_multiplier"])
|
| 41 |
|
| 42 |
-
|
| 43 |
model=model_name,
|
| 44 |
messages=conversation_messages,
|
| 45 |
tools=tool_definitions,
|
| 46 |
tool_choice="auto",
|
| 47 |
max_tokens=MAX_TOKENS,
|
| 48 |
temperature=TOOLS_TEMPERATURE
|
| 49 |
-
)
|
| 50 |
-
|
| 51 |
-
response_choice = model_response.choices[0]
|
| 52 |
-
assistant_message = response_choice.message
|
| 53 |
-
formatted_assistant_message = assistant_response(assistant_message)
|
| 54 |
|
| 55 |
-
conversation_messages.append(
|
| 56 |
-
{
|
| 57 |
-
"role": formatted_assistant_message["role"],
|
| 58 |
-
"content": formatted_assistant_message["content"],
|
| 59 |
-
"tool_calls": formatted_assistant_message["tool_calls"]
|
| 60 |
-
}
|
| 61 |
-
)
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
execution_success = True
|
| 68 |
-
current_iteration_successful = True
|
| 69 |
-
break
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
tool_arguments_raw = tool_invocation.function.arguments
|
| 75 |
-
|
| 76 |
-
extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
|
| 77 |
-
|
| 78 |
-
if extraction_error:
|
| 79 |
-
error_key = f"{tool_name}_extraction"
|
| 80 |
-
ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
|
| 81 |
-
tool_execution_errors.append({
|
| 82 |
-
"tool": tool_name,
|
| 83 |
-
"error": extraction_error,
|
| 84 |
-
"type": "extraction"
|
| 85 |
-
})
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
logs_generator
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
else:
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
logs_generator
|
| 99 |
-
yield logs_generator
|
| 100 |
-
time.sleep(REASONING_DELAY)
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
logs_generator
|
| 105 |
-
yield logs_generator
|
| 106 |
-
time.sleep(REASONING_DELAY)
|
| 107 |
|
| 108 |
try:
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
"iteration": MAXIMUM_ITERATIONS,
|
| 119 |
-
"retry_count": retry_count
|
| 120 |
})
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
logs_generator
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
logs_generator = styles(reasoning_done, expanded=False)
|
| 128 |
-
yield logs_generator
|
| 129 |
|
| 130 |
except Exception as tool_error:
|
| 131 |
-
|
| 132 |
-
ITERATION_METRICS["error_patterns"][error_key] = ITERATION_METRICS["error_patterns"].get(error_key, 0) + 1
|
| 133 |
-
tool_execution_errors.append({
|
| 134 |
-
"tool": tool_name,
|
| 135 |
-
"error": str(tool_error),
|
| 136 |
-
"type": "execution",
|
| 137 |
-
"arguments": extracted_arguments
|
| 138 |
-
})
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
logs_generator
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
"tool_call_id": tool_invocation.id,
|
| 153 |
-
"name": tool_name,
|
| 154 |
-
"content": tool_execution_result
|
| 155 |
-
}
|
| 156 |
-
)
|
| 157 |
|
| 158 |
-
|
| 159 |
-
execution_success = True
|
| 160 |
-
current_iteration_successful = True
|
| 161 |
-
break
|
| 162 |
-
else:
|
| 163 |
-
iteration_errors.extend(tool_execution_errors)
|
| 164 |
|
| 165 |
-
except Exception
|
| 166 |
-
|
| 167 |
-
error_history.append({
|
| 168 |
-
"iteration": MAXIMUM_ITERATIONS,
|
| 169 |
-
"error": last_error,
|
| 170 |
-
"timestamp": time.time()
|
| 171 |
-
})
|
| 172 |
-
ITERATION_METRICS["failures"] += 1
|
| 173 |
-
iteration_errors.append({
|
| 174 |
-
"error": last_error,
|
| 175 |
-
"type": "model"
|
| 176 |
-
})
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
error_history.extend(iteration_errors)
|
| 184 |
-
|
| 185 |
-
retry_count += 1
|
| 186 |
-
previous_iterations = MAXIMUM_ITERATIONS
|
| 187 |
-
|
| 188 |
-
if ITERATION_METRICS["error_patterns"]:
|
| 189 |
-
frequent_errors = max(ITERATION_METRICS["error_patterns"].values())
|
| 190 |
-
if frequent_errors > 3:
|
| 191 |
-
new_iterations = min(MAXIMUM_ITERATIONS + 2, MAX_RETRY_LIMIT)
|
| 192 |
-
else:
|
| 193 |
-
new_iterations = min(MAXIMUM_ITERATIONS + 1, MAX_RETRY_LIMIT)
|
| 194 |
else:
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
| 209 |
|
| 210 |
-
ITERATION_METRICS["success_rate"] = (
|
| 211 |
|
| 212 |
-
if logs_generator:
|
| 213 |
-
logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
|
| 214 |
|
| 215 |
-
|
| 216 |
-
return conversation_messages, logs_generator, generator_results
|
|
|
|
| 8 |
from ..response.formatter import assistant_response
|
| 9 |
from ..reasoning.interface import reasoning_interfaces
|
| 10 |
from ..reasoning.tool_reasoning import tool_reasoning
|
| 11 |
+
from .parser import tools_parser
|
| 12 |
+
from .executor import tools_call
|
| 13 |
from config import (
|
| 14 |
MAX_TOKENS,
|
|
|
|
|
|
|
| 15 |
TOOLS_TEMPERATURE,
|
| 16 |
MAXIMUM_ITERATIONS,
|
| 17 |
MAX_RETRY_LIMIT,
|
| 18 |
ITERATION_METRICS
|
| 19 |
)
|
| 20 |
|
| 21 |
+
def tools_setup(server, model_name, conversation_messages, tool_definitions, search_engine):
|
| 22 |
+
if "current_iterations" not in ITERATION_METRICS:
|
| 23 |
+
ITERATION_METRICS["current_iterations"] = MAXIMUM_ITERATIONS
|
| 24 |
+
|
| 25 |
+
while ITERATION_METRICS["current_iterations"] <= MAX_RETRY_LIMIT:
|
| 26 |
+
for iteration_index in range(ITERATION_METRICS["current_iterations"]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
try:
|
| 28 |
+
if ITERATION_METRICS.get("retry_count", 0) > 0:
|
| 29 |
+
time.sleep(ITERATION_METRICS["retry_delays"][min(ITERATION_METRICS.get("retry_count", 0), len(ITERATION_METRICS["retry_delays"]) - 1)] * ITERATION_METRICS["backoff_multiplier"])
|
|
|
|
| 30 |
|
| 31 |
+
response = server.chat.completions.create(
|
| 32 |
model=model_name,
|
| 33 |
messages=conversation_messages,
|
| 34 |
tools=tool_definitions,
|
| 35 |
tool_choice="auto",
|
| 36 |
max_tokens=MAX_TOKENS,
|
| 37 |
temperature=TOOLS_TEMPERATURE
|
| 38 |
+
).choices[0].message
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
conversation_messages.append(assistant_response(response))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
if not (response.tool_calls or []):
|
| 43 |
+
if ITERATION_METRICS.get("logs_generator", ""):
|
| 44 |
+
ITERATION_METRICS["logs_generator"] = styles(ITERATION_METRICS.get("logs_generator", "").replace('<br>', '\n').strip(), expanded=False)
|
| 45 |
+
return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), True
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
for tool_invocation in (response.tool_calls or []):
|
| 48 |
+
if tools_parser(tool_invocation.function.arguments)[1]:
|
| 49 |
+
ITERATION_METRICS["error_patterns"][f"{tool_invocation.function.name}_extraction"] = ITERATION_METRICS["error_patterns"].get(f"{tool_invocation.function.name}_extraction", 0) + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
for i in range(0, len(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1])) + 1):
|
| 52 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1]), i), expanded=True)
|
| 53 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 54 |
+
ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, None, "error", error=tools_parser(tool_invocation.function.arguments)[1]), expanded=True)
|
| 55 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 56 |
+
|
| 57 |
+
conversation_messages.append({
|
| 58 |
+
"role": "tool",
|
| 59 |
+
"tool_call_id": tool_invocation.id,
|
| 60 |
+
"name": tool_invocation.function.name,
|
| 61 |
+
"content": tools_parser(tool_invocation.function.arguments)[1]
|
| 62 |
+
})
|
| 63 |
else:
|
| 64 |
+
for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "parsing")) + 1):
|
| 65 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "parsing"), i), expanded=True)
|
| 66 |
+
yield ITERATION_METRICS["logs_generator"]
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "executing")) + 1):
|
| 69 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "executing"), i), expanded=True)
|
| 70 |
+
yield ITERATION_METRICS["logs_generator"]
|
|
|
|
|
|
|
| 71 |
|
| 72 |
try:
|
| 73 |
+
conversation_messages.append({
|
| 74 |
+
"role": "tool",
|
| 75 |
+
"tool_call_id": tool_invocation.id,
|
| 76 |
+
"name": tool_invocation.function.name,
|
| 77 |
+
"content": tools_call(
|
| 78 |
+
search_engine,
|
| 79 |
+
tool_invocation.function.name,
|
| 80 |
+
tools_parser(tool_invocation.function.arguments)[0]
|
| 81 |
+
)
|
|
|
|
|
|
|
| 82 |
})
|
| 83 |
|
| 84 |
+
for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"])) + 1):
|
| 85 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"]), i), expanded=True)
|
| 86 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 87 |
+
ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "completed", result=conversation_messages[-1]["content"]), expanded=False)
|
| 88 |
+
yield ITERATION_METRICS["logs_generator"]
|
|
|
|
|
|
|
| 89 |
|
| 90 |
except Exception as tool_error:
|
| 91 |
+
ITERATION_METRICS["error_patterns"][f"{tool_invocation.function.name}_execution"] = ITERATION_METRICS["error_patterns"].get(f"{tool_invocation.function.name}_execution", 0) + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
for i in range(0, len(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error))) + 1):
|
| 94 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error)), i), expanded=True)
|
| 95 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 96 |
+
ITERATION_METRICS["logs_generator"] = styles(tool_reasoning(tool_invocation.function.name, tools_parser(tool_invocation.function.arguments)[0], "error", error=str(tool_error)), expanded=True)
|
| 97 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 98 |
+
|
| 99 |
+
conversation_messages.append({
|
| 100 |
+
"role": "tool",
|
| 101 |
+
"tool_call_id": tool_invocation.id,
|
| 102 |
+
"name": tool_invocation.function.name,
|
| 103 |
+
"content": str(tool_error)
|
| 104 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
except Exception:
|
| 109 |
+
ITERATION_METRICS["failures"] = ITERATION_METRICS.get("failures", 0) + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
ITERATION_METRICS["retry_count"] = ITERATION_METRICS.get("retry_count", 0) + 1
|
| 112 |
+
|
| 113 |
+
if ITERATION_METRICS["error_patterns"]:
|
| 114 |
+
if max(ITERATION_METRICS["error_patterns"].values()) > 3:
|
| 115 |
+
ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 2, MAX_RETRY_LIMIT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
else:
|
| 117 |
+
ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 1, MAX_RETRY_LIMIT)
|
| 118 |
+
else:
|
| 119 |
+
ITERATION_METRICS["current_iterations"] = min(ITERATION_METRICS["current_iterations"] + 1, MAX_RETRY_LIMIT)
|
| 120 |
+
|
| 121 |
+
if ITERATION_METRICS["current_iterations"] > ITERATION_METRICS.get("previous_iterations", 0):
|
| 122 |
+
for i in range(0, len(f"Retrying with increased iterations: {ITERATION_METRICS['current_iterations']} (attempt {ITERATION_METRICS.get('retry_count', 0) + 1})") + 1):
|
| 123 |
+
ITERATION_METRICS["logs_generator"] = styles(reasoning_interfaces(f"Retrying with increased iterations: {ITERATION_METRICS['current_iterations']} (attempt {ITERATION_METRICS.get('retry_count', 0) + 1})", i), expanded=True)
|
| 124 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 125 |
+
|
| 126 |
+
ITERATION_METRICS["previous_iterations"] = ITERATION_METRICS["current_iterations"]
|
| 127 |
+
|
| 128 |
+
if ITERATION_METRICS["current_iterations"] >= MAX_RETRY_LIMIT:
|
| 129 |
+
ITERATION_METRICS["logs_generator"] = styles(f"Maximum retry limit reached after {ITERATION_METRICS.get('attempts', 0)} attempts with {ITERATION_METRICS.get('failures', 0)} failures", expanded=True)
|
| 130 |
+
yield ITERATION_METRICS["logs_generator"]
|
| 131 |
+
break
|
| 132 |
|
| 133 |
+
ITERATION_METRICS["success_rate"] = (ITERATION_METRICS.get("tool_results_count", 0) / max(ITERATION_METRICS.get("attempts", 1), 1)) * 100
|
| 134 |
|
| 135 |
+
if ITERATION_METRICS.get("logs_generator", ""):
|
| 136 |
+
ITERATION_METRICS["logs_generator"] = styles(ITERATION_METRICS.get("logs_generator", "").replace('<br>', '\n').strip(), expanded=False)
|
| 137 |
|
| 138 |
+
return conversation_messages, ITERATION_METRICS.get("logs_generator", ""), ITERATION_METRICS.get("tool_results_count", 0) > 0
|
|
|
src/processor/tools/parser.py
CHANGED
|
@@ -5,13 +5,10 @@
|
|
| 5 |
|
| 6 |
import json
|
| 7 |
|
| 8 |
-
def
|
| 9 |
try:
|
| 10 |
parsed_params = json.loads(raw_parameters or "{}")
|
| 11 |
-
|
| 12 |
-
parsed_params["engine"] = fallback_engine
|
| 13 |
-
if "engine" not in parsed_params:
|
| 14 |
-
parsed_params["engine"] = fallback_engine
|
| 15 |
return parsed_params, None
|
| 16 |
except Exception as parse_error:
|
| 17 |
return None, f"Invalid tool arguments: {str(parse_error)}"
|
|
|
|
| 5 |
|
| 6 |
import json
|
| 7 |
|
| 8 |
+
def tools_parser(raw_parameters):
|
| 9 |
try:
|
| 10 |
parsed_params = json.loads(raw_parameters or "{}")
|
| 11 |
+
parsed_params["engine"] = "google"
|
|
|
|
|
|
|
|
|
|
| 12 |
return parsed_params, None
|
| 13 |
except Exception as parse_error:
|
| 14 |
return None, f"Invalid tool arguments: {str(parse_error)}"
|
src/tools/__init__.py
CHANGED
|
@@ -3,6 +3,6 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
from .tool_manager import
|
| 7 |
|
| 8 |
-
__all__ = ['
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
from .tool_manager import local_tools
|
| 7 |
|
| 8 |
+
__all__ = ['local_tools']
|
src/tools/tool_manager.py
CHANGED
|
@@ -3,13 +3,13 @@
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
-
def
|
| 7 |
return [
|
| 8 |
{
|
| 9 |
"type": "function",
|
| 10 |
"function": {
|
| 11 |
"name": "web_search",
|
| 12 |
-
"description": "Perform a web search via SearXNG (Google
|
| 13 |
"parameters": {
|
| 14 |
"type": "object",
|
| 15 |
"properties": {
|
|
@@ -18,12 +18,7 @@ def construct_tool_definitions():
|
|
| 18 |
},
|
| 19 |
"engine": {
|
| 20 |
"type": "string",
|
| 21 |
-
"enum": [
|
| 22 |
-
"google",
|
| 23 |
-
"bing",
|
| 24 |
-
"baidu"
|
| 25 |
-
],
|
| 26 |
-
"default": "google",
|
| 27 |
},
|
| 28 |
},
|
| 29 |
"required": ["query"],
|
|
|
|
| 3 |
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
#
|
| 5 |
|
| 6 |
+
def local_tools():
|
| 7 |
return [
|
| 8 |
{
|
| 9 |
"type": "function",
|
| 10 |
"function": {
|
| 11 |
"name": "web_search",
|
| 12 |
+
"description": "Perform a web search via SearXNG (Google only).",
|
| 13 |
"parameters": {
|
| 14 |
"type": "object",
|
| 15 |
"properties": {
|
|
|
|
| 18 |
},
|
| 19 |
"engine": {
|
| 20 |
"type": "string",
|
| 21 |
+
"enum": ["google"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
},
|
| 23 |
},
|
| 24 |
"required": ["query"],
|