Spaces:
Running
Running
Commit
·
408c946
0
Parent(s):
SearchGPT: Initial.
Browse filesSigned-off-by: Hadad <[email protected]>
- .gitattributes +35 -0
- Dockerfile +22 -0
- README.md +75 -0
- app.py +34 -0
- assets/css/__init__.py +8 -0
- assets/css/reasoning.py +31 -0
- config.py +267 -0
- requirements.txt +2 -0
- src/client/__init__.py +8 -0
- src/client/openai_client.py +17 -0
- src/core/__init__.py +12 -0
- src/core/web_configuration.py +13 -0
- src/core/web_loader.py +188 -0
- src/engine/__init__.py +8 -0
- src/engine/browser_engine.py +88 -0
- src/processor/__init__.py +8 -0
- src/processor/message_processor.py +237 -0
- src/tools/__init__.py +8 -0
- src/tools/tool_manager.py +50 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
# Use a specific container image for the app
|
| 7 |
+
FROM python:latest
|
| 8 |
+
|
| 9 |
+
# Set the main working directory inside the container
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# Copy all files into the container
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
# Install all dependencies
|
| 16 |
+
RUN pip install -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Open the port so the app can be accessed
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Start the app
|
| 22 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SearchGPT
|
| 3 |
+
short_description: ChatGPT with real-time web search & URL reading capability
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
emoji: ⚡
|
| 6 |
+
colorFrom: blue
|
| 7 |
+
colorTo: yellow
|
| 8 |
+
sdk: docker
|
| 9 |
+
app_port: 7860
|
| 10 |
+
pinned: false
|
| 11 |
+
# Used to promote this Hugging Face Space
|
| 12 |
+
models:
|
| 13 |
+
- hadadrjt/JARVIS
|
| 14 |
+
- agentica-org/DeepCoder-14B-Preview
|
| 15 |
+
- agentica-org/DeepSWE-Preview
|
| 16 |
+
- fka/awesome-chatgpt-prompts
|
| 17 |
+
- black-forest-labs/FLUX.1-Kontext-dev
|
| 18 |
+
- ChatDOC/OCRFlux-3B
|
| 19 |
+
- deepseek-ai/DeepSeek-R1
|
| 20 |
+
- deepseek-ai/DeepSeek-R1-0528
|
| 21 |
+
- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
|
| 22 |
+
- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
| 23 |
+
- deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
|
| 24 |
+
- deepseek-ai/DeepSeek-V3-0324
|
| 25 |
+
- google/gemma-3-1b-it
|
| 26 |
+
- google/gemma-3-27b-it
|
| 27 |
+
- google/gemma-3-4b-it
|
| 28 |
+
- google/gemma-3n-E4B-it
|
| 29 |
+
- google/gemma-3n-E4B-it-litert-preview
|
| 30 |
+
- google/medsiglip-448
|
| 31 |
+
- kyutai/tts-1.6b-en_fr
|
| 32 |
+
- meta-llama/Llama-3.1-8B-Instruct
|
| 33 |
+
- meta-llama/Llama-3.2-3B-Instruct
|
| 34 |
+
- meta-llama/Llama-3.3-70B-Instruct
|
| 35 |
+
- meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
| 36 |
+
- meta-llama/Llama-4-Scout-17B-16E-Instruct
|
| 37 |
+
- microsoft/Phi-4-mini-instruct
|
| 38 |
+
- mistralai/Devstral-Small-2505
|
| 39 |
+
- mistralai/Mistral-Small-3.1-24B-Instruct-2503
|
| 40 |
+
- openai/webgpt_comparisons
|
| 41 |
+
- openai/whisper-large-v3-turbo
|
| 42 |
+
- openai/gpt-oss-120b
|
| 43 |
+
- openai/gpt-oss-20b
|
| 44 |
+
- Qwen/QwQ-32B
|
| 45 |
+
- Qwen/Qwen2.5-VL-32B-Instruct
|
| 46 |
+
- Qwen/Qwen2.5-VL-3B-Instruct
|
| 47 |
+
- Qwen/Qwen2.5-VL-72B-Instruct
|
| 48 |
+
- Qwen/Qwen3-235B-A22B
|
| 49 |
+
- THUDM/GLM-4.1V-9B-Thinking
|
| 50 |
+
- tngtech/DeepSeek-TNG-R1T2-Chimera
|
| 51 |
+
- moonshotai/Kimi-K2-Instruct
|
| 52 |
+
- Qwen/Qwen3-235B-A22B-Instruct-2507
|
| 53 |
+
- Qwen/Qwen3-Coder-480B-A35B-Instruct
|
| 54 |
+
- Qwen/Qwen3-235B-A22B-Thinking-2507
|
| 55 |
+
- zai-org/GLM-4.5
|
| 56 |
+
- zai-org/GLM-4.5-Air
|
| 57 |
+
- zai-org/GLM-4.5V
|
| 58 |
+
- deepseek-ai/DeepSeek-V3.1
|
| 59 |
+
- deepseek-ai/DeepSeek-V3.1-Base
|
| 60 |
+
- microsoft/VibeVoice-1.5B
|
| 61 |
+
- xai-org/grok-2
|
| 62 |
+
- Qwen/Qwen-Image-Edit
|
| 63 |
+
- ByteDance-Seed/Seed-OSS-36B-Instruct
|
| 64 |
+
- google/gemma-3-270m
|
| 65 |
+
- google/gemma-3-270m-it
|
| 66 |
+
- openbmb/MiniCPM-V-4_5
|
| 67 |
+
- tencent/Hunyuan-MT-7B
|
| 68 |
+
- meituan-longcat/LongCat-Flash-Chat
|
| 69 |
+
- Phr00t/WAN2.2-14B-Rapid-AllInOne
|
| 70 |
+
- apple/FastVLM-0.5B
|
| 71 |
+
- stepfun-ai/Step-Audio-2-mini
|
| 72 |
+
# Used to promote this Hugging Face Space
|
| 73 |
+
datasets:
|
| 74 |
+
- fka/awesome-chatgpt-prompts
|
| 75 |
+
---
|
app.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from src.processor.message_processor import process_user_request
|
| 7 |
+
from config import DESCRIPTION
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
with gr.Blocks(fill_height=True, fill_width=True) as app:
|
| 11 |
+
with gr.Sidebar(): gr.HTML(DESCRIPTION)
|
| 12 |
+
gr.ChatInterface(
|
| 13 |
+
fn=process_user_request,
|
| 14 |
+
chatbot=gr.Chatbot(
|
| 15 |
+
label="SearchGPT | GPT-4.1 (Nano)",
|
| 16 |
+
type="messages",
|
| 17 |
+
show_copy_button=True,
|
| 18 |
+
scale=1
|
| 19 |
+
),
|
| 20 |
+
type="messages",
|
| 21 |
+
examples=[
|
| 22 |
+
["What is UltimaX Intelligence"],
|
| 23 |
+
["https://wikipedia.org/wiki/Artificial_intelligence Read and summarize that"],
|
| 24 |
+
["What's the latest AI development in 2025?"],
|
| 25 |
+
["OpenAI GPT-5 vs DeepSeek V3.1"]
|
| 26 |
+
],
|
| 27 |
+
cache_examples=False,
|
| 28 |
+
show_api=False
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
app.launch(
|
| 32 |
+
server_name="0.0.0.0",
|
| 33 |
+
pwa=True
|
| 34 |
+
)
|
assets/css/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .reasoning import styles
|
| 7 |
+
|
| 8 |
+
__all__ = ['styles']
|
assets/css/reasoning.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
def styles(reasoning: str, expanded: bool = False) -> str:
|
| 7 |
+
open_attr = "open" if expanded else ""
|
| 8 |
+
emoji = "🧠"
|
| 9 |
+
return f"""
|
| 10 |
+
<details {open_attr} style="
|
| 11 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 12 |
+
">
|
| 13 |
+
<summary style="
|
| 14 |
+
font-weight: 700;
|
| 15 |
+
font-size: 14px !important;
|
| 16 |
+
cursor: pointer;
|
| 17 |
+
user-select: none;
|
| 18 |
+
">
|
| 19 |
+
{emoji} Reasoning
|
| 20 |
+
</summary>
|
| 21 |
+
<div style="
|
| 22 |
+
margin-top: 6px;
|
| 23 |
+
padding-top: 6px;
|
| 24 |
+
font-size: 10px !important;
|
| 25 |
+
line-height: 1.7;
|
| 26 |
+
letter-spacing: 0.02em;
|
| 27 |
+
">
|
| 28 |
+
{reasoning}
|
| 29 |
+
</div>
|
| 30 |
+
</details>
|
| 31 |
+
"""
|
config.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
#OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
|
| 7 |
+
#OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
|
| 8 |
+
|
| 9 |
+
MODEL = "gpt-4.1-nano"
|
| 10 |
+
|
| 11 |
+
SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
|
| 12 |
+
BAIDU_ENDPOINT = "https://www.baidu.com/s"
|
| 13 |
+
READER_ENDPOINT = "https://r.jina.ai/"
|
| 14 |
+
REQUEST_TIMEOUT = 300 # 5 minute
|
| 15 |
+
|
| 16 |
+
INSTRUCTIONS = """
|
| 17 |
+
You are ChatGPT with advanced real-time web search, content extraction, and summarization capabilities.
|
| 18 |
+
|
| 19 |
+
Your objective is to provide the most accurate, comprehensive, and professionally structured responses to user queries.
|
| 20 |
+
|
| 21 |
+
Always use web search to gather relevant information before responding unless the question is purely factual and does not require external sources.
|
| 22 |
+
|
| 23 |
+
Search workflow :
|
| 24 |
+
1. Perform a web search using available engines (Google, Bing, Baidu) to retrieve highly relevant results
|
| 25 |
+
2. Select up to 10 top results based on relevance, credibility, and content depth
|
| 26 |
+
3. For each selected URL, fetch the full content using the read_url function
|
| 27 |
+
4. Extract key information, critical data, and insights
|
| 28 |
+
5. Collect all URLs encountered in search results and content extraction
|
| 29 |
+
6. Provide a structured summary in English, professional, concise, and precise
|
| 30 |
+
7. Include citations for each URL used, in the format [Source title](URL)
|
| 31 |
+
8. If information is ambiguous, incomplete, or contradictory, clearly state it
|
| 32 |
+
9. Ensure your response is readable, logically organized, and free of emoji, dashes, or unnecessary symbols
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
CONTENT_EXTRACTION = """
|
| 36 |
+
<system>
|
| 37 |
+
- Analyze the retrieved content in detail
|
| 38 |
+
- Identify all critical facts, arguments, statistics, and relevant data
|
| 39 |
+
- Collect all URLs, hyperlinks, references, and citations mentioned in the content
|
| 40 |
+
- Evaluate credibility of sources, highlight potential biases or conflicts
|
| 41 |
+
- Produce a structured, professional, and comprehensive summary
|
| 42 |
+
- Emphasize clarity, accuracy, and logical flow
|
| 43 |
+
- Include all discovered URLs in the final summary as [Source title](URL)
|
| 44 |
+
- Mark any uncertainties, contradictions, or missing information clearly
|
| 45 |
+
</system>
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
SEARCH_SELECTION = """
|
| 49 |
+
<system>
|
| 50 |
+
- For each search result, fetch the full content using read_url
|
| 51 |
+
- Extract key information, main arguments, data points, and statistics
|
| 52 |
+
- Capture every URL present in the content or references
|
| 53 |
+
- Create a professional, structured summary in English
|
| 54 |
+
- List each source at the end of the summary in the format [Source title](link)
|
| 55 |
+
- Identify ambiguities or gaps in information
|
| 56 |
+
- Ensure clarity, completeness, and high information density
|
| 57 |
+
</system>
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
DESCRIPTION = """
|
| 61 |
+
<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
|
| 62 |
+
<br><br>
|
| 63 |
+
This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
|
| 64 |
+
<b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
|
| 65 |
+
<br><br>
|
| 66 |
+
The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
|
| 67 |
+
<b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
|
| 68 |
+
<br><br>
|
| 69 |
+
The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
|
| 70 |
+
target="_blank">UltimaX Intelligence</a></b>.
|
| 71 |
+
<br><br>
|
| 72 |
+
Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
|
| 73 |
+
target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
|
| 74 |
+
<br><br>
|
| 75 |
+
<b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
OS = [
|
| 79 |
+
"Windows NT 10.0; Win64; x64",
|
| 80 |
+
"Macintosh; Intel Mac OS X 10_15_7",
|
| 81 |
+
"X11; Linux x86_64",
|
| 82 |
+
"Windows NT 11.0; Win64; x64",
|
| 83 |
+
"Macintosh; Intel Mac OS X 11_6_2"
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
OCTETS = [
|
| 87 |
+
1, 2, 3, 4, 5, 8, 12, 13, 14, 15,
|
| 88 |
+
16, 17, 18, 19, 20, 23, 24, 34, 35, 36,
|
| 89 |
+
37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
| 90 |
+
47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
|
| 91 |
+
57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
|
| 92 |
+
67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
|
| 93 |
+
77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
|
| 94 |
+
87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
|
| 95 |
+
97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
| 96 |
+
107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
|
| 97 |
+
117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
|
| 98 |
+
128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
|
| 99 |
+
138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
|
| 100 |
+
148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
|
| 101 |
+
158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
|
| 102 |
+
168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
|
| 103 |
+
179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
|
| 104 |
+
189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
|
| 105 |
+
199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
|
| 106 |
+
209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
|
| 107 |
+
219, 220, 221, 222, 223
|
| 108 |
+
]
|
| 109 |
+
|
| 110 |
+
BROWSERS = [
|
| 111 |
+
"Chrome",
|
| 112 |
+
"Firefox",
|
| 113 |
+
"Safari",
|
| 114 |
+
"Edge",
|
| 115 |
+
"Opera"
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
CHROME_VERSIONS = [
|
| 119 |
+
"120.0.0.0",
|
| 120 |
+
"119.0.0.0",
|
| 121 |
+
"118.0.0.0",
|
| 122 |
+
"117.0.0.0",
|
| 123 |
+
"116.0.0.0"
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
FIREFOX_VERSIONS = [
|
| 127 |
+
"121.0",
|
| 128 |
+
"120.0",
|
| 129 |
+
"119.0",
|
| 130 |
+
"118.0",
|
| 131 |
+
"117.0"
|
| 132 |
+
]
|
| 133 |
+
|
| 134 |
+
SAFARI_VERSIONS = [
|
| 135 |
+
"17.1",
|
| 136 |
+
"17.0",
|
| 137 |
+
"16.6",
|
| 138 |
+
"16.5",
|
| 139 |
+
"16.4",
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
EDGE_VERSIONS = [
|
| 143 |
+
"120.0.2210.91",
|
| 144 |
+
"119.0.2151.97",
|
| 145 |
+
"118.0.2088.76",
|
| 146 |
+
"117.0.2045.60",
|
| 147 |
+
"116.0.1938.81"
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
DOMAINS = [
|
| 151 |
+
"google.com",
|
| 152 |
+
"bing.com",
|
| 153 |
+
"yahoo.com",
|
| 154 |
+
"duckduckgo.com",
|
| 155 |
+
"baidu.com",
|
| 156 |
+
"yandex.com",
|
| 157 |
+
"facebook.com",
|
| 158 |
+
"twitter.com",
|
| 159 |
+
"linkedin.com",
|
| 160 |
+
"reddit.com",
|
| 161 |
+
"youtube.com",
|
| 162 |
+
"wikipedia.org",
|
| 163 |
+
"amazon.com",
|
| 164 |
+
"github.com",
|
| 165 |
+
"stackoverflow.com",
|
| 166 |
+
"medium.com",
|
| 167 |
+
"quora.com",
|
| 168 |
+
"pinterest.com",
|
| 169 |
+
"instagram.com",
|
| 170 |
+
"tumblr.com"
|
| 171 |
+
]
|
| 172 |
+
|
| 173 |
+
PROTOCOLS = [
|
| 174 |
+
"https://",
|
| 175 |
+
"https://www."
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
SEARCH_ENGINES = [
|
| 179 |
+
"https://www.google.com/search?q=",
|
| 180 |
+
"https://www.bing.com/search?q=",
|
| 181 |
+
"https://search.yahoo.com/search?p=",
|
| 182 |
+
"https://duckduckgo.com/?q=",
|
| 183 |
+
"https://www.baidu.com/s?wd=",
|
| 184 |
+
"https://yandex.com/search/?text=",
|
| 185 |
+
"https://www.google.co.uk/search?q=",
|
| 186 |
+
"https://www.google.ca/search?q=",
|
| 187 |
+
"https://www.google.com.au/search?q=",
|
| 188 |
+
"https://www.google.de/search?q=",
|
| 189 |
+
"https://www.google.fr/search?q=",
|
| 190 |
+
"https://www.google.co.jp/search?q=",
|
| 191 |
+
"https://www.google.com.br/search?q=",
|
| 192 |
+
"https://www.google.co.in/search?q=",
|
| 193 |
+
"https://www.google.ru/search?q=",
|
| 194 |
+
"https://www.google.it/search?q="
|
| 195 |
+
]
|
| 196 |
+
|
| 197 |
+
KEYWORDS = [
|
| 198 |
+
"news",
|
| 199 |
+
"weather",
|
| 200 |
+
"sports",
|
| 201 |
+
"technology",
|
| 202 |
+
"science",
|
| 203 |
+
"health",
|
| 204 |
+
"finance",
|
| 205 |
+
"entertainment",
|
| 206 |
+
"travel",
|
| 207 |
+
"food",
|
| 208 |
+
"education",
|
| 209 |
+
"business",
|
| 210 |
+
"politics",
|
| 211 |
+
"culture",
|
| 212 |
+
"history",
|
| 213 |
+
"music",
|
| 214 |
+
"movies",
|
| 215 |
+
"games",
|
| 216 |
+
"books",
|
| 217 |
+
"art"
|
| 218 |
+
]
|
| 219 |
+
|
| 220 |
+
COUNTRIES = [
|
| 221 |
+
"US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
|
| 222 |
+
"IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
|
| 223 |
+
"KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
|
| 224 |
+
"CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
|
| 225 |
+
"TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
|
| 226 |
+
]
|
| 227 |
+
|
| 228 |
+
LANGUAGES = [
|
| 229 |
+
"en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
|
| 230 |
+
"pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
|
| 231 |
+
"sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
|
| 232 |
+
"zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
|
| 233 |
+
"es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
|
| 234 |
+
"en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
|
| 235 |
+
"ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
|
| 236 |
+
]
|
| 237 |
+
|
| 238 |
+
TIMEZONES = [
|
| 239 |
+
"America/New_York",
|
| 240 |
+
"America/Chicago",
|
| 241 |
+
"America/Los_Angeles",
|
| 242 |
+
"America/Denver",
|
| 243 |
+
"Europe/London",
|
| 244 |
+
"Europe/Paris",
|
| 245 |
+
"Europe/Berlin",
|
| 246 |
+
"Europe/Moscow",
|
| 247 |
+
"Asia/Tokyo",
|
| 248 |
+
"Asia/Shanghai",
|
| 249 |
+
"Asia/Hong_Kong",
|
| 250 |
+
"Asia/Singapore",
|
| 251 |
+
"Asia/Seoul",
|
| 252 |
+
"Asia/Mumbai",
|
| 253 |
+
"Asia/Dubai",
|
| 254 |
+
"Australia/Sydney",
|
| 255 |
+
"Australia/Melbourne",
|
| 256 |
+
"America/Toronto",
|
| 257 |
+
"America/Vancouver",
|
| 258 |
+
"America/Mexico_City",
|
| 259 |
+
"America/Sao_Paulo",
|
| 260 |
+
"America/Buenos_Aires",
|
| 261 |
+
"Africa/Cairo",
|
| 262 |
+
"Africa/Johannesburg",
|
| 263 |
+
"Africa/Lagos",
|
| 264 |
+
"Africa/Nairobi",
|
| 265 |
+
"Pacific/Auckland",
|
| 266 |
+
"Pacific/Honolulu"
|
| 267 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[oauth,mcp]
|
| 2 |
+
openai
|
src/client/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .openai_client import initialize_client
|
| 7 |
+
|
| 8 |
+
__all__ = ['initialize_client']
|
src/client/openai_client.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from openai import OpenAI
|
| 8 |
+
|
| 9 |
+
def initialize_client():
|
| 10 |
+
try:
|
| 11 |
+
client = OpenAI(
|
| 12 |
+
base_url=os.getenv("OPENAI_API_BASE_URL"),
|
| 13 |
+
api_key=os.getenv("OPENAI_API_KEY")
|
| 14 |
+
)
|
| 15 |
+
return client, None
|
| 16 |
+
except Exception as initialization_error:
|
| 17 |
+
return None, f"Failed to initialize client: {str(initialization_error)}"
|
src/core/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .web_loader import WebLoader
|
| 7 |
+
from .web_configuration import WebConfiguration
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
'WebLoader',
|
| 11 |
+
'WebConfiguration'
|
| 12 |
+
]
|
src/core/web_configuration.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from config import SEARXNG_ENDPOINT, BAIDU_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
|
| 7 |
+
|
| 8 |
+
class WebConfiguration:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.searxng_endpoint = SEARXNG_ENDPOINT
|
| 11 |
+
self.baidu_endpoint = BAIDU_ENDPOINT
|
| 12 |
+
self.content_reader_api = READER_ENDPOINT
|
| 13 |
+
self.request_timeout = REQUEST_TIMEOUT
|
src/core/web_loader.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
import random
|
| 7 |
+
import threading
|
| 8 |
+
import time
|
| 9 |
+
from collections import deque
|
| 10 |
+
from config import (
|
| 11 |
+
OS,
|
| 12 |
+
OCTETS,
|
| 13 |
+
BROWSERS,
|
| 14 |
+
CHROME_VERSIONS,
|
| 15 |
+
FIREFOX_VERSIONS,
|
| 16 |
+
SAFARI_VERSIONS,
|
| 17 |
+
EDGE_VERSIONS,
|
| 18 |
+
DOMAINS,
|
| 19 |
+
PROTOCOLS,
|
| 20 |
+
SEARCH_ENGINES,
|
| 21 |
+
KEYWORDS,
|
| 22 |
+
COUNTRIES,
|
| 23 |
+
LANGUAGES,
|
| 24 |
+
TIMEZONES
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
class WebLoader:
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self.ipv4_pool = deque(maxlen=1000)
|
| 30 |
+
self.ipv6_pool = deque(maxlen=1000)
|
| 31 |
+
self.user_agent_pool = deque(maxlen=500)
|
| 32 |
+
self.origin_pool = deque(maxlen=500)
|
| 33 |
+
self.referrer_pool = deque(maxlen=500)
|
| 34 |
+
self.location_pool = deque(maxlen=500)
|
| 35 |
+
self.lock = threading.Lock()
|
| 36 |
+
self.running = True
|
| 37 |
+
|
| 38 |
+
def generate_ipv4(self):
|
| 39 |
+
while len(self.ipv4_pool) < 1000 and self.running:
|
| 40 |
+
octet = random.choice(OCTETS)
|
| 41 |
+
ip = f"{octet}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
| 42 |
+
with self.lock:
|
| 43 |
+
self.ipv4_pool.append(ip)
|
| 44 |
+
time.sleep(0.001)
|
| 45 |
+
|
| 46 |
+
def generate_ipv6(self):
|
| 47 |
+
while len(self.ipv6_pool) < 1000 and self.running:
|
| 48 |
+
segments = []
|
| 49 |
+
for _ in range(8):
|
| 50 |
+
segments.append(f"{random.randint(0, 65535):04x}")
|
| 51 |
+
ip = ":".join(segments)
|
| 52 |
+
with self.lock:
|
| 53 |
+
self.ipv6_pool.append(ip)
|
| 54 |
+
time.sleep(0.001)
|
| 55 |
+
|
| 56 |
+
def generate_user_agents(self):
|
| 57 |
+
os_list = OS
|
| 58 |
+
browsers = BROWSERS
|
| 59 |
+
chrome_versions = CHROME_VERSIONS
|
| 60 |
+
firefox_versions = FIREFOX_VERSIONS
|
| 61 |
+
safari_versions = SAFARI_VERSIONS
|
| 62 |
+
edge_versions = EDGE_VERSIONS
|
| 63 |
+
|
| 64 |
+
while len(self.user_agent_pool) < 500 and self.running:
|
| 65 |
+
browser = random.choice(browsers)
|
| 66 |
+
os_string = random.choice(os_list)
|
| 67 |
+
|
| 68 |
+
if browser == "Chrome":
|
| 69 |
+
version = random.choice(chrome_versions)
|
| 70 |
+
ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36"
|
| 71 |
+
elif browser == "Firefox":
|
| 72 |
+
version = random.choice(firefox_versions)
|
| 73 |
+
ua = f"Mozilla/5.0 ({os_string}) Gecko/20100101 Firefox/{version}"
|
| 74 |
+
elif browser == "Safari":
|
| 75 |
+
version = random.choice(safari_versions)
|
| 76 |
+
webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
|
| 77 |
+
ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Safari/{webkit_version}"
|
| 78 |
+
elif browser == "Edge":
|
| 79 |
+
version = random.choice(edge_versions)
|
| 80 |
+
ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
|
| 81 |
+
else:
|
| 82 |
+
version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
|
| 83 |
+
ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
|
| 84 |
+
|
| 85 |
+
with self.lock:
|
| 86 |
+
self.user_agent_pool.append(ua)
|
| 87 |
+
time.sleep(0.002)
|
| 88 |
+
|
| 89 |
+
def generate_origins(self):
|
| 90 |
+
domains = DOMAINS
|
| 91 |
+
protocols = PROTOCOLS
|
| 92 |
+
|
| 93 |
+
while len(self.origin_pool) < 500 and self.running:
|
| 94 |
+
protocol = random.choice(protocols)
|
| 95 |
+
domain = random.choice(domains)
|
| 96 |
+
origin = f"{protocol}{domain}"
|
| 97 |
+
with self.lock:
|
| 98 |
+
self.origin_pool.append(origin)
|
| 99 |
+
time.sleep(0.002)
|
| 100 |
+
|
| 101 |
+
def generate_referrers(self):
|
| 102 |
+
search_engines = SEARCH_ENGINES
|
| 103 |
+
keywords = KEYWORDS
|
| 104 |
+
|
| 105 |
+
while len(self.referrer_pool) < 500 and self.running:
|
| 106 |
+
engine = random.choice(search_engines)
|
| 107 |
+
keyword = random.choice(keywords)
|
| 108 |
+
referrer = f"{engine}{keyword}"
|
| 109 |
+
with self.lock:
|
| 110 |
+
self.referrer_pool.append(referrer)
|
| 111 |
+
time.sleep(0.002)
|
| 112 |
+
|
| 113 |
+
def generate_locations(self):
|
| 114 |
+
countries = COUNTRIES
|
| 115 |
+
languages = LANGUAGES
|
| 116 |
+
timezones = TIMEZONES
|
| 117 |
+
|
| 118 |
+
while len(self.location_pool) < 500 and self.running:
|
| 119 |
+
country = random.choice(countries)
|
| 120 |
+
language = random.choice(languages)
|
| 121 |
+
timezone = random.choice(timezones)
|
| 122 |
+
location = {
|
| 123 |
+
"country": country,
|
| 124 |
+
"language": language,
|
| 125 |
+
"timezone": timezone
|
| 126 |
+
}
|
| 127 |
+
with self.lock:
|
| 128 |
+
self.location_pool.append(location)
|
| 129 |
+
time.sleep(0.002)
|
| 130 |
+
|
| 131 |
+
def get_ipv4(self):
|
| 132 |
+
with self.lock:
|
| 133 |
+
if self.ipv4_pool:
|
| 134 |
+
return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
|
| 135 |
+
return f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
| 136 |
+
|
| 137 |
+
def get_ipv6(self):
|
| 138 |
+
with self.lock:
|
| 139 |
+
if self.ipv6_pool:
|
| 140 |
+
return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
|
| 141 |
+
segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
|
| 142 |
+
return ":".join(segments)
|
| 143 |
+
|
| 144 |
+
def get_user_agent(self):
|
| 145 |
+
with self.lock:
|
| 146 |
+
if self.user_agent_pool:
|
| 147 |
+
return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
|
| 148 |
+
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
| 149 |
+
|
| 150 |
+
def get_origin(self):
|
| 151 |
+
with self.lock:
|
| 152 |
+
if self.origin_pool:
|
| 153 |
+
return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
|
| 154 |
+
return "https://www.google.com"
|
| 155 |
+
|
| 156 |
+
def get_referrer(self):
|
| 157 |
+
with self.lock:
|
| 158 |
+
if self.referrer_pool:
|
| 159 |
+
return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
|
| 160 |
+
return "https://www.google.com/search?q=search"
|
| 161 |
+
|
| 162 |
+
def get_location(self):
|
| 163 |
+
with self.lock:
|
| 164 |
+
if self.location_pool:
|
| 165 |
+
return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
|
| 166 |
+
return {
|
| 167 |
+
"country": "US",
|
| 168 |
+
"language": "en-US",
|
| 169 |
+
"timezone": "America/New_York"
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
def start_engine(self):
|
| 173 |
+
threads = [
|
| 174 |
+
threading.Thread(target=self.generate_ipv4, daemon=True),
|
| 175 |
+
threading.Thread(target=self.generate_ipv6, daemon=True),
|
| 176 |
+
threading.Thread(target=self.generate_user_agents, daemon=True),
|
| 177 |
+
threading.Thread(target=self.generate_origins, daemon=True),
|
| 178 |
+
threading.Thread(target=self.generate_referrers, daemon=True),
|
| 179 |
+
threading.Thread(target=self.generate_locations, daemon=True)
|
| 180 |
+
]
|
| 181 |
+
for thread in threads:
|
| 182 |
+
thread.start()
|
| 183 |
+
|
| 184 |
+
def stop(self):
|
| 185 |
+
self.running = False
|
| 186 |
+
|
| 187 |
+
web_loader = WebLoader()
|
| 188 |
+
web_loader.start_engine()
|
src/engine/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .browser_engine import BrowserEngine
|
| 7 |
+
|
| 8 |
+
__all__ = ['BrowserEngine']
|
src/engine/browser_engine.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
import requests
|
| 7 |
+
from config import CONTENT_EXTRACTION, SEARCH_SELECTION
|
| 8 |
+
from src.core.web_loader import web_loader
|
| 9 |
+
|
| 10 |
+
class BrowserEngine:
|
| 11 |
+
def __init__(self, configuration):
|
| 12 |
+
self.config = configuration
|
| 13 |
+
|
| 14 |
+
def generate_headers(self):
|
| 15 |
+
ipv4 = web_loader.get_ipv4()
|
| 16 |
+
ipv6 = web_loader.get_ipv6()
|
| 17 |
+
user_agent = web_loader.get_user_agent()
|
| 18 |
+
origin = web_loader.get_origin()
|
| 19 |
+
referrer = web_loader.get_referrer()
|
| 20 |
+
location = web_loader.get_location()
|
| 21 |
+
|
| 22 |
+
return {
|
| 23 |
+
"User-Agent": user_agent,
|
| 24 |
+
"X-Forwarded-For": f"{ipv4}, {ipv6}",
|
| 25 |
+
"X-Real-IP": ipv4,
|
| 26 |
+
"X-Originating-IP": ipv4,
|
| 27 |
+
"X-Remote-IP": ipv4,
|
| 28 |
+
"X-Remote-Addr": ipv4,
|
| 29 |
+
"X-Client-IP": ipv4,
|
| 30 |
+
"X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""),
|
| 31 |
+
"Origin": origin,
|
| 32 |
+
"Referer": referrer,
|
| 33 |
+
"Accept-Language": f"{location['language']},en;q=0.9",
|
| 34 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
| 35 |
+
"Accept-Encoding": "gzip, deflate, br",
|
| 36 |
+
"DNT": "1",
|
| 37 |
+
"Connection": "keep-alive",
|
| 38 |
+
"Upgrade-Insecure-Requests": "1",
|
| 39 |
+
"Sec-Fetch-Dest": "document",
|
| 40 |
+
"Sec-Fetch-Mode": "navigate",
|
| 41 |
+
"Sec-Fetch-Site": "cross-site",
|
| 42 |
+
"Sec-Fetch-User": "?1",
|
| 43 |
+
"Cache-Control": "max-age=0",
|
| 44 |
+
"X-Country": location['country'],
|
| 45 |
+
"X-Timezone": location['timezone']
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
def extract_page_content(self, target_url: str) -> str:
|
| 49 |
+
try:
|
| 50 |
+
headers = self.generate_headers()
|
| 51 |
+
payload = {
|
| 52 |
+
"url": target_url
|
| 53 |
+
}
|
| 54 |
+
request_response = requests.post(
|
| 55 |
+
self.config.content_reader_api,
|
| 56 |
+
data=payload,
|
| 57 |
+
headers=headers,
|
| 58 |
+
timeout=self.config.request_timeout,
|
| 59 |
+
)
|
| 60 |
+
request_response.raise_for_status()
|
| 61 |
+
extracted_content = request_response.text
|
| 62 |
+
return f"{extracted_content}{CONTENT_EXTRACTION}"
|
| 63 |
+
except Exception as error:
|
| 64 |
+
return f"Error reading URL: {str(error)}"
|
| 65 |
+
|
| 66 |
+
def perform_search(self, search_query: str, search_provider: str = "google") -> str:
|
| 67 |
+
try:
|
| 68 |
+
headers = self.generate_headers()
|
| 69 |
+
|
| 70 |
+
if search_provider == "baidu":
|
| 71 |
+
full_url = f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={requests.utils.quote(search_query)}"
|
| 72 |
+
headers["X-Target-Selector"] = "#content_left"
|
| 73 |
+
else:
|
| 74 |
+
provider_prefix = "!go" if search_provider == "google" else "!bi"
|
| 75 |
+
encoded_query = requests.utils.quote(f"{provider_prefix} {search_query}")
|
| 76 |
+
full_url = f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={encoded_query}"
|
| 77 |
+
headers["X-Target-Selector"] = "#urls"
|
| 78 |
+
|
| 79 |
+
search_response = requests.get(
|
| 80 |
+
full_url,
|
| 81 |
+
headers=headers,
|
| 82 |
+
timeout=self.config.request_timeout
|
| 83 |
+
)
|
| 84 |
+
search_response.raise_for_status()
|
| 85 |
+
search_results = search_response.text
|
| 86 |
+
return f"{search_results}{SEARCH_SELECTION}"
|
| 87 |
+
except Exception as error:
|
| 88 |
+
return f"Error during search: {str(error)}"
|
src/processor/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .message_processor import process_user_request
|
| 7 |
+
|
| 8 |
+
__all__ = ['process_user_request']
|
src/processor/message_processor.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import traceback
|
| 8 |
+
from openai import OpenAI
|
| 9 |
+
from config import MODEL, INSTRUCTIONS
|
| 10 |
+
from src.core.web_configuration import WebConfiguration
|
| 11 |
+
from src.engine.browser_engine import BrowserEngine
|
| 12 |
+
from src.tools.tool_manager import construct_tool_definitions
|
| 13 |
+
from src.client.openai_client import initialize_client
|
| 14 |
+
from assets.css.reasoning import styles
|
| 15 |
+
|
| 16 |
+
def setup_response(system_instruction, conversation_history, user_input):
|
| 17 |
+
history = []
|
| 18 |
+
|
| 19 |
+
if system_instruction:
|
| 20 |
+
history.append({"role": "system", "content": system_instruction})
|
| 21 |
+
|
| 22 |
+
if isinstance(conversation_history, list):
|
| 23 |
+
for history_item in conversation_history:
|
| 24 |
+
message_role = history_item.get("role")
|
| 25 |
+
message_content = history_item.get("content")
|
| 26 |
+
if message_role in ("user", "assistant") and isinstance(message_content, str):
|
| 27 |
+
history.append({"role": message_role, "content": message_content})
|
| 28 |
+
|
| 29 |
+
if isinstance(user_input, str) and user_input.strip():
|
| 30 |
+
history.append({"role": "user", "content": user_input})
|
| 31 |
+
|
| 32 |
+
return history
|
| 33 |
+
|
| 34 |
+
def extract_tool_parameters(raw_parameters, fallback_engine="google"):
|
| 35 |
+
try:
|
| 36 |
+
parsed_params = json.loads(raw_parameters or "{}")
|
| 37 |
+
if "engine" in parsed_params and parsed_params["engine"] not in ["google", "bing", "baidu"]:
|
| 38 |
+
parsed_params["engine"] = fallback_engine
|
| 39 |
+
if "engine" not in parsed_params:
|
| 40 |
+
parsed_params["engine"] = fallback_engine
|
| 41 |
+
return parsed_params, None
|
| 42 |
+
except Exception as parse_error:
|
| 43 |
+
return None, f"Invalid tool arguments: {str(parse_error)}"
|
| 44 |
+
|
| 45 |
+
def assistant_response(response_message):
|
| 46 |
+
extracted_tool_calls = []
|
| 47 |
+
|
| 48 |
+
if getattr(response_message, "tool_calls", None):
|
| 49 |
+
for tool_call in response_message.tool_calls:
|
| 50 |
+
extracted_tool_calls.append(
|
| 51 |
+
{
|
| 52 |
+
"id": tool_call.id,
|
| 53 |
+
"type": "function",
|
| 54 |
+
"function": {
|
| 55 |
+
"name": tool_call.function.name,
|
| 56 |
+
"arguments": tool_call.function.arguments
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
"role": "assistant",
|
| 63 |
+
"content": response_message.content or "",
|
| 64 |
+
"tool_calls": extracted_tool_calls if extracted_tool_calls else None
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def invoke_tool_function(search_engine, function_name, function_params):
|
| 68 |
+
if function_name == "web_search":
|
| 69 |
+
return search_engine.perform_search(
|
| 70 |
+
search_query=function_params.get("query", ""),
|
| 71 |
+
search_provider=function_params.get("engine", "google")
|
| 72 |
+
)
|
| 73 |
+
if function_name == "read_url":
|
| 74 |
+
return search_engine.extract_page_content(
|
| 75 |
+
target_url=function_params.get("url", "")
|
| 76 |
+
)
|
| 77 |
+
return f"Unknown tool: {function_name}"
|
| 78 |
+
|
| 79 |
+
def generate_response(server, model_name, conversation_messages, tool_definitions):
|
| 80 |
+
response_generator = ""
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
response = server.chat.completions.create(
|
| 84 |
+
model=model_name,
|
| 85 |
+
messages=conversation_messages,
|
| 86 |
+
tools=tool_definitions,
|
| 87 |
+
tool_choice="none",
|
| 88 |
+
temperature=1.0,
|
| 89 |
+
stream=True
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
for data in response:
|
| 93 |
+
try:
|
| 94 |
+
raw_data = data.choices[0].delta.content or ""
|
| 95 |
+
except Exception:
|
| 96 |
+
raw_data = ""
|
| 97 |
+
|
| 98 |
+
if raw_data:
|
| 99 |
+
response_generator += raw_data
|
| 100 |
+
yield response_generator
|
| 101 |
+
|
| 102 |
+
yield response_generator
|
| 103 |
+
|
| 104 |
+
except Exception as response_error:
|
| 105 |
+
response_generator += f"\nError: {str(response_error)}\n"
|
| 106 |
+
response_generator += traceback.format_exc()
|
| 107 |
+
yield response_generator
|
| 108 |
+
|
| 109 |
+
def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
|
| 110 |
+
maximum_iterations = 4
|
| 111 |
+
logs_generator = ""
|
| 112 |
+
|
| 113 |
+
for iteration_index in range(maximum_iterations):
|
| 114 |
+
try:
|
| 115 |
+
model_response = server.chat.completions.create(
|
| 116 |
+
model=model_name,
|
| 117 |
+
messages=conversation_messages,
|
| 118 |
+
tools=tool_definitions,
|
| 119 |
+
tool_choice="auto",
|
| 120 |
+
temperature=0.7
|
| 121 |
+
)
|
| 122 |
+
except Exception:
|
| 123 |
+
return conversation_messages, logs_generator
|
| 124 |
+
|
| 125 |
+
response_choice = model_response.choices[0]
|
| 126 |
+
assistant_message = response_choice.message
|
| 127 |
+
formatted_assistant_message = assistant_response(assistant_message)
|
| 128 |
+
|
| 129 |
+
conversation_messages.append(
|
| 130 |
+
{
|
| 131 |
+
"role": formatted_assistant_message["role"],
|
| 132 |
+
"content": formatted_assistant_message["content"],
|
| 133 |
+
"tool_calls": formatted_assistant_message["tool_calls"]
|
| 134 |
+
}
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
pending_tool_calls = assistant_message.tool_calls or []
|
| 138 |
+
if not pending_tool_calls:
|
| 139 |
+
return conversation_messages, logs_generator
|
| 140 |
+
|
| 141 |
+
for tool_invocation in pending_tool_calls:
|
| 142 |
+
tool_name = tool_invocation.function.name
|
| 143 |
+
tool_arguments_raw = tool_invocation.function.arguments
|
| 144 |
+
|
| 145 |
+
extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
|
| 146 |
+
|
| 147 |
+
if extraction_error:
|
| 148 |
+
log_content = f"Tool: {tool_name}<br>Status: Failed<br>Error: {extraction_error}"
|
| 149 |
+
logs_generator = styles(log_content, expanded=True)
|
| 150 |
+
yield logs_generator
|
| 151 |
+
tool_execution_result = extraction_error
|
| 152 |
+
else:
|
| 153 |
+
log_content = f"Tool: {tool_name}<br>Status: Executing<br>Parameters: {json.dumps(extracted_arguments, indent=2).replace(' ', ' ').replace(chr(10), '<br>')}"
|
| 154 |
+
logs_generator = styles(log_content, expanded=True)
|
| 155 |
+
yield logs_generator
|
| 156 |
+
|
| 157 |
+
tool_execution_result = invoke_tool_function(
|
| 158 |
+
search_engine,
|
| 159 |
+
tool_name,
|
| 160 |
+
extracted_arguments
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
result_preview = tool_execution_result[:500] + "..." if len(tool_execution_result) > 500 else tool_execution_result
|
| 164 |
+
log_content = f"Tool: {tool_name}<br>Status: Completed<br>Parameters: {json.dumps(extracted_arguments, indent=2).replace(' ', ' ').replace(chr(10), '<br>')}<br>Result Preview: {result_preview.replace(chr(10), '<br>')}"
|
| 165 |
+
logs_generator = styles(log_content, expanded=False)
|
| 166 |
+
yield logs_generator
|
| 167 |
+
|
| 168 |
+
conversation_messages.append(
|
| 169 |
+
{
|
| 170 |
+
"role": "tool",
|
| 171 |
+
"tool_call_id": tool_invocation.id,
|
| 172 |
+
"name": tool_name,
|
| 173 |
+
"content": tool_execution_result
|
| 174 |
+
}
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
return conversation_messages, logs_generator
|
| 178 |
+
|
| 179 |
+
def process_user_request(user_message, chat_history):
|
| 180 |
+
if not isinstance(user_message, str) or not user_message.strip():
|
| 181 |
+
yield []
|
| 182 |
+
return
|
| 183 |
+
|
| 184 |
+
output_content = ""
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
server, client_initialization_error = initialize_client()
|
| 188 |
+
if client_initialization_error:
|
| 189 |
+
output_content = client_initialization_error
|
| 190 |
+
yield output_content
|
| 191 |
+
return
|
| 192 |
+
|
| 193 |
+
search_configuration = WebConfiguration()
|
| 194 |
+
search_engine_instance = BrowserEngine(search_configuration)
|
| 195 |
+
available_tools = construct_tool_definitions()
|
| 196 |
+
|
| 197 |
+
conversation_messages = setup_response(
|
| 198 |
+
INSTRUCTIONS,
|
| 199 |
+
chat_history,
|
| 200 |
+
user_message
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
tool_response = ""
|
| 204 |
+
for tool_update in process_tool_interactions(
|
| 205 |
+
server=server,
|
| 206 |
+
model_name=MODEL,
|
| 207 |
+
conversation_messages=conversation_messages,
|
| 208 |
+
tool_definitions=available_tools,
|
| 209 |
+
search_engine=search_engine_instance
|
| 210 |
+
):
|
| 211 |
+
if isinstance(tool_update, str):
|
| 212 |
+
tool_response = tool_update
|
| 213 |
+
yield tool_response
|
| 214 |
+
else:
|
| 215 |
+
conversation_messages = tool_update[0]
|
| 216 |
+
tool_response = tool_update[1]
|
| 217 |
+
|
| 218 |
+
if tool_response:
|
| 219 |
+
yield tool_response + "\n\n"
|
| 220 |
+
|
| 221 |
+
final_response_generator = generate_response(
|
| 222 |
+
server=server,
|
| 223 |
+
model_name=MODEL,
|
| 224 |
+
conversation_messages=conversation_messages,
|
| 225 |
+
tool_definitions=available_tools
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
for response_final in final_response_generator:
|
| 229 |
+
if tool_response:
|
| 230 |
+
yield tool_response + "\n\n" + response_final
|
| 231 |
+
else:
|
| 232 |
+
yield response_final
|
| 233 |
+
|
| 234 |
+
except Exception as processing_error:
|
| 235 |
+
output_content += f"\nError: {str(processing_error)}\n"
|
| 236 |
+
output_content += traceback.format_exc()
|
| 237 |
+
yield output_content
|
src/tools/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
from .tool_manager import construct_tool_definitions
|
| 7 |
+
|
| 8 |
+
__all__ = ['construct_tool_definitions']
|
src/tools/tool_manager.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
| 3 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 4 |
+
#
|
| 5 |
+
|
| 6 |
+
def construct_tool_definitions():
|
| 7 |
+
return [
|
| 8 |
+
{
|
| 9 |
+
"type": "function",
|
| 10 |
+
"function": {
|
| 11 |
+
"name": "web_search",
|
| 12 |
+
"description": "Perform a web search via SearXNG (Google or Bing) or Baidu.",
|
| 13 |
+
"parameters": {
|
| 14 |
+
"type": "object",
|
| 15 |
+
"properties": {
|
| 16 |
+
"query": {
|
| 17 |
+
"type": "string"
|
| 18 |
+
},
|
| 19 |
+
"engine": {
|
| 20 |
+
"type": "string",
|
| 21 |
+
"enum": [
|
| 22 |
+
"google",
|
| 23 |
+
"bing",
|
| 24 |
+
"baidu"
|
| 25 |
+
],
|
| 26 |
+
"default": "google",
|
| 27 |
+
},
|
| 28 |
+
},
|
| 29 |
+
"required": ["query"],
|
| 30 |
+
},
|
| 31 |
+
},
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"type": "function",
|
| 35 |
+
"function": {
|
| 36 |
+
"name": "read_url",
|
| 37 |
+
"description": "Fetch and extract main content from a URL.",
|
| 38 |
+
"parameters": {
|
| 39 |
+
"type": "object",
|
| 40 |
+
"properties": {
|
| 41 |
+
"url": {
|
| 42 |
+
"type": "string",
|
| 43 |
+
"format": "uri"
|
| 44 |
+
},
|
| 45 |
+
},
|
| 46 |
+
"required": ["url"],
|
| 47 |
+
},
|
| 48 |
+
},
|
| 49 |
+
}
|
| 50 |
+
]
|