File size: 4,260 Bytes
5081ce5
 
32ffa10
5081ce5
32ffa10
 
 
f85b689
 
95634e0
5081ce5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f85b689
 
5081ce5
 
f85b689
 
 
5081ce5
 
 
 
 
f85b689
5081ce5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f85b689
 
 
5081ce5
8db0d23
 
 
 
 
 
 
 
 
 
 
5081ce5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f85b689
 
5081ce5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
from huggingface_hub import InferenceClient
from smolagents import DuckDuckGoSearchTool
import re

web_search = DuckDuckGoSearchTool()

SYSTEM_PROMPT = """
You are an AI research assistant that can search the web. Follow these steps:

1. FIRST ANALYZE the user's question:
   - If information is missing or ambiguous, ask ONE clarifying question
   - If clear, proceed to search

2. When searching:
   - Generate multiple specific search queries wrapped in <search> tags
   - Focus on factual keywords, one query per line
   Example:
   <search>
   Pont des Arts exact length meters
   History of Pont des Arts bridge
   </search>

3. After receiving results:
   - Analyze information from multiple sources
   - Cross-verify facts
   - If needed, generate follow-up searches
   - Provide final answer with:
     - Clear structure
     - Key facts with sources
     - Concise explanations

Never invent information. Cite sources for all facts. Use neutral, academic tone.
"""

def process_searches(response):
    # Preserve thinking tags while processing searches
    formatted_response = response.replace("<thinking>", "\nπŸ’­ THINKING PROCESS:\n").replace("</thinking>", "\n")
    searches = re.findall(r'<search>(.*?)</search>', formatted_response, re.DOTALL)
    if searches:
        queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
        results = []
        for query in queries:
            search_result = web_search(query)
            results.append(f"πŸ” SEARCH: {query}\nRESULTS: {search_result}\n")
        return '\n'.join(results)
    return None

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token,
):
    client = InferenceClient(
        provider="hf-inference",
        api_key=hf_token
    )
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})
    
    full_response = ""
    search_cycle = True
    
    while search_cycle:
        search_cycle = False
        
        completion = client.chat.completions.create(
            model="Qwen/QwQ-32B",
            messages=messages,
            max_tokens=10000,
            temperature=temperature,
            top_p=top_p,
            stream=True
        )
        
        response = ""
        for chunk in completion:
            token = chunk.choices[0].delta.content or ""
            response += token
            full_response += token
            # Display thinking tags immediately
            if "<thinking>" in token.lower() or "</thinking>" in token.lower():
                yield full_response
        
    if search_results:
        search_cycle = True
        messages.append({"role": "assistant", "content": response})
        messages.append({
            "role": "user",
            "content": f"SEARCH RESULTS:\n{search_results}\nAnalyze these results..."
        })
        # Add this line to display queries
        full_response += f"\nπŸ” SEARCH QUERIES USED:\n{chr(10).join(queries)}\n\n"  # ← NEW
        full_response += "\nπŸ” Analyzing search results...\n"
        yield full_response

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
        gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
        gr.Textbox(label="HF API Token", type="password")
    ],
    title="Web Research Agent πŸ€–",
    description="Advanced AI assistant with web search capabilities",
    examples=[
        ["Compare COVID-19 mortality rates between US and Sweden with sources"],
        ["What's the current consensus on dark matter composition?"],
        ["Latest advancements in fusion energy 2023-2024"]
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch()