Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from smolagents import DuckDuckGoSearchTool | |
import re | |
web_search = DuckDuckGoSearchTool() | |
SYSTEM_PROMPT = """You are an AI research assistant that can search the web. Follow these steps: | |
1. FIRST ANALYZE the user's question: | |
- If information is missing or ambiguous, ask ONE clarifying question | |
- If clear, proceed to search | |
2. When searching: | |
- Generate multiple specific search queries wrapped in <search> tags | |
- Focus on factual keywords, one query per line | |
Example: | |
<search> | |
Pont des Arts exact length meters | |
History of Pont des Arts bridge | |
</search> | |
3. After receiving results: | |
- Analyze information from multiple sources | |
- Cross-verify facts | |
- If needed, generate follow-up searches | |
- Provide final answer with: | |
- Clear structure | |
- Key facts with sources | |
- Concise explanations | |
Never invent information. Cite sources for all facts. Use neutral, academic tone.""" | |
def process_searches(response): | |
searches = re.findall(r'<search>(.*?)</search>', response, re.DOTALL) | |
if searches: | |
queries = [q.strip() for q in searches[0].split('\n') if q.strip()] | |
results = [] | |
for query in queries: | |
search_result = web_search(query) | |
results.append(f"π Search results for '{query}':\n{search_result}\n") | |
return '\n'.join(results) | |
return None | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
hf_token, | |
): | |
client = InferenceClient( | |
provider="hf-inference", | |
api_key=hf_token | |
) | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
full_response = "" | |
search_cycle = True | |
while search_cycle: | |
search_cycle = False | |
completion = client.chat.completions.create( | |
model="Qwen/QwQ-32B", | |
messages=messages, | |
max_tokens=10000, | |
temperature=temperature, | |
top_p=top_p, | |
stream=True | |
) | |
response = "" | |
for chunk in completion: | |
token = chunk.choices[0].delta.content or "" | |
response += token | |
full_response += token | |
yield full_response | |
search_results = process_searches(response) | |
if search_results: | |
search_cycle = True | |
messages.append({"role": "assistant", "content": response}) | |
messages.append({ | |
"role": "user", | |
"content": f"SEARCH RESULTS:\n{search_results}\nAnalyze these results..." | |
}) | |
# Add this line to display queries | |
full_response += f"\nπ SEARCH QUERIES USED:\n{chr(10).join(queries)}\n\n" # β NEW | |
full_response += "\nπ Analyzing search results...\n" | |
yield full_response | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8), | |
gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"), | |
gr.Textbox(label="HF API Token", type="password") | |
], | |
title="Web Research Agent π€", | |
description="Advanced AI assistant with web search capabilities", | |
examples=[ | |
["Compare COVID-19 mortality rates between US and Sweden with sources"], | |
["What's the current consensus on dark matter composition?"], | |
["Latest advancements in fusion energy 2023-2024"] | |
] | |
) | |
if __name__ == "__main__": | |
demo.launch() |