import gradio as gr from huggingface_hub import InferenceClient from smolagents import DuckDuckGoSearchTool import re web_search = DuckDuckGoSearchTool() SYSTEM_PROMPT = """You are an AI research assistant that can search the web. Follow these steps: 1. FIRST ANALYZE the user's question: - If information is missing or ambiguous, ask ONE clarifying question - If clear, proceed to search 2. When searching: - Generate multiple specific search queries wrapped in tags - Focus on factual keywords, one query per line Example: Pont des Arts exact length meters History of Pont des Arts bridge 3. After receiving results: - Analyze information from multiple sources - Cross-verify facts - If needed, generate follow-up searches - Provide final answer with: - Clear structure - Key facts with sources - Concise explanations Never invent information. Cite sources for all facts. Use neutral, academic tone.""" def process_searches(response): searches = re.findall(r'(.*?)', response, re.DOTALL) if searches: queries = [q.strip() for q in searches[0].split('\n') if q.strip()] results = [] for query in queries: search_result = web_search(query) results.append(f"šŸ” Search results for '{query}':\n{search_result}\n") return '\n'.join(results) return None def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, hf_token, ): client = InferenceClient( provider="hf-inference", api_key=hf_token ) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) full_response = "" search_cycle = True while search_cycle: search_cycle = False completion = client.chat.completions.create( model="Qwen/QwQ-32B", messages=messages, max_tokens=10000, temperature=temperature, top_p=top_p, stream=True ) response = "" for chunk in completion: token = chunk.choices[0].delta.content or "" response += token full_response += token yield full_response search_results = process_searches(response) if search_results: search_cycle = True messages.append({"role": "assistant", "content": response}) messages.append({ "role": "user", "content": f"SEARCH RESULTS:\n{search_results}\nAnalyze these results..." }) # Add this line to display queries full_response += f"\nšŸ” SEARCH QUERIES USED:\n{chr(10).join(queries)}\n\n" # ā† NEW full_response += "\nšŸ” Analyzing search results...\n" yield full_response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8), gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"), gr.Textbox(label="HF API Token", type="password") ], title="Web Research Agent šŸ¤–", description="Advanced AI assistant with web search capabilities", examples=[ ["Compare COVID-19 mortality rates between US and Sweden with sources"], ["What's the current consensus on dark matter composition?"], ["Latest advancements in fusion energy 2023-2024"] ] ) if __name__ == "__main__": demo.launch()