import gradio as gr from huggingface_hub import InferenceClient from smolagents import DuckDuckGoSearchTool import re import time web_search = DuckDuckGoSearchTool() SYSTEM_PROMPT = """ You are an AI research assistant that can search the web. Follow these steps: 1. FIRST ANALYZE the user's question: - If information is missing or ambiguous, ask ONE clarifying question - If clear, proceed to search 2. When searching: - Generate multiple specific search queries wrapped in tags - Focus on factual keywords, one query per line Example: Pont des Arts exact length meters History of Pont des Arts bridge 3. After receiving results: - Analyze information from multiple sources - Cross-verify facts - If needed, generate follow-up searches - Provide final answer with: - Clear structure - Key facts with sources - Concise explanations Never invent information. Cite sources for all facts. Use neutral, academic tone. """ def process_searches(response): formatted_response = response.replace("", "\nšŸ’­ THINKING PROCESS:\n").replace("", "\n") searches = re.findall(r'(.*?)', formatted_response, re.DOTALL) if searches: queries = [q.strip() for q in searches[0].split('\n') if q.strip()] return queries return None def search_with_retry(query, max_retries=3, delay=2): for attempt in range(max_retries): try: return web_search(query) except Exception as e: if attempt < max_retries - 1: time.sleep(delay) continue raise return None def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, hf_token, ): client = InferenceClient( provider="hf-inference", api_key=hf_token ) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) full_response = "" search_cycle = True try: while search_cycle: search_cycle = False try: completion = client.chat.completions.create( model="Qwen/QwQ-32B", messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True ) except Exception as e: yield f"āš ļø API Error: {str(e)}\n\nPlease check your HF token and model access." return response = "" for chunk in completion: token = chunk.choices[0].delta.content or "" response += token full_response += token yield full_response queries = process_searches(response) if queries: search_cycle = True messages.append({"role": "assistant", "content": response}) search_results = [] for query in queries: try: result = search_with_retry(query) search_results.append(f"šŸ” SEARCH: {query}\nRESULTS: {result}\n") except Exception as e: search_results.append(f"āš ļø Search Error: {str(e)}\nQuery: {query}") time.sleep(2) messages.append({ "role": "user", "content": f"SEARCH RESULTS:\n{chr(10).join(search_results)}\nAnalyze these results..." }) full_response += "\nšŸ” Analyzing search results...\n" yield full_response except Exception as e: yield f"āš ļø Critical Error: {str(e)}\n\nPlease try again later." demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8), gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"), gr.Textbox(label="HF API Token", type="password") ], title="Web Research Agent šŸ¤–", description="Advanced AI assistant with web search capabilities", examples=[ ["Compare COVID-19 mortality rates between US and Sweden with sources"], ["What's the current consensus on dark matter composition?"], ["Latest advancements in fusion energy 2023-2024"] ], cache_examples=False ) if __name__ == "__main__": demo.launch()