import gradio as gr from openai import OpenAI from smolagents import DuckDuckGoSearchTool import re import time web_search = DuckDuckGoSearchTool() SYSTEM_PROMPT = """ You are an AI research assistant that can search the web. Follow these steps: 1. FIRST ANALYZE the user's question: - If information is missing or ambiguous, ask ONE clarifying question - If clear, proceed to search 2. When searching: - Generate multiple specific search queries wrapped in tags - Focus on factual keywords, one query per line Example: Pont des Arts exact length meters History of Pont des Arts bridge 3. After receiving results: - Analyze information from multiple sources - Cross-verify facts - If needed, generate follow-up searches - Provide final answer with: - Clear structure - Key facts with sources - Concise explanations Never invent information. Cite sources for all facts. Use neutral, academic tone. """ def process_searches(response): searches = re.findall(r'(.*?)', response, re.DOTALL) if searches: queries = [q.strip() for q in searches[0].split('\n') if q.strip()] return queries return None def process_thinking(response, duration): thinking_blocks = re.findall(r'(.*?)', response, re.DOTALL) if not thinking_blocks: return response, False formatted_response = response for content in thinking_blocks: formatted_think = ( f"\nšŸ’­ THINKING PROCESS:\n{content.strip()}\n" f"Thought for {duration:.1f} seconds.\n" ) formatted_response = formatted_response.replace( f'{content}', formatted_think ) return formatted_response, True def search_with_retry(query, max_retries=3, delay=2): for attempt in range(max_retries): try: return web_search(query) except Exception as e: if attempt < max_retries - 1: time.sleep(delay) continue raise return None def animate_thinking(): dots = ["", ".", "..", "..."] i = 0 while True: yield f"Thinking{dots[i]}" i = (i + 1) % 4 time.sleep(0.5) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, openrouter_key, ): client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=openrouter_key, ) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) full_response = "" search_cycle = True thinking_animation = animate_thinking() try: while search_cycle: search_cycle = False show_thinking = False thinking_start = None try: start_time = time.time() completion = client.chat.completions.create( model="qwen/qwq-32b:free", messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True, extra_headers={ "HTTP-Referer": "https://your-domain.com", "X-Title": "Web Research Agent" } ) except Exception as e: yield f"āš ļø API Error: {str(e)}\n\nPlease check your OpenRouter API key." return response = "" thinking_buffer = "" in_thinking_tag = False for chunk in completion: token = chunk.choices[0].delta.content or "" response += token full_response += token # Detect thinking tags if not in_thinking_tag and '' in token: in_thinking_tag = True thinking_start = time.time() show_thinking = True if in_thinking_tag: thinking_buffer += token if '' in token: in_thinking_tag = False thinking_duration = time.time() - thinking_start # Show animated thinking if needed if show_thinking and not in_thinking_tag: formatted, has_thinking = process_thinking(full_response, time.time() - start_time) if has_thinking: full_response = formatted show_thinking = False yield full_response else: yield next(thinking_animation) else: yield full_response # Process final thinking state final_response, has_thinking = process_thinking(full_response, time.time() - start_time) if has_thinking: full_response = final_response yield full_response queries = process_searches(response) if queries: search_cycle = True messages.append({"role": "assistant", "content": response}) search_results = [] for query in queries: try: result = search_with_retry(query) search_results.append(f"šŸ” SEARCH: {query}\nRESULTS: {result}\n") except Exception as e: search_results.append(f"āš ļø Search Error: {str(e)}\nQuery: {query}") time.sleep(2) messages.append({ "role": "user", "content": f"SEARCH RESULTS:\n{chr(10).join(search_results)}\nAnalyze these results..." }) full_response += "\nšŸ” Analyzing search results...\n" yield full_response except Exception as e: yield f"āš ļø Critical Error: {str(e)}\n\nPlease try again later." demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8), gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"), gr.Textbox(label="OpenRouter API Key", type="password") ], title="Web Research Agent šŸ¤–", description="Advanced AI assistant with web search capabilities", examples=[ ["Compare COVID-19 mortality rates between US and Sweden with sources"], ["What's the current consensus on dark matter composition?"], ["Latest advancements in fusion energy 2023-2024"] ], cache_examples=False ) if __name__ == "__main__": demo.launch()