Spaces:

xd11yggy
/

perplexity_ai

Running

File size: 7,381 Bytes

import gradio as gr
from openai import OpenAI
from smolagents import DuckDuckGoSearchTool
import re
import time

web_search = DuckDuckGoSearchTool()

SYSTEM_PROMPT = """
You are an AI research assistant that can search the web. Follow these steps:

1. FIRST ANALYZE the user's question:
   - If information is missing or ambiguous, ask ONE clarifying question
   - If clear, proceed to search

2. When searching:
   - Generate multiple specific search queries wrapped in <search> tags
   - Focus on factual keywords, one query per line
   Example:
   <search>
   Pont des Arts exact length meters
   History of Pont des Arts bridge
   </search>

3. After receiving results:
   - Analyze information from multiple sources
   - Cross-verify facts
   - If needed, generate follow-up searches
   - Provide final answer with:
     - Clear structure
     - Key facts with sources
     - Concise explanations

Never invent information. Cite sources for all facts. Use neutral, academic tone.
"""

def process_searches(response):
    searches = re.findall(r'<search>(.*?)</search>', response, re.DOTALL)
    if searches:
        queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
        return queries
    return None

def process_thinking(response, duration):
    thinking_blocks = re.findall(r'<thinking>(.*?)</thinking>', response, re.DOTALL)
    if not thinking_blocks:
        return response, False
    
    formatted_response = response
    for content in thinking_blocks:
        formatted_think = (
            f"\n💭 THINKING PROCESS:\n{content.strip()}\n"
            f"Thought for {duration:.1f} seconds.\n"
        )
        formatted_response = formatted_response.replace(
            f'<thinking>{content}</thinking>', 
            formatted_think
        )
    return formatted_response, True

def search_with_retry(query, max_retries=3, delay=2):
    for attempt in range(max_retries):
        try:
            return web_search(query)
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(delay)
                continue
            raise
    return None

def animate_thinking():
    dots = ["", ".", "..", "..."]
    i = 0
    while True:
        yield f"Thinking{dots[i]}"
        i = (i + 1) % 4
        time.sleep(0.5)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    openrouter_key,
):
    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=openrouter_key,
    )
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})
    
    full_response = ""
    search_cycle = True
    thinking_animation = animate_thinking()
    
    try:
        while search_cycle:
            search_cycle = False
            show_thinking = False
            thinking_start = None
            
            try:
                start_time = time.time()
                completion = client.chat.completions.create(
                    model="qwen/qwq-32b:free",
                    messages=messages,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    top_p=top_p,
                    stream=True,
                    extra_headers={
                        "HTTP-Referer": "https://your-domain.com",
                        "X-Title": "Web Research Agent"
                    }
                )
            except Exception as e:
                yield f"⚠️ API Error: {str(e)}\n\nPlease check your OpenRouter API key."
                return

            response = ""
            thinking_buffer = ""
            in_thinking_tag = False
            
            for chunk in completion:
                token = chunk.choices[0].delta.content or ""
                response += token
                full_response += token

                # Detect thinking tags
                if not in_thinking_tag and '<thinking>' in token:
                    in_thinking_tag = True
                    thinking_start = time.time()
                    show_thinking = True
                
                if in_thinking_tag:
                    thinking_buffer += token
                    if '</thinking>' in token:
                        in_thinking_tag = False
                        thinking_duration = time.time() - thinking_start
                
                # Show animated thinking if needed
                if show_thinking and not in_thinking_tag:
                    formatted, has_thinking = process_thinking(full_response, time.time() - start_time)
                    if has_thinking:
                        full_response = formatted
                        show_thinking = False
                        yield full_response
                    else:
                        yield next(thinking_animation)
                else:
                    yield full_response

            # Process final thinking state
            final_response, has_thinking = process_thinking(full_response, time.time() - start_time)
            if has_thinking:
                full_response = final_response
                yield full_response

            queries = process_searches(response)
            
            if queries:
                search_cycle = True
                messages.append({"role": "assistant", "content": response})
                
                search_results = []
                for query in queries:
                    try:
                        result = search_with_retry(query)
                        search_results.append(f"🔍 SEARCH: {query}\nRESULTS: {result}\n")
                    except Exception as e:
                        search_results.append(f"⚠️ Search Error: {str(e)}\nQuery: {query}")
                        time.sleep(2)
                
                messages.append({
                    "role": "user",
                    "content": f"SEARCH RESULTS:\n{chr(10).join(search_results)}\nAnalyze these results..."
                })
                full_response += "\n🔍 Analyzing search results...\n"
                yield full_response

    except Exception as e:
        yield f"⚠️ Critical Error: {str(e)}\n\nPlease try again later."

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
        gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
        gr.Textbox(label="OpenRouter API Key", type="password")
    ],
    title="Web Research Agent 🤖",
    description="Advanced AI assistant with web search capabilities",
    examples=[
        ["Compare COVID-19 mortality rates between US and Sweden with sources"],
        ["What's the current consensus on dark matter composition?"],
        ["Latest advancements in fusion energy 2023-2024"]
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch()