perplexity_ai / app.py
xd11yggy's picture
Update app.py
9f92389 verified
raw
history blame
7.38 kB
import gradio as gr
from openai import OpenAI
from smolagents import DuckDuckGoSearchTool
import re
import time
web_search = DuckDuckGoSearchTool()
SYSTEM_PROMPT = """
You are an AI research assistant that can search the web. Follow these steps:
1. FIRST ANALYZE the user's question:
- If information is missing or ambiguous, ask ONE clarifying question
- If clear, proceed to search
2. When searching:
- Generate multiple specific search queries wrapped in <search> tags
- Focus on factual keywords, one query per line
Example:
<search>
Pont des Arts exact length meters
History of Pont des Arts bridge
</search>
3. After receiving results:
- Analyze information from multiple sources
- Cross-verify facts
- If needed, generate follow-up searches
- Provide final answer with:
- Clear structure
- Key facts with sources
- Concise explanations
Never invent information. Cite sources for all facts. Use neutral, academic tone.
"""
def process_searches(response):
searches = re.findall(r'<search>(.*?)</search>', response, re.DOTALL)
if searches:
queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
return queries
return None
def process_thinking(response, duration):
thinking_blocks = re.findall(r'<thinking>(.*?)</thinking>', response, re.DOTALL)
if not thinking_blocks:
return response, False
formatted_response = response
for content in thinking_blocks:
formatted_think = (
f"\nπŸ’­ THINKING PROCESS:\n{content.strip()}\n"
f"Thought for {duration:.1f} seconds.\n"
)
formatted_response = formatted_response.replace(
f'<thinking>{content}</thinking>',
formatted_think
)
return formatted_response, True
def search_with_retry(query, max_retries=3, delay=2):
for attempt in range(max_retries):
try:
return web_search(query)
except Exception as e:
if attempt < max_retries - 1:
time.sleep(delay)
continue
raise
return None
def animate_thinking():
dots = ["", ".", "..", "..."]
i = 0
while True:
yield f"Thinking{dots[i]}"
i = (i + 1) % 4
time.sleep(0.5)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
openrouter_key,
):
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=openrouter_key,
)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
full_response = ""
search_cycle = True
thinking_animation = animate_thinking()
try:
while search_cycle:
search_cycle = False
show_thinking = False
thinking_start = None
try:
start_time = time.time()
completion = client.chat.completions.create(
model="qwen/qwq-32b:free",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
extra_headers={
"HTTP-Referer": "https://your-domain.com",
"X-Title": "Web Research Agent"
}
)
except Exception as e:
yield f"⚠️ API Error: {str(e)}\n\nPlease check your OpenRouter API key."
return
response = ""
thinking_buffer = ""
in_thinking_tag = False
for chunk in completion:
token = chunk.choices[0].delta.content or ""
response += token
full_response += token
# Detect thinking tags
if not in_thinking_tag and '<thinking>' in token:
in_thinking_tag = True
thinking_start = time.time()
show_thinking = True
if in_thinking_tag:
thinking_buffer += token
if '</thinking>' in token:
in_thinking_tag = False
thinking_duration = time.time() - thinking_start
# Show animated thinking if needed
if show_thinking and not in_thinking_tag:
formatted, has_thinking = process_thinking(full_response, time.time() - start_time)
if has_thinking:
full_response = formatted
show_thinking = False
yield full_response
else:
yield next(thinking_animation)
else:
yield full_response
# Process final thinking state
final_response, has_thinking = process_thinking(full_response, time.time() - start_time)
if has_thinking:
full_response = final_response
yield full_response
queries = process_searches(response)
if queries:
search_cycle = True
messages.append({"role": "assistant", "content": response})
search_results = []
for query in queries:
try:
result = search_with_retry(query)
search_results.append(f"πŸ” SEARCH: {query}\nRESULTS: {result}\n")
except Exception as e:
search_results.append(f"⚠️ Search Error: {str(e)}\nQuery: {query}")
time.sleep(2)
messages.append({
"role": "user",
"content": f"SEARCH RESULTS:\n{chr(10).join(search_results)}\nAnalyze these results..."
})
full_response += "\nπŸ” Analyzing search results...\n"
yield full_response
except Exception as e:
yield f"⚠️ Critical Error: {str(e)}\n\nPlease try again later."
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
gr.Textbox(label="OpenRouter API Key", type="password")
],
title="Web Research Agent πŸ€–",
description="Advanced AI assistant with web search capabilities",
examples=[
["Compare COVID-19 mortality rates between US and Sweden with sources"],
["What's the current consensus on dark matter composition?"],
["Latest advancements in fusion energy 2023-2024"]
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()