perplexity_ai / app.py
xd11yggy's picture
Update app.py
f85b689 verified
raw
history blame
4.26 kB
import gradio as gr
from huggingface_hub import InferenceClient
from smolagents import DuckDuckGoSearchTool
import re
web_search = DuckDuckGoSearchTool()
SYSTEM_PROMPT = """
You are an AI research assistant that can search the web. Follow these steps:
1. FIRST ANALYZE the user's question:
- If information is missing or ambiguous, ask ONE clarifying question
- If clear, proceed to search
2. When searching:
- Generate multiple specific search queries wrapped in <search> tags
- Focus on factual keywords, one query per line
Example:
<search>
Pont des Arts exact length meters
History of Pont des Arts bridge
</search>
3. After receiving results:
- Analyze information from multiple sources
- Cross-verify facts
- If needed, generate follow-up searches
- Provide final answer with:
- Clear structure
- Key facts with sources
- Concise explanations
Never invent information. Cite sources for all facts. Use neutral, academic tone.
"""
def process_searches(response):
# Preserve thinking tags while processing searches
formatted_response = response.replace("<thinking>", "\nπŸ’­ THINKING PROCESS:\n").replace("</thinking>", "\n")
searches = re.findall(r'<search>(.*?)</search>', formatted_response, re.DOTALL)
if searches:
queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
results = []
for query in queries:
search_result = web_search(query)
results.append(f"πŸ” SEARCH: {query}\nRESULTS: {search_result}\n")
return '\n'.join(results)
return None
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token,
):
client = InferenceClient(
provider="hf-inference",
api_key=hf_token
)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
full_response = ""
search_cycle = True
while search_cycle:
search_cycle = False
completion = client.chat.completions.create(
model="Qwen/QwQ-32B",
messages=messages,
max_tokens=10000,
temperature=temperature,
top_p=top_p,
stream=True
)
response = ""
for chunk in completion:
token = chunk.choices[0].delta.content or ""
response += token
full_response += token
# Display thinking tags immediately
if "<thinking>" in token.lower() or "</thinking>" in token.lower():
yield full_response
if search_results:
search_cycle = True
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": f"SEARCH RESULTS:\n{search_results}\nAnalyze these results..."
})
# Add this line to display queries
full_response += f"\nπŸ” SEARCH QUERIES USED:\n{chr(10).join(queries)}\n\n" # ← NEW
full_response += "\nπŸ” Analyzing search results...\n"
yield full_response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
gr.Textbox(label="HF API Token", type="password")
],
title="Web Research Agent πŸ€–",
description="Advanced AI assistant with web search capabilities",
examples=[
["Compare COVID-19 mortality rates between US and Sweden with sources"],
["What's the current consensus on dark matter composition?"],
["Latest advancements in fusion energy 2023-2024"]
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()