Spaces:

xd11yggy
/

perplexity_ai

Running

App Files Files Community

perplexity_ai / app.py

xd11yggy

Update app.py

9f92389 verified 4 days ago

raw

history blame

7.38 kB

	import gradio as gr
	from openai import OpenAI
	from smolagents import DuckDuckGoSearchTool
	import re
	import time

	web_search = DuckDuckGoSearchTool()

	SYSTEM_PROMPT = """
	You are an AI research assistant that can search the web. Follow these steps:

	1. FIRST ANALYZE the user's question:
	- If information is missing or ambiguous, ask ONE clarifying question
	- If clear, proceed to search

	2. When searching:
	- Generate multiple specific search queries wrapped in <search> tags
	- Focus on factual keywords, one query per line
	Example:
	<search>
	Pont des Arts exact length meters
	History of Pont des Arts bridge
	</search>

	3. After receiving results:
	- Analyze information from multiple sources
	- Cross-verify facts
	- If needed, generate follow-up searches
	- Provide final answer with:
	- Clear structure
	- Key facts with sources
	- Concise explanations

	Never invent information. Cite sources for all facts. Use neutral, academic tone.
	"""

	def process_searches(response):
	searches = re.findall(r'<search>(.*?)</search>', response, re.DOTALL)
	if searches:
	queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
	return queries
	return None

	def process_thinking(response, duration):
	thinking_blocks = re.findall(r'<thinking>(.*?)</thinking>', response, re.DOTALL)
	if not thinking_blocks:
	return response, False

	formatted_response = response
	for content in thinking_blocks:
	formatted_think = (
	f"\n💭 THINKING PROCESS:\n{content.strip()}\n"
	f"Thought for {duration:.1f} seconds.\n"
	)
	formatted_response = formatted_response.replace(
	f'<thinking>{content}</thinking>',
	formatted_think
	)
	return formatted_response, True

	def search_with_retry(query, max_retries=3, delay=2):
	for attempt in range(max_retries):
	try:
	return web_search(query)
	except Exception as e:
	if attempt < max_retries - 1:
	time.sleep(delay)
	continue
	raise
	return None

	def animate_thinking():
	dots = ["", ".", "..", "..."]
	i = 0
	while True:
	yield f"Thinking{dots[i]}"
	i = (i + 1) % 4
	time.sleep(0.5)

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	openrouter_key,
	):
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=openrouter_key,
	)

	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	full_response = ""
	search_cycle = True
	thinking_animation = animate_thinking()

	try:
	while search_cycle:
	search_cycle = False
	show_thinking = False
	thinking_start = None

	try:
	start_time = time.time()
	completion = client.chat.completions.create(
	model="qwen/qwq-32b:free",
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	stream=True,
	extra_headers={
	"HTTP-Referer": "https://your-domain.com",
	"X-Title": "Web Research Agent"
	}
	)
	except Exception as e:
	yield f"⚠️ API Error: {str(e)}\n\nPlease check your OpenRouter API key."
	return

	response = ""
	thinking_buffer = ""
	in_thinking_tag = False

	for chunk in completion:
	token = chunk.choices[0].delta.content or ""
	response += token
	full_response += token

	# Detect thinking tags
	if not in_thinking_tag and '<thinking>' in token:
	in_thinking_tag = True
	thinking_start = time.time()
	show_thinking = True

	if in_thinking_tag:
	thinking_buffer += token
	if '</thinking>' in token:
	in_thinking_tag = False
	thinking_duration = time.time() - thinking_start

	# Show animated thinking if needed
	if show_thinking and not in_thinking_tag:
	formatted, has_thinking = process_thinking(full_response, time.time() - start_time)
	if has_thinking:
	full_response = formatted
	show_thinking = False
	yield full_response
	else:
	yield next(thinking_animation)
	else:
	yield full_response

	# Process final thinking state
	final_response, has_thinking = process_thinking(full_response, time.time() - start_time)
	if has_thinking:
	full_response = final_response
	yield full_response

	queries = process_searches(response)

	if queries:
	search_cycle = True
	messages.append({"role": "assistant", "content": response})

	search_results = []
	for query in queries:
	try:
	result = search_with_retry(query)
	search_results.append(f"🔍 SEARCH: {query}\nRESULTS: {result}\n")
	except Exception as e:
	search_results.append(f"⚠️ Search Error: {str(e)}\nQuery: {query}")
	time.sleep(2)

	messages.append({
	"role": "user",
	"content": f"SEARCH RESULTS:\n{chr(10).join(search_results)}\nAnalyze these results..."
	})
	full_response += "\n🔍 Analyzing search results...\n"
	yield full_response

	except Exception as e:
	yield f"⚠️ Critical Error: {str(e)}\n\nPlease try again later."

	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
	gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
	gr.Textbox(label="OpenRouter API Key", type="password")
	],
	title="Web Research Agent 🤖",
	description="Advanced AI assistant with web search capabilities",
	examples=[
	["Compare COVID-19 mortality rates between US and Sweden with sources"],
	["What's the current consensus on dark matter composition?"],
	["Latest advancements in fusion energy 2023-2024"]
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()