Spaces:

xd11yggy
/

perplexity_ai

Running

App Files Files Community

perplexity_ai / app.py

xd11yggy

Update app.py

f85b689 verified 4 days ago

raw

history blame

4.26 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from smolagents import DuckDuckGoSearchTool
	import re

	web_search = DuckDuckGoSearchTool()

	SYSTEM_PROMPT = """
	You are an AI research assistant that can search the web. Follow these steps:

	1. FIRST ANALYZE the user's question:
	- If information is missing or ambiguous, ask ONE clarifying question
	- If clear, proceed to search

	2. When searching:
	- Generate multiple specific search queries wrapped in <search> tags
	- Focus on factual keywords, one query per line
	Example:
	<search>
	Pont des Arts exact length meters
	History of Pont des Arts bridge
	</search>

	3. After receiving results:
	- Analyze information from multiple sources
	- Cross-verify facts
	- If needed, generate follow-up searches
	- Provide final answer with:
	- Clear structure
	- Key facts with sources
	- Concise explanations

	Never invent information. Cite sources for all facts. Use neutral, academic tone.
	"""

	def process_searches(response):
	# Preserve thinking tags while processing searches
	formatted_response = response.replace("<thinking>", "\n💭 THINKING PROCESS:\n").replace("</thinking>", "\n")
	searches = re.findall(r'<search>(.*?)</search>', formatted_response, re.DOTALL)
	if searches:
	queries = [q.strip() for q in searches[0].split('\n') if q.strip()]
	results = []
	for query in queries:
	search_result = web_search(query)
	results.append(f"🔍 SEARCH: {query}\nRESULTS: {search_result}\n")
	return '\n'.join(results)
	return None

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token,
	):
	client = InferenceClient(
	provider="hf-inference",
	api_key=hf_token
	)

	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	full_response = ""
	search_cycle = True

	while search_cycle:
	search_cycle = False

	completion = client.chat.completions.create(
	model="Qwen/QwQ-32B",
	messages=messages,
	max_tokens=10000,
	temperature=temperature,
	top_p=top_p,
	stream=True
	)

	response = ""
	for chunk in completion:
	token = chunk.choices[0].delta.content or ""
	response += token
	full_response += token
	# Display thinking tags immediately
	if "<thinking>" in token.lower() or "</thinking>" in token.lower():
	yield full_response

	if search_results:
	search_cycle = True
	messages.append({"role": "assistant", "content": response})
	messages.append({
	"role": "user",
	"content": f"SEARCH RESULTS:\n{search_results}\nAnalyze these results..."
	})
	# Add this line to display queries
	full_response += f"\n🔍 SEARCH QUERIES USED:\n{chr(10).join(queries)}\n\n" # ← NEW
	full_response += "\n🔍 Analyzing search results...\n"
	yield full_response

	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value=SYSTEM_PROMPT, label="System Prompt", lines=8),
	gr.Slider(minimum=1000, maximum=15000, value=6000, step=500, label="Max Tokens"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p"),
	gr.Textbox(label="HF API Token", type="password")
	],
	title="Web Research Agent 🤖",
	description="Advanced AI assistant with web search capabilities",
	examples=[
	["Compare COVID-19 mortality rates between US and Sweden with sources"],
	["What's the current consensus on dark matter composition?"],
	["Latest advancements in fusion energy 2023-2024"]
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()