Spaces:

Shreyas094
/

GPT-Researcher

Running

App Files Files Community

GPT-Researcher / gpt_researcher /retrievers /exa /exa.py

Shreyas094

Upload 528 files

372531f verified about 2 months ago

raw

history blame contribute delete

3.26 kB

	import os
	from ..utils import check_pkg


	class ExaSearch:
	"""
	Exa API Retriever
	"""

	def __init__(self, query):
	"""
	Initializes the ExaSearch object.
	Args:
	query: The search query.
	"""
	# This validation is necessary since exa_py is optional
	check_pkg("exa_py")
	from exa_py import Exa
	self.query = query
	self.api_key = self._retrieve_api_key()
	self.client = Exa(api_key=self.api_key)

	def _retrieve_api_key(self):
	"""
	Retrieves the Exa API key from environment variables.
	Returns:
	The API key.
	Raises:
	Exception: If the API key is not found.
	"""
	try:
	api_key = os.environ["EXA_API_KEY"]
	except KeyError:
	raise Exception(
	"Exa API key not found. Please set the EXA_API_KEY environment variable. "
	"You can obtain your key from https://exa.ai/"
	)
	return api_key

	def search(
	self, max_results=10, use_autoprompt=False, search_type="neural", **filters
	):
	"""
	Searches the query using the Exa API.
	Args:
	max_results: The maximum number of results to return.
	use_autoprompt: Whether to use autoprompting.
	search_type: The type of search (e.g., "neural", "keyword").
	**filters: Additional filters (e.g., date range, domains).
	Returns:
	A list of search results.
	"""
	results = self.client.search(
	self.query,
	type=search_type,
	use_autoprompt=use_autoprompt,
	num_results=max_results,
	**filters
	)

	search_response = [
	{"href": result.url, "body": result.text} for result in results.results
	]
	return search_response

	def find_similar(self, url, exclude_source_domain=False, **filters):
	"""
	Finds similar documents to the provided URL using the Exa API.
	Args:
	url: The URL to find similar documents for.
	exclude_source_domain: Whether to exclude the source domain in the results.
	**filters: Additional filters.
	Returns:
	A list of similar documents.
	"""
	results = self.client.find_similar(
	url, exclude_source_domain=exclude_source_domain, **filters
	)

	similar_response = [
	{"href": result.url, "body": result.text} for result in results.results
	]
	return similar_response

	def get_contents(self, ids, **options):
	"""
	Retrieves the contents of the specified IDs using the Exa API.
	Args:
	ids: The IDs of the documents to retrieve.
	**options: Additional options for content retrieval.
	Returns:
	A list of document contents.
	"""
	results = self.client.get_contents(ids, **options)

	contents_response = [
	{"id": result.id, "content": result.text} for result in results.results
	]
	return contents_response