Spaces:

hanchraizedai
/

semsearch

Running

App Files Files

semsearch / pineconeclient.py

hanch

fix pinecone startup

c9d11ad verified 28 days ago

raw

history blame

2.62 kB

	import logging
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	import json
	import streamlit as st

	from pinecone import Pinecone

	from utils import get_variable


	PINECONE_KEY = get_variable("PINECONE_API_KEY") # app.pinecone.io
	PINE_CONE_ENVIRONMENT = get_variable("PINE_CONE_ENVIRONMENT") # app.pinecone.io


	@st.cache_resource
	def init_pinecone():
	#pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
	pinecone = Pinecone(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
	return pinecone.Index("semsearch")




	def index_query(xq, top_k, regions=[], countries=[], index_namespace=""):
	logger.debug(f"Getting companies from countries: {countries} ")
	filters = []
	if len(regions)>0:
	filters.append({'region': {"$in": regions}})
	if len(countries)>0:
	filters.append({'country': {"$in": countries}})
	if len(filters)==1:
	filters = filters[0]
	elif len(filters)>1:
	filters = {"$and": filters}
	else:
	filters = {}
	#st.write(filter)
	if not 'index' in st.session_state:
	st.session_state.index = init_pinecone()

	xc = st.session_state.index.query(vector = xq, namespace=index_namespace, top_k=20, filter = filters, include_metadata=True, include_vectors = False)
	#xc = st.session_state.index.query(xq, top_k=top_k, include_metadata=True, include_vectors = True)
	return xc

	def search_index(query, top_k, regions, countries, retriever, index_namespace=""):
	xq = retriever.encode([query]).tolist()
	try:
	xc = index_query(xq, top_k=top_k, regions=regions, countries=countries)
	except:
	# force reload
	Pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
	st.session_state.index = Pinecone.Index("semsearch")
	xc = index_query(xq, top_k=top_k, regions=regions, countries=countries, index_namespace=index_namespace)

	results = []
	for match in xc['matches']:
	#logger.debug(f"Match: {match}")
	#answer = reader(question=query, context=match["metadata"]['context'])
	score = match['score']
	# if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0:
	# score = score / scrape_boost
	answer = {'score': score, 'metadata': match['metadata']}
	answer['id'] = match['id']
	answer["name"] = match["metadata"]['company_name']
	answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else ""

	results.append(answer)
	return results