import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) import json import streamlit as st from pinecone import Pinecone from utils import get_variable PINECONE_KEY = get_variable("PINECONE_API_KEY") # app.pinecone.io PINE_CONE_ENVIRONMENT = get_variable("PINE_CONE_ENVIRONMENT") # app.pinecone.io @st.cache_resource def init_pinecone(): #pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) pinecone = Pinecone(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) return pinecone.Index("semsearch") def index_query(xq, top_k, regions=[], countries=[], index_namespace=""): logger.debug(f"Getting companies from countries: {countries} ") filters = [] if len(regions)>0: filters.append({'region': {"$in": regions}}) if len(countries)>0: filters.append({'country': {"$in": countries}}) if len(filters)==1: filters = filters[0] elif len(filters)>1: filters = {"$and": filters} else: filters = {} #st.write(filter) if not 'index' in st.session_state: st.session_state.index = init_pinecone() xc = st.session_state.index.query(vector = xq, namespace=index_namespace, top_k=20, filter = filters, include_metadata=True, include_vectors = False) #xc = st.session_state.index.query(xq, top_k=top_k, include_metadata=True, include_vectors = True) return xc def search_index(query, top_k, regions, countries, retriever, index_namespace=""): xq = retriever.encode([query]).tolist() try: xc = index_query(xq, top_k=top_k, regions=regions, countries=countries) except: # force reload Pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) st.session_state.index = Pinecone.Index("semsearch") xc = index_query(xq, top_k=top_k, regions=regions, countries=countries, index_namespace=index_namespace) results = [] for match in xc['matches']: #logger.debug(f"Match: {match}") #answer = reader(question=query, context=match["metadata"]['context']) score = match['score'] # if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0: # score = score / scrape_boost answer = {'score': score, 'metadata': match['metadata']} answer['id'] = match['id'] answer["name"] = match["metadata"]['company_name'] answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else "" results.append(answer) return results