Spaces:
Sleeping
Sleeping
File size: 2,617 Bytes
77b927d 7308bd6 77b927d c9d11ad 77b927d 7308bd6 77b927d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
import json
import streamlit as st
from pinecone import Pinecone
from utils import get_variable
PINECONE_KEY = get_variable("PINECONE_API_KEY") # app.pinecone.io
PINE_CONE_ENVIRONMENT = get_variable("PINE_CONE_ENVIRONMENT") # app.pinecone.io
@st.cache_resource
def init_pinecone():
#pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
pinecone = Pinecone(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
return pinecone.Index("semsearch")
def index_query(xq, top_k, regions=[], countries=[], index_namespace=""):
logger.debug(f"Getting companies from countries: {countries} ")
filters = []
if len(regions)>0:
filters.append({'region': {"$in": regions}})
if len(countries)>0:
filters.append({'country': {"$in": countries}})
if len(filters)==1:
filters = filters[0]
elif len(filters)>1:
filters = {"$and": filters}
else:
filters = {}
#st.write(filter)
if not 'index' in st.session_state:
st.session_state.index = init_pinecone()
xc = st.session_state.index.query(vector = xq, namespace=index_namespace, top_k=20, filter = filters, include_metadata=True, include_vectors = False)
#xc = st.session_state.index.query(xq, top_k=top_k, include_metadata=True, include_vectors = True)
return xc
def search_index(query, top_k, regions, countries, retriever, index_namespace=""):
xq = retriever.encode([query]).tolist()
try:
xc = index_query(xq, top_k=top_k, regions=regions, countries=countries)
except:
# force reload
Pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT)
st.session_state.index = Pinecone.Index("semsearch")
xc = index_query(xq, top_k=top_k, regions=regions, countries=countries, index_namespace=index_namespace)
results = []
for match in xc['matches']:
#logger.debug(f"Match: {match}")
#answer = reader(question=query, context=match["metadata"]['context'])
score = match['score']
# if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0:
# score = score / scrape_boost
answer = {'score': score, 'metadata': match['metadata']}
answer['id'] = match['id']
answer["name"] = match["metadata"]['company_name']
answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else ""
results.append(answer)
return results
|