Spaces:
Runtime error
Runtime error
from langchain_mongodb import MongoDBAtlasVectorSearch | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from pymongo import MongoClient | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_community.llms import HuggingFaceEndpoint | |
import os | |
config= { | |
'MONGODB_CONN_STRING': os.getenv('MONGODB_CONN_STRING'), | |
'HUGGINGFACEHUB_API_TOKEN': os.getenv('HUGGINGFACEHUB_API_TOKEN'), | |
'DB_NAME':os.getenv('DB_NAME'), | |
'VECTOR_SEARCH_INDEX':os.getenv('VECTOR_SEARCH_INDEX'), | |
'PASSWORD_DB': os.getenv('PASSWORD_DB') | |
} | |
client = MongoClient(config['MONGODB_CONN_STRING']) | |
embeddings = HuggingFaceEmbeddings(model_name= "intfloat/e5-large-v2") | |
llm_model = HuggingFaceEndpoint(repo_id='mistralai/Mistral-7B-Instruct-v0.2', | |
huggingfacehub_api_token=config['HUGGINGFACEHUB_API_TOKEN'], | |
temperature=0.3) | |
template = """ | |
<s>[INST] Instruction:Your are a helpful chatbot who can answer all data science ,anime and manga questions. | |
You have to follow these rules strictly while answering the question based on context: | |
1. Do not use the word context or based on context which is provided in answers. | |
2. If there is no context you have to answer in 128 words not more than that. | |
3. context are in series format so make your own best pattern based on that give answer. | |
[/INST] | |
context: | |
{context}</s> | |
### QUESTION: | |
{question} [/INST] | |
""" | |
prompt = ChatPromptTemplate.from_template(template=template) | |
parser = StrOutputParser() | |
def get_all_collections(): | |
database = client[config['DB_NAME']] | |
names = database.list_collection_names() | |
coll_dict = {} | |
for name in names: | |
coll_dict[name] = ' '.join(str(name).capitalize().split('_')) | |
return coll_dict | |
class VECTORDB_STORE: | |
def __init__(self, coll_name): | |
collection_name = self.get_collection_name(coll_name) | |
collection = client[config['DB_NAME']][collection_name] | |
self.vectordb_store = MongoDBAtlasVectorSearch(collection =collection, | |
embedding= embeddings, | |
index_name= config['VECTOR_SEARCH_INDEX']) | |
def get_collection_name(coll_name): | |
for key, value in get_all_collections().items(): | |
if coll_name == value: | |
return key | |
return None | |
def chain(self): | |
retriever = self.vectordb_store.as_retriever(search_kwargs={"k": 10}) | |
chain = {'context': retriever, 'question': RunnablePassthrough()} | prompt | llm_model | parser | |
return chain | |