Spaces:

markpeace
/

rise-ai

Sleeping

markpeace commited on Mar 5, 2024

Commit

46fc259

1 Parent(s): 224ff63

better chunking methodology

Files changed (3) hide show

agent/prompt.py CHANGED Viewed

@@ -39,7 +39,7 @@ prompt = {
          ]),
         1: ChatPromptTemplate.from_messages([
-            ("system", "The thread_id of this conversation is {thread_id}."),
             ("system", "In your answer you should list the tools used to produce this answer"),
             MessagesPlaceholder(variable_name="conversation")
          ])

          ]),
         1: ChatPromptTemplate.from_messages([
+            ("system", "The thread_id of this conversation is {thread_id}."),
             ("system", "In your answer you should list the tools used to produce this answer"),
             MessagesPlaceholder(variable_name="conversation")
          ])

agent/toolset.py CHANGED Viewed

@@ -2,16 +2,14 @@ from langchain.agents import tool
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores.faiss import FAISS
 from langchain.chains import RetrievalQA
-from langchain_openai import OpenAI
 from langchain_core.pydantic_v1 import BaseModel, Field
 @tool
 def frequently_asked_questions(input: str):
     """
-        Provides answers to questions about Rise and Futureme.
-        Please always use this tool if the user has questions.
-        If you cannot answer the query with the tool, then you should recommend they contact rise@mmu.ac.uk
     """
     # Load from local storage
@@ -19,7 +17,11 @@ def frequently_asked_questions(input: str):
     persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
     # Use RetrievalQA chain for orchestration
-    qa = RetrievalQA.from_chain_type(llm=OpenAI(model="gpt-3.5-turbo-instruct", temperature=0), chain_type="stuff", retriever=persisted_vectorstore.as_retriever())
     result = qa.invoke(input)
     return result

 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores.faiss import FAISS
 from langchain.chains import RetrievalQA
+from langchain_openai import OpenAI, ChatOpenAI
 from langchain_core.pydantic_v1 import BaseModel, Field
 @tool
 def frequently_asked_questions(input: str):
     """
+        Please always use this tool if the user has questions about our offer
     """
     # Load from local storage
     persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
     # Use RetrievalQA chain for orchestration
+    qa = RetrievalQA.from_chain_type(
+        llm=ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0),
+        chain_type="stuff",
+        return_source_documents=False,
+        retriever=persisted_vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"k":3, "score_threshold":0.5}))
     result = qa.invoke(input)
     return result

train/faq.py CHANGED Viewed

@@ -11,8 +11,8 @@ def train():
     # Split document in chunks
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=100,
-        chunk_overlap=20
     )
     docs = text_splitter.split_documents(documents=documents)

     # Split document in chunks
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=250,
+        chunk_overlap=50
     )
     docs = text_splitter.split_documents(documents=documents)