markpeace commited on
Commit
46fc259
·
1 Parent(s): 224ff63

better chunking methodology

Browse files
Files changed (3) hide show
  1. agent/prompt.py +1 -1
  2. agent/toolset.py +7 -5
  3. train/faq.py +2 -2
agent/prompt.py CHANGED
@@ -39,7 +39,7 @@ prompt = {
39
  ]),
40
 
41
  1: ChatPromptTemplate.from_messages([
42
- ("system", "The thread_id of this conversation is {thread_id}."),
43
  ("system", "In your answer you should list the tools used to produce this answer"),
44
  MessagesPlaceholder(variable_name="conversation")
45
  ])
 
39
  ]),
40
 
41
  1: ChatPromptTemplate.from_messages([
42
+ ("system", "The thread_id of this conversation is {thread_id}."),
43
  ("system", "In your answer you should list the tools used to produce this answer"),
44
  MessagesPlaceholder(variable_name="conversation")
45
  ])
agent/toolset.py CHANGED
@@ -2,16 +2,14 @@ from langchain.agents import tool
2
  from langchain_openai import OpenAIEmbeddings
3
  from langchain_community.vectorstores.faiss import FAISS
4
  from langchain.chains import RetrievalQA
5
- from langchain_openai import OpenAI
6
  from langchain_core.pydantic_v1 import BaseModel, Field
7
 
8
  @tool
9
  def frequently_asked_questions(input: str):
10
 
11
  """
12
- Provides answers to questions about Rise and Futureme.
13
- Please always use this tool if the user has questions.
14
- If you cannot answer the query with the tool, then you should recommend they contact rise@mmu.ac.uk
15
  """
16
 
17
  # Load from local storage
@@ -19,7 +17,11 @@ def frequently_asked_questions(input: str):
19
  persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
20
 
21
  # Use RetrievalQA chain for orchestration
22
- qa = RetrievalQA.from_chain_type(llm=OpenAI(model="gpt-3.5-turbo-instruct", temperature=0), chain_type="stuff", retriever=persisted_vectorstore.as_retriever())
 
 
 
 
23
  result = qa.invoke(input)
24
  return result
25
 
 
2
  from langchain_openai import OpenAIEmbeddings
3
  from langchain_community.vectorstores.faiss import FAISS
4
  from langchain.chains import RetrievalQA
5
+ from langchain_openai import OpenAI, ChatOpenAI
6
  from langchain_core.pydantic_v1 import BaseModel, Field
7
 
8
  @tool
9
  def frequently_asked_questions(input: str):
10
 
11
  """
12
+ Please always use this tool if the user has questions about our offer
 
 
13
  """
14
 
15
  # Load from local storage
 
17
  persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
18
 
19
  # Use RetrievalQA chain for orchestration
20
+ qa = RetrievalQA.from_chain_type(
21
+ llm=ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0),
22
+ chain_type="stuff",
23
+ return_source_documents=False,
24
+ retriever=persisted_vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"k":3, "score_threshold":0.5}))
25
  result = qa.invoke(input)
26
  return result
27
 
train/faq.py CHANGED
@@ -11,8 +11,8 @@ def train():
11
  # Split document in chunks
12
  text_splitter = RecursiveCharacterTextSplitter(
13
 
14
- chunk_size=100,
15
- chunk_overlap=20
16
  )
17
  docs = text_splitter.split_documents(documents=documents)
18
 
 
11
  # Split document in chunks
12
  text_splitter = RecursiveCharacterTextSplitter(
13
 
14
+ chunk_size=250,
15
+ chunk_overlap=50
16
  )
17
  docs = text_splitter.split_documents(documents=documents)
18