better chunking methodology
Browse files- agent/prompt.py +1 -1
- agent/toolset.py +7 -5
- train/faq.py +2 -2
agent/prompt.py
CHANGED
@@ -39,7 +39,7 @@ prompt = {
|
|
39 |
]),
|
40 |
|
41 |
1: ChatPromptTemplate.from_messages([
|
42 |
-
("system", "The thread_id of this conversation is {thread_id}."),
|
43 |
("system", "In your answer you should list the tools used to produce this answer"),
|
44 |
MessagesPlaceholder(variable_name="conversation")
|
45 |
])
|
|
|
39 |
]),
|
40 |
|
41 |
1: ChatPromptTemplate.from_messages([
|
42 |
+
("system", "The thread_id of this conversation is {thread_id}."),
|
43 |
("system", "In your answer you should list the tools used to produce this answer"),
|
44 |
MessagesPlaceholder(variable_name="conversation")
|
45 |
])
|
agent/toolset.py
CHANGED
@@ -2,16 +2,14 @@ from langchain.agents import tool
|
|
2 |
from langchain_openai import OpenAIEmbeddings
|
3 |
from langchain_community.vectorstores.faiss import FAISS
|
4 |
from langchain.chains import RetrievalQA
|
5 |
-
from langchain_openai import OpenAI
|
6 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
7 |
|
8 |
@tool
|
9 |
def frequently_asked_questions(input: str):
|
10 |
|
11 |
"""
|
12 |
-
|
13 |
-
Please always use this tool if the user has questions.
|
14 |
-
If you cannot answer the query with the tool, then you should recommend they contact rise@mmu.ac.uk
|
15 |
"""
|
16 |
|
17 |
# Load from local storage
|
@@ -19,7 +17,11 @@ def frequently_asked_questions(input: str):
|
|
19 |
persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
|
20 |
|
21 |
# Use RetrievalQA chain for orchestration
|
22 |
-
qa = RetrievalQA.from_chain_type(
|
|
|
|
|
|
|
|
|
23 |
result = qa.invoke(input)
|
24 |
return result
|
25 |
|
|
|
2 |
from langchain_openai import OpenAIEmbeddings
|
3 |
from langchain_community.vectorstores.faiss import FAISS
|
4 |
from langchain.chains import RetrievalQA
|
5 |
+
from langchain_openai import OpenAI, ChatOpenAI
|
6 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
7 |
|
8 |
@tool
|
9 |
def frequently_asked_questions(input: str):
|
10 |
|
11 |
"""
|
12 |
+
Please always use this tool if the user has questions about our offer
|
|
|
|
|
13 |
"""
|
14 |
|
15 |
# Load from local storage
|
|
|
17 |
persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
|
18 |
|
19 |
# Use RetrievalQA chain for orchestration
|
20 |
+
qa = RetrievalQA.from_chain_type(
|
21 |
+
llm=ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0),
|
22 |
+
chain_type="stuff",
|
23 |
+
return_source_documents=False,
|
24 |
+
retriever=persisted_vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"k":3, "score_threshold":0.5}))
|
25 |
result = qa.invoke(input)
|
26 |
return result
|
27 |
|
train/faq.py
CHANGED
@@ -11,8 +11,8 @@ def train():
|
|
11 |
# Split document in chunks
|
12 |
text_splitter = RecursiveCharacterTextSplitter(
|
13 |
|
14 |
-
chunk_size=
|
15 |
-
chunk_overlap=
|
16 |
)
|
17 |
docs = text_splitter.split_documents(documents=documents)
|
18 |
|
|
|
11 |
# Split document in chunks
|
12 |
text_splitter = RecursiveCharacterTextSplitter(
|
13 |
|
14 |
+
chunk_size=250,
|
15 |
+
chunk_overlap=50
|
16 |
)
|
17 |
docs = text_splitter.split_documents(documents=documents)
|
18 |
|