def train(): | |
from langchain_community.document_loaders.csv_loader import CSVLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores.faiss import FAISS | |
from langchain_community.document_loaders import WebBaseLoader | |
documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load() | |
documents[0].page_content = documents[0].page_content.split("Student FAQ")[1].strip(); | |
# Split document in chunks | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=250, | |
chunk_overlap=50 | |
) | |
docs = text_splitter.split_documents(documents=documents) | |
embeddings = OpenAIEmbeddings() | |
# Create vectors | |
vectorstore = FAISS.from_documents(docs, embeddings) | |
# Persist the vectors locally on disk | |
vectorstore.save_local("_rise_faq_db"); | |
return {"trained":"success"} |