File size: 945 Bytes
257931c 94b154c 257931c 796ceef 257931c 224ff63 257931c 796ceef 46fc259 796ceef 257931c 8b9c87b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
def train():
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.document_loaders import WebBaseLoader
documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
documents[0].page_content = documents[0].page_content.split("Student FAQ")[1].strip();
# Split document in chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=250,
chunk_overlap=50
)
docs = text_splitter.split_documents(documents=documents)
embeddings = OpenAIEmbeddings()
# Create vectors
vectorstore = FAISS.from_documents(docs, embeddings)
# Persist the vectors locally on disk
vectorstore.save_local("_rise_faq_db");
return {"trained":"success"} |