File size: 945 Bytes
257931c
 
94b154c
257931c
 
796ceef
 
257931c
224ff63
257931c
 
796ceef
 
46fc259
 
796ceef
257931c
 
 
 
 
 
 
 
8b9c87b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def train():
    from langchain_community.document_loaders.csv_loader import CSVLoader
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain_openai import OpenAIEmbeddings
    from langchain_community.vectorstores.faiss import FAISS
    from langchain_community.document_loaders import WebBaseLoader
    
    documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
    documents[0].page_content = documents[0].page_content.split("Student FAQ")[1].strip();

    # Split document in chunks
    text_splitter = RecursiveCharacterTextSplitter(

        chunk_size=250,
        chunk_overlap=50
    )
    docs = text_splitter.split_documents(documents=documents)

    embeddings = OpenAIEmbeddings()
    # Create vectors
    vectorstore = FAISS.from_documents(docs, embeddings)
    # Persist the vectors locally on disk
    vectorstore.save_local("_rise_faq_db");

    return {"trained":"success"}