sammarigarcia commited on
Commit
3f394e4
·
verified ·
1 Parent(s): dd9bc92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -31,7 +31,18 @@ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100
31
  split_docs = text_splitter.split_documents(docs)
32
 
33
  embeddings = OpenAIEmbeddings()
34
- vectorstore = FAISS.from_documents(split_docs, embeddings)
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  qa_chain = RetrievalQA.from_chain_type(
37
  llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
 
31
  split_docs = text_splitter.split_documents(docs)
32
 
33
  embeddings = OpenAIEmbeddings()
34
+ from tqdm import tqdm
35
+
36
+ # Dividir en lotes de, por ejemplo, 100 documentos
37
+ batch_size = 100
38
+ vectorstore = None
39
+
40
+ for i in tqdm(range(0, len(split_docs), batch_size)):
41
+ batch = split_docs[i:i+batch_size]
42
+ if vectorstore is None:
43
+ vectorstore = FAISS.from_documents(batch, embeddings)
44
+ else:
45
+ vectorstore.add_documents(batch)
46
 
47
  qa_chain = RetrievalQA.from_chain_type(
48
  llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),