File size: 767 Bytes
372531f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import asyncio
import os

from langchain_core.documents import Document
from typing import List, Dict


# Supports the base Document class from langchain
# - https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/documents/base.py
class LangChainDocumentLoader:

    def __init__(self, documents: List[Document]):
        self.documents = documents

    async def load(self, metadata_source_index="title") -> List[Dict[str, str]]:
        docs = []
        for document in self.documents:
            docs.append(
                {
                    "raw_content": document.page_content,
                    "url": document.metadata.get(metadata_source_index, ""),
                }
            )
        return docs