Spaces:
Running
Running
import asyncio | |
import os | |
from langchain_core.documents import Document | |
from typing import List, Dict | |
# Supports the base Document class from langchain | |
# - https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/documents/base.py | |
class LangChainDocumentLoader: | |
def __init__(self, documents: List[Document]): | |
self.documents = documents | |
async def load(self, metadata_source_index="title") -> List[Dict[str, str]]: | |
docs = [] | |
for document in self.documents: | |
docs.append( | |
{ | |
"raw_content": document.page_content, | |
"url": document.metadata.get(metadata_source_index, ""), | |
} | |
) | |
return docs | |