Spaces:
Runtime error
Runtime error
"""Obsidian reader class. | |
Pass in the path to an Obsidian vault and it will parse all markdown | |
files into a List of Documents, | |
with each Document containing text from under an Obsidian header. | |
""" | |
import os | |
from pathlib import Path | |
from typing import Any, List | |
from langchain.docstore.document import Document as LCDocument | |
from gpt_index.readers.base import BaseReader | |
from gpt_index.readers.file.markdown_parser import MarkdownParser | |
from gpt_index.readers.schema.base import Document | |
class ObsidianReader(BaseReader): | |
"""Utilities for loading data from an Obsidian Vault. | |
Args: | |
input_dir (str): Path to the vault. | |
""" | |
def __init__(self, input_dir: str): | |
"""Init params.""" | |
self.input_dir = Path(input_dir) | |
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]: | |
"""Load data from the input directory.""" | |
docs: List[str] = [] | |
for dirpath, dirnames, filenames in os.walk(self.input_dir): | |
dirnames[:] = [d for d in dirnames if not d.startswith(".")] | |
for filename in filenames: | |
if filename.endswith(".md"): | |
filepath = os.path.join(dirpath, filename) | |
content = MarkdownParser().parse_file(Path(filepath)) | |
docs.extend(content) | |
return [Document(d) for d in docs] | |
def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]: | |
"""Load data in LangChain document format.""" | |
docs = self.load_data(**load_kwargs) | |
return [d.to_langchain_format() for d in docs] | |