Spaces:
Runtime error
Runtime error
"""Simple reader for mbox (mailbox) files.""" | |
import os | |
from pathlib import Path | |
from typing import Any, List | |
from gpt_index.readers.base import BaseReader | |
from gpt_index.readers.file.mbox_parser import MboxParser | |
from gpt_index.readers.schema.base import Document | |
class MboxReader(BaseReader): | |
"""Mbox e-mail reader. | |
Reads a set of e-mails saved in the mbox format. | |
""" | |
def __init__(self) -> None: | |
"""Initialize.""" | |
def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]: | |
"""Load data from the input directory. | |
load_kwargs: | |
max_count (int): Maximum amount of messages to read. | |
message_format (str): Message format overriding default. | |
""" | |
docs: List[Document] = [] | |
for (dirpath, dirnames, filenames) in os.walk(input_dir): | |
dirnames[:] = [d for d in dirnames if not d.startswith(".")] | |
for filename in filenames: | |
if filename.endswith(".mbox"): | |
filepath = os.path.join(dirpath, filename) | |
content = MboxParser(**load_kwargs).parse_file(Path(filepath)) | |
for msg in content: | |
docs.append(Document(msg)) | |
return docs | |