Spaces:
Running
Running
import asyncio | |
from metagpt.config2 import config | |
from metagpt.const import EXAMPLE_DATA_PATH | |
from metagpt.logs import logger | |
from metagpt.rag.parsers import OmniParse | |
from metagpt.rag.schema import OmniParseOptions, OmniParseType, ParseResultType | |
from metagpt.utils.omniparse_client import OmniParseClient | |
TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx" | |
TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf" | |
TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4" | |
TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3" | |
async def omniparse_client_example(): | |
client = OmniParseClient(base_url=config.omniparse.base_url) | |
# docx | |
with open(TEST_DOCX, "rb") as f: | |
file_input = f.read() | |
document_parse_ret = await client.parse_document(file_input=file_input, bytes_filename="test_01.docx") | |
logger.info(document_parse_ret) | |
pdf_parse_ret = await client.parse_pdf(file_input=TEST_PDF) | |
logger.info(pdf_parse_ret) | |
# video | |
video_parse_ret = await client.parse_video(file_input=TEST_VIDEO) | |
logger.info(video_parse_ret) | |
# audio | |
audio_parse_ret = await client.parse_audio(file_input=TEST_AUDIO) | |
logger.info(audio_parse_ret) | |
async def omniparse_example(): | |
parser = OmniParse( | |
api_key=config.omniparse.api_key, | |
base_url=config.omniparse.base_url, | |
parse_options=OmniParseOptions( | |
parse_type=OmniParseType.PDF, | |
result_type=ParseResultType.MD, | |
max_timeout=120, | |
num_workers=3, | |
), | |
) | |
ret = parser.load_data(file_path=TEST_PDF) | |
logger.info(ret) | |
file_paths = [TEST_DOCX, TEST_PDF] | |
parser.parse_type = OmniParseType.DOCUMENT | |
ret = await parser.aload_data(file_path=file_paths) | |
logger.info(ret) | |
async def main(): | |
await omniparse_client_example() | |
await omniparse_example() | |
if __name__ == "__main__": | |
asyncio.run(main()) | |