# Copyright (c) 2024 Microsoft Corporation. | |
# Licensed under the MIT License | |
import asyncio | |
import os | |
import pandas as pd | |
from graphrag.index import run_pipeline_with_config | |
pipeline_file = os.path.join( | |
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml" | |
) | |
async def run(): | |
# Load your dataset | |
dataset = _load_dataset_some_unique_way() | |
# Load your config without the input section | |
config = pipeline_file | |
# Grab the last result from the pipeline, should be our entity extraction | |
outputs = [] | |
async for output in run_pipeline_with_config( | |
config_or_path=config, dataset=dataset | |
): | |
outputs.append(output) | |
pipeline_result = outputs[-1] | |
if pipeline_result.result is not None: | |
# Should look something like | |
# col1 col2 filled_column | |
# 0 2 4 Filled Value | |
# 1 5 10 Filled Value | |
print(pipeline_result.result) | |
else: | |
print("No results!") | |
def _load_dataset_some_unique_way() -> pd.DataFrame: | |
# Totally loaded from some other place | |
return pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}]) | |
if __name__ == "__main__": | |
asyncio.run(run()) | |