GotoUsuke's picture
Upload folder using huggingface_hub
db4a26f verified
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from graphrag.index import run_pipeline_with_config
pipeline_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
async def run():
# Load your dataset
dataset = _load_dataset_some_unique_way()
# Load your config without the input section
config = pipeline_file
# Grab the last result from the pipeline, should be our entity extraction
outputs = []
async for output in run_pipeline_with_config(
config_or_path=config, dataset=dataset
):
outputs.append(output)
pipeline_result = outputs[-1]
if pipeline_result.result is not None:
# Should look something like
# col1 col2 filled_column
# 0 2 4 Filled Value
# 1 5 10 Filled Value
print(pipeline_result.result)
else:
print("No results!")
def _load_dataset_some_unique_way() -> pd.DataFrame:
# Totally loaded from some other place
return pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
if __name__ == "__main__":
asyncio.run(run())