import os

from datasets import load_dataset

HF_TOKEN = os.environ.get('HF_TOKEN')

MESSAGE_COLS = ['reference', 'prediction', 'enhanced']

CACHE_DIR = 'cache'


def load_data():
    return load_dataset("JetBrains-Research/commit-labeling-samples",
                        split="train",
                        cache_dir=CACHE_DIR,
                        token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS