import os from datasets import load_dataset HF_TOKEN = os.environ.get('HF_TOKEN') MESSAGE_COLS = ['reference', 'prediction', 'enhanced'] CACHE_DIR = 'cache' def load_data(): return load_dataset("JetBrains-Research/commit-labeling-samples", split="train", cache_dir=CACHE_DIR, token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS