commit-labeling / data_loader.py
Petr Tsvetkov
Update the labeling app to use new data
0dd349d
raw
history blame contribute delete
419 Bytes
import os
from datasets import load_dataset
HF_TOKEN = os.environ.get('HF_TOKEN')
MESSAGE_COLS = ['reference', 'prediction', 'enhanced']
CACHE_DIR = 'cache'
def load_data():
return load_dataset("JetBrains-Research/commit-labeling-samples",
split="train",
cache_dir=CACHE_DIR,
token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS