File size: 419 Bytes
0dd349d 14bb44e 0dd349d 14bb44e 0dd349d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
import os
from datasets import load_dataset
HF_TOKEN = os.environ.get('HF_TOKEN')
MESSAGE_COLS = ['reference', 'prediction', 'enhanced']
CACHE_DIR = 'cache'
def load_data():
return load_dataset("JetBrains-Research/commit-labeling-samples",
split="train",
cache_dir=CACHE_DIR,
token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS
|