File size: 419 Bytes
0dd349d
 
14bb44e
 
0dd349d
 
 
14bb44e
 
 
 
 
0dd349d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import os

from datasets import load_dataset

HF_TOKEN = os.environ.get('HF_TOKEN')

MESSAGE_COLS = ['reference', 'prediction', 'enhanced']

CACHE_DIR = 'cache'


def load_data():
    return load_dataset("JetBrains-Research/commit-labeling-samples",
                        split="train",
                        cache_dir=CACHE_DIR,
                        token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS