Spaces:

JetBrains-Research
/

commit-labeling

Running

File size: 419 Bytes

0dd349d
 
14bb44e
 
0dd349d
 
 
14bb44e
 
 
 
 
0dd349d

import os

from datasets import load_dataset

HF_TOKEN = os.environ.get('HF_TOKEN')

MESSAGE_COLS = ['reference', 'prediction', 'enhanced']

CACHE_DIR = 'cache'


def load_data():
    return load_dataset("JetBrains-Research/commit-labeling-samples",
                        split="train",
                        cache_dir=CACHE_DIR,
                        token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS