File size: 342 Bytes
7f0844d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
import config
from transformers import AutoTokenizer
from typing import List


def align_features(dataset):
    def fix_authors(example):
        if not isinstance(example["authors"], list):
             return {"authors": [example["authors"]] if example["authors"] else []}
        return example
        
    return dataset.map(fix_authors)