File size: 1,270 Bytes
2359bda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from . import InputExample
import csv
import gzip
import os
class LabelSentenceReader:
"""Reads in a file that has at least two columns: a label and a sentence.
This reader can for example be used with the BatchHardTripletLoss.
Maps labels automatically to integers"""
def __init__(self, folder, label_col_idx=0, sentence_col_idx=1, separator='\t'):
self.folder = folder
self.label_map = {}
self.label_col_idx = label_col_idx
self.sentence_col_idx = sentence_col_idx
self.separator = separator
def get_examples(self, filename, max_examples=0):
examples = []
id = 0
for line in open(os.path.join(self.folder, filename), encoding="utf-8"):
splits = line.strip().split(self.separator)
label = splits[self.label_col_idx]
sentence = splits[self.sentence_col_idx]
if label not in self.label_map:
self.label_map[label] = len(self.label_map)
label_id = self.label_map[label]
guid = "%s-%d" % (filename, id)
id += 1
examples.append(InputExample(guid=guid, texts=[sentence], label=label_id))
if 0 < max_examples <= id:
break
return examples
|