File size: 1,058 Bytes
2359bda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from . import InputExample
import csv
import gzip
import os
import gzip
class PairedFilesReader(object):
"""
Reads in the a Pair Dataset, split in two files
"""
def __init__(self, filepaths):
self.filepaths = filepaths
def get_examples(self, max_examples=0):
"""
"""
fIns = []
for filepath in self.filepaths:
fIn = gzip.open(filepath, 'rt', encoding='utf-8') if filepath.endswith('.gz') else open(filepath, encoding='utf-8')
fIns.append(fIn)
examples = []
eof = False
while not eof:
texts = []
for fIn in fIns:
text = fIn.readline()
if text == '':
eof = True
break
texts.append(text)
if eof:
break;
examples.append(InputExample(guid=str(len(examples)), texts=texts, label=1))
if max_examples > 0 and len(examples) >= max_examples:
break
return examples |