File size: 1,435 Bytes
e7d695a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import sys
import io
import json
with open(sys.argv[1], 'r', encoding='utf8') as reader, \
open(sys.argv[2], 'w', encoding='utf8') as writer :
lines_dict = json.load(reader)
full_rela_lst = []
full_src_lst = []
full_tgt_lst = []
unique_src = 0
for example in lines_dict:
rela_lst = []
temp_triples = ''
for i, tripleset in enumerate(example['tripleset']):
subj, rela, obj = tripleset
rela = rela.lower()
rela_lst.append(rela)
if i > 0:
temp_triples += ' | '
temp_triples += '{} : {} : {}'.format(subj, rela, obj)
unique_src += 1
for sent in example['annotations']:
full_tgt_lst.append(sent['text'])
full_src_lst.append(temp_triples)
full_rela_lst.append(rela_lst)
print('unique source is', unique_src)
for src, tgt in zip(full_src_lst, full_tgt_lst):
x = {}
x['context'] = src # context #+ '||'
x['completion'] = tgt #completion
writer.write(json.dumps(x)+'\n') |