File size: 1,636 Bytes
fa3faa9
 
b829268
fa3faa9
 
 
 
045f1d7
 
fa3faa9
7def859
 
 
 
045f1d7
b829268
fa3faa9
7def859
045f1d7
fa3faa9
 
045f1d7
fa3faa9
 
 
 
045f1d7
fa3faa9
582d6c9
fa3faa9
045f1d7
fa3faa9
7def859
045f1d7
fa3faa9
582d6c9
fa3faa9
045f1d7
 
 
fa3faa9
b829268
 
 
 
fa3faa9
 
 
 
 
 
045f1d7
fa3faa9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import csv
import pickle
from random import randint

### NOTICE: csv only accept two colomn input. but accept multi-time input.


# 1_2_3, 1 is action, 2 is supply object, 3 is source object
def update_dict_csv(term_dict:dict, f):
    for rows in csv.reader(f):
        word = rows[0].lower()
        if word in term_dict:
            if rows[1] not in term_dict[word]:
                term_dict[word] = term_dict[word]+[rows[1]]
            else:
                print("{},{} 已存在".format(word, rows[1]))
        else:
            term_dict[word]=[rows[1]]
    term_dict = sort_dict(term_dict)
    pass

def export_csv_dict(term_dict:dict, f):
    for key, val in term_dict.items():
        csv.writer(f).writerow([key, val])
    pass

def save_pickle_dict(term_dict:dict, f):
    pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
    pass

def update_pickel_csv(pickle_f, csv_f):
    term_dict = pickle.load(pickle_f)
    update_dict_csv(term_dict, csv_f)
    #save to pickle file, highest protocol to get better performance
    pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
    pass

def sort_dict(term_dict:dict):
    term_dict = dict(sorted(term_dict.items(), key=lambda x:len(x[0]), reverse=True))
    return term_dict

def get_word(term_dict:dict, key:str) -> str:
    word = term_dict[key][randint(0,len(term_dict[key])-1)]
    return word

#demo
term_dict_sc2 = {}
with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
    update_dict_csv(term_dict_sc2,f)

with open("../test.csv", "w", encoding='utf-8') as w:
    export_csv_dict(term_dict_sc2,w)

## for load pickle, just:
# pickle.load(f)