|
import numpy as np
|
|
import pickle
|
|
from os.path import join as pjoin
|
|
|
|
POS_enumerator = {
|
|
'VERB': 0,
|
|
'NOUN': 1,
|
|
'DET': 2,
|
|
'ADP': 3,
|
|
'NUM': 4,
|
|
'AUX': 5,
|
|
'PRON': 6,
|
|
'ADJ': 7,
|
|
'ADV': 8,
|
|
'Loc_VIP': 9,
|
|
'Body_VIP': 10,
|
|
'Obj_VIP': 11,
|
|
'Act_VIP': 12,
|
|
'Desc_VIP': 13,
|
|
'OTHER': 14,
|
|
}
|
|
|
|
Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward',
|
|
'up', 'down', 'straight', 'curve')
|
|
|
|
Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh')
|
|
|
|
Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball')
|
|
|
|
Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn',
|
|
'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll',
|
|
'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb')
|
|
|
|
Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily',
|
|
'angrily', 'sadly')
|
|
|
|
VIP_dict = {
|
|
'Loc_VIP': Loc_list,
|
|
'Body_VIP': Body_list,
|
|
'Obj_VIP': Obj_List,
|
|
'Act_VIP': Act_list,
|
|
'Desc_VIP': Desc_list,
|
|
}
|
|
|
|
|
|
class WordVectorizer(object):
|
|
def __init__(self, meta_root, prefix):
|
|
vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
|
|
words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
|
|
self.word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
|
|
self.word2vec = {w: vectors[self.word2idx[w]] for w in words}
|
|
|
|
def _get_pos_ohot(self, pos):
|
|
pos_vec = np.zeros(len(POS_enumerator))
|
|
if pos in POS_enumerator:
|
|
pos_vec[POS_enumerator[pos]] = 1
|
|
else:
|
|
pos_vec[POS_enumerator['OTHER']] = 1
|
|
return pos_vec
|
|
|
|
def __len__(self):
|
|
return len(self.word2vec)
|
|
|
|
def __getitem__(self, item):
|
|
word, pos = item.split('/')
|
|
if word in self.word2vec:
|
|
word_vec = self.word2vec[word]
|
|
vip_pos = None
|
|
for key, values in VIP_dict.items():
|
|
if word in values:
|
|
vip_pos = key
|
|
break
|
|
if vip_pos is not None:
|
|
pos_vec = self._get_pos_ohot(vip_pos)
|
|
else:
|
|
pos_vec = self._get_pos_ohot(pos)
|
|
else:
|
|
word_vec = self.word2vec['unk']
|
|
pos_vec = self._get_pos_ohot('OTHER')
|
|
return word_vec, pos_vec
|
|
|
|
|
|
class WordVectorizerV2(WordVectorizer):
|
|
def __init__(self, meta_root, prefix):
|
|
super(WordVectorizerV2, self).__init__(meta_root, prefix)
|
|
self.idx2word = {self.word2idx[w]: w for w in self.word2idx}
|
|
|
|
def __getitem__(self, item):
|
|
word_vec, pose_vec = super(WordVectorizerV2, self).__getitem__(item)
|
|
word, pos = item.split('/')
|
|
if word in self.word2vec:
|
|
return word_vec, pose_vec, self.word2idx[word]
|
|
else:
|
|
return word_vec, pose_vec, self.word2idx['unk']
|
|
|
|
def itos(self, idx):
|
|
if idx == len(self.idx2word):
|
|
return "pad"
|
|
return self.idx2word[idx] |