dianecy commited on
Commit
2d6ecd9
·
verified ·
1 Parent(s): 8377130

Upload folder using huggingface_hub

Browse files
utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (257 Bytes). View file
 
utils/__pycache__/config.cpython-39.pyc ADDED
Binary file (4.39 kB). View file
 
utils/__pycache__/dataset.cpython-39.pyc ADDED
Binary file (6.74 kB). View file
 
utils/__pycache__/dataset_verbonly.cpython-39.pyc ADDED
Binary file (9.36 kB). View file
 
utils/__pycache__/misc.cpython-39.pyc ADDED
Binary file (9.2 kB). View file
 
utils/__pycache__/simple_tokenizer.cpython-39.pyc ADDED
Binary file (5.75 kB). View file
 
utils/config.py CHANGED
@@ -76,6 +76,7 @@ def load_cfg_from_cfg_file(file):
76
 
77
  def merge_cfg_from_list(cfg, cfg_list):
78
  new_cfg = copy.deepcopy(cfg)
 
79
  assert len(cfg_list) % 2 == 0
80
  for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):
81
  subkey = full_key.split('.')[-1]
 
76
 
77
  def merge_cfg_from_list(cfg, cfg_list):
78
  new_cfg = copy.deepcopy(cfg)
79
+ print(cfg_list)
80
  assert len(cfg_list) % 2 == 0
81
  for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):
82
  subkey = full_key.split('.')[-1]
utils/dataset.py CHANGED
@@ -123,28 +123,28 @@ class RefDataset(Dataset):
123
  0.27577711]).reshape(3, 1, 1)
124
  self.length = info[dataset][split]
125
  self.env = None
126
- self.exclude_position = args.exclude_pos
127
- self.metric_learning = args.metric_learning
128
- self.hardpos_rigid = args.hardpos_rigid
129
  self.resize_bg1 = A.Compose([
130
  A.Resize(input_size, input_size, always_apply=True)])
131
- if self.metric_learning :
132
- if self.hardpos_rigid and self.exclude_position :
133
- multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj_nopos.txt'
134
- with open(multiobj_path, 'r') as f:
135
- self.multi_obj_ref_ids = [int(line.strip()) for line in f.readlines()]
136
- elif self.hardpos_rigid :
137
- multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj.txt'
138
- with open(multiobj_path, 'r') as f:
139
- self.multi_obj_ref_ids = [int(line.strip()) for line in f.readlines()]
140
- else :
141
- self.multi_obj_ref_ids = None
142
-
143
- path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/llama3-demo/llama3/hardpos_verbphrase_0906upd.json'
144
- with open(path, 'r', encoding='utf-8') as f:
145
- self.metadata = json.load(f)
146
- else :
147
- self.metadata = None
148
 
149
  def _init_db(self):
150
  self.env = lmdb.open(self.lmdb_dir,
@@ -211,53 +211,10 @@ class RefDataset(Dataset):
211
  self.input_size,
212
  flags=cv2.INTER_LINEAR,
213
  borderValue=0.)
214
-
215
- # if metric learning, select 2 positive sentences
216
- if self.metric_learning:
217
- if self.hardpos_rigid and seg_id in self.multi_obj_ref_ids:
218
- if n_sentences > 1:
219
- idx = np.random.choice(ref['num_sents'], 2, replace=False)
220
- sent = [sents[i] for i in idx]
221
- else:
222
- sent = [sents[0], sents[0]]
223
- else:
224
- # Added processing hardpos data
225
- hardpos_dict = self.metadata[str(ref['seg_id'])]
226
- hardpos_list = list(itertools.chain(*hardpos_dict.values()))
227
- sent_id_list = list(hardpos_dict.keys())
228
-
229
- if n_sentences > 1:
230
- if self.hardpos_rigid :
231
- idx = np.random.choice(ref['num_sents'], 1, replace=False)[0]
232
- cur_hardpos = hardpos_dict[sent_id_list[idx]]
233
- if len(cur_hardpos) == 0 :
234
- idx = np.random.choice(ref['num_sents'], 2, replace=False)
235
- sent = [sents[i] for i in idx]
236
- else :
237
- hardpos_choice = random.choice(cur_hardpos)
238
- sent = [sents[idx], hardpos_choice]
239
- random.shuffle(sent)
240
- else :
241
- if len(hardpos_list) == 0 :
242
- idx = np.random.choice(ref['num_sents'], 2, replace=False)
243
- sent = [sents[i] for i in idx]
244
- else :
245
- idx = np.random.choice(ref['num_sents'], 1, replace=False)[0]
246
- hardpos_choice = random.choice(hardpos_list)
247
- sent = [sents[idx], hardpos_choice]
248
- random.shuffle(sent)
249
- # if there's only one, duplicate it
250
- else:
251
- if len(hardpos_list) == 0 :
252
- sent = [sents[0], sents[0]]
253
- else :
254
- hardpos_choice = random.choice(hardpos_list)
255
- sent = [sents[0], hardpos_choice]
256
- random.shuffle(sent)
257
- # print(f"Generated sentences: {sent}")
258
- else:
259
- idx = np.random.choice(ref['num_sents'], 1, replace=False)
260
- sent = sents[idx]
261
  word_vec = tokenize(sent, self.word_length, True).squeeze(0)
262
  img, mask = self.convert(img, mask)
263
 
 
123
  0.27577711]).reshape(3, 1, 1)
124
  self.length = info[dataset][split]
125
  self.env = None
126
+ # self.exclude_position = args.exclude_pos
127
+ # self.metric_learning = args.metric_learning
128
+ # self.hardpos_rigid = args.hardpos_rigid
129
  self.resize_bg1 = A.Compose([
130
  A.Resize(input_size, input_size, always_apply=True)])
131
+ # if self.metric_learning :
132
+ # if self.hardpos_rigid and self.exclude_position :
133
+ # multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj_nopos.txt'
134
+ # with open(multiobj_path, 'r') as f:
135
+ # self.multi_obj_ref_ids = [int(line.strip()) for line in f.readlines()]
136
+ # elif self.hardpos_rigid :
137
+ # multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj.txt'
138
+ # with open(multiobj_path, 'r') as f:
139
+ # self.multi_obj_ref_ids = [int(line.strip()) for line in f.readlines()]
140
+ # else :
141
+ # self.multi_obj_ref_ids = None
142
+
143
+ # path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/llama3-demo/llama3/hardpos_verbphrase_0906upd.json'
144
+ # with open(path, 'r', encoding='utf-8') as f:
145
+ # self.metadata = json.load(f)
146
+ # else :
147
+ # self.metadata = None
148
 
149
  def _init_db(self):
150
  self.env = lmdb.open(self.lmdb_dir,
 
211
  self.input_size,
212
  flags=cv2.INTER_LINEAR,
213
  borderValue=0.)
214
+
215
+ # idx = np.random.choice(n_sentences, 1, replace=False)
216
+ idx = np.random.choice(n_sentences, 1, replace=False)[0]
217
+ sent = sents[idx]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  word_vec = tokenize(sent, self.word_length, True).squeeze(0)
219
  img, mask = self.convert(img, mask)
220
 
utils/dataset_verbonly.py CHANGED
@@ -135,20 +135,24 @@ class RefDataset(Dataset):
135
  if not self.exclude_multiobj and not self.exclude_position :
136
  return None
137
  elif self.exclude_position:
138
- multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj_ov2_nopos.txt'
139
  elif self.exclude_multiobj :
140
- multiobj_path = '/home/chaeyun/data/projects/chaeyun/RIS/CRIS.pytorch/multiobj_ov3.txt'
141
  with open(multiobj_path, 'r') as f:
142
  return [int(line.strip()) for line in f.readlines()]
143
 
144
  def _load_metadata(self):
145
  # Load metadata for hard positive verb phrases, hard negative queries
146
- hardpos_path = '/data2/projects/chaeyun/VerbCentric_RIS/hardpos_verbphrase_0906upd.json'
147
- hardneg_path = '/data2/projects/chaeyun/VerbCentric_RIS/hardneg_verb.json'
 
 
 
 
148
 
149
  with open(hardpos_path, 'r', encoding='utf-8') as f:
150
  hardpos_json = json.load(f)
151
- if self.metric_mode == "hardpos_only" :
152
  hardneg_json = None
153
  else :
154
  with open(hardneg_path, 'r', encoding='utf-8') as q:
@@ -225,11 +229,12 @@ class RefDataset(Dataset):
225
  # if metric learning, assign hard positive verb phrase if applicable
226
  idx = np.random.choice(n_sentences, 1, replace=False)[0]
227
  sent = sents[idx]
 
228
  raw_hardpos, hardpos = self._get_hardpos_verb(ref, seg_id, idx)
229
  img, mask = self.convert(img, mask)
230
  word_vec = tokenize(sent, self.word_length, True).squeeze(0)
231
 
232
- if self.metric_mode == "hardpos_only" :
233
  return img, word_vec, mask, hardpos
234
 
235
  else :
@@ -305,6 +310,10 @@ class RefDataset(Dataset):
305
  if cur_hardpos:
306
  # Assign a hard positive verb phrase if available
307
  raw_verb = random.choice(cur_hardpos)
 
 
 
 
308
  verb_hardpos = tokenize(raw_verb, self.word_length, True).squeeze(0)
309
  return raw_verb, verb_hardpos
310
 
 
135
  if not self.exclude_multiobj and not self.exclude_position :
136
  return None
137
  elif self.exclude_position:
138
+ multiobj_path = '/home/s1/chaeyunkim/VerbCentric_CY/multiobj_ov2_nopos.txt'
139
  elif self.exclude_multiobj :
140
+ multiobj_path = '/home/s1/chaeyunkim/VerbCentric_CY/multiobj_ov3.txt'
141
  with open(multiobj_path, 'r') as f:
142
  return [int(line.strip()) for line in f.readlines()]
143
 
144
  def _load_metadata(self):
145
  # Load metadata for hard positive verb phrases, hard negative queries
146
+ if 'op2' in self.metric_mode :
147
+ hardpos_path = '/home/s1/chaeyunkim/VerbCentric_CY/hardpos_verbphrase_op2_1024upd.json'
148
+ else :
149
+ hardpos_path = '/home/s1/chaeyunkim/VerbCentric_CY/hardpos_verbphrase_0906upd.json'
150
+ # do not use hardneg_path
151
+ hardneg_path = '/home/s1/chaeyunkim/VerbCentric_CY/hardneg_verb.json'
152
 
153
  with open(hardpos_path, 'r', encoding='utf-8') as f:
154
  hardpos_json = json.load(f)
155
+ if "hardpos_only" in self.metric_mode :
156
  hardneg_json = None
157
  else :
158
  with open(hardneg_path, 'r', encoding='utf-8') as q:
 
229
  # if metric learning, assign hard positive verb phrase if applicable
230
  idx = np.random.choice(n_sentences, 1, replace=False)[0]
231
  sent = sents[idx]
232
+
233
  raw_hardpos, hardpos = self._get_hardpos_verb(ref, seg_id, idx)
234
  img, mask = self.convert(img, mask)
235
  word_vec = tokenize(sent, self.word_length, True).squeeze(0)
236
 
237
+ if "hardpos_only" in self.metric_mode :
238
  return img, word_vec, mask, hardpos
239
 
240
  else :
 
310
  if cur_hardpos:
311
  # Assign a hard positive verb phrase if available
312
  raw_verb = random.choice(cur_hardpos)
313
+
314
+ # print(f"Current Sentence : {ref['sents']}")
315
+ # print(f"Current hardpos : {cur_hardpos}")
316
+ # print("Selected raw verb : ", raw_verb)
317
  verb_hardpos = tokenize(raw_verb, self.word_length, True).squeeze(0)
318
  return raw_verb, verb_hardpos
319