Spaces:

YuWang0103
/

LGGM-Text2Graph

Runtime error

App Files Files Community

YuWang0103 commited on May 30, 2024

Commit

b31c308

verified ·

1 Parent(s): 6b59850

Update dataset.py

Browse files

Files changed (1) hide show

dataset.py +1 -57

dataset.py CHANGED Viewed

@@ -34,39 +34,7 @@ def load_dataset_cc(dataname, batch_size, hydra_path, condition):
     model = SentenceTransformer("all-MiniLM-L6-v2")
     cond_embs = model.encode(condition)
-    for domain in domains:
-        if not os.path.exists(f'{hydra_path}/graphs/{domain}/train.pt'):
-            data = torch.load(f'{hydra_path}/graphs/{domain}/{domain}.pt')
-            #fix seed
-            torch.manual_seed(0)
-            #random permute and split
-            n = len(data)
-            indices = torch.randperm(n)
-            if domain == 'eco':
-                train_indices = indices[:4].repeat(50)
-                val_indices = indices[4:5].repeat(50)
-                test_indices = indices[5:]
-            else:
-                train_indices = indices[:int(0.7 * n)]
-                val_indices = indices[int(0.7 * n):int(0.8 * n)]
-                test_indices = indices[int(0.8 * n):]
-            train_data = [data[_] for _ in train_indices]
-            val_data = [data[_] for _ in val_indices]
-            test_data = [data[_] for _ in test_indices]
-            torch.save(train_indices, f'{hydra_path}/graphs/{domain}/train_indices.pt')
-            torch.save(val_indices, f'{hydra_path}/graphs/{domain}/val_indices.pt')
-            torch.save(test_indices, f'{hydra_path}/graphs/{domain}/test_indices.pt')
-            torch.save(train_data, f'{hydra_path}/graphs/{domain}/train.pt')
-            torch.save(val_data, f'{hydra_path}/graphs/{domain}/val.pt')
-            torch.save(test_data, f'{hydra_path}/graphs/{domain}/test.pt')
     train_data, val_data, test_data = [], [], []
@@ -99,30 +67,6 @@ def load_dataset_cc(dataname, batch_size, hydra_path, condition):
             test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
-    elif dataname == 'all':
-        for i, domain in enumerate(domains):
-            train_d = torch.load(f'{hydra_path}/graphs/{domain}/train.pt')
-            val_d = torch.load(f'{hydra_path}/graphs/{domain}/val.pt')
-            test_d = torch.load(f'{hydra_path}/graphs/{domain}/test.pt')
-            train_indices = torch.load(f'{hydra_path}/graphs/{domain}/train_indices.pt')
-            val_indices = torch.load(f'{hydra_path}/graphs/{domain}/val_indices.pt')
-            test_indices = torch.load(f'{hydra_path}/graphs/{domain}/test_indices.pt')
-            # text_prompt = torch.load(f'{hydra_path}/graphs/{domain}/text_prompt_order.pt')
-            with open(f'{hydra_path}/graphs/{domain}/text_prompt_order.txt', 'r') as f:
-                text_prompt = f.readlines()
-                text_prompt = [x.strip() for x in text_prompt]
-            print(domain, text_prompt[0])
-            text_embs = model.encode(text_prompt)
-            train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
-            val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
-            test_data.extend([arrange_data(d, text_embs[ind.item()], ind.item())  for d, ind in zip(test_d, test_indices)])
-            print(i, domain, len(train_data), len(val_data), len(test_data))
     print('Size of dataset', len(train_data), len(val_data), len(test_data))

     model = SentenceTransformer("all-MiniLM-L6-v2")
     cond_embs = model.encode(condition)
     train_data, val_data, test_data = [], [], []
             test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
     print('Size of dataset', len(train_data), len(val_data), len(test_data))