Spaces:
Sleeping
Sleeping
File size: 8,945 Bytes
eaf2e33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import os
import csv
import time
import random
from src.smb.level import *
from src.drl.me_reg import *
from src.drl.nets import esmb_sample
from src.utils.filesys import getpath
from src.utils.datastruct import RingQueue
from src.smb.asyncsimlt import AsycSimltPool
from src.env.environments import get_padded_obs
from src.olgen.ol_generator import VecOnlineGenerator, OnlineGenerator
from src.drl.drl_uses import load_cfgs, load_performance
from src.olgen.olg_policy import process_obs, RandGenPolicy, RLGenPolicy, EnsembleGenPolicy
def evaluate_rewards(lvls, rfunc='default', dest_path='', parallel=1, eval_pool=None):
internal_pool = eval_pool is None
if internal_pool:
eval_pool = AsycSimltPool(parallel, rfunc_name=rfunc, verbose=False, test=True)
res = []
for lvl in lvls:
eval_pool.put('evaluate', (0, str(lvl)))
buffer = eval_pool.get()
for _, item in buffer:
res.append([sum(r) for r in zip(*item.values())])
if internal_pool:
buffer = eval_pool.close()
else:
buffer = eval_pool.get(True)
for _, item in buffer:
res.append([sum(r) for r in zip(*item.values())])
if len(dest_path):
np.save(dest_path, res)
return res
def evaluate_mnd(lvls, refs, parallel=2):
eval_pool = AsycSimltPool(parallel, verbose=False, refs=[str(ref) for ref in refs])
res = []
for lvl in lvls:
eval_pool.put('mnd_item', str(lvl))
res += eval_pool.get()
res += eval_pool.get(wait=True)
res = np.array(res)
eval_pool.close()
return np.mean(res[:, 0]), np.mean(res[:, 1])
def evaluate_mpd(lvls, parallel=2):
task_datas = [[] for _ in range(parallel)]
for i, (A, B) in enumerate(combinations(lvls, 2)):
task_datas[i % parallel].append((str(A), str(B)))
hms, dtws = [], []
eval_pool = AsycSimltPool(parallel, verbose=False)
for task_data in task_datas:
eval_pool.put('mpd', task_data)
res = eval_pool.get(wait=True)
for task_hms, _ in res:
hms += task_hms
# dtws += task_dtws
return np.mean(hms) #, np.mean(dtws)
def evaluate_gen_log(path, parallel=5):
rfunc_name = load_cfgs(path, 'rfunc')
f = open(getpath(f'{path}/step_tests.csv'), 'w', newline='')
wrtr = csv.writer(f)
cols = ['step', 'r-avg', 'r-std', 'mnd-hm', 'mnd-dtw', 'mpd-hm', 'mpd-dtw', '']
wrtr.writerow(cols)
start_time = time.time()
for lvls, name in traverse_batched_level_files(f'{path}/gen_log'):
step = name[4:]
rewards = [sum(item) for item in evaluate_rewards(lvls, rfunc_name, parallel=parallel)]
r_avg, r_std = np.mean(rewards), np.std(rewards)
mpd = evaluate_mpd(lvls, parallel=parallel)
line = [step, r_avg, r_std, mpd, '']
wrtr.writerow(line)
f.flush()
print(
f'{path}: step{step} evaluated in {time.time()-start_time:.1f}s -- '
+ '; '.join(f'{k}: {v}' for k, v in zip(cols, line))
)
f.close()
pass
def evaluate_generator(generator, nr=200, h=50, parallel=5, dest_path=None, additional_info=None, rfunc_name='default'):
if additional_info is None: additional_info = {}
''' Test Reward '''
lvls = generator.generate(nr, h)
rewards = [sum(item) for item in evaluate_rewards(lvls, parallel=parallel, rfunc=rfunc_name)]
r_avg, r_std = np.mean(rewards), np.std(rewards)
''' Test MPD '''
# mpd, _ = evaluate_mpd(lvls, parallel=parallel)
mpd, *_ = evaluate_mpd(generator.generate(3000*2, h), parallel=parallel)
res = {
'r-avg': r_avg, 'r-std': r_std, 'div': mpd,
}
res.update(additional_info)
if dest_path:
with open(getpath(dest_path), 'w', newline='') as f:
keys = [k for k in res.keys()]
wrtr = csv.writer(f)
wrtr.writerow(keys + [''])
wrtr.writerow([res[k] for k in keys] + [''])
return res
pass
def evaluate_jmer(training_path, n=1000, max_parallel=None, device='cuda:0'):
init_vecs = np.load(getpath('smb/init_latvecs.npy'))
try:
m, histlen, h, gamma, me_type = load_cfgs(training_path, 'm', 'N', 'h', 'gamma', 'me_type')
except KeyError:
return 0.
mereg_func = LogWassersteinExclusion(1.) if me_type == 'logw' else WassersteinExclusion(1.)
model = torch.load(getpath(training_path, 'policy.pth'), map_location=device)
model.requires_grad_(False)
if max_parallel is None:
max_parallel = min(n, 512)
me_regs = []
obs_queues = [RingQueue(histlen) for _ in range(max_parallel)]
while len(me_regs) < n:
size = min(max_parallel, n - len(me_regs))
mereg_vals, discount = np.zeros([size]), 1.
veclists = [[] for _ in range(size)]
for queue, veclist in zip(obs_queues, veclists):
queue.clear()
init_latvec = init_vecs[random.randrange(0, len(init_vecs))]
queue.push(init_latvec)
veclist.append(init_latvec)
for _ in range(h):
obs = np.stack([get_padded_obs(queue.to_list(), histlen) for queue in obs_queues[:size]])
muss, stdss, betas = model.get_intermediate(process_obs(obs, device))
mereg_vals += discount * mereg_func.forward(muss, stdss, betas).squeeze().cpu().numpy()
discount *= gamma
actions, _ = esmb_sample(muss, stdss, betas)
for queue, veclist, action in zip(obs_queues, veclists, actions.cpu().numpy()):
queue.push(action)
veclist.append(action)
me_regs += mereg_vals.tolist()
return me_regs
def evaluate_baseline(*rfuncs, parallel=4):
nr, md, nd, h = 100, 1000, 200, 50
gen_policy = RandGenPolicy()
olgenerator = OnlineGenerator(gen_policy)
lvls, refs = olgenerator.generate(md, h), olgenerator.generate(nd, h)
divs_h, divs_js = evaluate_mnd(lvls, refs, parallel=parallel)
keys, vals = ['d-h', 'd-js'], [divs_h, divs_js]
print(f'Diversity of baseline generator: Hamming {divs_h:.2f}; TPJS {divs_js:.2f}')
for rfunc in rfuncs:
try:
print(f'Start to evaluate {rfunc}')
start_time = time.time()
lvls = olgenerator.generate(nr, h)
rewards = [sum(item) for item in evaluate_rewards(lvls, parallel=parallel, rfunc=rfunc)]
keys.append(rfunc)
vals.append(np.mean(rewards))
print(f'Evaluation for {rfunc} finished in {time.time()-start_time:.2f}s')
print(f'Evaluation results for {rfunc}: {vals[-1]:.2f}')
except AttributeError:
continue
with open(getpath('training_data', 'baselines.csv'), 'w', newline='') as f:
wrtr = csv.writer(f)
wrtr.writerow(keys)
wrtr.writerow(vals)
def sample_initial():
playable_latvecs = np.load(getpath('smb/init_latvecs.npy'))
indexes = random.sample([*range(len(playable_latvecs))], 500)
z = playable_latvecs[indexes, :]
np.save(getpath('analysis/initial_seg.npy'), z)
pass
def generate_levels_for_test(h=25):
init_set = np.load(getpath('analysis/initial_seg.npy'))
def _generte_one(policy, path):
try:
start = time.time()
generator = VecOnlineGenerator(policy, vec_num=len(init_set))
fd, _ = os.path.split(getpath(path))
os.makedirs(fd, exist_ok=True)
generator.re_init(init_set)
lvls = generator.generate(len(init_set), h, rand_init=False)
save_batch(lvls, path)
print('Save to', path, '%.2fs' % (time.time() - start))
except FileNotFoundError as e:
print(e)
for l, m in product(['0.0', '0.1', '0.2', '0.3', '0.4', '0.5'], [2, 3, 4, 5]):
for i in range(1, 6):
pi_path = f'training_data/varpm-fhp/l{l}_m{m}/t{i}'
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/varpm-fhp/l{l}_m{m}/t{i}/samples.lvls')
pi_path = f'training_data/varpm-lgp/l{l}_m{m}/t{i}'
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/varpm-lgp/l{l}_m{m}/t{i}/samples.lvls')
for algo in ['sac', 'egsac', 'asyncsac', 'pmoe']:
for i in range(1, 6):
pi_path = f'training_data/{algo}/fhp/t{i}'
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/{algo}/fhp/t{i}/samples.lvls')
pi_path = f'training_data/{algo}/lgp/t{i}'
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/{algo}/lgp/t{i}/samples.lvls')
for algo in ['sunrise', 'dvd']:
for i in range(1, 5):
pi_path = f'training_data/{algo}/fhp/t{i}'
_generte_one(EnsembleGenPolicy.from_path(pi_path), f'test_data/{algo}/fhp/t{i}/samples.lvls')
pi_path = f'training_data/{algo}/lgp/t{i}'
_generte_one(EnsembleGenPolicy.from_path(pi_path), f'test_data/{algo}/lgp/t{i}/samples.lvls')
pass
if __name__ == '__main__':
generate_levels_for_test()
|