danielpikl
/

killabee777

killabee777 / examples /rl /run_diffuser_gen_trajectories.py

Add stable diffusion weights

af7c068 over 2 years ago

1.46 kB

	import d4rl # noqa
	import gym
	import tqdm
	from diffusers.experimental import ValueGuidedRLPipeline


	config = dict(
	n_samples=64,
	horizon=32,
	num_inference_steps=20,
	n_guide_steps=0,
	scale_grad_by_std=True,
	scale=0.1,
	eta=0.0,
	t_grad_cutoff=2,
	device="cpu",
	)


	if __name__ == "__main__":
	env_name = "hopper-medium-v2"
	env = gym.make(env_name)

	pipeline = ValueGuidedRLPipeline.from_pretrained(
	"bglick13/hopper-medium-v2-value-function-hor32",
	env=env,
	)

	env.seed(0)
	obs = env.reset()
	total_reward = 0
	total_score = 0
	T = 1000
	rollout = [obs.copy()]
	try:
	for t in tqdm.tqdm(range(T)):
	# Call the policy
	denorm_actions = pipeline(obs, planning_horizon=32)

	# execute action in environment
	next_observation, reward, terminal, _ = env.step(denorm_actions)
	score = env.get_normalized_score(total_reward)
	# update return
	total_reward += reward
	total_score += score
	print(
	f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:"
	f" {total_score}"
	)
	# save observations for rendering
	rollout.append(next_observation.copy())

	obs = next_observation
	except KeyboardInterrupt:
	pass

	print(f"Total reward: {total_reward}")