AgentXXX
/

ppo-PyramidsRND

Reinforcement Learning

unity-ml-agents

deep-reinforcement-learning

ML-Agents-Pyramids

Model card Files Files and versions Metrics Training metrics Community

ppo-PyramidsRND / run_logs /timers.json

AgentXXX's picture

First commit

350fae0 about 2 years ago

history blame contribute delete

19.2 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.19035176932811737,
	"min": 0.1869889348745346,
	"max": 1.3933141231536865,
	"count": 50
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 5725.78125,
	"min": 5642.578125,
	"max": 42267.578125,
	"count": 50
	},
	"Pyramids.Step.mean": {
	"value": 1499938.0,
	"min": 29952.0,
	"max": 1499938.0,
	"count": 50
	},
	"Pyramids.Step.sum": {
	"value": 1499938.0,
	"min": 29952.0,
	"max": 1499938.0,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.7198227047920227,
	"min": -0.10943890362977982,
	"max": 0.7678422331809998,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 211.62786865234375,
	"min": -26.484214782714844,
	"max": 226.51345825195312,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.006343322806060314,
	"min": 0.0018057385459542274,
	"max": 0.32135599851608276,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 1.8649369478225708,
	"min": 0.4893551468849182,
	"max": 77.44679260253906,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.07014735733893901,
	"min": 0.06574141409544695,
	"max": 0.07252042188242623,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9820630027451461,
	"min": 0.5059426374390225,
	"max": 1.0459573762103296,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.015425517178652221,
	"min": 0.000283952019120431,
	"max": 0.017331049907841675,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.2159572405011311,
	"min": 0.003691376248565603,
	"max": 0.259583568200469,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 3.0990703955809477e-06,
	"min": 3.0990703955809477e-06,
	"max": 0.00029676708679192377,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 4.338698553813327e-05,
	"min": 4.338698553813327e-05,
	"max": 0.003822118725960466,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10103299047619049,
	"min": 0.10103299047619049,
	"max": 0.19892236190476195,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4144618666666668,
	"min": 1.3924565333333336,
	"max": 2.674039533333334,
	"count": 50
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00011319574857142844,
	"min": 0.00011319574857142844,
	"max": 0.009892343954285714,
	"count": 50
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0015847404799999981,
	"min": 0.0015847404799999981,
	"max": 0.12741654938000002,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.007367999292910099,
	"min": 0.007345173042267561,
	"max": 0.44613298773765564,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.10315199196338654,
	"min": 0.10283242166042328,
	"max": 3.1229310035705566,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 285.04761904761904,
	"min": 241.7741935483871,
	"max": 999.0,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 29930.0,
	"min": 15984.0,
	"max": 33719.0,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.6958990400745755,
	"min": -1.0000000521540642,
	"max": 1.744482127151319,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 178.06939920783043,
	"min": -29.885801687836647,
	"max": 214.0183975920081,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.6958990400745755,
	"min": -1.0000000521540642,
	"max": 1.744482127151319,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 178.06939920783043,
	"min": -29.885801687836647,
	"max": 214.0183975920081,
	"count": 50
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.02180663863546215,
	"min": 0.01923992401950067,
	"max": 8.981390904635191,
	"count": 50
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.2896970567235257,
	"min": 2.2896970567235257,
	"max": 143.70225447416306,
	"count": 50
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1673372208",
	"python_version": "3.8.16 (default, Dec 7 2022, 01:12:13) \n[GCC 7.5.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.21.6",
	"end_time_seconds": "1673375404"
	},
	"total": 3196.2931193980003,
	"count": 1,
	"self": 0.49211429800061524,
	"children": {
	"run_training.setup": {
	"total": 0.10173632799978805,
	"count": 1,
	"self": 0.10173632799978805
	},
	"TrainerController.start_learning": {
	"total": 3195.699268772,
	"count": 1,
	"self": 1.8804009169207347,
	"children": {
	"TrainerController._reset_env": {
	"total": 6.238196538000011,
	"count": 1,
	"self": 6.238196538000011
	},
	"TrainerController.advance": {
	"total": 3187.452687611078,
	"count": 96382,
	"self": 1.881276861824972,
	"children": {
	"env_step": {
	"total": 2174.887555183087,
	"count": 96382,
	"self": 2018.6649755960534,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 155.09106925910373,
	"count": 96382,
	"self": 6.330429315182755,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 148.76063994392098,
	"count": 93811,
	"self": 48.42127378794885,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 100.33936615597213,
	"count": 93811,
	"self": 100.33936615597213
	}
	}
	}
	}
	},
	"workers": {
	"total": 1.1315103279298455,
	"count": 96382,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3190.44725509811,
	"count": 96382,
	"is_parallel": true,
	"self": 1315.709227955973,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.002023237999765115,
	"count": 1,
	"is_parallel": true,
	"self": 0.000650978999601648,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013722590001634671,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013722590001634671
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.043383351000102266,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044068099987271125,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00044918399999005487,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044918399999005487
	},
	"communicator.exchange": {
	"total": 0.04095350800025699,
	"count": 1,
	"is_parallel": true,
	"self": 0.04095350800025699
	},
	"steps_from_proto": {
	"total": 0.0015399779999825114,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044218200082468684,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010977959991578246,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010977959991578246
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1874.738027142137,
	"count": 96381,
	"is_parallel": true,
	"self": 41.08964880105168,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 32.77087719384099,
	"count": 96381,
	"is_parallel": true,
	"self": 32.77087719384099
	},
	"communicator.exchange": {
	"total": 1665.55127279512,
	"count": 96381,
	"is_parallel": true,
	"self": 1665.55127279512
	},
	"steps_from_proto": {
	"total": 135.32622835212442,
	"count": 96381,
	"is_parallel": true,
	"self": 31.468214836767856,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 103.85801351535656,
	"count": 771048,
	"is_parallel": true,
	"self": 103.85801351535656
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1010.6838555661661,
	"count": 96382,
	"self": 3.4332052722725166,
	"children": {
	"process_trajectory": {
	"total": 218.0238637959087,
	"count": 96382,
	"self": 217.63376915290928,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.3900946429994292,
	"count": 3,
	"self": 0.3900946429994292
	}
	}
	},
	"_update_policy": {
	"total": 789.2267864979849,
	"count": 688,
	"self": 295.6710374200429,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 493.555749077942,
	"count": 34182,
	"self": 493.555749077942
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.365000571240671e-06,
	"count": 1,
	"self": 1.365000571240671e-06
	},
	"TrainerController._save_models": {
	"total": 0.12798234100046102,
	"count": 1,
	"self": 0.002196180999817443,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.12578616000064358,
	"count": 1,
	"self": 0.12578616000064358
	}
	}
	}
	}
	}
	}
	}