eolang's picture
3 epochs, .95 gamma
ac659aa
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.39848941564559937,
"min": 0.39557358622550964,
"max": 1.4169278144836426,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 12043.9443359375,
"min": 11886.1953125,
"max": 42983.921875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989903.0,
"min": 29967.0,
"max": 989903.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989903.0,
"min": 29967.0,
"max": 989903.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.957814633846283,
"min": -0.20334137976169586,
"max": 0.9886853694915771,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 277.7662353515625,
"min": -49.005271911621094,
"max": 284.74139404296875,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.01677311211824417,
"min": -0.014436770230531693,
"max": 0.37975215911865234,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 4.864202499389648,
"min": -3.9989852905273438,
"max": 90.38101196289062,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06926123596192565,
"min": 0.0646859946330854,
"max": 0.07285770052389819,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9696573034669591,
"min": 0.5001783685380026,
"max": 1.0446219689814218,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.011170058925295346,
"min": 0.0006551485770169672,
"max": 0.016091234402926927,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.15638082495413483,
"min": 0.008516931501220574,
"max": 0.1776355933203983,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.353718977364286e-06,
"min": 7.353718977364286e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0001029520656831,
"min": 0.0001029520656831,
"max": 0.0036329725890092003,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10245120714285716,
"min": 0.10245120714285716,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4343169000000002,
"min": 1.3886848,
"max": 2.6109908,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00025487559357142866,
"min": 0.00025487559357142866,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035682583100000013,
"min": 0.0035682583100000013,
"max": 0.12111798091999999,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.008480370044708252,
"min": 0.00838493648916483,
"max": 0.45137521624565125,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.11872518062591553,
"min": 0.11738910526037216,
"max": 3.1596264839172363,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 263.38532110091745,
"min": 263.38532110091745,
"max": 997.6875,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 28709.0,
"min": 16766.0,
"max": 32780.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.6815486045878962,
"min": -0.9358938003424555,
"max": 1.7254363437945193,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 183.28879790008068,
"min": -29.948601610958576,
"max": 189.79799781739712,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.6815486045878962,
"min": -0.9358938003424555,
"max": 1.7254363437945193,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 183.28879790008068,
"min": -29.948601610958576,
"max": 189.79799781739712,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.023199150221152102,
"min": 0.023199150221152102,
"max": 9.772698922521052,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.528707374105579,
"min": 2.528707374105579,
"max": 166.13588168285787,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1675410526",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1675412643"
},
"total": 2117.261594591,
"count": 1,
"self": 0.4238931810004942,
"children": {
"run_training.setup": {
"total": 0.10531930199999806,
"count": 1,
"self": 0.10531930199999806
},
"TrainerController.start_learning": {
"total": 2116.732382108,
"count": 1,
"self": 1.1460875619000035,
"children": {
"TrainerController._reset_env": {
"total": 5.873799500999894,
"count": 1,
"self": 5.873799500999894
},
"TrainerController.advance": {
"total": 2109.6320428441,
"count": 64187,
"self": 1.2125659420876218,
"children": {
"env_step": {
"total": 1470.8575063209662,
"count": 64187,
"self": 1369.52903589293,
"children": {
"SubprocessEnvManager._take_step": {
"total": 100.60325568002668,
"count": 64187,
"self": 4.288382129031788,
"children": {
"TorchPolicy.evaluate": {
"total": 96.31487355099489,
"count": 62564,
"self": 32.604488925999476,
"children": {
"TorchPolicy.sample_actions": {
"total": 63.71038462499541,
"count": 62564,
"self": 63.71038462499541
}
}
}
}
},
"workers": {
"total": 0.7252147480094209,
"count": 64187,
"self": 0.0,
"children": {
"worker_root": {
"total": 2112.4264891249677,
"count": 64187,
"is_parallel": true,
"self": 839.9836711509931,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0019349220001458889,
"count": 1,
"is_parallel": true,
"self": 0.0006931630000508449,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001241759000095044,
"count": 8,
"is_parallel": true,
"self": 0.001241759000095044
}
}
},
"UnityEnvironment.step": {
"total": 0.047981266000078904,
"count": 1,
"is_parallel": true,
"self": 0.0004896890002328291,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00041363499985891394,
"count": 1,
"is_parallel": true,
"self": 0.00041363499985891394
},
"communicator.exchange": {
"total": 0.045387164999965535,
"count": 1,
"is_parallel": true,
"self": 0.045387164999965535
},
"steps_from_proto": {
"total": 0.001690777000021626,
"count": 1,
"is_parallel": true,
"self": 0.00042488600024626066,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012658909997753653,
"count": 8,
"is_parallel": true,
"self": 0.0012658909997753653
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1272.4428179739746,
"count": 64186,
"is_parallel": true,
"self": 27.130916288910157,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.709965248013305,
"count": 64186,
"is_parallel": true,
"self": 22.709965248013305
},
"communicator.exchange": {
"total": 1123.11795532896,
"count": 64186,
"is_parallel": true,
"self": 1123.11795532896
},
"steps_from_proto": {
"total": 99.48398110809103,
"count": 64186,
"is_parallel": true,
"self": 21.83103236799343,
"children": {
"_process_rank_one_or_two_observation": {
"total": 77.6529487400976,
"count": 513488,
"is_parallel": true,
"self": 77.6529487400976
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 637.5619705810461,
"count": 64187,
"self": 2.186521476124426,
"children": {
"process_trajectory": {
"total": 145.97011386892336,
"count": 64187,
"self": 145.7924996249235,
"children": {
"RLTrainer._checkpoint": {
"total": 0.17761424399986936,
"count": 2,
"self": 0.17761424399986936
}
}
},
"_update_policy": {
"total": 489.40533523599834,
"count": 456,
"self": 186.9665750249619,
"children": {
"TorchPPOOptimizer.update": {
"total": 302.4387602110364,
"count": 22794,
"self": 302.4387602110364
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.480001270072535e-07,
"count": 1,
"self": 8.480001270072535e-07
},
"TrainerController._save_models": {
"total": 0.08045135299971662,
"count": 1,
"self": 0.001338375999694108,
"children": {
"RLTrainer._checkpoint": {
"total": 0.07911297700002251,
"count": 1,
"self": 0.07911297700002251
}
}
}
}
}
}
}