diff --git "a/model_index.json" "b/model_index.json" --- "a/model_index.json" +++ "b/model_index.json" @@ -0,0 +1,429 @@ +{ + "epoch": 249, + "global_step": 62500, + "layers": { + "pos_embed": "torch.Size([1, 256, 512])", + "x_embedder.proj.weight": "torch.Size([512, 4, 2, 2])", + "x_embedder.proj.bias": "torch.Size([512])", + "t_embedder.mlp.0.weight": "torch.Size([512, 256])", + "t_embedder.mlp.0.bias": "torch.Size([512])", + "t_embedder.mlp.2.weight": "torch.Size([512, 512])", + "t_embedder.mlp.2.bias": "torch.Size([512])", + "final_layer.linear.weight": "torch.Size([32, 512])", + "final_layer.linear.bias": "torch.Size([32])", + "final_layer.adaLN_modulation.1.weight": "torch.Size([1024, 512])", + "final_layer.adaLN_modulation.1.bias": "torch.Size([1024])", + "encoder.resnet.conv1.weight": "torch.Size([64, 3, 7, 7])", + "encoder.resnet.bn1.weight": "torch.Size([64])", + "encoder.resnet.bn1.bias": "torch.Size([64])", + "encoder.resnet.bn1.running_mean": "torch.Size([64])", + "encoder.resnet.bn1.running_var": "torch.Size([64])", + "encoder.resnet.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer1.0.conv1.weight": "torch.Size([64, 64, 3, 3])", + "encoder.resnet.layer1.0.bn1.weight": "torch.Size([64])", + "encoder.resnet.layer1.0.bn1.bias": "torch.Size([64])", + "encoder.resnet.layer1.0.bn1.running_mean": "torch.Size([64])", + "encoder.resnet.layer1.0.bn1.running_var": "torch.Size([64])", + "encoder.resnet.layer1.0.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer1.0.conv2.weight": "torch.Size([64, 64, 3, 3])", + "encoder.resnet.layer1.0.bn2.weight": "torch.Size([64])", + "encoder.resnet.layer1.0.bn2.bias": "torch.Size([64])", + "encoder.resnet.layer1.0.bn2.running_mean": "torch.Size([64])", + "encoder.resnet.layer1.0.bn2.running_var": "torch.Size([64])", + "encoder.resnet.layer1.0.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer1.1.conv1.weight": "torch.Size([64, 64, 3, 3])", + "encoder.resnet.layer1.1.bn1.weight": "torch.Size([64])", + "encoder.resnet.layer1.1.bn1.bias": "torch.Size([64])", + "encoder.resnet.layer1.1.bn1.running_mean": "torch.Size([64])", + "encoder.resnet.layer1.1.bn1.running_var": "torch.Size([64])", + "encoder.resnet.layer1.1.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer1.1.conv2.weight": "torch.Size([64, 64, 3, 3])", + "encoder.resnet.layer1.1.bn2.weight": "torch.Size([64])", + "encoder.resnet.layer1.1.bn2.bias": "torch.Size([64])", + "encoder.resnet.layer1.1.bn2.running_mean": "torch.Size([64])", + "encoder.resnet.layer1.1.bn2.running_var": "torch.Size([64])", + "encoder.resnet.layer1.1.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer2.0.conv1.weight": "torch.Size([128, 64, 3, 3])", + "encoder.resnet.layer2.0.bn1.weight": "torch.Size([128])", + "encoder.resnet.layer2.0.bn1.bias": "torch.Size([128])", + "encoder.resnet.layer2.0.bn1.running_mean": "torch.Size([128])", + "encoder.resnet.layer2.0.bn1.running_var": "torch.Size([128])", + "encoder.resnet.layer2.0.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer2.0.conv2.weight": "torch.Size([128, 128, 3, 3])", + "encoder.resnet.layer2.0.bn2.weight": "torch.Size([128])", + "encoder.resnet.layer2.0.bn2.bias": "torch.Size([128])", + "encoder.resnet.layer2.0.bn2.running_mean": "torch.Size([128])", + "encoder.resnet.layer2.0.bn2.running_var": "torch.Size([128])", + "encoder.resnet.layer2.0.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer2.0.downsample.0.weight": "torch.Size([128, 64, 1, 1])", + "encoder.resnet.layer2.0.downsample.1.weight": "torch.Size([128])", + "encoder.resnet.layer2.0.downsample.1.bias": "torch.Size([128])", + "encoder.resnet.layer2.0.downsample.1.running_mean": "torch.Size([128])", + "encoder.resnet.layer2.0.downsample.1.running_var": "torch.Size([128])", + "encoder.resnet.layer2.0.downsample.1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer2.1.conv1.weight": "torch.Size([128, 128, 3, 3])", + "encoder.resnet.layer2.1.bn1.weight": "torch.Size([128])", + "encoder.resnet.layer2.1.bn1.bias": "torch.Size([128])", + "encoder.resnet.layer2.1.bn1.running_mean": "torch.Size([128])", + "encoder.resnet.layer2.1.bn1.running_var": "torch.Size([128])", + "encoder.resnet.layer2.1.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer2.1.conv2.weight": "torch.Size([128, 128, 3, 3])", + "encoder.resnet.layer2.1.bn2.weight": "torch.Size([128])", + "encoder.resnet.layer2.1.bn2.bias": "torch.Size([128])", + "encoder.resnet.layer2.1.bn2.running_mean": "torch.Size([128])", + "encoder.resnet.layer2.1.bn2.running_var": "torch.Size([128])", + "encoder.resnet.layer2.1.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer3.0.conv1.weight": "torch.Size([256, 128, 3, 3])", + "encoder.resnet.layer3.0.bn1.weight": "torch.Size([256])", + "encoder.resnet.layer3.0.bn1.bias": "torch.Size([256])", + "encoder.resnet.layer3.0.bn1.running_mean": "torch.Size([256])", + "encoder.resnet.layer3.0.bn1.running_var": "torch.Size([256])", + "encoder.resnet.layer3.0.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer3.0.conv2.weight": "torch.Size([256, 256, 3, 3])", + "encoder.resnet.layer3.0.bn2.weight": "torch.Size([256])", + "encoder.resnet.layer3.0.bn2.bias": "torch.Size([256])", + "encoder.resnet.layer3.0.bn2.running_mean": "torch.Size([256])", + "encoder.resnet.layer3.0.bn2.running_var": "torch.Size([256])", + "encoder.resnet.layer3.0.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer3.0.downsample.0.weight": "torch.Size([256, 128, 1, 1])", + "encoder.resnet.layer3.0.downsample.1.weight": "torch.Size([256])", + "encoder.resnet.layer3.0.downsample.1.bias": "torch.Size([256])", + "encoder.resnet.layer3.0.downsample.1.running_mean": "torch.Size([256])", + "encoder.resnet.layer3.0.downsample.1.running_var": "torch.Size([256])", + "encoder.resnet.layer3.0.downsample.1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer3.1.conv1.weight": "torch.Size([256, 256, 3, 3])", + "encoder.resnet.layer3.1.bn1.weight": "torch.Size([256])", + "encoder.resnet.layer3.1.bn1.bias": "torch.Size([256])", + "encoder.resnet.layer3.1.bn1.running_mean": "torch.Size([256])", + "encoder.resnet.layer3.1.bn1.running_var": "torch.Size([256])", + "encoder.resnet.layer3.1.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer3.1.conv2.weight": "torch.Size([256, 256, 3, 3])", + "encoder.resnet.layer3.1.bn2.weight": "torch.Size([256])", + "encoder.resnet.layer3.1.bn2.bias": "torch.Size([256])", + "encoder.resnet.layer3.1.bn2.running_mean": "torch.Size([256])", + "encoder.resnet.layer3.1.bn2.running_var": "torch.Size([256])", + "encoder.resnet.layer3.1.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer4.0.conv1.weight": "torch.Size([512, 256, 3, 3])", + "encoder.resnet.layer4.0.bn1.weight": "torch.Size([512])", + "encoder.resnet.layer4.0.bn1.bias": "torch.Size([512])", + "encoder.resnet.layer4.0.bn1.running_mean": "torch.Size([512])", + "encoder.resnet.layer4.0.bn1.running_var": "torch.Size([512])", + "encoder.resnet.layer4.0.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer4.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "encoder.resnet.layer4.0.bn2.weight": "torch.Size([512])", + "encoder.resnet.layer4.0.bn2.bias": "torch.Size([512])", + "encoder.resnet.layer4.0.bn2.running_mean": "torch.Size([512])", + "encoder.resnet.layer4.0.bn2.running_var": "torch.Size([512])", + "encoder.resnet.layer4.0.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer4.0.downsample.0.weight": "torch.Size([512, 256, 1, 1])", + "encoder.resnet.layer4.0.downsample.1.weight": "torch.Size([512])", + "encoder.resnet.layer4.0.downsample.1.bias": "torch.Size([512])", + "encoder.resnet.layer4.0.downsample.1.running_mean": "torch.Size([512])", + "encoder.resnet.layer4.0.downsample.1.running_var": "torch.Size([512])", + "encoder.resnet.layer4.0.downsample.1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer4.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "encoder.resnet.layer4.1.bn1.weight": "torch.Size([512])", + "encoder.resnet.layer4.1.bn1.bias": "torch.Size([512])", + "encoder.resnet.layer4.1.bn1.running_mean": "torch.Size([512])", + "encoder.resnet.layer4.1.bn1.running_var": "torch.Size([512])", + "encoder.resnet.layer4.1.bn1.num_batches_tracked": "torch.Size([])", + "encoder.resnet.layer4.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "encoder.resnet.layer4.1.bn2.weight": "torch.Size([512])", + "encoder.resnet.layer4.1.bn2.bias": "torch.Size([512])", + "encoder.resnet.layer4.1.bn2.running_mean": "torch.Size([512])", + "encoder.resnet.layer4.1.bn2.running_var": "torch.Size([512])", + "encoder.resnet.layer4.1.bn2.num_batches_tracked": "torch.Size([])", + "encoder.resnet.fc.weight": "torch.Size([512, 512])", + "encoder.resnet.fc.bias": "torch.Size([512])", + "blocks.0.attn.qkv.weight": "torch.Size([1536, 512])", + "blocks.0.attn.qkv.bias": "torch.Size([1536])", + "blocks.0.attn.proj.weight": "torch.Size([512, 512])", + "blocks.0.attn.proj.bias": "torch.Size([512])", + "blocks.0.mlp.fc1.weight": "torch.Size([2048, 512])", + "blocks.0.mlp.fc1.bias": "torch.Size([2048])", + "blocks.0.mlp.fc2.weight": "torch.Size([512, 2048])", + "blocks.0.mlp.fc2.bias": "torch.Size([512])", + "blocks.0.adaLN_modulation.1.weight": "torch.Size([3072, 512])", + "blocks.0.adaLN_modulation.1.bias": "torch.Size([3072])", + "blocks.1.attn.qkv.weight": "torch.Size([1536, 512])", + "blocks.1.attn.qkv.bias": "torch.Size([1536])", + "blocks.1.attn.proj.weight": "torch.Size([512, 512])", + "blocks.1.attn.proj.bias": "torch.Size([512])", + "blocks.1.mlp.fc1.weight": "torch.Size([2048, 512])", + "blocks.1.mlp.fc1.bias": "torch.Size([2048])", + "blocks.1.mlp.fc2.weight": "torch.Size([512, 2048])", + "blocks.1.mlp.fc2.bias": "torch.Size([512])", + "blocks.1.adaLN_modulation.1.weight": "torch.Size([3072, 512])", + "blocks.1.adaLN_modulation.1.bias": "torch.Size([3072])", + "blocks.2.attn.qkv.weight": "torch.Size([1536, 512])", + "blocks.2.attn.qkv.bias": "torch.Size([1536])", + "blocks.2.attn.proj.weight": "torch.Size([512, 512])", + "blocks.2.attn.proj.bias": "torch.Size([512])", + "blocks.2.mlp.fc1.weight": "torch.Size([2048, 512])", + "blocks.2.mlp.fc1.bias": "torch.Size([2048])", + "blocks.2.mlp.fc2.weight": "torch.Size([512, 2048])", + "blocks.2.mlp.fc2.bias": "torch.Size([512])", + "blocks.2.adaLN_modulation.1.weight": "torch.Size([3072, 512])", + "blocks.2.adaLN_modulation.1.bias": "torch.Size([3072])", + "blocks.3.attn.qkv.weight": "torch.Size([1536, 512])", + "blocks.3.attn.qkv.bias": "torch.Size([1536])", + "blocks.3.attn.proj.weight": "torch.Size([512, 512])", + "blocks.3.attn.proj.bias": "torch.Size([512])", + "blocks.3.mlp.fc1.weight": "torch.Size([2048, 512])", + "blocks.3.mlp.fc1.bias": "torch.Size([2048])", + "blocks.3.mlp.fc2.weight": "torch.Size([512, 2048])", + "blocks.3.mlp.fc2.bias": "torch.Size([512])", + "blocks.3.adaLN_modulation.1.weight": "torch.Size([3072, 512])", + "blocks.3.adaLN_modulation.1.bias": "torch.Size([3072])", + "vae.encoder.conv_in.weight": "torch.Size([128, 3, 3, 3])", + "vae.encoder.conv_in.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.norm1.weight": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.norm1.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.conv1.weight": "torch.Size([128, 128, 3, 3])", + "vae.encoder.down_blocks.0.resnets.0.conv1.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.norm2.weight": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.norm2.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.0.conv2.weight": "torch.Size([128, 128, 3, 3])", + "vae.encoder.down_blocks.0.resnets.0.conv2.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.norm1.weight": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.norm1.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.conv1.weight": "torch.Size([128, 128, 3, 3])", + "vae.encoder.down_blocks.0.resnets.1.conv1.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.norm2.weight": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.norm2.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.resnets.1.conv2.weight": "torch.Size([128, 128, 3, 3])", + "vae.encoder.down_blocks.0.resnets.1.conv2.bias": "torch.Size([128])", + "vae.encoder.down_blocks.0.downsamplers.0.conv.weight": "torch.Size([128, 128, 3, 3])", + "vae.encoder.down_blocks.0.downsamplers.0.conv.bias": "torch.Size([128])", + "vae.encoder.down_blocks.1.resnets.0.norm1.weight": "torch.Size([128])", + "vae.encoder.down_blocks.1.resnets.0.norm1.bias": "torch.Size([128])", + "vae.encoder.down_blocks.1.resnets.0.conv1.weight": "torch.Size([256, 128, 3, 3])", + "vae.encoder.down_blocks.1.resnets.0.conv1.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.0.norm2.weight": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.0.norm2.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.0.conv2.weight": "torch.Size([256, 256, 3, 3])", + "vae.encoder.down_blocks.1.resnets.0.conv2.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.0.conv_shortcut.weight": "torch.Size([256, 128, 1, 1])", + "vae.encoder.down_blocks.1.resnets.0.conv_shortcut.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.norm1.weight": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.norm1.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.conv1.weight": "torch.Size([256, 256, 3, 3])", + "vae.encoder.down_blocks.1.resnets.1.conv1.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.norm2.weight": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.norm2.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.resnets.1.conv2.weight": "torch.Size([256, 256, 3, 3])", + "vae.encoder.down_blocks.1.resnets.1.conv2.bias": "torch.Size([256])", + "vae.encoder.down_blocks.1.downsamplers.0.conv.weight": "torch.Size([256, 256, 3, 3])", + "vae.encoder.down_blocks.1.downsamplers.0.conv.bias": "torch.Size([256])", + "vae.encoder.down_blocks.2.resnets.0.norm1.weight": "torch.Size([256])", + "vae.encoder.down_blocks.2.resnets.0.norm1.bias": "torch.Size([256])", + "vae.encoder.down_blocks.2.resnets.0.conv1.weight": "torch.Size([512, 256, 3, 3])", + "vae.encoder.down_blocks.2.resnets.0.conv1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.0.norm2.weight": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.0.norm2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.2.resnets.0.conv2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.0.conv_shortcut.weight": "torch.Size([512, 256, 1, 1])", + "vae.encoder.down_blocks.2.resnets.0.conv_shortcut.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.norm1.weight": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.norm1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.2.resnets.1.conv1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.norm2.weight": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.norm2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.2.resnets.1.conv2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.2.downsamplers.0.conv.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.2.downsamplers.0.conv.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.norm1.weight": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.norm1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.3.resnets.0.conv1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.norm2.weight": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.norm2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.3.resnets.0.conv2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.norm1.weight": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.norm1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.3.resnets.1.conv1.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.norm2.weight": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.norm2.bias": "torch.Size([512])", + "vae.encoder.down_blocks.3.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.down_blocks.3.resnets.1.conv2.bias": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.group_norm.weight": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.group_norm.bias": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.to_q.weight": "torch.Size([512, 512])", + "vae.encoder.mid_block.attentions.0.to_q.bias": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.to_k.weight": "torch.Size([512, 512])", + "vae.encoder.mid_block.attentions.0.to_k.bias": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.to_v.weight": "torch.Size([512, 512])", + "vae.encoder.mid_block.attentions.0.to_v.bias": "torch.Size([512])", + "vae.encoder.mid_block.attentions.0.to_out.0.weight": "torch.Size([512, 512])", + "vae.encoder.mid_block.attentions.0.to_out.0.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.norm1.weight": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.norm1.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.mid_block.resnets.0.conv1.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.norm2.weight": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.norm2.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.mid_block.resnets.0.conv2.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.norm1.weight": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.norm1.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.mid_block.resnets.1.conv1.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.norm2.weight": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.norm2.bias": "torch.Size([512])", + "vae.encoder.mid_block.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.encoder.mid_block.resnets.1.conv2.bias": "torch.Size([512])", + "vae.encoder.conv_norm_out.weight": "torch.Size([512])", + "vae.encoder.conv_norm_out.bias": "torch.Size([512])", + "vae.encoder.conv_out.weight": "torch.Size([8, 512, 3, 3])", + "vae.encoder.conv_out.bias": "torch.Size([8])", + "vae.decoder.conv_in.weight": "torch.Size([512, 4, 3, 3])", + "vae.decoder.conv_in.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.0.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.0.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.1.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.1.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.2.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.resnets.2.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.resnets.2.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.0.upsamplers.0.conv.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.0.upsamplers.0.conv.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.0.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.0.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.1.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.1.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.2.conv1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.norm2.weight": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.norm2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.resnets.2.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.resnets.2.conv2.bias": "torch.Size([512])", + "vae.decoder.up_blocks.1.upsamplers.0.conv.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.up_blocks.1.upsamplers.0.conv.bias": "torch.Size([512])", + "vae.decoder.up_blocks.2.resnets.0.norm1.weight": "torch.Size([512])", + "vae.decoder.up_blocks.2.resnets.0.norm1.bias": "torch.Size([512])", + "vae.decoder.up_blocks.2.resnets.0.conv1.weight": "torch.Size([256, 512, 3, 3])", + "vae.decoder.up_blocks.2.resnets.0.conv1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.0.norm2.weight": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.0.norm2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.0.conv2.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.resnets.0.conv2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.0.conv_shortcut.weight": "torch.Size([256, 512, 1, 1])", + "vae.decoder.up_blocks.2.resnets.0.conv_shortcut.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.norm1.weight": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.norm1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.conv1.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.resnets.1.conv1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.norm2.weight": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.norm2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.1.conv2.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.resnets.1.conv2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.norm1.weight": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.norm1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.conv1.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.resnets.2.conv1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.norm2.weight": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.norm2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.resnets.2.conv2.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.resnets.2.conv2.bias": "torch.Size([256])", + "vae.decoder.up_blocks.2.upsamplers.0.conv.weight": "torch.Size([256, 256, 3, 3])", + "vae.decoder.up_blocks.2.upsamplers.0.conv.bias": "torch.Size([256])", + "vae.decoder.up_blocks.3.resnets.0.norm1.weight": "torch.Size([256])", + "vae.decoder.up_blocks.3.resnets.0.norm1.bias": "torch.Size([256])", + "vae.decoder.up_blocks.3.resnets.0.conv1.weight": "torch.Size([128, 256, 3, 3])", + "vae.decoder.up_blocks.3.resnets.0.conv1.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.0.norm2.weight": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.0.norm2.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.0.conv2.weight": "torch.Size([128, 128, 3, 3])", + "vae.decoder.up_blocks.3.resnets.0.conv2.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.0.conv_shortcut.weight": "torch.Size([128, 256, 1, 1])", + "vae.decoder.up_blocks.3.resnets.0.conv_shortcut.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.norm1.weight": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.norm1.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.conv1.weight": "torch.Size([128, 128, 3, 3])", + "vae.decoder.up_blocks.3.resnets.1.conv1.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.norm2.weight": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.norm2.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.1.conv2.weight": "torch.Size([128, 128, 3, 3])", + "vae.decoder.up_blocks.3.resnets.1.conv2.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.norm1.weight": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.norm1.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.conv1.weight": "torch.Size([128, 128, 3, 3])", + "vae.decoder.up_blocks.3.resnets.2.conv1.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.norm2.weight": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.norm2.bias": "torch.Size([128])", + "vae.decoder.up_blocks.3.resnets.2.conv2.weight": "torch.Size([128, 128, 3, 3])", + "vae.decoder.up_blocks.3.resnets.2.conv2.bias": "torch.Size([128])", + "vae.decoder.mid_block.attentions.0.group_norm.weight": "torch.Size([512])", + "vae.decoder.mid_block.attentions.0.group_norm.bias": "torch.Size([512])", + "vae.decoder.mid_block.attentions.0.to_q.weight": "torch.Size([512, 512])", + "vae.decoder.mid_block.attentions.0.to_q.bias": "torch.Size([512])", + "vae.decoder.mid_block.attentions.0.to_k.weight": "torch.Size([512, 512])", + "vae.decoder.mid_block.attentions.0.to_k.bias": "torch.Size([512])", + "vae.decoder.mid_block.attentions.0.to_v.weight": "torch.Size([512, 512])", + "vae.decoder.mid_block.attentions.0.to_v.bias": "torch.Size([512])", + "vae.decoder.mid_block.attentions.0.to_out.0.weight": "torch.Size([512, 512])", + "vae.decoder.mid_block.attentions.0.to_out.0.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.norm1.weight": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.norm1.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.mid_block.resnets.0.conv1.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.norm2.weight": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.norm2.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.0.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.mid_block.resnets.0.conv2.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.norm1.weight": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.norm1.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.conv1.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.mid_block.resnets.1.conv1.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.norm2.weight": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.norm2.bias": "torch.Size([512])", + "vae.decoder.mid_block.resnets.1.conv2.weight": "torch.Size([512, 512, 3, 3])", + "vae.decoder.mid_block.resnets.1.conv2.bias": "torch.Size([512])", + "vae.decoder.conv_norm_out.weight": "torch.Size([128])", + "vae.decoder.conv_norm_out.bias": "torch.Size([128])", + "vae.decoder.conv_out.weight": "torch.Size([3, 128, 3, 3])", + "vae.decoder.conv_out.bias": "torch.Size([3])", + "vae.quant_conv.weight": "torch.Size([8, 8, 1, 1])", + "vae.quant_conv.bias": "torch.Size([8])", + "vae.post_quant_conv.weight": "torch.Size([4, 4, 1, 1])", + "vae.post_quant_conv.bias": "torch.Size([4])" + }, + "optimizer_states": "[{'state': {1: {'step': tensor(62477.), 'exp_avg': tensor([[[[ 2.0828e-04, 1.0679e-04],\n [ 1.3135e-04, 1.4409e-04]],\n\n [[ 7.3208e-05, -7.0953e-05],\n [ 1.0298e-04, -2.2638e-05]],\n\n [[ 7.5562e-05, 1.4081e-04],\n [ 3.5271e-05, 8.5972e-05]],\n\n [[-1.2859e-05, -2.1449e-05],\n [-1.3621e-04, -3.6418e-05]]],\n\n\n [[[ 1.4747e-04, -1.0954e-06],\n [ 5.2737e-05, 4.7511e-05]],\n\n [[ 2.1779e-05, -1.4862e-04],\n [ 6.3146e-05, -7.5379e-05]],\n\n [[-8.6229e-06, 9.7227e-05],\n [-3.9323e-05, 1.3058e-04]],\n\n [[ 6.7163e-05, -1.8540e-05],\n [-4.5358e-05, -4.1849e-05]]],\n\n\n [[[-4.3980e-05, -4.6972e-05],\n [-4.2040e-05, -3.3303e-05]],\n\n [[ 1.1221e-05, -1.3170e-04],\n [ 3.1527e-05, -4.7744e-05]],\n\n [[-6.5961e-06, 8.9168e-05],\n [ 1.7146e-05, 6.8468e-05]],\n\n [[ 3.6672e-05, 3.4908e-05],\n [ 1.1140e-05, 4.8970e-05]]],\n\n\n ...,\n\n\n [[[-2.3333e-05, -8.1039e-05],\n [-2.0339e-05, -2.3994e-05]],\n\n [[ 1.2891e-04, 7.3702e-05],\n [ 9.6464e-05, 5.5706e-05]],\n\n [[-7.8943e-05, -1.8124e-05],\n [-5.3605e-05, -3.5915e-05]],\n\n [[-8.6702e-05, -3.3335e-05],\n [-2.8344e-05, 4.5515e-05]]],\n\n\n [[[-2.7323e-04, -2.9208e-04],\n [-2.2086e-04, -2.4769e-04]],\n\n [[-1.9119e-04, -1.6281e-04],\n [-2.5636e-04, -2.6752e-04]],\n\n [[ 2.9915e-06, -4.6770e-05],\n [ 6.2822e-05, -9.3889e-06]],\n\n [[ 1.7746e-04, 1.5895e-04],\n [ 2.1425e-04, 1.6635e-04]]],\n\n\n [[[ 5.0852e-06, 1.7000e-05],\n [ 7.7786e-06, 2.1797e-05]],\n\n [[ 1.6936e-05, 1.2986e-05],\n [ 2.8758e-05, 4.5622e-06]],\n\n [[ 7.0466e-06, -3.5524e-06],\n [ 3.5968e-05, 2.4977e-05]],\n\n [[-1.7021e-05, -1.3876e-05],\n [ 1.1994e-05, 2.0855e-05]]]], device='cuda:0'), 'exp_avg_sq': tensor([[[[4.3095e-06, 4.4541e-06],\n [4.2340e-06, 4.6369e-06]],\n\n [[1.7891e-06, 1.8109e-06],\n [2.1426e-06, 2.0987e-06]],\n\n [[4.3371e-07, 3.1790e-07],\n [4.4659e-07, 4.3228e-07]],\n\n [[1.4696e-06, 1.2495e-06],\n [1.4619e-06, 1.2751e-06]]],\n\n\n [[[3.2977e-06, 3.2442e-06],\n [3.2822e-06, 3.3504e-06]],\n\n [[1.2129e-06, 1.3103e-06],\n [1.3790e-06, 1.4648e-06]],\n\n [[2.3673e-07, 1.9276e-07],\n [2.4562e-07, 2.1790e-07]],\n\n [[9.8281e-07, 8.9446e-07],\n [1.0440e-06, 9.6016e-07]]],\n\n\n [[[1.7174e-06, 1.6224e-06],\n [1.6491e-06, 1.6012e-06]],\n\n [[7.0072e-07, 7.7811e-07],\n [7.7640e-07, 8.5624e-07]],\n\n [[1.2024e-07, 8.0695e-08],\n [1.3242e-07, 1.0380e-07]],\n\n [[4.8584e-07, 4.5769e-07],\n [5.4683e-07, 4.9664e-07]]],\n\n\n ...,\n\n\n [[[3.5862e-06, 3.4961e-06],\n [3.4617e-06, 3.5538e-06]],\n\n [[1.4975e-06, 1.6411e-06],\n [1.8507e-06, 1.9281e-06]],\n\n [[2.6300e-07, 2.2383e-07],\n [2.4604e-07, 2.3630e-07]],\n\n [[1.1101e-06, 1.0254e-06],\n [1.2123e-06, 1.0972e-06]]],\n\n\n [[[1.0161e-06, 9.9268e-07],\n [1.0492e-06, 1.0212e-06]],\n\n [[4.9052e-07, 4.9119e-07],\n [5.5783e-07, 5.5790e-07]],\n\n [[7.9178e-08, 5.0157e-08],\n [7.5729e-08, 4.9716e-08]],\n\n [[3.3504e-07, 3.0451e-07],\n [3.4381e-07, 3.2407e-07]]],\n\n\n [[[4.6470e-07, 4.5525e-07],\n [4.4396e-07, 4.8057e-07]],\n\n [[2.6926e-07, 2.8556e-07],\n [2.3324e-07, 2.4917e-07]],\n\n [[8.7348e-08, 6.3846e-08],\n [7.4637e-08, 5.3753e-08]],\n\n [[1.9526e-07, 1.5851e-07],\n [1.7625e-07, 1.5394e-07]]]], device='cuda:0')}, 2: {'step': tensor(62477.), 'exp_avg': tensor([ 1.4659e-04, 7.5025e-05, -2.7340e-05, 6.6087e-05, -1.6099e-05,\n 1.0682e-05, 1.3577e-04, 1.3627e-04, 2.8268e-06, -2.7487e-06,\n 2.6562e-05, -2.7428e-05, 3.5076e-05, -1.0253e-04, -3.2573e-05,\n -3.1068e-05, -2.3938e-07, 2.0192e-05, -1.8061e-04, 5.2053e-05,\n 5.2366e-06, -2.0618e-04, 4.3246e-05, 2.2811e-04, 3.7994e-05,\n -9.6290e-05, 4.8687e-05, 1.3675e-05, 3.8604e-05, 2.6237e-05,\n 2.6638e-05, 5.0096e-05, -6.0243e-05, -2.8111e-05, 1.9905e-05,\n 6.8586e-05, -1.7548e-04, -5.6624e-05, -9.0386e-05, 3.2042e-05,\n 6.4347e-05, -4.5988e-05, 2.5903e-04, -1.9389e-05, 1.7241e-04,\n -1.2896e-04, -1.6884e-04, -1.7118e-05, -1.5646e-04, 3.2088e-06,\n -1.9093e-04, 4.1744e-06, -3.1157e-05, -2.0357e-04, 2.0248e-05,\n -8.8116e-05, -1.3568e-04, 3.7772e-05, 6.7165e-05, 1.1847e-04,\n -7.9814e-05, -2.7522e-07, 8.1710e-05, 1.9076e-04, 3.2295e-05,\n -5.4096e-05, 3.0121e-04, -1.2843e-04, -2.6135e-06, -5.1825e-06,\n -2.6800e-05, 2.5602e-04, -7.2396e-05, -3.1011e-05, 6.7203e-05,\n 1.2678e-04, 4.7891e-05, -4.5309e-05, -4.0276e-05, -2.8097e-04,\n -1.8275e-04, 2.7942e-05, 3.0284e-05, -3.8701e-05, 2.4951e-04,\n -4.6482e-05, -9.5683e-05, -3.5007e-05, 2.2704e-05, 1.3708e-05,\n -5.4902e-05, -3.1620e-05, 1.8993e-05, 1.7424e-04, -1.5501e-05,\n -1.5074e-05, -1.2011e-05, 4.8040e-06, -1.2688e-04, 2.7668e-05,\n 8.8682e-05, 9.0718e-05, 3.3227e-05, 8.4012e-05, 3.7941e-06,\n -2.0489e-04, -7.3812e-05, 2.2467e-05, 3.4182e-04, 8.9551e-05,\n 2.7961e-05, 1.5414e-05, -1.4764e-05, -4.4272e-05, -1.4047e-04,\n -5.8013e-05, 6.0422e-05, -2.4816e-05, -8.6991e-05, 7.6097e-05,\n -1.1862e-04, -6.1343e-05, -6.6932e-05, -1.7688e-04, 3.4487e-07,\n -4.3757e-05, -5.6825e-05, 2.2118e-05, 2.4339e-04, 1.5216e-04,\n 5.7591e-05, -6.2655e-05, 3.6193e-05, 3.7742e-05, -9.4889e-06,\n 1.4355e-04, 2.3178e-05, 7.5431e-05, 7.1013e-06, 2.8270e-05,\n 6.3905e-06, 4.0260e-05, 1.4570e-05, -1.0792e-07, 7.0303e-06,\n -9.6126e-06, -2.1516e-05, 2.6981e-06, -9.4881e-05, 2.3206e-05,\n 2.5581e-05, 3.2132e-06, -8.6539e-06, -3.1560e-05, 1.7182e-05,\n 1.1495e-04, -3.9580e-05, 5.1571e-06, 2.1886e-06, -9.3478e-05,\n -3.7763e-05, -2.1215e-05, 8.4992e-05, 6.5071e-05, 2.7808e-06,\n 7.0243e-05, -5.7411e-05, 2.1595e-05, -5.2089e-05, 6.7450e-05,\n -7.6741e-06, -4.4557e-05, 6.0233e-05, -3.8582e-05, 5.3000e-05,\n -5.8226e-05, -9.5294e-05, -1.2394e-04, -3.7828e-05, 5.6054e-05,\n 4.5382e-05, 2.9648e-05, -5.3554e-05, 2.9520e-05, -1.1525e-04,\n -2.3507e-06, -1.5442e-05, -2.5411e-05, 7.0180e-05, -1.0668e-05,\n 4.4781e-05, -4.9072e-05, -5.9205e-06, -1.1109e-06, 8.2434e-06,\n 5.0887e-05, -2.8673e-05, 4.6181e-05, -7.0384e-05, -5.7394e-06,\n 2.5926e-05, 3.2963e-05, -2.8209e-05, 6.9896e-07, -8.6774e-05,\n 2.6902e-05, -3.3682e-05, 8.6976e-05, 3.3594e-06, -4.6522e-05,\n 1.2490e-04, 1.0435e-04, 7.0848e-05, 4.2160e-06, 2.7885e-05,\n 4.7225e-06, -1.6235e-04, 2.5344e-06, 9.3699e-05, -2.0938e-05,\n -1.6359e-04, -2.0151e-06, 2.7983e-05, 2.8781e-04, 1.1287e-04,\n 2.9383e-05, -6.8479e-05, -1.8431e-05, -5.5877e-05, -3.2254e-05,\n -2.9311e-05, 1.2411e-05, 9.7426e-05, 1.9825e-06, 2.6972e-05,\n -9.9169e-06, -2.4932e-04, 5.1454e-05, -1.4104e-04, -5.8959e-05,\n 4.1357e-07, 9.5707e-06, 3.9248e-05, 5.9857e-05, 1.2518e-04,\n -7.7076e-05, 1.2773e-04, 7.1973e-05, -4.3721e-05, -7.3625e-05,\n -4.1209e-05, 5.9010e-05, 2.7021e-05, 1.2366e-04, 8.2071e-05,\n -1.2083e-04, 1.7414e-04, -1.5598e-04, -1.4712e-04, 1.0159e-05,\n 1.8931e-05, 6.3612e-06, 3.9731e-05, -2.0162e-05, 3.7682e-05,\n 9.1809e-05, -2.9181e-05, -5.4283e-05, 4.2186e-05, -1.2534e-05,\n 1.2583e-05, -2.6346e-05, -7.7221e-06, 6.7959e-05, -3.7753e-05,\n 1.5575e-05, -1.1915e-05, 1.5589e-04, -1.4208e-05, 2.0616e-05,\n 5.7710e-05, 4.7052e-05, -1.1155e-04, -1.4624e-05, -2.1317e-05,\n -1.8664e-05, -6.7530e-05, 1.8358e-04, -6.3489e-05, 1.6602e-05,\n 2.1400e-05, -1.3760e-05, 1.1951e-06, 2.1964e-05, 2.9390e-06,\n -5.3381e-06, 1.7745e-04, 1.6658e-04, -3.1129e-05, 4.9397e-05,\n 8.1141e-05, 1.6305e-05, 5.6077e-05, -8.8615e-05, -4.9097e-05,\n 1.1558e-04, 6.0252e-05, -8.8632e-05, 5.2614e-05, -3.0449e-06,\n 1.1276e-04, 1.8092e-04, 1.8314e-04, -5.5071e-05, 3.8152e-06,\n -5.6928e-05, -7.3121e-06, -3.0430e-05, 4.9621e-05, 4.1430e-05,\n 2.1149e-04, -2.9487e-05, -1.6907e-04, -1.3036e-04, 4.8354e-05,\n -2.0663e-05, 5.9990e-05, -7.2251e-05, -9.6264e-05, 5.3155e-06,\n -5.0418e-05, -4.5007e-05, 3.6550e-06, -1.0009e-04, 8.5128e-05,\n -1.3529e-04, 3.1268e-05, -5.5686e-04, 4.5225e-05, -6.9523e-05,\n -1.3185e-05, 7.0289e-06, 3.8456e-05, 2.7018e-05, 2.7283e-05,\n 1.0904e-05, -1.9859e-05, 2.6254e-05, -2.3353e-05, 3.4500e-05,\n 5.7884e-05, -6.9273e-06, 6.8259e-05, -1.4429e-04, 3.1818e-04,\n 1.1407e-05, 7.8613e-05, -1.4720e-04, -2.3108e-05, 3.6579e-05,\n -5.6408e-05, -3.1057e-05, -8.0252e-05, -2.9528e-05, 3.1468e-05,\n 2.2831e-05, 6.9078e-05, 7.2183e-05, -5.4105e-05, -3.1109e-05,\n -5.9610e-05, 2.7413e-05, 5.9141e-06, -3.7484e-05, 1.2507e-05,\n 6.0080e-05, -1.4797e-04, 4.3576e-05, -1.0295e-05, -4.9100e-06,\n -4.1630e-05, -2.4978e-04, 8.2361e-05, 9.9136e-05, 1.1953e-04,\n 7.3013e-05, -9.0322e-05, -4.8069e-05, -2.4620e-06, 1.5082e-05,\n 3.2263e-05, -1.9257e-05, -2.1683e-05, -7.4114e-05, -9.9353e-05,\n 3.2535e-05, -5.9416e-05, -7.2648e-05, -6.3999e-06, -4.9078e-05,\n -7.9864e-05, -2.0637e-05, -1.8872e-05, 2.9308e-05, -2.5856e-05,\n -1.1312e-05, 6.4520e-06, -3.4200e-05, 1.0352e-05, -7.0904e-05,\n -3.3233e-05, 1.9535e-05, -8.7810e-07, -5.4268e-05, 2.7181e-05,\n 9.0632e-06, 2.3314e-05, -2.0846e-05, 4.6925e-05, 5.6700e-05,\n -4.9144e-05, 5.8985e-06, 1.2881e-04, 7.0197e-05, -6.2525e-05,\n 8.3373e-06, 2.5218e-06, 4.1523e-05, -1.7683e-05, 1.3221e-05,\n 1.0281e-04, 6.4505e-05, 1.4285e-04, -7.7992e-05, -1.4124e-04,\n -6.1744e-05, 3.8250e-05, -4.2722e-05, -4.4635e-05, -5.5917e-06,\n -4.6254e-05, -8.9400e-05, -8.5138e-05, -8.5039e-06, -1.0274e-04,\n 1.3751e-05, -6.5376e-05, -3.6941e-05, 6.5174e-05, -1.0962e-05,\n -7.9178e-05, 1.3157e-05, -5.6204e-05, -1.3551e-04, -3.0645e-05,\n 1.2653e-05, 3.4899e-05, 2.4021e-05, 2.3031e-06, -1.3343e-04,\n -8.5669e-05, 3.1534e-05, -9.1617e-05, -1.6211e-05, -9.2156e-06,\n 7.8050e-05, -3.5724e-05, 7.9010e-05, -5.6768e-06, 1.0550e-04,\n -3.5381e-05, 1.0301e-04, 3.7899e-06, 8.2126e-05, -3.2933e-05,\n 2.1011e-05, -7.2436e-05, -2.4076e-05, -3.3368e-05, 1.0955e-05,\n 4.2680e-05, 2.1347e-05, 1.8200e-04, 2.2954e-05, -1.2964e-05,\n -5.4972e-05, -4.0667e-05, 2.5762e-05, 6.3501e-05, -4.0401e-05,\n 7.6818e-05, 8.2769e-05, 6.8086e-05, -1.2404e-04, 7.6380e-05,\n -9.9828e-05, 5.5272e-05, -7.9095e-05, -1.7519e-04, -4.2072e-05,\n 1.1120e-04, -5.9209e-05, -5.1603e-05, -1.8888e-05, 1.7166e-05,\n 1.0697e-05, -1.8561e-05, 4.4244e-05, -2.5975e-05, -6.1044e-05,\n -1.1084e-04, 5.9016e-05], device='cuda:0'), 'exp_avg_sq': tensor([1.5157e-06, 1.0909e-06, 5.3027e-07, 5.1119e-07, 3.9790e-07, 5.0771e-07,\n 3.6827e-07, 3.3049e-07, 2.5799e-07, 1.7056e-07, 2.5880e-07, 1.1592e-07,\n 8.9662e-08, 2.1936e-07, 7.3634e-08, 9.7557e-08, 1.5784e-07, 3.7899e-07,\n 4.6068e-07, 6.9816e-07, 3.7598e-07, 8.4364e-07, 7.1621e-07, 1.3432e-06,\n 4.0581e-07, 1.4008e-07, 1.8895e-07, 1.1952e-07, 2.7844e-07, 3.7268e-07,\n 2.8503e-07, 2.9173e-07, 6.7174e-07, 7.4176e-07, 2.4410e-07, 1.5209e-07,\n 1.4938e-06, 2.7555e-07, 8.7869e-07, 8.8853e-07, 7.0063e-07, 6.0611e-07,\n 1.0186e-06, 4.2910e-07, 1.7018e-06, 3.5528e-06, 8.4002e-07, 8.7767e-07,\n 5.9716e-07, 9.6646e-08, 7.3591e-07, 1.4784e-06, 2.2374e-07, 9.8542e-07,\n 3.3799e-07, 9.9945e-07, 5.1824e-07, 1.8036e-06, 4.0946e-07, 5.5971e-07,\n 7.6875e-07, 1.1768e-07, 6.7714e-07, 1.0157e-06, 1.4846e-06, 5.2501e-07,\n 2.0339e-06, 4.6815e-07, 3.2922e-07, 8.6370e-07, 1.3043e-07, 3.6011e-06,\n 2.0690e-06, 4.5883e-07, 2.1064e-07, 4.4248e-07, 2.7775e-07, 2.0983e-07,\n 1.0169e-07, 2.5890e-06, 8.1134e-07, 3.4361e-07, 9.8323e-07, 5.1622e-07,\n 3.0333e-06, 4.2469e-07, 9.7988e-08, 3.0394e-07, 5.5078e-07, 1.5962e-07,\n 4.8151e-07, 2.0658e-07, 3.1692e-06, 2.9135e-06, 1.0465e-07, 1.9146e-06,\n 1.0978e-07, 8.2873e-07, 4.0007e-07, 3.6055e-07, 5.3234e-07, 4.2812e-07,\n 1.6596e-07, 1.9774e-06, 1.9901e-07, 6.3230e-06, 5.3889e-07, 5.2080e-07,\n 2.7467e-06, 3.5742e-07, 4.7173e-06, 3.0420e-07, 1.4226e-07, 4.2213e-07,\n 2.6269e-06, 1.4981e-07, 6.4063e-07, 1.4699e-07, 7.2594e-07, 2.8250e-07,\n 4.7903e-07, 1.3979e-06, 1.6334e-07, 6.5188e-07, 5.4766e-07, 8.6598e-07,\n 4.5350e-07, 3.5542e-07, 9.2948e-07, 6.8882e-07, 3.5873e-07, 2.3771e-07,\n 1.9793e-07, 1.7253e-07, 1.5488e-07, 1.6860e-07, 1.9195e-07, 2.5366e-07,\n 2.3482e-07, 3.3869e-07, 3.6484e-07, 7.0100e-08, 1.1464e-07, 6.9861e-08,\n 7.7311e-08, 8.3587e-08, 9.8863e-08, 1.1191e-07, 8.8633e-08, 8.7895e-08,\n 1.3744e-07, 1.2947e-07, 1.6068e-07, 2.0767e-07, 8.2733e-08, 1.4776e-07,\n 3.2816e-07, 1.2731e-06, 1.1553e-07, 2.5651e-07, 3.2845e-07, 4.6684e-07,\n 3.1464e-07, 1.0293e-06, 3.4613e-07, 6.7846e-07, 3.1190e-07, 3.6407e-06,\n 4.5644e-07, 1.6303e-07, 1.6259e-07, 2.7808e-07, 5.4143e-07, 2.1663e-07,\n 3.4287e-07, 4.3791e-07, 2.9718e-07, 1.2470e-06, 5.5406e-07, 1.5632e-06,\n 2.2427e-07, 1.3065e-07, 3.4430e-07, 3.9302e-07, 4.4591e-07, 1.1937e-07,\n 2.3017e-07, 2.0999e-07, 1.0531e-07, 2.4216e-07, 7.6988e-07, 3.5899e-07,\n 4.5778e-07, 1.0949e-07, 2.9798e-07, 1.2882e-06, 9.4480e-07, 6.5489e-07,\n 1.6226e-07, 2.2905e-07, 1.7048e-07, 2.5844e-07, 1.3780e-07, 1.1938e-07,\n 3.2235e-07, 1.0464e-07, 3.2386e-07, 1.0974e-07, 9.4637e-08, 2.1943e-07,\n 2.4012e-07, 1.1709e-06, 2.4228e-07, 1.1814e-07, 2.9170e-07, 3.4656e-07,\n 1.0926e-06, 2.4080e-06, 6.1385e-07, 2.1578e-07, 6.9771e-07, 1.9850e-07,\n 8.8765e-08, 1.2849e-06, 1.0595e-06, 7.0555e-07, 3.4935e-07, 1.5699e-07,\n 1.0967e-07, 2.2140e-07, 2.5227e-07, 2.2532e-07, 6.6451e-07, 1.8346e-07,\n 3.8189e-07, 4.8477e-07, 3.2142e-06, 6.8240e-07, 1.4793e-06, 1.7070e-06,\n 1.9317e-07, 7.1282e-08, 3.8666e-07, 7.6204e-07, 2.4110e-06, 5.0415e-07,\n 4.9412e-07, 2.4519e-07, 8.8939e-07, 1.4158e-06, 2.1847e-07, 1.6521e-06,\n 1.4406e-07, 2.8117e-07, 5.4359e-07, 4.0913e-07, 2.4047e-06, 6.4407e-07,\n 7.6856e-07, 2.2766e-07, 2.0879e-07, 1.2769e-07, 4.0198e-07, 7.0711e-07,\n 1.9377e-07, 2.1795e-07, 2.1848e-07, 1.0909e-07, 1.2831e-07, 6.8687e-08,\n 1.1267e-07, 7.3987e-08, 9.7472e-08, 4.5805e-07, 1.8628e-07, 2.7204e-07,\n 7.7702e-07, 5.0894e-07, 5.6338e-07, 6.7320e-07, 2.6637e-07, 6.0040e-07,\n 3.5631e-07, 7.9391e-07, 1.9883e-07, 2.4827e-07, 1.3104e-06, 4.8221e-07,\n 1.3548e-07, 4.8445e-07, 1.0350e-07, 1.9579e-07, 2.2981e-07, 8.5336e-08,\n 2.5429e-07, 2.3016e-07, 7.0868e-07, 9.2723e-07, 3.7840e-07, 1.2962e-06,\n 6.8881e-07, 9.1090e-07, 4.0756e-07, 6.6530e-07, 2.5822e-06, 1.0289e-06,\n 7.7831e-07, 1.5775e-07, 1.1282e-07, 1.0601e-07, 2.1674e-07, 1.5575e-06,\n 6.3633e-07, 4.4928e-07, 9.5820e-08, 7.9977e-07, 2.9179e-07, 7.0136e-07,\n 7.8441e-07, 5.2060e-07, 8.0457e-06, 9.1775e-07, 3.6786e-06, 8.5246e-07,\n 4.2155e-07, 1.8225e-07, 1.9271e-07, 2.2438e-07, 9.8671e-07, 5.5692e-07,\n 2.9875e-07, 2.7100e-07, 8.0109e-08, 5.9083e-07, 4.6858e-07, 3.2660e-06,\n 4.2678e-07, 6.3463e-06, 5.7329e-07, 4.1763e-07, 4.4040e-07, 4.0088e-07,\n 1.6008e-07, 1.5191e-06, 1.0229e-07, 2.0200e-07, 1.7551e-06, 7.6321e-07,\n 2.4210e-07, 7.2235e-07, 1.9918e-07, 8.9146e-07, 1.7314e-07, 3.9125e-07,\n 3.6868e-06, 4.5680e-07, 1.1532e-06, 3.0367e-06, 2.0173e-07, 6.3986e-07,\n 1.2772e-06, 1.0535e-06, 5.0770e-07, 5.1656e-07, 1.2046e-07, 3.4656e-07,\n 5.3238e-07, 1.5192e-06, 2.5541e-07, 4.4692e-07, 1.2465e-07, 1.7256e-06,\n 1.7266e-07, 3.2933e-07, 1.4308e-07, 2.4096e-06, 2.1476e-06, 3.7546e-07,\n 1.4846e-07, 1.0224e-06, 4.2416e-07, 2.3189e-06, 1.0430e-06, 1.3779e-06,\n 7.1296e-07, 6.6785e-07, 5.5482e-07, 4.8694e-07, 4.0331e-07, 1.7070e-07,\n 1.5968e-07, 1.2575e-07, 1.3117e-07, 1.3824e-07, 2.9017e-07, 2.5274e-07,\n 2.6525e-07, 2.7317e-07, 1.1246e-07, 1.4044e-07, 1.4186e-07, 6.7596e-08,\n 8.3655e-08, 8.2938e-08, 8.7679e-08, 3.7231e-08, 9.9663e-08, 2.1558e-07,\n 6.3791e-08, 1.2470e-07, 1.4304e-07, 5.7014e-07, 1.6464e-07, 6.4025e-07,\n 1.7267e-07, 2.3537e-07, 2.0959e-07, 3.0657e-07, 6.4146e-07, 2.2344e-07,\n 1.7889e-07, 1.4272e-07, 9.7480e-07, 7.5778e-07, 3.4932e-07, 2.7290e-07,\n 3.6504e-07, 1.5594e-07, 9.6715e-08, 8.3159e-07, 3.7163e-07, 6.6565e-07,\n 1.7086e-06, 5.9127e-07, 5.4309e-07, 1.3117e-07, 7.4033e-07, 1.3647e-07,\n 9.9441e-08, 7.1413e-07, 7.8370e-07, 8.5698e-07, 1.0074e-06, 6.9087e-07,\n 6.8502e-07, 3.2338e-07, 1.2935e-07, 1.8474e-07, 1.5617e-06, 2.2797e-07,\n 2.2041e-07, 4.8174e-08, 2.9853e-07, 4.0714e-07, 8.6737e-08, 2.3068e-07,\n 1.3819e-07, 7.2453e-07, 1.0086e-07, 5.3430e-07, 1.9643e-07, 6.4675e-07,\n 2.7080e-07, 3.1737e-07, 4.3820e-06, 2.2050e-06, 5.0414e-07, 2.0285e-07,\n 2.1851e-07, 1.2258e-06, 4.8779e-07, 6.4018e-07, 3.0669e-07, 1.0380e-06,\n 4.3371e-07, 3.6696e-07, 1.3108e-06, 5.0961e-07, 1.1855e-07, 7.6287e-07,\n 1.9973e-07, 1.2272e-07, 1.3915e-05, 1.2390e-07, 3.5530e-07, 8.8588e-07,\n 4.8856e-07, 2.4303e-07, 4.0141e-07, 1.2338e-07, 2.7517e-07, 6.8456e-07,\n 1.1596e-07, 7.0226e-07, 6.4825e-07, 1.5080e-07, 7.8648e-08, 8.8767e-07,\n 5.3219e-07, 1.6652e-07, 4.3478e-07, 1.3613e-07, 6.0580e-07, 4.9729e-07,\n 2.5494e-07, 2.7632e-07, 2.0489e-07, 2.1387e-07, 2.7555e-07, 1.2094e-06,\n 3.4214e-07, 2.3380e-07], device='cuda:0')}, 3: {'step': tensor(62477.), 'exp_avg': tensor([[ 2.5489e-06, -1.1349e-06, -9.3715e-06, ..., 4.4432e-08,\n 4.1500e-08, 3.8376e-08],\n [ 1.4008e-06, 2.2708e-06, 1.3236e-06, ..., 3.2526e-08,\n 3.0266e-08, 2.8169e-08],\n [-3.0587e-06, -1.5811e-05, -1.2476e-05, ..., 2.1752e-07,\n 2.0234e-07, 1.8811e-07],\n ...,\n [-9.9740e-07, -1.0161e-05, -7.9074e-06, ..., 1.0309e-07,\n 9.5937e-08, 8.9238e-08],\n [ 1.6139e-07, -1.8202e-05, -1.7833e-05, ..., 1.4902e-07,\n 1.3858e-07, 1.2880e-07],\n [-8.6292e-07, -9.7977e-06, -7.1501e-06, ..., 1.6074e-07,\n 1.4962e-07, 1.3917e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[1.8150e-09, 1.7669e-09, 1.7787e-09, ..., 2.4987e-11, 2.1646e-11,\n 1.8749e-11],\n [1.0275e-10, 1.0116e-10, 1.0021e-10, ..., 2.4352e-13, 2.1095e-13,\n 1.8273e-13],\n [3.4372e-09, 3.1602e-09, 3.0281e-09, ..., 3.7865e-12, 3.2797e-12,\n 2.8406e-12],\n ...,\n [1.4332e-09, 1.3402e-09, 1.2326e-09, ..., 6.1354e-13, 5.3139e-13,\n 4.6024e-13],\n [5.4662e-09, 5.1065e-09, 4.8437e-09, ..., 6.3588e-12, 5.5080e-12,\n 4.7707e-12],\n [1.7222e-09, 1.6634e-09, 1.5690e-09, ..., 5.6082e-13, 4.8572e-13,\n 4.2065e-13]], device='cuda:0')}, 4: {'step': tensor(62477.), 'exp_avg': tensor([-4.7336e-06, -1.1735e-06, 1.0580e-05, 7.7261e-07, 4.2317e-06,\n 2.9760e-06, -9.9675e-07, 2.4390e-06, -2.2379e-06, -6.8177e-06,\n -8.7215e-08, -7.9930e-06, 1.2557e-06, 7.4567e-06, 3.0446e-06,\n -1.4310e-06, 1.3710e-07, -8.8297e-06, 2.8521e-06, 2.9357e-06,\n -1.4237e-06, 3.0871e-06, 9.9418e-06, -2.1143e-06, 5.6122e-06,\n -9.8705e-07, 1.4112e-06, 7.0871e-06, -5.4345e-06, -6.8208e-07,\n -1.0332e-06, -3.9990e-07, -1.2146e-06, -5.6929e-07, 4.0063e-06,\n -7.4909e-06, 4.1341e-06, -2.2094e-06, -1.8221e-06, 8.8050e-07,\n -2.4335e-06, -2.0965e-06, -5.7785e-07, 5.4006e-06, -1.3696e-06,\n -4.4347e-06, -1.6380e-06, 4.5622e-06, 2.8353e-06, -1.6855e-06,\n 6.3487e-06, 8.5861e-06, -7.1754e-06, -7.4040e-06, 5.1512e-07,\n -7.1539e-07, 5.8760e-06, 1.1480e-05, 6.3407e-06, 6.1428e-06,\n 7.4672e-07, -5.4163e-06, -2.5043e-06, -3.6862e-06, 9.1140e-07,\n 3.3122e-06, 8.6561e-06, -3.9395e-06, 5.8814e-06, 5.6894e-06,\n 2.2927e-06, -4.5785e-06, 4.2153e-07, -1.0859e-05, -2.8392e-06,\n 3.8628e-06, 4.7558e-06, 6.0963e-06, 5.8981e-06, -8.1968e-08,\n -5.0863e-07, -4.8836e-06, 6.0857e-07, 7.2328e-06, -4.6982e-08,\n 4.3270e-06, -1.1793e-05, 9.9450e-06, 8.0871e-06, 3.0382e-06,\n 4.7471e-06, -1.0494e-06, -1.5875e-06, 1.2597e-06, -3.2808e-06,\n 4.3207e-06, 3.9078e-06, -1.3008e-05, 5.2936e-06, 2.9727e-06,\n 1.1719e-06, 4.7929e-06, -5.5898e-06, -3.8045e-07, 6.7229e-06,\n -1.1357e-06, 2.7480e-06, 2.5942e-06, -5.6950e-07, 5.5892e-06,\n 5.5118e-06, -1.1038e-06, -3.0736e-06, -3.1504e-08, -8.9301e-07,\n 4.3296e-06, -2.7914e-06, -2.3095e-06, -1.9908e-07, -6.7558e-07,\n -7.0366e-07, -1.1112e-06, -1.7192e-06, -7.8726e-07, 3.6754e-06,\n 1.3225e-05, 8.3513e-06, 4.9678e-06, -6.2567e-07, 2.5551e-06,\n -8.3730e-06, 3.6758e-06, 4.1202e-06, 6.1581e-07, 1.2158e-06,\n 3.2388e-06, 3.4653e-06, 2.2514e-07, -1.4925e-06, -3.9089e-06,\n 1.2223e-06, 8.2716e-06, -1.0942e-06, -9.8752e-07, 2.3698e-06,\n 4.6523e-06, -2.9021e-06, 2.7470e-06, 3.8582e-07, 1.4421e-06,\n -9.5147e-07, 2.6581e-06, 1.0637e-05, 2.2152e-06, 3.7165e-06,\n 1.1372e-05, 6.8731e-07, -4.8090e-06, 8.2880e-06, 9.2654e-06,\n -9.2665e-07, -6.1059e-06, 7.0352e-06, 1.7134e-06, -1.2960e-06,\n 4.3303e-06, 7.7697e-06, 1.9614e-06, 1.3396e-06, 6.7169e-07,\n 7.0895e-06, 8.0043e-06, 4.6670e-06, 3.3993e-06, 1.3275e-06,\n 4.9588e-06, 2.4124e-06, 3.5114e-07, -6.8235e-07, -3.3374e-06,\n 1.8072e-06, -2.2487e-06, -1.4151e-07, 4.2584e-06, 1.8428e-06,\n 7.4404e-06, -2.9994e-06, 4.7422e-06, -3.4544e-06, -4.6972e-06,\n 2.9728e-06, 5.9117e-07, -4.8611e-06, 2.5260e-06, -2.1514e-06,\n -2.3053e-06, -1.6530e-06, 1.0121e-05, -2.2072e-06, -3.1037e-06,\n 2.1624e-06, -8.0314e-06, -8.1122e-07, 3.5706e-06, -4.3842e-07,\n -7.3269e-07, -5.7977e-07, -6.3934e-06, 9.3899e-06, -2.0056e-06,\n -9.1748e-08, 2.7579e-06, 1.0113e-06, -3.1471e-07, -4.9233e-06,\n -1.9770e-06, -8.2786e-06, 8.0149e-07, -6.4026e-06, -3.4143e-06,\n -5.2525e-06, 3.3540e-06, 4.1787e-06, -1.8706e-07, 1.5363e-06,\n 5.8670e-06, -1.4602e-06, -4.5495e-07, -1.4775e-06, 1.0540e-06,\n -5.2730e-07, 3.8447e-06, -1.1879e-06, 3.1256e-06, 5.7513e-06,\n 7.5855e-06, 5.7240e-06, -9.1504e-07, 5.0896e-06, -4.5275e-07,\n 4.2941e-06, 5.8583e-07, 1.3863e-06, -2.0358e-06, 6.2689e-07,\n 4.8869e-06, -3.5617e-06, -1.2418e-06, 3.1051e-06, 9.5302e-06,\n -1.5862e-06, -1.3534e-06, -8.9570e-06, 6.2103e-06, 2.5381e-06,\n 1.6023e-06, -5.9220e-06, -2.9331e-06, 3.8752e-06, -3.3581e-07,\n 8.5148e-06, 1.8572e-06, -2.8038e-07, -2.9577e-06, 4.3557e-07,\n 3.7990e-06, -3.8794e-08, 4.3642e-07, 5.8453e-06, -7.8181e-07,\n -5.4920e-06, 9.2567e-06, -1.6763e-06, 2.4244e-06, 7.3700e-06,\n 1.2543e-05, 3.8618e-06, 1.0157e-05, -7.0772e-06, 3.0796e-06,\n -6.9834e-06, 2.6095e-07, 5.6780e-06, 6.2442e-06, 5.9231e-06,\n 1.0515e-05, 8.2496e-06, -9.8594e-06, -3.2119e-06, -4.0922e-07,\n 2.3643e-06, -2.3461e-06, -5.4607e-06, -4.0858e-06, 4.4232e-06,\n 3.0166e-06, 7.5521e-06, 7.9381e-06, -6.2756e-07, -1.3210e-06,\n -3.2770e-06, 3.4300e-06, 3.7452e-06, -1.1550e-05, 5.7750e-06,\n -1.9062e-06, -6.3672e-06, -7.4606e-06, 2.7759e-06, 9.1413e-07,\n 2.4707e-06, -1.5731e-07, 1.5658e-06, 3.3387e-06, 3.2264e-06,\n 1.1063e-07, 4.5386e-06, -4.2060e-06, -4.5194e-06, 1.8945e-07,\n -2.8047e-06, -1.0491e-06, 1.4058e-06, -7.2463e-07, 4.7447e-06,\n -3.7480e-06, -1.7061e-06, -5.8382e-07, 1.3311e-07, -6.5149e-06,\n 3.2196e-06, 1.5004e-06, -1.1272e-06, -3.7436e-06, -4.2337e-06,\n -9.4065e-07, -2.5739e-06, -2.1102e-06, -5.5597e-06, -3.0322e-06,\n 2.4260e-06, 7.4427e-06, 7.9064e-06, -8.4480e-07, 1.1280e-05,\n -4.2425e-06, 3.6576e-06, 5.8162e-06, 2.2812e-06, 3.8517e-06,\n -1.6370e-06, -4.0346e-06, -4.2847e-06, -9.9953e-07, -1.0865e-06,\n 9.7985e-06, -1.5277e-07, -1.5832e-06, 1.2917e-06, -1.2187e-06,\n 1.4263e-06, 6.7498e-06, -8.0483e-07, -7.5347e-07, 6.4809e-06,\n -4.0738e-06, 2.3099e-06, -2.1205e-06, 8.6322e-06, 5.1104e-06,\n 1.0539e-05, 6.1166e-06, -5.8940e-06, 3.8380e-06, -3.2688e-08,\n 5.0114e-06, -1.1040e-05, 5.7655e-06, 1.1158e-06, -2.3147e-06,\n 5.4914e-06, 1.4866e-06, 7.9106e-06, 7.1071e-07, -3.4193e-06,\n -2.3594e-06, 6.3378e-07, 8.8223e-06, 7.4935e-06, 1.3535e-06,\n 9.1739e-06, -1.2687e-06, 6.4008e-06, -1.7926e-06, 3.9100e-06,\n 2.2305e-07, 5.3034e-06, -1.5477e-06, 2.2278e-06, 6.4169e-07,\n -1.7405e-06, -8.5938e-06, -9.3725e-07, -7.3808e-06, -8.3311e-06,\n 7.7946e-07, 5.3950e-06, 4.2961e-06, -2.4558e-07, 3.0139e-06,\n 9.0624e-06, 5.9892e-06, -4.7614e-06, -6.2088e-06, 7.1101e-06,\n 7.1080e-08, -2.8337e-06, 6.9536e-06, 3.3464e-06, 6.0343e-06,\n 4.4267e-06, -4.8696e-06, -4.8129e-06, -1.8106e-06, 4.4335e-06,\n 1.1429e-05, 1.7124e-06, 4.1555e-06, -2.7578e-06, 3.7900e-06,\n 2.7739e-06, -3.9500e-06, 3.0688e-06, -3.1579e-07, 1.1246e-05,\n -4.0334e-07, 1.0367e-06, -6.8063e-06, 9.5636e-08, -8.5806e-07,\n -9.1553e-07, -3.1944e-06, 7.5016e-06, 2.9327e-07, 7.2783e-06,\n 7.2621e-06, -7.1029e-06, -6.7085e-06, -3.3706e-07, 4.3032e-06,\n 5.8978e-06, 2.5882e-06, 9.3417e-06, -1.0458e-06, -3.5108e-06,\n 2.7590e-06, -1.5926e-06, -1.8668e-06, -3.9019e-06, 5.5442e-06,\n -2.4566e-07, -2.7444e-06, 4.7594e-06, 8.4115e-06, 4.7159e-06,\n -1.3898e-06, -2.9299e-06, 6.4706e-06, 7.4885e-07, 3.1589e-08,\n 7.3595e-06, -1.1303e-06, -1.5377e-06, -1.0781e-06, 4.9407e-06,\n 1.1031e-06, -3.1813e-06, 7.3284e-07, -3.4493e-06, 1.1463e-06,\n -5.7217e-07, 7.0459e-06, 3.4689e-06, -5.4842e-06, 1.4963e-06,\n 8.2943e-06, 1.1464e-05, 3.3673e-06, -1.2537e-05, 2.5335e-06,\n -1.4292e-06, -1.9305e-06, -3.6066e-06, 1.3502e-06, -2.8879e-07,\n 3.4575e-06, -3.0933e-06, 6.8688e-06, -1.0949e-06, -1.3394e-06,\n 6.5653e-06, 5.3135e-06, 2.1851e-06, 1.9329e-06, -6.0957e-06,\n -2.5832e-06, 6.2020e-06, 7.2686e-06, -5.4929e-06, 5.3658e-06,\n 5.3987e-06, 5.9277e-06], device='cuda:0'), 'exp_avg_sq': tensor([6.5769e-09, 2.6008e-10, 9.4353e-09, 6.5461e-10, 7.0621e-09, 1.5958e-09,\n 5.2749e-09, 1.3284e-09, 3.2877e-08, 1.5890e-08, 4.5843e-10, 7.1479e-09,\n 3.2836e-09, 3.3951e-09, 6.2125e-09, 2.4527e-10, 4.3242e-10, 1.4219e-08,\n 1.8388e-08, 6.5584e-09, 2.3746e-10, 3.3036e-09, 8.2572e-09, 1.4222e-09,\n 3.2524e-09, 6.6822e-10, 5.3926e-10, 5.6476e-09, 1.4872e-08, 6.9692e-10,\n 1.6418e-09, 9.4928e-11, 9.0605e-10, 4.0746e-09, 3.0522e-09, 1.1872e-08,\n 1.1037e-08, 2.0356e-09, 8.4588e-10, 5.8929e-09, 3.8295e-10, 4.7305e-10,\n 1.1395e-09, 1.2743e-08, 3.8207e-09, 9.1418e-09, 4.4530e-09, 7.9136e-09,\n 3.1489e-09, 7.4128e-10, 5.7567e-09, 1.4121e-08, 1.6788e-08, 1.0178e-08,\n 1.9455e-08, 5.6806e-09, 4.7067e-09, 1.6019e-08, 2.5707e-09, 8.2791e-09,\n 1.9067e-09, 3.2115e-09, 1.2724e-09, 1.0703e-09, 2.1579e-09, 1.4110e-09,\n 5.9342e-09, 2.0567e-09, 5.5118e-09, 1.1625e-08, 1.5580e-08, 8.3037e-09,\n 2.4495e-10, 1.6047e-08, 3.2907e-09, 1.7792e-08, 1.0532e-08, 6.1755e-09,\n 2.0857e-09, 2.8168e-09, 3.9047e-10, 2.9882e-09, 2.1168e-10, 6.0104e-09,\n 1.3117e-10, 5.3844e-09, 2.2451e-08, 2.0467e-08, 5.2680e-09, 6.7394e-09,\n 2.8957e-09, 9.5730e-10, 1.6236e-10, 2.1181e-09, 1.3626e-09, 1.0897e-08,\n 8.9179e-09, 1.2952e-08, 4.3597e-09, 8.8337e-09, 9.3910e-10, 9.5093e-09,\n 8.0939e-09, 3.9584e-10, 1.6988e-08, 4.0223e-09, 3.6838e-09, 1.9864e-09,\n 6.0463e-10, 1.2023e-08, 3.1662e-09, 9.6100e-09, 1.2826e-08, 6.5226e-10,\n 2.8409e-09, 6.0223e-09, 1.6430e-09, 2.6612e-09, 4.2661e-10, 3.7677e-10,\n 6.5070e-10, 2.4152e-10, 4.0258e-09, 6.4118e-10, 2.6849e-09, 1.0070e-08,\n 1.1213e-08, 5.8327e-09, 1.1915e-10, 6.6734e-09, 1.2888e-08, 1.0072e-08,\n 2.1557e-09, 3.1423e-11, 1.5147e-09, 1.6719e-09, 4.3453e-09, 5.0006e-10,\n 4.6359e-09, 8.4872e-09, 2.1033e-08, 1.2474e-08, 2.9233e-10, 2.5711e-10,\n 1.2935e-09, 4.1221e-09, 8.7119e-10, 8.2464e-09, 1.2757e-08, 2.8389e-09,\n 5.8836e-09, 1.0819e-08, 1.2437e-08, 1.1602e-09, 1.7426e-08, 1.0750e-08,\n 3.8015e-10, 6.9644e-09, 7.0230e-09, 5.6495e-09, 1.9559e-10, 6.6598e-09,\n 7.8116e-09, 2.5897e-09, 2.1427e-09, 1.0259e-09, 5.5953e-09, 1.0012e-09,\n 2.3491e-08, 5.9145e-09, 1.4989e-08, 8.7182e-09, 6.8705e-09, 1.2352e-08,\n 1.4912e-10, 8.8022e-09, 8.7411e-10, 7.2633e-10, 6.7317e-09, 1.2522e-09,\n 3.3861e-09, 8.2965e-10, 1.0886e-08, 2.9600e-09, 1.8729e-09, 9.0658e-09,\n 1.2221e-09, 6.2279e-09, 6.6012e-10, 1.0838e-09, 1.4579e-08, 7.1877e-10,\n 2.2796e-09, 1.0312e-09, 1.2257e-09, 1.1379e-08, 2.4958e-08, 7.8417e-09,\n 6.2573e-09, 9.7863e-09, 1.7840e-08, 8.9956e-09, 7.3889e-11, 1.2570e-09,\n 3.4221e-10, 4.2141e-09, 2.1097e-10, 6.8219e-09, 6.5808e-09, 1.9401e-09,\n 1.6764e-08, 2.6199e-09, 2.3738e-09, 3.2924e-09, 1.0054e-08, 8.9764e-10,\n 7.1417e-09, 4.0464e-10, 9.6995e-09, 1.1179e-09, 5.4720e-09, 7.7227e-09,\n 1.6054e-08, 8.9591e-10, 1.0502e-09, 6.4647e-09, 9.6279e-09, 7.2395e-10,\n 4.1869e-09, 3.7744e-09, 5.6101e-10, 3.7601e-09, 1.5467e-09, 8.8823e-09,\n 6.2330e-09, 7.3031e-09, 1.0775e-08, 4.5851e-10, 1.0457e-08, 6.5207e-10,\n 3.5120e-09, 1.6578e-10, 3.1213e-08, 1.7300e-09, 1.2358e-09, 6.5506e-09,\n 3.7973e-09, 8.5106e-10, 1.6982e-09, 7.7299e-09, 3.0969e-09, 1.5566e-09,\n 1.0978e-08, 6.5098e-09, 2.0763e-09, 4.8706e-10, 3.0847e-09, 1.0838e-09,\n 1.6485e-08, 3.0328e-11, 1.0844e-08, 8.2982e-10, 9.9463e-10, 6.2123e-10,\n 2.9361e-09, 4.4457e-09, 7.5622e-11, 1.6656e-08, 1.9089e-08, 6.3140e-10,\n 1.6258e-09, 8.4823e-09, 4.2091e-10, 1.9122e-09, 1.2111e-08, 1.3748e-08,\n 9.4172e-09, 1.3099e-08, 7.5813e-09, 6.2295e-09, 9.5274e-09, 1.1235e-10,\n 1.4129e-08, 6.0220e-09, 1.4611e-08, 4.7163e-09, 6.1743e-09, 9.2696e-09,\n 1.2832e-09, 1.6329e-09, 2.0254e-09, 8.4363e-10, 1.3606e-08, 4.4461e-09,\n 5.0126e-09, 5.0396e-09, 3.4278e-09, 5.2623e-09, 6.2256e-10, 3.0106e-10,\n 1.7621e-09, 8.2874e-10, 5.7324e-09, 1.9831e-08, 4.8953e-09, 2.2245e-09,\n 3.0621e-09, 5.6706e-09, 2.3250e-09, 1.9979e-10, 8.0143e-09, 8.0466e-11,\n 1.2398e-09, 8.3832e-10, 1.1208e-09, 7.8512e-09, 9.3405e-09, 1.9803e-09,\n 1.5578e-09, 1.7424e-09, 1.3993e-09, 7.9222e-10, 1.6602e-09, 1.1072e-09,\n 4.6117e-09, 7.9164e-10, 9.9716e-10, 7.8051e-11, 1.7008e-09, 1.3635e-08,\n 7.8101e-09, 8.3621e-09, 1.6765e-09, 5.1244e-09, 1.3343e-08, 1.3005e-09,\n 1.5074e-09, 4.5191e-10, 6.3721e-09, 1.1905e-08, 9.8349e-09, 6.7887e-09,\n 1.4956e-08, 1.2563e-08, 1.3861e-08, 6.3388e-09, 1.8435e-09, 5.3515e-09,\n 1.4290e-08, 2.4073e-09, 2.0038e-09, 1.4075e-09, 4.7711e-09, 5.2196e-10,\n 3.3821e-10, 7.5751e-09, 1.7444e-09, 2.0512e-09, 2.6644e-09, 2.2382e-09,\n 9.0117e-09, 1.7450e-08, 2.7881e-09, 5.9630e-09, 6.6989e-09, 2.9773e-09,\n 2.2397e-09, 1.4342e-09, 6.7920e-09, 9.2940e-09, 1.3584e-08, 5.3650e-09,\n 4.1504e-09, 1.3923e-08, 6.9246e-10, 6.2659e-09, 1.7250e-08, 1.0816e-08,\n 7.7126e-10, 1.3361e-08, 4.1168e-09, 2.4762e-09, 1.4785e-08, 3.1954e-08,\n 1.5818e-09, 1.6695e-09, 8.9862e-09, 8.1160e-09, 5.8687e-09, 2.3818e-09,\n 8.9055e-09, 1.2185e-09, 1.0098e-08, 1.5478e-09, 7.4523e-09, 1.0974e-09,\n 1.1939e-08, 6.1371e-10, 3.8612e-09, 9.2313e-10, 9.4847e-10, 1.5270e-08,\n 7.1994e-10, 1.0381e-08, 1.0295e-08, 4.2201e-09, 4.1361e-09, 7.4452e-09,\n 2.6637e-10, 1.3750e-09, 4.1060e-09, 6.0719e-09, 6.7108e-09, 8.8237e-09,\n 1.3770e-08, 7.4835e-10, 2.6286e-09, 5.6835e-09, 1.2049e-08, 5.5363e-09,\n 4.7008e-09, 9.3464e-10, 1.4644e-09, 3.8031e-10, 2.5254e-09, 2.2110e-08,\n 2.6200e-10, 3.5423e-09, 2.4760e-09, 8.1853e-09, 8.2575e-10, 1.7615e-09,\n 2.0783e-09, 3.2207e-09, 1.3835e-08, 3.6568e-10, 1.8094e-10, 9.4399e-09,\n 1.2334e-08, 1.1300e-08, 1.2195e-09, 4.4383e-09, 6.9360e-09, 4.1007e-10,\n 1.7880e-08, 8.9661e-09, 3.8137e-09, 4.3481e-09, 1.3534e-09, 6.6370e-09,\n 2.7440e-09, 9.2007e-09, 6.0129e-09, 1.3706e-09, 2.2011e-09, 1.0150e-08,\n 8.6396e-10, 7.6552e-10, 1.3066e-09, 5.1608e-09, 5.7677e-10, 4.3007e-09,\n 1.4399e-08, 1.9845e-08, 8.7573e-09, 1.2025e-10, 3.8056e-09, 4.9572e-09,\n 1.5826e-09, 3.2993e-10, 6.5471e-09, 8.9054e-10, 1.4459e-08, 4.2465e-09,\n 6.1941e-09, 2.8917e-09, 2.8981e-09, 4.9151e-09, 6.6244e-09, 1.4446e-10,\n 1.7782e-09, 8.0572e-09, 6.0251e-09, 7.2737e-09, 3.1912e-09, 4.4348e-09,\n 9.1167e-09, 2.0574e-09, 1.0922e-08, 2.0399e-08, 1.8693e-09, 6.0279e-10,\n 6.2215e-09, 3.1971e-09, 1.4156e-09, 6.7605e-09, 4.9922e-09, 4.9723e-09,\n 6.5693e-09, 3.4796e-10, 1.7695e-08, 2.4147e-09, 2.4202e-09, 1.5072e-09,\n 1.4509e-08, 1.4968e-09, 5.6708e-09, 2.7728e-09, 5.0064e-09, 3.4955e-09,\n 1.4428e-08, 4.2944e-09], device='cuda:0')}, 5: {'step': tensor(62477.), 'exp_avg': tensor([[ 1.5127e-07, -1.5888e-07, -1.4896e-07, ..., 1.2560e-07,\n 1.8984e-07, 3.4697e-07],\n [ 9.1136e-08, -1.5142e-07, -2.3262e-07, ..., 7.7285e-08,\n 9.7642e-09, 3.0398e-07],\n [ 3.6789e-06, 3.5739e-06, -5.6723e-06, ..., -7.3778e-06,\n -7.2265e-06, -7.6332e-06],\n ...,\n [-1.2039e-06, -5.1574e-07, 5.4895e-07, ..., 1.5640e-06,\n -4.3425e-08, 1.1257e-06],\n [-8.7381e-07, 2.6647e-07, -1.5329e-06, ..., -1.4858e-06,\n -1.1512e-06, -1.3888e-06],\n [ 2.0698e-06, 2.2254e-07, -2.3556e-06, ..., -1.0650e-06,\n -6.9212e-07, -9.6323e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[1.0629e-11, 5.8780e-12, 1.3181e-11, ..., 2.0460e-11, 2.4181e-11,\n 2.9177e-11],\n [9.8628e-12, 4.9113e-12, 1.1496e-11, ..., 1.7479e-11, 2.0761e-11,\n 2.4831e-11],\n [2.7504e-10, 3.1567e-10, 1.1055e-09, ..., 1.2815e-09, 1.7010e-09,\n 1.7068e-09],\n ...,\n [1.9698e-10, 1.5925e-10, 9.4924e-10, ..., 8.1219e-10, 1.2339e-09,\n 1.0226e-09],\n [3.4668e-10, 7.2816e-11, 4.8111e-10, ..., 4.0845e-10, 6.4254e-10,\n 5.1387e-10],\n [3.1729e-10, 6.6206e-11, 5.7522e-10, ..., 4.1160e-10, 7.3870e-10,\n 5.0171e-10]], device='cuda:0')}, 6: {'step': tensor(62477.), 'exp_avg': tensor([ 7.9549e-07, 7.9387e-07, -1.5285e-05, 6.1667e-06, 1.9590e-06,\n 2.2760e-07, -1.7044e-09, -4.8758e-06, 3.1890e-07, 5.2794e-07,\n 3.7725e-06, -4.3214e-07, -4.3970e-06, 8.8816e-06, 7.1617e-08,\n 1.1405e-05, -4.5367e-06, -5.7902e-06, -1.5093e-06, -1.1452e-06,\n 1.2419e-06, -4.2452e-06, 3.4873e-06, 1.7155e-06, 6.0986e-07,\n 3.7381e-07, -7.5348e-06, 2.2240e-06, 5.2225e-06, 3.5062e-07,\n 4.5158e-06, -7.5193e-06, -2.3712e-06, -9.7082e-06, 1.4713e-05,\n -4.0236e-08, 2.0643e-07, 1.8603e-05, 9.5962e-07, -3.3639e-06,\n 1.9102e-06, -1.8651e-08, -6.8174e-08, 3.0538e-07, -2.1092e-07,\n -2.1873e-06, -2.0926e-06, -1.7679e-07, 3.7687e-06, -4.3748e-07,\n -4.1500e-06, -5.8414e-06, 1.4558e-05, -1.3133e-06, 8.7853e-07,\n 5.3302e-06, 4.4742e-06, -1.9971e-06, 1.6676e-05, 3.3776e-06,\n 1.5631e-07, -1.4950e-06, 6.5877e-06, 9.3497e-07, 7.6773e-08,\n 1.4257e-07, 9.1129e-08, 4.0776e-07, -1.3380e-05, 6.2473e-07,\n 7.3360e-07, 1.1789e-07, 2.9045e-06, -1.8707e-08, 5.8626e-06,\n 8.4911e-06, 1.0957e-05, -1.9102e-06, 1.7221e-06, 7.6958e-06,\n 5.6045e-07, 9.2243e-06, 1.4677e-05, -3.6307e-06, 3.0822e-07,\n 5.3877e-06, 6.9455e-06, -2.8165e-06, -2.1639e-06, 2.7584e-06,\n 2.2811e-06, -1.9801e-07, -9.0748e-06, 2.7170e-05, 1.1271e-06,\n -6.2570e-06, 1.5621e-06, -1.0656e-05, 9.5142e-07, -7.6786e-06,\n 4.1352e-06, 5.1632e-08, 2.4797e-07, 4.2915e-07, 1.0842e-06,\n 1.2181e-05, -1.7979e-06, 3.6134e-06, 5.6640e-06, -5.3969e-07,\n 1.2271e-06, 1.7005e-05, -6.2476e-06, 5.0632e-06, -9.9004e-06,\n 8.6097e-07, 1.0090e-06, 1.0417e-07, 3.1154e-06, 4.8308e-07,\n 9.7104e-07, -2.8397e-06, 6.2729e-06, -6.4762e-07, 7.4751e-07,\n 1.6780e-06, -1.7248e-06, -1.9719e-07, 3.4216e-07, 3.2864e-06,\n 7.1438e-06, -5.6285e-06, 7.3354e-07, -1.7870e-06, 4.1303e-07,\n -1.2197e-06, -5.6391e-08, 8.5640e-06, 1.6784e-05, -3.6692e-06,\n -3.5325e-07, 2.1373e-07, 1.8346e-07, 3.4473e-06, 2.8308e-06,\n 3.7024e-07, 2.3949e-07, 6.4800e-07, -2.0807e-06, 8.7436e-07,\n -1.0574e-05, -2.5368e-06, -3.0935e-07, 4.5886e-07, -1.1944e-05,\n -4.4283e-06, 6.1575e-07, 8.6753e-07, -1.3123e-07, 8.5666e-08,\n 1.7312e-05, 2.1569e-06, -2.3195e-08, -1.2002e-06, 1.3951e-05,\n 1.6326e-06, 4.7186e-07, 2.5889e-06, -1.9015e-06, -3.3654e-06,\n 8.7450e-07, 1.6282e-06, -3.7040e-07, 6.3384e-07, 7.7503e-07,\n 1.1437e-05, 1.4608e-06, 5.0303e-07, 9.9421e-06, 5.7222e-06,\n 2.5534e-06, 3.7560e-06, -2.0993e-06, 1.9772e-05, -6.2340e-07,\n -3.8606e-06, 1.1020e-06, 6.6218e-06, 7.5911e-07, 3.2946e-06,\n 5.9871e-06, 4.6966e-07, 4.6543e-07, -4.3112e-08, 2.2309e-07,\n -6.4987e-06, -8.5480e-06, -9.5299e-06, -5.2432e-06, 6.0681e-07,\n -1.0512e-05, 2.5789e-07, -5.8115e-07, -4.3490e-06, 7.3138e-06,\n 2.0472e-06, 1.1687e-06, -2.5299e-06, 6.2514e-06, 8.5300e-07,\n 1.0025e-05, 7.0857e-06, 2.1529e-06, -4.7644e-06, 8.3543e-06,\n 1.5863e-07, 2.7187e-06, 6.3602e-06, 2.4179e-06, 4.4328e-07,\n 5.3919e-06, 2.0640e-06, -6.0617e-08, 4.3386e-07, 9.8368e-06,\n 2.6937e-06, 3.2581e-07, 2.5656e-07, 8.7708e-07, 1.6513e-06,\n -3.9390e-06, -3.8715e-06, -7.0362e-07, 1.5661e-07, 2.0137e-05,\n 5.7294e-06, -5.5011e-06, -5.1184e-06, 1.0873e-07, 8.7716e-06,\n 1.6193e-06, -7.9196e-06, -3.2536e-06, -2.9966e-07, 9.7560e-07,\n -7.0859e-06, 7.9347e-07, 9.2465e-07, 3.1798e-06, 3.3560e-06,\n -2.5209e-06, -3.5387e-07, -9.5986e-07, -2.9111e-06, 9.4683e-07,\n 1.9449e-06, -3.7046e-06, 1.4014e-06, 1.8868e-05, -5.9652e-06,\n 3.0194e-08, 4.7083e-07, -2.8830e-07, -4.9917e-07, -8.1849e-07,\n -1.7629e-06, -3.8067e-06, -4.4241e-06, 1.5691e-07, -2.0024e-07,\n -1.6035e-05, -1.0097e-05, -9.7973e-07, 8.9390e-06, -2.6928e-06,\n -1.2047e-05, 1.6643e-07, -5.8897e-06, -5.5535e-07, 6.6613e-07,\n -7.0240e-06, 5.7165e-07, 6.1473e-06, 6.5031e-06, 1.8852e-07,\n 5.8218e-06, -3.2108e-06, 9.6767e-06, 9.7050e-07, 5.1368e-07,\n -3.5128e-06, -7.6180e-06, 4.9512e-06, 4.1482e-07, 1.5027e-06,\n -6.4760e-06, 1.0257e-05, 1.2737e-07, 1.7677e-07, 1.3821e-05,\n 7.5570e-06, 6.8685e-06, 1.0002e-07, 2.4734e-08, 4.5859e-06,\n 5.2963e-07, 2.1104e-06, 1.1558e-06, -3.5508e-07, -6.8227e-06,\n 1.6577e-06, -2.6920e-06, 3.9956e-08, 4.6939e-07, 4.6227e-06,\n -1.3382e-07, 2.9048e-07, 2.3212e-06, 2.2777e-07, 4.4845e-07,\n 5.1049e-06, 1.7373e-05, 1.6152e-06, -1.9886e-06, 9.2764e-07,\n 7.6775e-06, 1.7101e-07, 9.8520e-07, 6.0438e-08, 1.2845e-05,\n -1.0260e-06, 6.1881e-06, 7.1665e-06, -1.6259e-07, -1.6942e-06,\n 7.6927e-06, -1.6328e-06, -4.8250e-07, -3.0828e-06, 4.1308e-06,\n 3.7481e-07, -1.7474e-06, -8.5204e-07, 6.3101e-07, 8.1031e-07,\n -4.9054e-06, -1.3351e-07, -1.2048e-05, -1.4840e-07, 9.0265e-06,\n -1.6206e-06, 3.1594e-06, -9.2853e-07, 2.4273e-06, 2.3114e-06,\n 1.6576e-05, -1.1840e-06, -3.2730e-06, -2.2975e-08, 6.5135e-06,\n -6.1634e-06, 1.3373e-05, 4.7114e-06, -4.0162e-06, 2.7901e-07,\n -1.8723e-06, -8.2230e-06, 1.5529e-06, -2.2282e-06, 2.3319e-06,\n 1.9432e-07, 4.3999e-07, 8.9535e-07, -3.6905e-06, 1.8554e-07,\n 1.9310e-06, 3.9887e-07, 2.7750e-06, 7.0524e-06, -1.5821e-07,\n 2.9230e-07, -2.3079e-06, -1.8071e-07, -1.0102e-06, 1.3624e-05,\n -1.1662e-06, -5.5008e-06, 7.8968e-07, -1.4543e-05, 2.2930e-07,\n 1.2640e-06, -1.3834e-06, 5.9732e-07, 1.6587e-05, 3.5343e-07,\n -6.1448e-06, -5.6311e-06, -3.1021e-06, -1.0211e-05, -3.2795e-06,\n 2.6221e-06, 1.8328e-05, 8.5037e-07, -9.5391e-07, 8.7282e-06,\n 9.0954e-06, 8.4780e-06, 1.0783e-05, 9.5353e-07, -1.9930e-06,\n 6.4293e-07, 6.9847e-07, 3.2897e-06, 9.7376e-06, 4.3760e-06,\n -2.1309e-07, -1.0720e-05, 1.6433e-06, -1.0140e-05, 8.7358e-07,\n 9.7650e-07, -9.7277e-08, 1.1834e-05, 3.2839e-07, 1.0282e-06,\n -2.8330e-06, 7.2036e-07, 5.4537e-07, 1.3198e-06, 1.0378e-06,\n 8.8440e-06, -6.4036e-06, -6.8142e-06, 5.0226e-06, 6.5794e-06,\n 2.9141e-06, 9.0518e-06, -2.7220e-06, 1.1124e-06, 3.4116e-06,\n 6.6177e-07, 5.8294e-06, 2.0121e-07, 1.2948e-05, 4.0555e-08,\n -4.8718e-06, -3.6059e-06, 2.5978e-06, -7.6329e-06, -3.4725e-06,\n 3.2287e-07, -6.9310e-08, -1.5117e-07, 1.1634e-06, -1.9549e-06,\n 1.1466e-07, -1.5301e-06, 3.7103e-06, 3.7236e-07, 4.1393e-06,\n -8.5696e-06, 4.4554e-07, -5.3948e-06, 3.2216e-06, 3.8706e-06,\n 1.7409e-06, 7.0535e-06, -1.6371e-06, 2.3478e-06, -4.8389e-06,\n 1.3166e-06, 7.7617e-07, 4.7262e-06, 2.8815e-06, -7.8945e-06,\n -1.0157e-05, 9.4395e-06, -5.8128e-06, -1.2072e-06, 6.3140e-06,\n 8.9478e-07, -5.5676e-06, 1.3783e-06, 5.9207e-06, 9.0674e-07,\n -1.5814e-06, 9.3205e-06, 8.6543e-06, 1.7068e-06, 7.3382e-07,\n 1.0171e-05, -6.9140e-08, 1.4549e-05, -2.7728e-07, -4.4368e-06,\n 7.4421e-07, 4.4677e-07, -3.5479e-06, -1.5516e-06, 2.5161e-07,\n 8.2316e-07, -3.6653e-06, -2.3423e-06, -7.1696e-06, 6.1149e-06,\n 1.7286e-06, 6.7169e-06, 3.3425e-07, -3.9865e-06, 2.2323e-06,\n -2.3979e-06, -5.1972e-07], device='cuda:0'), 'exp_avg_sq': tensor([1.6028e-10, 1.3903e-10, 8.8251e-09, 2.8232e-09, 7.4770e-09, 7.2413e-11,\n 3.7328e-09, 2.1434e-09, 4.5435e-09, 2.5202e-10, 5.5104e-09, 1.0834e-10,\n 3.0728e-09, 2.1165e-09, 3.5785e-09, 1.0098e-08, 4.7937e-09, 6.3757e-09,\n 3.5792e-09, 1.2325e-08, 1.0713e-10, 2.0487e-09, 1.8801e-09, 1.9605e-10,\n 1.1122e-10, 4.2633e-11, 4.4612e-09, 2.1639e-09, 4.5225e-09, 1.0571e-10,\n 3.4357e-09, 8.3784e-09, 3.4634e-09, 8.7462e-09, 4.1285e-08, 1.9139e-11,\n 4.9255e-11, 2.5409e-08, 3.0362e-10, 4.5457e-09, 5.2551e-10, 5.1549e-11,\n 3.1718e-11, 5.5732e-11, 2.1355e-11, 1.2122e-08, 9.1566e-09, 2.2608e-11,\n 8.1487e-09, 6.1361e-09, 1.6481e-09, 1.2876e-08, 1.4455e-08, 7.3385e-09,\n 2.2978e-10, 2.8338e-09, 1.1494e-08, 8.7867e-09, 2.0285e-08, 8.9016e-09,\n 8.7705e-11, 1.3700e-09, 3.2995e-09, 1.1325e-10, 2.1534e-11, 7.2185e-11,\n 1.2013e-10, 1.6626e-10, 6.2493e-09, 1.7024e-10, 9.6274e-11, 4.2076e-09,\n 2.7304e-09, 1.1299e-08, 9.9907e-09, 1.3177e-08, 7.3928e-09, 1.7562e-09,\n 2.1291e-10, 5.2542e-09, 1.0763e-10, 7.6970e-09, 1.3506e-08, 7.8040e-10,\n 9.8968e-11, 2.0824e-08, 1.7824e-08, 4.0333e-10, 4.3378e-09, 4.5944e-09,\n 3.5820e-09, 6.0560e-10, 2.3152e-09, 2.2104e-08, 1.3590e-10, 7.9482e-09,\n 2.5638e-10, 3.0453e-09, 7.5577e-09, 1.1505e-08, 2.2385e-09, 1.1752e-08,\n 3.9063e-11, 4.1449e-11, 2.9656e-09, 1.0080e-08, 2.5498e-10, 3.4608e-09,\n 8.7355e-09, 4.3057e-09, 6.3406e-11, 2.5639e-08, 2.9262e-09, 6.2964e-09,\n 2.5297e-09, 2.0487e-10, 4.5408e-11, 1.7197e-11, 5.4609e-09, 1.3396e-10,\n 1.1754e-10, 2.3478e-09, 2.8004e-09, 3.9063e-11, 5.0404e-11, 1.4067e-10,\n 1.3761e-08, 1.1453e-10, 1.3818e-10, 1.2444e-08, 6.8932e-09, 1.0028e-09,\n 4.2643e-10, 1.0196e-09, 5.6245e-11, 2.1604e-09, 8.0865e-11, 9.4938e-09,\n 9.9852e-09, 1.8688e-08, 3.4120e-11, 3.8913e-09, 3.1289e-10, 2.3879e-09,\n 6.0573e-09, 3.5066e-11, 2.9528e-09, 4.2315e-11, 2.0363e-08, 1.8131e-10,\n 7.5415e-09, 3.0201e-10, 1.5868e-11, 8.9346e-11, 7.9211e-09, 4.9495e-10,\n 2.1631e-10, 1.4012e-10, 2.3257e-10, 2.4803e-11, 2.3085e-08, 2.7815e-09,\n 1.5352e-11, 7.9492e-10, 1.1839e-08, 4.1635e-09, 1.2770e-10, 1.1087e-08,\n 4.2765e-09, 2.4906e-10, 7.5454e-11, 1.4494e-09, 6.8309e-11, 4.7630e-09,\n 4.3244e-10, 1.9288e-08, 1.9454e-10, 1.6406e-10, 3.2265e-09, 5.0417e-09,\n 1.7618e-09, 1.3917e-08, 1.5541e-10, 9.0857e-09, 3.1864e-09, 1.5336e-09,\n 1.2244e-10, 3.4396e-09, 1.3553e-10, 9.9379e-09, 4.1792e-09, 9.7101e-11,\n 6.0658e-09, 7.8482e-11, 6.2668e-11, 4.1371e-09, 1.2889e-09, 2.9621e-09,\n 2.3000e-09, 7.0851e-11, 5.6840e-09, 3.0750e-11, 8.8896e-11, 3.8949e-09,\n 1.6223e-09, 1.8012e-10, 6.1383e-10, 1.5212e-09, 6.7396e-09, 1.3285e-10,\n 2.7398e-09, 5.8039e-09, 5.5358e-09, 3.5132e-09, 1.5898e-08, 1.9234e-10,\n 1.5495e-08, 1.1526e-08, 9.0387e-09, 2.2370e-09, 1.0440e-08, 1.0025e-08,\n 2.2851e-11, 3.2944e-09, 6.0523e-09, 6.7794e-09, 8.6243e-11, 7.1871e-11,\n 1.1637e-10, 4.0281e-10, 4.2489e-09, 1.1763e-09, 8.5335e-11, 4.1292e-11,\n 9.0485e-09, 1.1023e-08, 4.9461e-09, 5.3072e-09, 2.1171e-10, 3.1822e-09,\n 2.2651e-09, 6.5757e-09, 7.1667e-10, 2.2806e-09, 1.4029e-10, 5.2888e-09,\n 1.1668e-10, 5.3703e-11, 1.4626e-08, 6.2021e-09, 1.2329e-08, 2.1283e-08,\n 1.3369e-09, 1.1405e-09, 2.4032e-10, 1.1321e-10, 3.9722e-09, 3.3073e-10,\n 1.7552e-08, 2.0616e-09, 3.2530e-11, 8.3807e-11, 5.1441e-10, 3.2153e-09,\n 4.3586e-09, 3.2619e-10, 1.4503e-08, 4.7507e-09, 3.4846e-11, 2.8556e-11,\n 4.4255e-09, 9.9114e-09, 2.3275e-09, 1.2326e-08, 4.1476e-09, 1.9403e-09,\n 2.6494e-10, 7.5936e-09, 3.5217e-10, 5.5845e-11, 1.0427e-08, 6.2025e-11,\n 6.5894e-09, 1.5281e-08, 1.1034e-10, 1.9888e-09, 2.5445e-09, 9.0827e-09,\n 2.3541e-10, 1.0928e-10, 3.8332e-09, 6.7127e-09, 1.6048e-08, 1.3418e-10,\n 2.3080e-09, 2.4963e-09, 1.3364e-08, 2.2105e-11, 8.5905e-11, 2.2197e-08,\n 1.4264e-09, 1.8879e-09, 9.3481e-11, 1.8648e-09, 4.2834e-09, 9.0461e-11,\n 2.0183e-08, 9.4154e-09, 5.4716e-11, 1.5307e-08, 1.4391e-10, 4.5477e-09,\n 7.7876e-11, 4.4964e-11, 4.3106e-09, 8.6550e-11, 1.3541e-10, 1.3666e-10,\n 3.0303e-11, 6.3543e-09, 2.4871e-09, 9.1562e-09, 1.1887e-09, 1.4478e-09,\n 9.5188e-11, 1.6885e-08, 7.9866e-10, 1.1258e-10, 1.1852e-10, 1.7053e-08,\n 6.6694e-09, 1.8040e-08, 6.6126e-09, 2.9630e-08, 4.3856e-09, 1.0331e-08,\n 3.3449e-09, 1.7298e-11, 5.3304e-10, 3.3837e-09, 8.1429e-11, 6.5984e-09,\n 6.5540e-09, 1.7267e-10, 2.1413e-10, 6.3580e-09, 1.0375e-08, 2.4837e-09,\n 7.9095e-11, 5.3948e-09, 1.0215e-08, 2.2506e-09, 2.7544e-10, 4.4143e-09,\n 2.9211e-09, 1.2404e-08, 4.7967e-11, 3.2492e-09, 1.5583e-10, 1.2605e-08,\n 1.5961e-08, 2.2173e-08, 5.9120e-09, 3.7506e-09, 8.4194e-11, 2.4665e-09,\n 9.9591e-09, 8.4597e-11, 1.0689e-09, 1.1007e-08, 7.3155e-11, 2.9134e-10,\n 9.5370e-11, 1.7907e-09, 4.4879e-11, 8.7231e-09, 3.2859e-11, 4.0860e-09,\n 1.3104e-08, 2.8073e-11, 2.1794e-10, 1.2992e-09, 6.9961e-11, 8.6612e-09,\n 1.4205e-08, 2.6252e-11, 2.9857e-09, 3.6075e-11, 1.0683e-08, 1.2159e-10,\n 3.0015e-09, 1.5551e-09, 3.2195e-09, 2.1444e-08, 9.9512e-11, 1.6057e-09,\n 2.6181e-09, 3.0052e-09, 6.3909e-09, 1.2887e-08, 1.7013e-09, 1.1698e-08,\n 1.9144e-10, 3.2665e-10, 4.8876e-09, 1.1293e-08, 6.4006e-09, 6.1169e-09,\n 2.6553e-10, 8.5284e-10, 1.1349e-10, 1.5303e-10, 1.1076e-08, 3.3718e-09,\n 5.6453e-09, 2.4881e-10, 2.5950e-08, 1.2093e-10, 4.6064e-09, 9.1298e-09,\n 8.6195e-09, 1.0794e-10, 2.1081e-08, 1.0695e-10, 1.3752e-10, 5.2070e-09,\n 1.3565e-10, 3.1728e-09, 1.0518e-08, 2.5992e-10, 2.9637e-09, 1.1444e-09,\n 5.0256e-09, 2.2716e-09, 1.2563e-08, 1.7669e-09, 1.1000e-08, 1.1935e-09,\n 1.4503e-10, 1.2520e-09, 1.3314e-10, 5.5719e-09, 5.4044e-11, 1.1991e-08,\n 1.1852e-10, 1.1495e-09, 1.3909e-09, 1.3387e-09, 1.7434e-09, 4.4465e-09,\n 4.2034e-11, 1.8815e-11, 3.0956e-11, 2.1440e-10, 1.2208e-08, 3.9987e-11,\n 1.8847e-09, 3.6453e-09, 1.0292e-10, 7.7754e-09, 2.1762e-09, 1.7433e-10,\n 5.1436e-09, 1.2075e-09, 4.5124e-09, 9.7252e-11, 5.1586e-09, 3.0905e-09,\n 1.0689e-10, 7.7452e-09, 7.7323e-11, 1.6766e-10, 7.5496e-09, 9.7032e-09,\n 3.0401e-08, 1.2088e-08, 2.1717e-09, 5.1456e-09, 6.4121e-09, 2.3572e-09,\n 1.7158e-10, 1.1252e-09, 1.0498e-08, 2.5334e-09, 1.4772e-10, 3.2693e-10,\n 5.1909e-09, 4.7311e-09, 1.7888e-10, 3.0691e-09, 1.2146e-08, 5.9429e-11,\n 1.1350e-08, 2.2544e-10, 2.6366e-09, 6.5363e-09, 1.1915e-10, 7.9558e-09,\n 8.5226e-10, 4.0809e-11, 1.4983e-10, 5.6954e-09, 2.2369e-09, 4.4146e-09,\n 1.6180e-08, 7.3528e-09, 1.3951e-08, 1.6072e-10, 6.4482e-09, 4.9844e-09,\n 2.8135e-09, 2.3057e-09], device='cuda:0')}, 7: {'step': tensor(62477.), 'exp_avg': tensor([[-1.1779e-04, 3.9750e-05, 7.0062e-05, ..., 1.5834e-04,\n 9.2525e-05, 2.0264e-04],\n [-1.1594e-04, 1.3481e-04, -5.8344e-05, ..., 7.2383e-05,\n 1.1609e-04, 1.2852e-04],\n [-4.6146e-05, -1.4447e-04, 1.1096e-04, ..., 1.8721e-04,\n -1.2639e-05, 5.5291e-04],\n ...,\n [-1.4713e-06, -1.5827e-06, 2.2470e-06, ..., 2.6193e-06,\n 6.7801e-07, -4.2789e-06],\n [-2.0151e-06, -2.9343e-06, -1.6115e-06, ..., 2.1943e-07,\n 2.2655e-06, 1.6174e-06],\n [-1.4289e-06, -2.0122e-07, 4.4349e-07, ..., 2.4659e-06,\n 2.3307e-06, -4.1664e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[7.0528e-07, 9.9530e-07, 5.5698e-07, ..., 1.6960e-06, 1.2170e-06,\n 3.6778e-06],\n [6.0837e-07, 8.6500e-07, 4.8947e-07, ..., 1.3232e-06, 1.0928e-06,\n 2.7666e-06],\n [6.7090e-07, 1.0669e-06, 5.3003e-07, ..., 1.4280e-06, 1.2687e-06,\n 2.7238e-06],\n ...,\n [6.0704e-10, 1.0706e-09, 9.6348e-10, ..., 3.5068e-10, 1.5316e-09,\n 1.2072e-09],\n [1.8628e-09, 2.1969e-09, 2.2782e-09, ..., 3.9254e-09, 2.2864e-09,\n 4.3234e-09],\n [7.3894e-10, 1.2498e-09, 1.1262e-09, ..., 5.1761e-10, 1.5060e-09,\n 1.5871e-09]], device='cuda:0')}, 8: {'step': tensor(62477.), 'exp_avg': tensor([-1.0659e-05, -2.6528e-06, 1.4299e-04, 2.6532e-05, 2.8464e-06,\n 2.0611e-06, 4.8724e-07, -2.3353e-07, -2.4577e-05, -6.6040e-05,\n -1.8037e-05, -2.6510e-05, 1.5029e-06, 2.1422e-06, 1.0864e-06,\n -2.5768e-07, 2.5358e-04, -6.8346e-05, 1.7910e-04, 7.3638e-05,\n 2.2179e-06, 2.2184e-06, -7.6744e-07, -1.8531e-06, 1.6239e-04,\n 5.0497e-05, 9.8378e-05, 9.5751e-05, 1.3660e-06, 1.2819e-06,\n 1.1474e-06, 8.4141e-07], device='cuda:0'), 'exp_avg_sq': tensor([3.3291e-06, 2.5462e-06, 2.1115e-06, 3.0466e-06, 4.5921e-10, 8.1799e-10,\n 4.7100e-09, 9.2983e-10, 3.0612e-06, 2.7974e-06, 3.1227e-06, 3.1768e-06,\n 3.6139e-10, 5.5362e-10, 5.5803e-09, 7.8058e-10, 3.8940e-06, 2.3742e-06,\n 2.3185e-06, 3.1314e-06, 6.1085e-10, 1.0811e-09, 4.0457e-09, 9.1360e-10,\n 2.8935e-06, 3.3649e-06, 2.4111e-06, 3.2818e-06, 2.8708e-10, 7.6545e-10,\n 5.7969e-09, 1.3175e-09], device='cuda:0')}, 9: {'step': tensor(62477.), 'exp_avg': tensor([[-6.5701e-08, -4.0249e-08, 4.8900e-07, ..., -1.5901e-08,\n 2.0179e-06, -4.0636e-07],\n [-4.9111e-08, -2.9890e-08, -5.1721e-06, ..., 2.5343e-06,\n -3.2171e-06, -2.6185e-06],\n [ 8.5247e-08, 7.6017e-08, -6.1904e-07, ..., 3.1650e-06,\n 5.3646e-07, -9.9597e-07],\n ...,\n [-2.2493e-08, -1.6276e-08, -4.1949e-07, ..., -3.3349e-07,\n -2.4092e-07, 6.6585e-07],\n [ 7.2609e-08, 1.2030e-07, -1.1548e-07, ..., -1.9858e-06,\n 1.1068e-07, 1.2187e-06],\n [-3.4779e-07, -3.4792e-07, 7.7630e-06, ..., -2.0521e-06,\n 4.1000e-06, 2.9561e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.8894e-11, 1.6499e-11, 4.4540e-10, ..., 3.1417e-10, 2.0006e-10,\n 5.1933e-11],\n [2.7958e-11, 2.4461e-11, 7.9651e-10, ..., 4.8149e-10, 3.4929e-10,\n 9.6792e-11],\n [1.7226e-11, 1.4868e-11, 4.0679e-10, ..., 2.5114e-10, 1.8419e-10,\n 4.8309e-11],\n ...,\n [6.0682e-12, 5.3584e-12, 2.6333e-10, ..., 1.0433e-10, 1.1488e-10,\n 3.3714e-11],\n [4.5577e-12, 4.0039e-12, 9.0104e-11, ..., 4.6963e-11, 4.4676e-11,\n 1.4616e-11],\n [6.4792e-12, 5.7592e-12, 2.8752e-10, ..., 7.1459e-11, 1.3097e-10,\n 4.4554e-11]], device='cuda:0')}, 10: {'step': tensor(62477.), 'exp_avg': tensor([ 1.4819e-07, -1.1494e-06, 2.7709e-06, ..., 5.5669e-09,\n -2.4158e-06, 4.1464e-06], device='cuda:0'), 'exp_avg_sq': tensor([1.2885e-09, 2.0259e-09, 1.1317e-09, ..., 5.0853e-10, 2.0999e-10,\n 4.2480e-10], device='cuda:0')}, 73: {'step': tensor(62477.), 'exp_avg': tensor([[ 2.1864e-07, 1.3466e-07, 4.1105e-07, ..., 9.9465e-08,\n 4.0494e-08, -5.9280e-08],\n [-2.2573e-06, -2.1049e-06, -1.2706e-06, ..., 3.3116e-07,\n 1.2306e-07, -2.4479e-07],\n [-5.4242e-07, -7.5656e-07, -5.9822e-07, ..., 2.1237e-08,\n -6.6710e-09, -2.8080e-07],\n ...,\n [ 9.9534e-07, 4.8904e-07, 1.1914e-06, ..., 5.7466e-07,\n -1.4345e-06, -4.3803e-06],\n [ 6.4580e-07, 1.2510e-07, -7.2418e-07, ..., -6.5315e-07,\n 2.1937e-06, 6.0311e-06],\n [-1.1298e-07, -5.4589e-07, -4.1291e-06, ..., 2.1050e-06,\n 4.3110e-06, 1.6264e-05]], device='cuda:0'), 'exp_avg_sq': tensor([[3.3716e-11, 1.8744e-11, 1.5133e-11, ..., 1.0943e-12, 8.4937e-13,\n 8.3256e-12],\n [5.0341e-11, 3.4240e-11, 3.1734e-11, ..., 1.4400e-12, 1.1204e-12,\n 9.5569e-12],\n [9.4642e-12, 6.0306e-12, 4.4776e-12, ..., 3.3767e-13, 3.5074e-13,\n 3.7679e-12],\n ...,\n [2.1416e-11, 1.9421e-11, 3.1395e-10, ..., 1.7594e-10, 4.8036e-10,\n 7.7128e-09],\n [4.3284e-11, 2.9869e-11, 5.2811e-10, ..., 2.9832e-10, 7.9139e-10,\n 1.2846e-08],\n [1.6851e-11, 1.4768e-11, 2.9693e-10, ..., 1.5121e-10, 3.7374e-10,\n 6.6602e-09]], device='cuda:0')}, 74: {'step': tensor(62477.), 'exp_avg': tensor([ 3.3665e-08, 6.1753e-08, -2.5324e-07, ..., -3.7939e-06,\n 3.5217e-06, 1.4015e-05], device='cuda:0'), 'exp_avg_sq': tensor([1.0338e-11, 1.3476e-11, 4.5703e-12, ..., 8.9766e-09, 1.4862e-08,\n 7.1038e-09], device='cuda:0')}, 75: {'step': tensor(62477.), 'exp_avg': tensor([[ 2.0850e-05, -2.6679e-05, -2.1856e-07, ..., -3.4447e-07,\n 1.3910e-05, 1.8847e-06],\n [ 2.2048e-05, -2.5270e-05, -5.1115e-07, ..., -1.0387e-06,\n 1.4940e-05, 2.9131e-07],\n [-2.1848e-06, 2.5725e-06, 6.4526e-08, ..., 1.2286e-07,\n -1.3078e-06, -3.4267e-07],\n ...,\n [ 1.0426e-05, -1.2887e-05, -6.7258e-08, ..., -1.3357e-06,\n 6.1979e-06, 1.8981e-06],\n [ 4.1790e-05, -4.3729e-05, -2.3502e-06, ..., -8.7706e-07,\n 2.6046e-05, 4.6869e-06],\n [ 7.2588e-06, -4.1382e-06, 4.1079e-07, ..., 5.8535e-07,\n 5.0022e-06, 1.1757e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[2.5415e-08, 2.9599e-08, 2.6416e-11, ..., 7.2952e-11, 1.1847e-08,\n 2.3648e-10],\n [2.1051e-08, 2.3966e-08, 1.8043e-11, ..., 5.4445e-11, 9.5729e-09,\n 1.7683e-10],\n [1.2928e-10, 1.5282e-10, 1.0978e-13, ..., 3.0387e-13, 5.1790e-11,\n 1.6225e-12],\n ...,\n [2.3660e-08, 2.7776e-08, 1.7308e-11, ..., 5.2594e-11, 9.5888e-09,\n 2.7526e-10],\n [4.3224e-08, 4.9764e-08, 3.0418e-11, ..., 9.2739e-11, 1.7941e-08,\n 4.3864e-10],\n [4.7296e-08, 5.6210e-08, 5.1366e-11, ..., 1.3523e-10, 2.1143e-08,\n 4.7259e-10]], device='cuda:0')}, 76: {'step': tensor(62477.), 'exp_avg': tensor([ 3.1649e-05, 3.3646e-05, -2.8761e-06, 1.9440e-07, -5.7712e-07,\n -9.0102e-07, -4.6989e-07, -3.3346e-06, 3.8860e-06, 7.9803e-06,\n 8.2435e-07, -2.0518e-06, -1.8131e-06, 1.1451e-06, 7.9575e-06,\n 9.1370e-08, -1.5373e-06, 1.3955e-06, 2.1763e-06, -9.1112e-07,\n -2.0606e-06, -2.6191e-06, 8.8438e-07, 5.7066e-06, -7.5949e-07,\n 2.5568e-06, 5.9974e-06, 1.6754e-06, -2.4026e-06, -1.1884e-06,\n 6.5136e-07, 2.1135e-06, -4.2530e-07, -1.0465e-07, 1.6952e-06,\n 5.3315e-07, 3.0726e-06, -3.8693e-07, 1.4139e-06, -1.1895e-06,\n -5.3556e-08, 9.9606e-07, 1.2352e-06, -2.5670e-07, 7.0688e-06,\n 4.2271e-06, -6.9671e-06, -1.2316e-08, 4.6472e-05, 4.7112e-07,\n -5.4467e-05, -1.8469e-06, 1.5471e-05, -7.1481e-07, 3.2594e-05,\n -3.4044e-05, -2.3378e-05, -1.1093e-06, 3.1414e-05, -4.0598e-05,\n 2.1114e-07, -1.5954e-06, 3.4525e-05, -7.1473e-05, -4.6277e-06,\n 1.6150e-05, 9.5185e-06, -4.3561e-05, 5.8064e-06, 2.8264e-06,\n -3.3176e-06, 1.8433e-05, 7.8085e-07, -2.6250e-06, -3.1888e-05,\n 4.2639e-05, -2.1144e-05, 1.6818e-05, -9.1866e-06, 5.0125e-06,\n 1.8518e-06, -1.3479e-05, 5.3017e-06, -1.4737e-05, 1.2193e-06,\n 1.1264e-05, 3.4003e-05, -5.8572e-06, -1.6428e-05, -6.1363e-06,\n -2.5389e-05, -5.9406e-06, -1.1267e-06, 4.5282e-05, 4.5555e-06,\n 7.1756e-06, -9.4696e-07, -6.1294e-07, -5.1873e-07, 1.9325e-05,\n 3.3543e-05, 3.4316e-05, 1.0968e-05, 3.3982e-05, 9.6203e-06,\n 8.7819e-06, -2.2166e-05, 2.2980e-05, -1.0264e-04, 4.8354e-05,\n -2.3855e-06, -3.8012e-06, -5.4257e-06, -1.5118e-05, 5.3361e-06,\n -2.1837e-05, 2.2736e-06, 7.7034e-06, 3.3544e-05, 5.1413e-08,\n -4.2466e-05, 3.1541e-07, -2.1599e-05, -5.0726e-05, 6.1358e-06,\n 1.7163e-05, -2.1952e-05, -7.3022e-07, -8.8208e-05, -5.2485e-05,\n -6.4365e-07, 1.2563e-06, -6.6741e-06, 6.0722e-06, 7.7744e-06,\n -1.1302e-06, 1.0899e-05, 2.1914e-06, 5.7753e-07, -7.2522e-06,\n 3.9907e-06, -9.8118e-06, -6.3113e-06, 8.9283e-07, 3.2874e-06,\n 3.6790e-07, -2.3671e-06, 1.7457e-07, 8.0846e-06, -4.0849e-06,\n -3.2266e-07, -9.9620e-07, -2.8259e-06, -9.1011e-07, -3.4000e-06,\n 1.5388e-06, -7.5300e-07, -9.9195e-07, 6.9692e-08, -2.6417e-05,\n 1.1864e-05, 5.3127e-06, 1.6972e-06, 2.6293e-06, -3.8348e-07,\n -6.2999e-07, 4.8126e-07, 4.7177e-06, 1.8039e-05, -3.0651e-05,\n 3.5405e-06, -1.5982e-05, -2.3476e-05, -1.6612e-05, 2.8249e-05,\n 1.3719e-06, 1.2786e-05, 5.6773e-05, -1.2962e-05, -2.5947e-06,\n 1.8599e-05, 1.1089e-05, -2.9384e-05, 1.6722e-05, 4.7000e-05,\n 6.7566e-06, 2.9767e-06, -3.7110e-05, -1.6184e-05, -3.3098e-06,\n -4.6856e-06, 1.1208e-05, 9.6264e-06, -1.0157e-05, -5.3252e-06,\n -1.3073e-05, 3.9053e-06, 4.8570e-06, -4.4040e-05, -7.7826e-06,\n -1.5013e-05, 2.7794e-07, 2.0472e-05, 5.7138e-06, 3.5770e-05,\n -1.3715e-05, 2.6752e-06, 4.2128e-05, -7.1123e-06, -1.7094e-06,\n 3.9332e-05, 3.0492e-05, 3.0706e-05, 2.5711e-06, 1.0741e-05,\n -2.6346e-06, -6.4823e-05, 6.1281e-07, -2.6211e-05, -1.4860e-05,\n -3.3356e-05, -8.1678e-07, -2.2806e-05, -1.5706e-06, 3.6944e-06,\n -1.6564e-05, -2.0870e-05, 7.5673e-06, 3.6699e-05, 1.5758e-05,\n 1.3004e-05, -6.4173e-06, -3.2629e-05, 1.6125e-06, -5.5744e-06,\n 2.2111e-07, -3.9634e-05, 3.0020e-05, -1.0907e-06, 1.7557e-05,\n 1.6458e-06, -3.8404e-06, 2.8394e-05, 1.7989e-05, 4.3035e-06,\n -4.2440e-05, 3.5036e-05, -1.0389e-06, -2.6168e-05, -2.5754e-06,\n -1.3459e-05, -2.4683e-05, -1.3446e-05, 5.3074e-05, -2.7221e-05,\n -5.1985e-05, 1.2029e-05, 5.5130e-05, 3.3501e-05, -4.6051e-06,\n 8.4120e-06, -3.3729e-06, 2.7923e-06, -1.2903e-06, -9.9639e-07,\n 2.2388e-05, -2.4978e-06, 1.2404e-06, 1.7179e-06, 5.2098e-07,\n 2.5889e-07, -2.2519e-06, 2.8040e-06, 1.2454e-06, -1.9069e-07,\n 1.2303e-06, 1.3534e-06, 1.6974e-05, 2.2526e-06, 2.8853e-06,\n 6.4636e-07, 1.3583e-06, 2.2535e-07, 1.1480e-07, 8.7320e-06,\n -7.9407e-08, 2.6883e-06, 2.3711e-06, 3.6672e-06, -6.8188e-07,\n 2.8268e-07, -3.2798e-07, 1.1825e-06, -5.5453e-06, 1.6724e-05,\n 4.1578e-06, 1.8034e-06, 3.3726e-06, -1.1218e-06, -1.1591e-08,\n -2.4020e-06, -1.4927e-06, 1.2537e-06, -2.7021e-06, -1.4873e-06,\n 1.1436e-06, -2.7261e-06, -4.1090e-05, -3.4273e-07, -5.0467e-06,\n -4.9153e-05, -6.7403e-07, 2.5852e-06, 4.6308e-06, -1.2551e-06,\n -2.4269e-05, -7.4074e-06, 5.1040e-07, 1.0226e-06, 4.7139e-07,\n -4.4960e-06, 1.9197e-06, 1.1228e-06, -4.2619e-05, 1.1252e-05,\n -6.9746e-06, 1.7960e-05, -3.3416e-05, -2.8214e-05, -4.0553e-06,\n -1.9057e-05, -1.6918e-05, -7.5266e-06, 3.7787e-05, -4.3171e-05,\n -6.0566e-06, 2.3480e-05, 1.1290e-04, -3.2092e-05, -1.4162e-05,\n -3.3845e-06, -2.7049e-06, -2.2527e-05, 3.7656e-06, 1.1278e-05,\n 8.2880e-07, 1.0656e-05, -4.5976e-06, -4.0510e-06, 9.6805e-08,\n -2.1709e-05, 2.1438e-06, -4.1518e-05, 7.9864e-05, -3.0900e-06,\n -3.3364e-06, 2.3486e-05, 3.7602e-05, -1.4053e-06, 1.0705e-05,\n 8.0629e-06, -8.4120e-07, 4.3680e-05, -1.3346e-05, -2.0403e-05,\n -1.3721e-05, -2.1156e-05, -1.2418e-06, 1.6589e-05, 1.4505e-05,\n -2.6570e-05, -1.7448e-06, -1.1671e-06, -1.5451e-05, -8.3557e-06,\n -6.2126e-07, 2.7169e-06, -9.6859e-06, 1.0786e-06, -6.5120e-07,\n 2.2996e-05, -6.9225e-05, 2.3507e-06, -6.3958e-06, 2.9034e-05,\n 5.4437e-06, 2.1656e-05, -3.7074e-06, -9.5532e-06, 1.0767e-05,\n -6.3607e-06, -3.1889e-07, 1.9874e-07, -7.0219e-06, -1.1261e-05,\n 6.9852e-06, -1.3395e-05, -1.7881e-06, -9.4578e-07, 2.1355e-05,\n -2.2602e-05, -8.2441e-06, 3.0493e-08, -5.4156e-07, -7.3365e-06,\n 9.9768e-06, 2.6215e-06, -6.1254e-07, 7.7514e-07, 2.8702e-08,\n 1.8173e-06, 8.5448e-07, -2.3079e-07, 1.5426e-06, -1.7791e-07,\n 6.2440e-07, -2.6058e-06, 9.0347e-07, -2.6065e-06, -1.5661e-05,\n 7.9428e-06, 9.0560e-07, -1.3414e-06, -1.0209e-06, -3.2077e-05,\n -3.9500e-07, 1.1161e-06, 2.0708e-05, 1.0640e-05, 9.8181e-06,\n -5.3870e-05, 2.4527e-05, -3.6203e-06, 3.2850e-05, 2.7251e-06,\n -3.9482e-05, 7.7991e-06, 1.7351e-05, -3.0594e-05, -2.6384e-06,\n -1.4498e-05, 3.6520e-05, -2.4025e-05, -9.7065e-07, 3.2050e-05,\n -1.1421e-05, -4.0371e-05, -2.7161e-05, 8.2167e-06, -1.7637e-05,\n -3.8493e-05, 1.2225e-06, 2.9228e-05, 4.9195e-05, -1.4836e-05,\n 7.5515e-06, -2.6014e-05, 1.2754e-05, -2.1669e-06, 1.4566e-05,\n 4.1647e-05, -1.6859e-05, -3.8266e-05, 1.5078e-05, -1.0627e-06,\n 1.5213e-05, -1.6304e-05, -2.6874e-05, -1.5879e-06, 1.4594e-05,\n 1.1670e-07, -5.0023e-05, 1.2242e-06, -3.0999e-05, 2.1648e-05,\n 1.8840e-06, 2.2249e-05, 1.1563e-05, 1.7642e-05, -8.6010e-06,\n -2.0224e-05, -3.9983e-06, -6.8927e-06, -2.1173e-05, 1.3443e-05,\n 2.9115e-07, 9.1301e-06, 5.7693e-06, 3.1090e-05, -1.8773e-05,\n 3.0845e-05, -3.3300e-05, -4.3926e-05, -4.6175e-05, 2.4144e-05,\n -5.4993e-05, -2.7207e-05, 2.7249e-05, 4.2193e-05, 1.6876e-05,\n -4.7073e-05, 3.6735e-05, 4.7588e-07, 6.6162e-06, -3.2620e-07,\n -4.9378e-06, -7.1053e-06, 1.8464e-05, 1.3543e-05, 1.3486e-05,\n 5.5898e-05, 7.8452e-06], device='cuda:0'), 'exp_avg_sq': tensor([5.2165e-08, 4.2889e-08, 2.3709e-10, 1.9342e-10, 1.6911e-10, 1.7020e-10,\n 1.3433e-10, 8.6063e-10, 9.5021e-09, 1.8482e-08, 8.5726e-11, 5.4333e-10,\n 5.3285e-10, 5.3700e-11, 7.8879e-09, 3.9374e-11, 8.4362e-11, 1.2662e-10,\n 1.2921e-10, 2.3121e-10, 1.9326e-10, 1.6629e-10, 1.6707e-10, 1.2671e-10,\n 6.5671e-11, 4.8802e-10, 8.8818e-09, 5.4920e-11, 8.5931e-11, 1.1380e-10,\n 1.0157e-10, 1.4773e-10, 1.0395e-10, 3.3948e-10, 1.8451e-10, 9.3702e-11,\n 2.2548e-10, 9.1381e-11, 1.8027e-10, 1.3192e-10, 1.6000e-10, 1.1951e-10,\n 1.6408e-10, 1.5845e-10, 3.2168e-10, 6.8154e-10, 3.2760e-10, 1.4097e-10,\n 5.1270e-08, 5.0052e-09, 5.9221e-08, 4.5929e-10, 3.6615e-08, 1.8861e-10,\n 7.8055e-08, 1.4519e-07, 1.2788e-08, 2.5748e-10, 7.9109e-08, 5.1423e-08,\n 3.7793e-10, 2.0834e-08, 1.0403e-07, 1.4419e-07, 3.1863e-08, 4.9383e-08,\n 5.5337e-10, 5.6528e-08, 2.0754e-08, 1.2384e-07, 7.7442e-09, 1.4846e-08,\n 4.5371e-10, 1.1998e-10, 6.1879e-08, 4.3114e-08, 4.6219e-08, 5.0307e-08,\n 1.6749e-08, 4.0170e-10, 3.7378e-10, 6.6830e-08, 9.9507e-08, 7.8441e-08,\n 8.3069e-10, 8.6042e-09, 1.6342e-08, 7.7325e-09, 7.9117e-08, 2.8823e-08,\n 1.1041e-07, 2.2046e-08, 1.4183e-08, 2.1868e-07, 1.0749e-08, 1.7107e-07,\n 2.0424e-08, 3.6608e-08, 6.5542e-11, 5.4377e-08, 9.1467e-08, 7.7473e-08,\n 2.1339e-08, 1.6532e-07, 4.3815e-08, 8.1957e-10, 5.2581e-08, 6.0593e-08,\n 1.8930e-07, 7.5727e-08, 8.5589e-10, 3.1364e-08, 2.3001e-08, 3.9675e-08,\n 4.0484e-10, 4.8893e-08, 1.3948e-09, 4.5845e-08, 1.1684e-07, 5.1494e-11,\n 7.1608e-08, 3.0392e-10, 1.3957e-08, 6.4210e-08, 1.5172e-07, 1.1952e-07,\n 3.6113e-08, 1.6667e-09, 1.3534e-07, 9.1530e-08, 1.3770e-10, 1.0850e-10,\n 1.0795e-08, 5.8608e-09, 1.5140e-08, 3.2709e-11, 2.9615e-08, 1.0645e-10,\n 3.0774e-08, 3.0819e-08, 1.6358e-08, 9.4068e-09, 1.2237e-08, 4.7669e-11,\n 1.4146e-08, 9.8232e-09, 4.8600e-09, 2.2637e-11, 6.6753e-10, 5.8682e-09,\n 4.8472e-11, 1.5069e-10, 6.9524e-09, 5.0954e-11, 1.9235e-09, 3.6283e-11,\n 8.5073e-11, 1.7062e-10, 3.2919e-11, 2.4061e-08, 1.7884e-08, 1.0936e-08,\n 2.1838e-10, 2.9751e-10, 4.7246e-09, 3.6428e-10, 8.4905e-11, 7.3697e-10,\n 7.2417e-08, 3.3757e-08, 5.3370e-08, 6.5970e-08, 1.2792e-07, 4.8375e-08,\n 9.1171e-08, 1.4503e-10, 5.7128e-09, 2.2017e-07, 6.2034e-08, 7.5012e-10,\n 5.1388e-08, 4.5654e-08, 8.8781e-08, 6.6863e-08, 7.5173e-08, 4.0126e-08,\n 6.1257e-08, 7.3138e-08, 9.3821e-09, 5.3801e-08, 2.9201e-09, 1.6318e-08,\n 5.2029e-08, 3.0038e-08, 9.0780e-08, 9.6562e-08, 7.6196e-08, 1.0122e-07,\n 5.4465e-08, 8.3487e-08, 5.4058e-08, 1.0035e-10, 3.4703e-08, 3.2535e-08,\n 5.4404e-08, 5.0372e-08, 5.1272e-08, 3.1449e-08, 2.9819e-08, 2.9019e-11,\n 2.8094e-08, 1.0706e-07, 2.9999e-08, 3.1206e-08, 6.6008e-08, 9.6623e-08,\n 2.1846e-07, 3.8054e-10, 7.3108e-08, 5.1688e-08, 2.9492e-08, 2.7561e-08,\n 3.5466e-08, 2.1711e-10, 2.3091e-10, 1.3353e-07, 2.6080e-08, 1.3185e-08,\n 2.1792e-08, 4.2856e-08, 4.7503e-08, 7.3529e-08, 9.7076e-08, 2.4680e-08,\n 9.1379e-08, 3.0147e-10, 8.7112e-08, 1.5927e-07, 1.1695e-09, 2.8384e-07,\n 2.7095e-08, 2.7069e-08, 1.4209e-07, 7.5973e-08, 5.1048e-10, 1.0594e-07,\n 4.8875e-08, 1.1670e-10, 1.9712e-07, 5.5145e-10, 2.2082e-08, 1.8823e-07,\n 5.5012e-08, 7.8498e-08, 1.3765e-07, 1.0909e-07, 4.8640e-09, 1.5401e-07,\n 4.7538e-08, 1.9144e-08, 3.6693e-08, 6.7239e-09, 5.3160e-10, 2.5675e-10,\n 1.0178e-10, 6.8977e-09, 7.1829e-11, 4.8271e-11, 4.3319e-10, 2.1547e-11,\n 2.7124e-10, 6.1117e-09, 5.1977e-09, 1.3729e-10, 1.1812e-10, 1.4955e-10,\n 1.5051e-10, 1.1041e-08, 2.4372e-10, 1.4550e-08, 1.6690e-10, 2.7838e-10,\n 1.1737e-10, 1.4890e-10, 1.7520e-08, 5.7181e-11, 3.4444e-10, 1.3783e-10,\n 9.0581e-11, 9.8731e-11, 5.7421e-11, 7.3052e-11, 7.7865e-10, 1.8874e-08,\n 1.9614e-08, 9.7708e-08, 1.1536e-10, 1.6119e-10, 1.4362e-10, 2.9015e-10,\n 2.2838e-10, 3.2219e-10, 9.6680e-11, 1.8245e-10, 3.8411e-10, 1.8158e-10,\n 2.2047e-10, 2.7009e-08, 3.1065e-11, 8.5836e-09, 4.5010e-08, 3.3351e-10,\n 2.2633e-10, 3.2941e-09, 1.7302e-09, 1.3440e-07, 4.8230e-08, 1.0502e-10,\n 1.8467e-10, 2.0076e-10, 1.0352e-09, 5.7417e-10, 1.8222e-09, 7.9053e-08,\n 3.1182e-08, 4.1998e-08, 1.1572e-08, 4.7812e-08, 1.1126e-07, 4.0505e-08,\n 5.1520e-08, 5.1379e-08, 1.8826e-08, 8.5327e-08, 7.7850e-08, 1.0081e-09,\n 9.8306e-08, 2.9732e-07, 1.1412e-07, 1.9042e-08, 7.0011e-08, 6.3571e-08,\n 3.4288e-08, 1.8739e-07, 1.1890e-08, 3.0679e-08, 1.1905e-07, 5.5857e-08,\n 3.6861e-08, 2.1103e-10, 4.0516e-08, 1.0420e-07, 3.3555e-08, 1.0796e-07,\n 4.8924e-10, 9.7688e-08, 9.7835e-08, 1.8396e-07, 4.5324e-08, 4.8170e-08,\n 2.7862e-08, 4.1615e-10, 1.4202e-07, 5.0669e-08, 1.3801e-08, 4.2360e-08,\n 3.8043e-08, 6.5676e-10, 3.8259e-08, 7.2776e-08, 3.7173e-08, 2.0834e-09,\n 3.5223e-08, 5.6898e-08, 1.1448e-08, 7.7273e-10, 3.8201e-10, 3.3295e-08,\n 2.9197e-08, 9.4261e-08, 7.0284e-08, 1.7946e-07, 1.4468e-09, 4.4210e-10,\n 6.1562e-08, 2.7996e-09, 4.1254e-08, 8.1197e-09, 5.9184e-08, 2.9921e-08,\n 2.7274e-08, 4.8540e-11, 4.9803e-11, 1.8505e-09, 4.1556e-09, 1.5861e-08,\n 2.2365e-08, 9.2096e-11, 2.1743e-08, 2.9270e-08, 1.4201e-08, 1.8868e-08,\n 6.1905e-10, 2.9373e-11, 5.1663e-09, 1.2138e-08, 1.8965e-09, 5.0623e-11,\n 3.3645e-11, 3.2180e-11, 7.9295e-11, 8.6765e-11, 4.6690e-11, 1.1511e-10,\n 4.5276e-11, 9.1114e-11, 3.7658e-09, 6.9551e-11, 1.0889e-10, 2.0021e-08,\n 7.1476e-09, 1.0454e-08, 2.2489e-10, 3.4921e-10, 1.0004e-07, 1.5055e-08,\n 4.1307e-08, 2.9230e-08, 3.2276e-08, 6.7568e-08, 1.0392e-07, 1.4581e-07,\n 4.7235e-10, 9.5545e-08, 1.9333e-10, 4.9750e-08, 7.2138e-08, 3.1960e-08,\n 3.6865e-08, 7.7269e-08, 8.7367e-08, 1.3752e-07, 1.0149e-07, 2.0538e-10,\n 6.3163e-08, 8.6690e-08, 4.8026e-08, 6.1333e-08, 1.3247e-07, 7.4397e-08,\n 5.4860e-08, 1.3741e-08, 5.6046e-08, 5.4436e-08, 3.3265e-08, 2.6975e-08,\n 3.5450e-08, 9.5692e-08, 1.6041e-08, 8.0877e-09, 7.8978e-08, 1.7465e-07,\n 4.2989e-08, 8.3551e-08, 9.4739e-10, 1.8267e-07, 8.0058e-08, 3.6991e-08,\n 4.0980e-08, 1.7755e-08, 9.2622e-11, 1.6064e-07, 3.9107e-08, 1.5333e-07,\n 6.5124e-08, 6.8521e-08, 1.8052e-07, 7.6429e-08, 4.6625e-08, 9.4543e-08,\n 4.8502e-08, 3.5501e-08, 3.1489e-09, 4.3281e-08, 7.4086e-08, 3.9392e-09,\n 4.0641e-08, 7.0032e-08, 1.0711e-07, 1.5760e-08, 5.0268e-08, 9.5096e-08,\n 4.4268e-08, 1.2165e-07, 1.1800e-07, 4.5891e-08, 2.0553e-08, 7.6155e-08,\n 3.7469e-08, 3.1902e-08, 1.0149e-07, 4.5534e-08, 1.4605e-10, 6.3891e-08,\n 4.7875e-11, 5.4286e-08, 3.9905e-08, 3.1822e-08, 6.7584e-08, 4.4244e-08,\n 8.1411e-08, 9.3641e-08], device='cuda:0')}, 77: {'step': tensor(62477.), 'exp_avg': tensor([[-1.6110e-05, -9.5071e-06, -1.2495e-06, ..., -1.4536e-06,\n 2.9985e-06, -2.8800e-06],\n [ 2.4775e-05, 2.6201e-05, 3.1741e-05, ..., -1.0096e-06,\n 7.7049e-07, -1.4391e-06],\n [ 3.1699e-05, 8.3872e-06, -1.2244e-05, ..., -5.2427e-07,\n 3.5364e-07, 2.7237e-06],\n ...,\n [-3.4013e-05, -1.7724e-05, -4.9320e-06, ..., -8.3014e-07,\n 1.0355e-06, 4.2907e-06],\n [-7.2350e-06, -7.1882e-06, 7.6732e-06, ..., -3.8128e-06,\n 9.3926e-07, 3.9127e-06],\n [ 3.2806e-05, 1.7748e-05, 1.2515e-05, ..., -3.1663e-07,\n 1.2334e-06, 2.8634e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[3.5398e-08, 2.3839e-08, 1.4799e-08, ..., 5.3356e-10, 1.1586e-10,\n 2.2684e-10],\n [5.2938e-08, 3.3872e-08, 1.6808e-08, ..., 5.7620e-10, 2.3748e-10,\n 3.7080e-10],\n [3.0772e-08, 1.9742e-08, 1.4224e-08, ..., 2.7245e-10, 8.1119e-11,\n 2.1415e-10],\n ...,\n [4.0229e-08, 2.7071e-08, 1.3039e-08, ..., 5.7175e-10, 1.5328e-10,\n 4.8565e-10],\n [3.5798e-08, 2.3685e-08, 1.3283e-08, ..., 7.9245e-10, 1.7087e-10,\n 3.9882e-10],\n [5.0414e-08, 3.5176e-08, 2.0820e-08, ..., 1.6673e-09, 3.6693e-10,\n 3.4518e-10]], device='cuda:0')}, 78: {'step': tensor(62477.), 'exp_avg': tensor([-5.7378e-06, -7.2756e-06, 2.0914e-06, ..., 4.5712e-06,\n -7.9633e-06, -2.9221e-06], device='cuda:0'), 'exp_avg_sq': tensor([9.7034e-10, 1.3015e-09, 7.7197e-10, ..., 1.3879e-09, 1.5217e-09,\n 1.7680e-09], device='cuda:0')}, 79: {'step': tensor(62477.), 'exp_avg': tensor([[ 3.5086e-06, 1.0183e-06, -6.3605e-06, ..., 3.2601e-06,\n -3.5843e-05, -3.3289e-06],\n [-6.7355e-06, 5.1217e-06, -1.3470e-05, ..., -3.4681e-06,\n -4.7603e-05, -1.0540e-05],\n [-7.3995e-06, -3.3786e-05, -6.4951e-06, ..., 1.7821e-05,\n 1.9035e-05, 6.1631e-06],\n ...,\n [ 5.5777e-06, -1.0909e-05, 2.8741e-06, ..., -1.4457e-06,\n -1.0663e-05, -2.7038e-06],\n [-2.3038e-06, 3.7377e-07, -1.9873e-06, ..., 7.8721e-06,\n -4.5162e-06, -5.9613e-06],\n [-4.8629e-07, 1.8342e-07, 1.7048e-07, ..., -3.5906e-07,\n -6.6997e-07, -3.0141e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[2.5039e-09, 3.3715e-08, 3.0762e-09, ..., 1.2442e-08, 1.8947e-08,\n 1.3659e-09],\n [4.0213e-09, 5.7633e-08, 5.7974e-09, ..., 1.3676e-08, 5.3125e-08,\n 2.9492e-09],\n [5.2302e-09, 1.2476e-07, 5.7595e-09, ..., 2.0826e-08, 3.3155e-08,\n 3.9571e-09],\n ...,\n [5.1917e-10, 6.0667e-09, 4.0688e-10, ..., 1.9120e-09, 3.1098e-09,\n 2.4956e-10],\n [2.1609e-10, 3.7818e-09, 2.6920e-10, ..., 1.4683e-09, 1.8219e-09,\n 2.6783e-10],\n [6.2603e-12, 7.3613e-11, 4.6347e-12, ..., 1.7663e-11, 4.7231e-11,\n 5.2065e-12]], device='cuda:0')}, 80: {'step': tensor(62477.), 'exp_avg': tensor([ 1.6482e-06, -1.6474e-05, -2.2226e-05, 3.4447e-05, 9.2234e-06,\n -2.0318e-05, 7.8848e-05, -4.8744e-05, -3.8505e-05, -3.9648e-06,\n 6.2690e-06, 7.8183e-07, 1.4748e-05, 4.7388e-05, 3.9849e-05,\n -2.8088e-05, 1.2669e-05, -4.0207e-05, 6.8002e-05, 1.5481e-05,\n 9.1964e-06, 2.3379e-05, 1.1468e-05, 6.3038e-05, -1.5786e-05,\n -4.1975e-05, 4.0088e-06, 8.7023e-06, -3.0269e-05, -6.8944e-06,\n -1.5210e-05, -3.7065e-05, -9.8016e-06, 3.2825e-06, 8.8144e-06,\n -5.0442e-05, -5.4218e-05, -2.2024e-05, 2.2773e-05, -3.1069e-05,\n 3.5878e-05, 1.5382e-05, -4.9409e-05, 5.2180e-06, 3.4982e-05,\n -6.8796e-06, 5.6460e-05, 3.4023e-06, 5.3665e-05, -1.1788e-06,\n -1.8790e-05, -2.4976e-08, 8.3172e-06, -3.9870e-05, -1.1099e-05,\n -1.0992e-05, -6.6143e-05, -2.8353e-06, 8.5325e-06, 2.0060e-06,\n 7.7876e-06, -3.7945e-06, 1.3446e-05, -1.8223e-06, 1.2903e-06,\n 3.3710e-07, -1.4638e-07, -3.9270e-05, 1.5175e-05, 1.7474e-06,\n 6.7896e-06, -1.8471e-06, -2.8031e-05, 5.7745e-08, 6.1461e-06,\n -2.6382e-05, -1.7890e-05, -3.4929e-06, 9.7129e-06, -8.9353e-07,\n -2.9041e-07, 7.2194e-07, 2.6054e-06, 8.1335e-06, 5.5553e-06,\n 9.7118e-06, -4.4455e-05, 5.3852e-06, -4.7190e-06, -7.2361e-07,\n 8.1499e-06, 4.8234e-07, -2.0344e-06, 6.5212e-06, 1.2786e-05,\n -1.2114e-06, 9.1434e-06, 4.5694e-06, -2.1650e-05, 1.2283e-05,\n -1.1078e-05, 1.1446e-05, -4.9873e-06, -1.6437e-05, -6.5406e-06,\n 9.4810e-07, 5.5466e-07, -1.1277e-05, -4.7548e-07, 2.5905e-06,\n -1.0956e-05, 4.6779e-06, -1.5036e-05, 1.8220e-06, 8.6800e-06,\n 8.9026e-07, -6.7387e-06, -4.8121e-06, -4.4992e-06, -1.2761e-05,\n 1.5554e-05, 1.1546e-07, -2.9089e-05, -1.5202e-05, -1.8675e-06,\n -1.5800e-05, -1.2313e-05, 1.0256e-07, -8.7888e-05, -3.2265e-05,\n 2.7927e-05, 5.4200e-05, 9.2706e-07, 2.6739e-05, -2.5311e-05,\n -4.8986e-05, 6.3192e-07, 3.4240e-05, -2.9207e-06, 8.6715e-06,\n -1.3811e-05, -1.5246e-05, -1.7312e-05, -7.4187e-06, 6.0486e-06,\n 9.5566e-06, 4.7397e-06, 9.4349e-06, 5.8840e-05, 3.5427e-05,\n -6.9702e-06, 2.6116e-05, 5.4928e-06, 1.0290e-05, 1.4744e-06,\n -7.8443e-05, -1.3851e-05, -7.4895e-06, 3.4835e-06, 2.3440e-05,\n -1.1921e-05, -1.3269e-05, 3.7042e-05, 1.2434e-05, -2.4237e-06,\n -2.4623e-05, -2.2492e-05, -1.2650e-06, -6.2295e-06, -2.2364e-05,\n -8.3740e-08, -4.0043e-07, 2.0077e-05, 5.7010e-07, -1.4253e-05,\n -2.4574e-05, -4.0172e-05, -4.4905e-06, 6.6630e-06, -8.4703e-07,\n 1.0143e-05, -9.5796e-06, -1.0845e-05, 6.3594e-07, -1.7368e-05,\n 1.0996e-06, -7.4537e-06, -7.7997e-06, -5.7538e-06, 8.3228e-06,\n -7.4732e-07, 5.3428e-06, 2.4981e-06, -2.8795e-06, 3.2256e-06,\n 6.0268e-06, 6.6949e-07, 9.6039e-07, 1.5018e-05, -6.3850e-06,\n 2.8166e-06, -1.4636e-08, -2.3621e-06, -1.0223e-06, 4.6407e-06,\n -2.1098e-06, 2.1206e-06, -3.5582e-05, 1.8337e-06, 3.1001e-05,\n 5.4978e-05, -5.1458e-06, 1.8223e-05, -3.8315e-06, -1.1585e-05,\n -1.9327e-06, -1.8324e-06, -3.8370e-06, 3.7081e-05, 1.4066e-05,\n 1.0405e-06, 1.1097e-07, 1.1026e-05, 2.6823e-05, -3.4658e-06,\n -8.4714e-06, 1.1291e-05, -1.1249e-05, -2.4932e-05, -8.2384e-06,\n -3.8003e-06, 2.0257e-07, 1.9364e-05, -2.8655e-06, -1.6228e-06,\n 1.0361e-06, -1.0311e-05, -5.6934e-06, 8.8341e-06, 8.3875e-07,\n 1.1171e-05, -1.9296e-06, 2.5342e-06, -1.0657e-06, -2.2445e-06,\n -1.5430e-05, -5.3860e-05, 2.9420e-05, 4.5461e-06, -7.5310e-06,\n 1.2022e-06, -1.1154e-05, 2.1338e-06, 1.6749e-05, -4.0595e-06,\n 4.8774e-06, -1.4864e-04, -3.9006e-05, 3.7959e-05, 4.2059e-05,\n 1.7796e-05, 2.8436e-05, -2.8372e-05, 3.3038e-05, -1.2738e-05,\n 3.9155e-05, -1.7887e-05, -3.8264e-05, 4.0323e-05, -1.0499e-05,\n -9.4245e-06, 2.6112e-05, 6.1961e-06, -2.4005e-05, 2.7844e-05,\n -4.4964e-06, 5.7047e-06, 3.3537e-05, -1.4668e-05, 4.1810e-06,\n 2.2027e-05, 2.3245e-05, -4.6602e-05, 3.2786e-06, -6.2403e-07,\n -7.7485e-06, 2.4896e-05, 7.3384e-05, 2.6683e-05, -5.9314e-06,\n 4.6339e-06, -1.4578e-05, -6.9274e-06, 1.5465e-06, -7.9220e-06,\n -9.5856e-06, 4.8944e-05, 5.5867e-06, -2.0862e-05, -8.4562e-06,\n -2.4113e-05, -2.9455e-06, -1.5831e-05, -2.5616e-05, 9.7516e-06,\n -2.5809e-05, 2.1029e-06, 2.0104e-05, 1.3815e-05, -4.4752e-06,\n 9.9030e-06, 3.3667e-05, 1.0479e-05, 5.8226e-06, 1.0009e-05,\n -2.1346e-06, 2.3008e-06, 4.5973e-06, -5.0006e-06, -4.0531e-06,\n -7.3022e-06, 1.2499e-05, -1.7146e-06, 2.2349e-05, 5.5444e-06,\n 6.4553e-06, -1.0738e-05, -2.2135e-05, -2.8327e-07, -9.8970e-07,\n 9.3898e-07, 5.7451e-06, -8.7533e-06, 3.2581e-06, -2.7372e-05,\n 1.2516e-06, -1.6061e-05, -2.4152e-05, -7.7017e-06, -1.5181e-05,\n -7.3427e-06, 2.9754e-06, 4.6372e-07, 3.4527e-06, -1.2338e-05,\n 6.8690e-06, -5.6592e-06, -1.0465e-06, 4.5219e-06, -1.7428e-06,\n -2.3497e-05, -1.1896e-05, -1.1681e-05, -9.8093e-06, -1.4249e-07,\n -1.2030e-06, 4.8120e-06, 1.5267e-05, -8.9331e-07, 5.8086e-06,\n -8.9082e-06, -1.5905e-05, -3.4762e-06, -9.8620e-07, -9.3893e-06,\n 1.9284e-05, 2.1078e-06, 4.7881e-06, 1.4597e-05, -1.4713e-05,\n 1.2983e-05, 3.7843e-07, -1.1702e-06, -2.9215e-06, 3.5426e-06,\n 2.4991e-07, -2.1468e-05, 2.3827e-05, -1.0310e-06, 1.2296e-07,\n 4.7682e-06, -2.1926e-05, 4.0725e-06, -6.2375e-06, -5.5063e-06,\n 3.3484e-05, 5.7268e-05, 1.0263e-05, 1.8386e-05, -2.1021e-05,\n 3.7984e-05, -1.1634e-05, -8.8315e-06, -5.6816e-06, -5.7320e-05,\n 3.9911e-05, -1.2107e-05, 7.2821e-06, -1.5375e-05, -4.5023e-05,\n -3.4126e-05, 1.3929e-05, 6.3431e-06, -4.6862e-05, -2.7014e-05,\n 2.0482e-05, 1.2107e-05, -2.1043e-05, -9.0644e-06, -4.4882e-05,\n 4.8837e-06, -7.5335e-06, 2.8563e-06, -2.4579e-05, -2.4081e-05,\n 4.6609e-06, -7.7984e-08, 9.6201e-06, -9.8966e-07, -1.7151e-05,\n -1.0676e-05, 2.9476e-06, 2.8993e-05, 1.2533e-05, 8.9535e-06,\n 2.1392e-06, 6.1763e-08, -3.7856e-06, -3.9541e-06, 7.4133e-06,\n -3.0925e-05, 1.0550e-05, 1.1421e-05, 1.0251e-05, 8.2452e-07,\n 8.0379e-06, 9.7646e-06, 8.1504e-06, -1.4914e-05, -6.4422e-06,\n 8.8884e-07, -1.7974e-05, -6.7882e-06, -5.8819e-06, 3.7144e-06,\n -1.0076e-06, -2.7068e-05, -1.0901e-06, -3.5592e-06, 3.7232e-06,\n -1.4577e-05, -6.1759e-06, -2.5696e-05, 6.4765e-06, 7.6734e-06,\n -4.6162e-06, 2.2156e-06, -4.8903e-07, 3.1911e-06, -2.3635e-05,\n 3.7972e-06, 3.4012e-06, -2.3727e-05, 4.4452e-06, -9.9314e-07,\n 2.3571e-06, 1.4056e-05, 2.7871e-05, 1.2038e-05, -1.5215e-06,\n 2.7309e-06, 1.6314e-06, 1.4506e-06, 1.1371e-05, -4.3073e-06,\n -2.2299e-06, 4.0733e-08, 4.7971e-06, 1.8185e-06, -1.2470e-06,\n 8.9269e-06, -6.5026e-06, -4.3699e-06, -9.2797e-06, 1.7746e-05,\n 5.6859e-06, -2.3623e-06, 9.0927e-07, -1.1062e-05, 8.6818e-06,\n 4.2945e-05, 7.7804e-06, 1.4751e-05, 1.8570e-06, 1.9188e-05,\n -1.4761e-05, -1.7576e-05, -1.5703e-05, 3.7248e-05, 7.1461e-06,\n 1.0066e-05, -1.8124e-05, -1.5090e-05, 1.8557e-06, -3.1170e-06,\n 7.0812e-06, -6.4426e-06, -1.9799e-05, -1.3273e-05, -8.6433e-06,\n 1.6949e-05, 1.0093e-06], device='cuda:0'), 'exp_avg_sq': tensor([7.8650e-08, 1.3294e-07, 1.3942e-07, 1.5515e-07, 8.7728e-08, 4.6773e-07,\n 6.6636e-08, 3.6322e-08, 8.5281e-08, 4.7273e-08, 5.6247e-08, 8.6146e-08,\n 7.0578e-08, 5.2207e-08, 3.5361e-08, 4.3350e-08, 5.1629e-08, 1.2571e-07,\n 5.5518e-08, 7.5361e-08, 1.9628e-08, 1.0805e-08, 1.8510e-08, 9.5444e-08,\n 4.5204e-08, 3.0990e-08, 1.6763e-08, 1.5894e-08, 7.0623e-08, 5.2748e-08,\n 3.4707e-08, 5.2904e-08, 3.7894e-08, 2.5940e-08, 3.2434e-08, 4.6315e-08,\n 1.2737e-07, 2.7616e-08, 6.7421e-08, 7.7371e-08, 6.9760e-08, 3.4162e-08,\n 2.9580e-08, 3.1965e-08, 5.0260e-08, 4.2078e-08, 9.8813e-08, 9.5490e-10,\n 7.9847e-08, 1.6898e-08, 1.3341e-08, 1.8311e-08, 9.5444e-09, 4.3026e-08,\n 1.0444e-08, 1.8563e-08, 1.1992e-07, 1.1739e-07, 5.3484e-09, 1.4753e-10,\n 5.7367e-09, 2.6749e-08, 7.4852e-09, 8.1502e-10, 8.0011e-10, 1.9056e-09,\n 8.2183e-10, 4.9585e-08, 4.5621e-08, 2.3648e-08, 3.1601e-09, 5.0763e-10,\n 6.9988e-08, 1.2018e-08, 2.1862e-09, 3.5426e-08, 1.8086e-08, 2.8284e-09,\n 2.0842e-08, 6.0706e-10, 9.6026e-10, 1.8773e-10, 7.3937e-09, 1.5910e-08,\n 3.1800e-09, 8.3191e-09, 3.2125e-08, 3.8125e-08, 9.4819e-09, 1.3517e-08,\n 1.3874e-08, 4.8692e-08, 3.7536e-10, 6.7443e-10, 2.3980e-08, 2.7307e-08,\n 2.7762e-08, 8.2797e-09, 1.2488e-08, 1.6835e-08, 1.5901e-08, 1.1496e-08,\n 1.5731e-08, 3.4382e-08, 1.3303e-08, 7.2932e-10, 8.8385e-09, 3.8524e-08,\n 1.0536e-09, 6.6490e-10, 9.8175e-10, 2.8682e-08, 3.5047e-08, 2.0161e-10,\n 2.0876e-09, 4.9829e-10, 1.8634e-08, 1.4544e-08, 5.6221e-10, 1.1612e-08,\n 7.3887e-09, 1.3774e-10, 2.5676e-08, 6.6656e-09, 6.1151e-09, 2.6841e-08,\n 1.3518e-08, 3.4367e-08, 1.2346e-07, 5.2654e-08, 1.4958e-07, 8.2091e-08,\n 9.4908e-08, 8.8201e-08, 6.4765e-08, 7.3090e-08, 2.6085e-08, 6.2487e-08,\n 1.4787e-08, 2.2874e-08, 5.1386e-08, 1.7392e-08, 5.3048e-08, 3.3648e-08,\n 3.2639e-08, 2.7855e-08, 6.6422e-08, 6.3160e-08, 5.5208e-08, 4.8168e-08,\n 3.7632e-08, 5.8236e-08, 5.8463e-08, 3.9346e-08, 4.1130e-08, 7.3652e-08,\n 4.2072e-08, 1.3130e-07, 3.2095e-08, 1.7911e-08, 2.3791e-08, 3.3397e-08,\n 5.4006e-08, 4.0611e-08, 3.7737e-09, 4.4342e-08, 4.6653e-08, 9.4596e-10,\n 1.6297e-08, 2.2738e-08, 1.8981e-10, 2.0762e-09, 2.7922e-08, 2.5190e-08,\n 1.9679e-08, 6.8387e-08, 4.7243e-08, 3.6353e-09, 1.8369e-08, 4.7775e-10,\n 6.4079e-09, 1.2826e-08, 5.9616e-09, 1.4033e-08, 1.4439e-08, 1.1681e-08,\n 1.4262e-08, 1.6706e-09, 1.0094e-08, 2.6900e-08, 5.9096e-09, 5.4205e-10,\n 3.2317e-10, 9.7954e-10, 7.8488e-09, 5.3116e-09, 8.8420e-08, 7.5301e-09,\n 6.2976e-09, 1.7119e-08, 1.0861e-10, 1.4357e-08, 1.1175e-08, 2.1432e-09,\n 6.8914e-09, 3.9344e-09, 6.4447e-09, 1.9007e-08, 1.0530e-08, 4.6865e-08,\n 4.0952e-08, 5.6991e-10, 1.4547e-08, 1.8777e-08, 1.9303e-08, 3.3566e-10,\n 1.9095e-09, 1.1702e-07, 7.9404e-08, 2.5777e-08, 3.1111e-10, 2.3735e-10,\n 1.3188e-08, 1.2774e-08, 5.1409e-10, 1.4908e-08, 1.8644e-08, 2.5034e-08,\n 9.7123e-09, 2.0953e-08, 9.9886e-09, 1.9197e-09, 7.3219e-08, 1.0795e-08,\n 4.4018e-09, 4.9430e-08, 7.6102e-10, 2.8789e-09, 3.4262e-09, 1.7113e-08,\n 3.5405e-08, 1.3512e-08, 1.2098e-09, 1.9683e-09, 5.3575e-10, 5.0791e-08,\n 7.3141e-08, 2.9805e-08, 4.0887e-09, 6.7934e-10, 1.6069e-08, 6.4005e-08,\n 5.9801e-11, 9.9686e-09, 8.9956e-10, 2.9089e-10, 8.4425e-07, 5.1886e-08,\n 3.5480e-07, 5.9121e-08, 8.0024e-08, 5.0310e-08, 1.6913e-07, 1.2182e-07,\n 6.3089e-08, 5.0140e-08, 1.0794e-07, 4.0084e-08, 6.4126e-08, 4.3348e-08,\n 1.0172e-07, 5.7397e-08, 9.3235e-08, 9.3397e-08, 5.9958e-08, 2.6866e-08,\n 6.8479e-08, 2.9689e-08, 1.4905e-07, 9.0728e-09, 5.9348e-08, 3.3408e-08,\n 4.8212e-08, 3.9928e-08, 1.2357e-10, 5.5553e-08, 1.1614e-07, 9.5381e-08,\n 2.6586e-08, 6.2124e-08, 2.4521e-08, 2.0243e-08, 3.9273e-08, 1.1154e-10,\n 3.7075e-08, 3.1628e-08, 6.0096e-08, 7.2547e-09, 3.2904e-08, 2.7192e-08,\n 5.8182e-08, 1.6151e-07, 4.5609e-08, 3.2276e-08, 1.0639e-07, 9.6335e-08,\n 3.1993e-09, 1.9054e-08, 9.0045e-09, 1.0782e-08, 1.8082e-09, 3.9990e-08,\n 7.7605e-09, 2.7032e-09, 2.9273e-08, 1.3286e-08, 1.6274e-09, 3.5518e-08,\n 5.4350e-08, 9.2762e-09, 1.1048e-09, 5.1878e-08, 4.6734e-10, 1.7619e-08,\n 1.4595e-08, 1.1429e-08, 8.1195e-09, 3.6950e-08, 1.2904e-10, 2.8697e-08,\n 3.1328e-08, 2.9985e-09, 2.8991e-08, 1.4493e-09, 5.1817e-08, 6.6102e-10,\n 1.6281e-08, 6.2866e-09, 2.0621e-08, 5.8407e-08, 2.3432e-08, 2.1701e-08,\n 4.2678e-10, 5.8397e-09, 5.7896e-08, 1.8250e-08, 1.7304e-09, 1.9833e-08,\n 1.4125e-08, 3.8756e-08, 2.4912e-08, 3.8047e-08, 8.0824e-09, 2.2611e-09,\n 4.6704e-10, 2.7147e-10, 6.3588e-09, 2.9556e-08, 1.0031e-08, 2.8122e-08,\n 3.1687e-08, 4.7858e-08, 3.3104e-10, 3.2419e-10, 1.1812e-08, 1.6687e-08,\n 4.4648e-09, 8.8731e-10, 1.8322e-08, 2.3926e-08, 6.1134e-09, 3.9058e-10,\n 4.1758e-09, 3.4585e-08, 1.3079e-08, 1.2044e-09, 5.4892e-08, 3.7190e-08,\n 6.9131e-09, 2.2656e-10, 1.0033e-08, 2.2217e-08, 5.3478e-08, 9.0750e-09,\n 2.4424e-07, 2.5516e-07, 2.1658e-07, 2.8101e-08, 7.1067e-08, 3.9685e-08,\n 1.4163e-07, 1.2497e-07, 3.4490e-08, 9.6469e-08, 6.2664e-08, 8.5584e-08,\n 4.3168e-08, 8.8347e-09, 3.0040e-08, 4.8902e-08, 4.0564e-08, 3.8472e-08,\n 3.8684e-08, 8.4301e-08, 9.6461e-08, 2.6468e-08, 1.1385e-07, 1.2621e-07,\n 2.6716e-08, 5.4589e-08, 7.2999e-08, 1.8174e-07, 5.3100e-08, 5.9621e-08,\n 8.0693e-08, 3.5230e-08, 3.6699e-08, 2.6962e-08, 8.2932e-08, 3.0071e-08,\n 2.4192e-08, 3.3259e-08, 6.4945e-08, 5.6678e-08, 7.2208e-09, 1.7725e-08,\n 8.2376e-09, 2.1200e-09, 2.4115e-09, 3.8440e-08, 2.1052e-08, 1.3995e-08,\n 3.9246e-09, 1.7281e-08, 2.9473e-10, 3.4972e-09, 3.3576e-08, 1.4259e-08,\n 5.5106e-09, 3.9447e-08, 2.0503e-08, 3.0484e-08, 3.4344e-08, 3.5685e-08,\n 1.1097e-09, 1.0699e-09, 2.0805e-08, 2.5059e-10, 4.3266e-10, 1.2509e-08,\n 1.4739e-08, 8.3247e-09, 2.9452e-08, 2.0753e-09, 8.3699e-09, 2.8540e-09,\n 8.0823e-10, 2.4392e-10, 2.4764e-08, 2.9793e-08, 1.2086e-09, 6.9122e-10,\n 4.1789e-08, 7.1290e-09, 5.2315e-10, 3.1016e-10, 6.4654e-08, 2.6640e-08,\n 3.7525e-08, 6.0530e-10, 5.7538e-09, 2.9634e-10, 3.1639e-08, 3.6465e-08,\n 6.9295e-10, 5.9104e-09, 6.7109e-09, 5.4318e-09, 5.7403e-09, 5.5300e-08,\n 8.0961e-09, 1.0011e-08, 1.7449e-09, 8.7903e-09, 4.0052e-08, 3.6254e-08,\n 7.4318e-09, 5.7925e-09, 1.1590e-08, 2.6189e-09, 3.6841e-08, 3.3113e-09,\n 7.9960e-09, 8.4566e-09, 4.6980e-08, 6.0401e-09, 5.6657e-09, 2.6303e-08,\n 3.7811e-08, 2.0172e-09, 6.6803e-09, 1.2407e-08, 6.0945e-08, 9.6279e-09,\n 1.5731e-09, 2.5650e-08, 2.7253e-08, 3.0932e-08, 2.4605e-08, 1.3742e-08,\n 1.0621e-08, 8.0780e-11], device='cuda:0')}, 81: {'step': tensor(62477.), 'exp_avg': tensor([[-2.8355e-07, -2.2193e-07, 4.0981e-07, ..., 2.3710e-06,\n 1.1139e-06, -4.4724e-07],\n [-3.5852e-07, -5.9744e-07, 2.9363e-06, ..., 1.6612e-06,\n 3.0089e-07, 5.1537e-07],\n [ 1.0509e-06, 9.7517e-07, -6.2845e-07, ..., -1.2791e-06,\n 1.9760e-07, 1.4574e-06],\n ...,\n [-5.5456e-06, -7.9529e-06, 1.9769e-05, ..., 2.4087e-05,\n -5.5002e-06, -3.5823e-06],\n [ 1.1870e-05, 9.7524e-06, 1.0524e-05, ..., -2.1056e-05,\n 3.5261e-06, 1.9514e-05],\n [ 6.7679e-07, 5.2320e-08, 2.0638e-06, ..., -1.5720e-06,\n -4.0682e-06, 1.6548e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.2118e-10, 1.0704e-10, 2.5135e-10, ..., 7.6134e-10, 3.2008e-10,\n 1.0862e-10],\n [1.2831e-10, 1.0321e-10, 2.1218e-10, ..., 6.5156e-10, 4.9799e-10,\n 1.0572e-10],\n [1.5816e-10, 1.3409e-10, 2.4494e-10, ..., 1.0223e-09, 5.8860e-10,\n 1.3793e-10],\n ...,\n [3.1726e-09, 2.8135e-09, 4.9799e-09, ..., 1.6370e-08, 6.7840e-09,\n 2.7004e-09],\n [3.2004e-09, 2.8312e-09, 5.8313e-09, ..., 1.1903e-08, 8.4012e-09,\n 2.9279e-09],\n [1.8373e-10, 1.7028e-10, 4.1794e-10, ..., 8.9433e-10, 4.0080e-10,\n 1.7511e-10]], device='cuda:0')}, 82: {'step': tensor(62477.), 'exp_avg': tensor([ 1.1968e-06, 2.5282e-06, -4.3865e-06, ..., 3.5779e-05,\n -4.8088e-05, -3.0883e-06], device='cuda:0'), 'exp_avg_sq': tensor([2.7150e-09, 2.5751e-09, 3.3556e-09, ..., 6.4249e-08, 6.0488e-08,\n 3.8690e-09], device='cuda:0')}, 83: {'step': tensor(62477.), 'exp_avg': tensor([[-1.3069e-06, -3.2887e-06, -1.0450e-06, ..., 1.7219e-07,\n 1.0031e-07, -2.4982e-07],\n [ 2.8043e-06, 7.4608e-06, 2.2613e-06, ..., -5.7018e-07,\n -5.6685e-07, 4.9462e-07],\n [ 7.0478e-07, 1.3574e-06, 5.7076e-07, ..., -1.2286e-07,\n 5.6760e-07, -4.4953e-07],\n ...,\n [ 2.0298e-07, -5.3260e-07, -5.8332e-09, ..., 6.0825e-08,\n -1.8417e-06, -2.7715e-06],\n [ 1.7079e-07, 1.2948e-07, 3.8143e-07, ..., -5.4160e-07,\n 7.4649e-07, -3.1365e-07],\n [-1.3377e-08, -7.2469e-08, -4.5364e-07, ..., 2.6363e-07,\n -3.4704e-07, -1.5405e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[3.7864e-11, 1.2560e-10, 3.0863e-11, ..., 1.6355e-12, 1.6867e-11,\n 3.4836e-11],\n [1.6579e-10, 6.1733e-10, 1.6519e-10, ..., 7.2887e-12, 7.0267e-11,\n 2.0779e-10],\n [2.3166e-11, 6.7079e-11, 1.6157e-11, ..., 1.3860e-12, 1.2156e-11,\n 2.4077e-11],\n ...,\n [4.2574e-12, 6.7395e-12, 1.8827e-12, ..., 2.9207e-12, 4.7846e-11,\n 3.6071e-10],\n [8.5870e-12, 1.1487e-11, 3.1369e-12, ..., 4.6777e-12, 8.5528e-11,\n 6.1481e-10],\n [1.3089e-11, 1.3367e-11, 2.7148e-12, ..., 9.5434e-12, 9.3346e-11,\n 4.9358e-10]], device='cuda:0')}, 84: {'step': tensor(62477.), 'exp_avg': tensor([ 3.8740e-07, -7.7199e-07, -2.2914e-07, ..., -5.7241e-06,\n 1.3115e-06, -3.2694e-06], device='cuda:0'), 'exp_avg_sq': tensor([4.8574e-11, 2.6223e-10, 4.1280e-11, ..., 1.4233e-09, 2.5460e-09,\n 2.0973e-09], device='cuda:0')}, 85: {'step': tensor(62477.), 'exp_avg': tensor([[ 5.3433e-07, 3.5130e-07, 2.6536e-06, ..., 2.0415e-07,\n -4.3293e-07, -1.6713e-06],\n [-4.1447e-06, -5.4093e-07, -1.5862e-06, ..., -1.7656e-06,\n -4.0805e-07, 2.5406e-06],\n [ 4.2264e-08, -2.1235e-07, 4.5888e-07, ..., -1.1583e-07,\n 4.3857e-09, 1.9743e-07],\n ...,\n [ 1.5875e-06, -5.3961e-07, -1.0665e-06, ..., 1.4982e-06,\n 1.4543e-06, -4.7025e-08],\n [ 1.2174e-06, -1.9486e-07, 2.2464e-06, ..., -1.2650e-06,\n -5.9609e-07, -2.1046e-07],\n [-7.0054e-06, -2.4064e-06, -4.2593e-06, ..., -1.3657e-06,\n 1.0627e-06, 6.4731e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.3491e-10, 2.1064e-11, 2.3960e-10, ..., 9.9438e-11, 2.5874e-11,\n 6.2133e-11],\n [4.6776e-10, 2.5174e-11, 2.9861e-10, ..., 3.3256e-11, 1.0543e-11,\n 2.1964e-10],\n [1.0852e-11, 2.4882e-12, 1.8925e-11, ..., 1.2223e-11, 3.3384e-12,\n 4.6435e-12],\n ...,\n [1.1706e-10, 4.2441e-11, 2.4496e-10, ..., 1.5759e-10, 4.3559e-11,\n 3.4369e-11],\n [2.2327e-11, 2.1360e-12, 3.0843e-11, ..., 8.9911e-12, 2.2407e-12,\n 1.1299e-11],\n [4.7668e-10, 4.8700e-11, 5.0542e-10, ..., 1.7464e-10, 4.4398e-11,\n 3.6190e-10]], device='cuda:0')}, 86: {'step': tensor(62477.), 'exp_avg': tensor([ 4.0278e-06, -1.9052e-07, 2.3793e-06, 5.9003e-06, 8.0660e-07,\n 2.5184e-06, -3.4318e-06, 1.5697e-05, -2.4522e-05, -6.3286e-06,\n 1.5916e-07, 6.8912e-07, -2.5839e-06, 5.2474e-06, -1.1631e-05,\n 1.0163e-05, 4.9139e-07, -4.7996e-06, 1.7763e-05, -1.5819e-06,\n -9.2469e-06, -2.7768e-05, -1.2973e-06, 7.2352e-06, -5.9598e-07,\n -4.6559e-06, -8.2722e-06, 7.1701e-07, 3.1018e-07, -4.4187e-07,\n 3.6880e-07, -6.6491e-06, -8.6464e-07, 1.6066e-06, -2.3735e-07,\n 8.0794e-06, 2.2131e-06, -2.7184e-07, 4.7735e-07, 4.6524e-06,\n -4.2489e-06, -5.2011e-06, -1.1337e-05, -1.4426e-08, -1.6292e-05,\n -7.6504e-07, -2.8920e-05, -1.1680e-06, -2.4904e-06, -1.2210e-06,\n 7.6371e-06, -1.8990e-06, 1.7540e-08, 1.4850e-05, 2.8313e-05,\n -5.5533e-06, -8.1187e-06, 4.0402e-07, 3.1451e-06, -1.7790e-05,\n 7.4776e-06, 1.3575e-06, -1.6057e-05, 7.5311e-06, -6.7422e-07,\n 3.4693e-06, -1.3806e-05, 1.3585e-06, 5.1942e-06, -3.1113e-07,\n 2.4459e-07, 2.2761e-05, 6.4300e-06, 1.0620e-06, 2.7353e-05,\n 4.7827e-06, -3.9560e-07, -5.3417e-06, -1.0995e-05, 5.9169e-06,\n 7.8227e-05, -1.2015e-05, 5.8048e-07, 1.5158e-07, 1.9274e-06,\n -7.8065e-07, 1.6416e-05, 6.7103e-07, -2.6035e-06, 1.8211e-05,\n 2.2685e-06, 1.2435e-06, 5.8831e-07, -3.4871e-05, -3.9251e-07,\n -2.5861e-06, 5.1680e-06, -9.6192e-06, -1.0263e-05, 3.4682e-06,\n 1.5922e-06, 2.7934e-06, -1.1082e-06, 5.1496e-06, -3.8554e-06,\n -3.0764e-06, 5.0704e-06, -1.7010e-05, -2.6446e-07, 1.0272e-05,\n 7.5115e-06, -1.3074e-06, -1.3454e-06, 1.8248e-06, -5.5814e-06,\n -5.4460e-06, -6.3171e-06, 3.7479e-07, 4.4166e-06, 1.1540e-05,\n 8.4817e-06, -3.9578e-06, 1.1940e-05, 3.0735e-06, -2.2390e-07,\n 2.3702e-06, 2.5619e-06, -9.2473e-07, -2.0948e-05, 5.1728e-06,\n -4.9309e-06, 6.2749e-06, -1.1966e-07, 6.3156e-06, -1.2030e-05,\n -1.5626e-06, -6.0656e-07, -1.0662e-05, -1.1069e-06, -4.9465e-06,\n -1.2362e-06, 5.7091e-06, 2.2700e-07, 6.6606e-06, 4.4436e-06,\n 1.5444e-06, 2.1233e-07, 9.3527e-07, -5.8249e-07, 1.8804e-06,\n 2.8311e-07, -9.0552e-06, -7.5415e-07, 1.1407e-06, 7.5700e-06,\n -6.1002e-06, -2.2335e-06, 2.2044e-07, -1.6485e-06, 1.3187e-05,\n -2.8304e-06, -8.9584e-06, -3.4494e-06, -2.8158e-06, 4.7906e-07,\n -1.1789e-05, 5.3004e-07, 1.2485e-06, 9.6200e-07, -7.6848e-06,\n 2.0215e-06, 2.6837e-06, -2.3971e-06, 6.0139e-07, 7.5242e-06,\n 7.6205e-07, -2.9446e-06, 1.8498e-06, 1.2379e-05, 2.1683e-06,\n -6.6169e-07, -2.2533e-08, 1.4483e-06, 7.2068e-07, 2.0166e-06,\n -1.2843e-06, -1.2721e-06, -2.0944e-05, -4.3488e-05, -2.8164e-06,\n 2.0425e-06, 1.0232e-06, -2.9436e-06, 1.2246e-05, 1.3188e-06,\n -8.8515e-06, -1.4447e-07, 1.9557e-06, -3.7344e-06, -3.6599e-07,\n 4.9210e-06, -1.4025e-06, 7.0139e-07, -1.0816e-05, -1.5574e-05,\n -2.3055e-07, -2.5450e-07, 6.1272e-07, 9.9480e-07, -1.1908e-05,\n 1.0622e-06, -5.0182e-06, 3.2952e-05, -1.4688e-06, 3.4539e-06,\n -9.9126e-07, 9.9666e-06, 3.4786e-06, 1.0062e-05, 5.3820e-06,\n -6.7484e-06, 2.2855e-06, 3.9631e-06, 1.2681e-05, 8.6248e-06,\n 2.6882e-06, -7.7008e-08, -5.3151e-06, -7.7477e-06, -1.3770e-06,\n 2.0532e-06, -1.9376e-06, -1.1414e-06, -8.4059e-07, -5.3740e-06,\n 4.0049e-06, -1.3203e-06, 3.0423e-06, -8.7611e-07, -3.0852e-06,\n -6.7336e-08, 9.4261e-07, -6.1247e-06, 4.2426e-07, 4.2761e-06,\n 1.2118e-06, -1.0327e-05, 5.5935e-06, 1.7270e-06, -1.0768e-05,\n 3.1026e-06, -2.8602e-06, 9.1440e-06, -2.6126e-05, 1.0244e-06,\n -1.3856e-06, 6.2783e-06, 5.8681e-06, 9.5728e-06, -8.5476e-06,\n 1.0418e-05, 1.1163e-05, 5.3523e-06, 7.1363e-07, 6.3139e-07,\n -4.4896e-06, 7.3635e-07, 2.1989e-05, -4.7658e-06, 3.3311e-06,\n -9.2814e-07, -1.2898e-05, -1.5558e-05, -6.6512e-06, -8.2499e-06,\n -5.1415e-07, 4.4927e-06, -4.7451e-06, 5.4155e-06, -1.8959e-06,\n 4.0854e-07, -1.0395e-05, -6.2664e-07, -1.7052e-06, -4.5157e-07,\n -1.4785e-06, 3.2727e-06, -9.4616e-06, 1.5966e-05, -2.4823e-06,\n 1.8420e-06, 1.9846e-06, 7.8712e-07, 9.0393e-06, -3.0145e-06,\n -2.1462e-05, 5.0428e-06, -1.4945e-06, -8.5372e-07, 5.0703e-06,\n -1.8194e-06, 5.9748e-07, 6.9737e-07, -2.5221e-06, -2.9135e-06,\n 4.3706e-06, 2.5887e-06, 3.7424e-06, 4.0322e-06, -1.5556e-06,\n 4.2522e-07, -7.1307e-06, 1.0441e-05, -2.5764e-07, 4.5414e-06,\n 2.6175e-07, 4.2316e-06, -5.3257e-06, -1.7773e-06, 8.3032e-06,\n 7.5897e-06, -1.2759e-05, -4.3379e-06, -8.7123e-06, 8.8489e-07,\n 1.2883e-05, 2.4355e-06, -5.5448e-07, -3.2350e-06, -2.0739e-06,\n 2.3199e-07, 5.4885e-06, -1.9400e-07, -3.5140e-06, -1.0740e-05,\n 8.0768e-06, 2.0660e-05, 2.7964e-05, -5.9541e-07, 1.9531e-06,\n -1.4858e-06, 3.1707e-06, 9.3677e-06, -7.6041e-08, 9.5699e-07,\n 3.7142e-06, -9.4096e-08, -9.9868e-08, 4.6904e-07, -1.8970e-07,\n 2.9852e-06, 2.7228e-06, 2.5998e-05, 1.5231e-05, -8.9025e-07,\n 2.3180e-06, -5.3855e-08, -5.9220e-06, 8.8093e-07, -6.9805e-07,\n -2.5829e-06, -1.2316e-06, 1.1764e-06, 2.2310e-06, 1.7767e-05,\n 7.5898e-06, 4.9731e-06, 1.8850e-06, -1.6712e-06, 1.2505e-05,\n 6.0001e-06, -1.4465e-06, -6.4588e-07, -1.5597e-06, 1.9456e-07,\n 3.4400e-06, -3.3348e-06, 2.0477e-06, 3.0397e-06, -2.0016e-06,\n -1.4237e-06, 2.3502e-05, 2.0800e-06, -3.4440e-06, -2.0361e-06,\n 6.4854e-06, -6.5134e-07, 3.5492e-06, -1.5801e-05, -2.6335e-06,\n -2.3306e-05, 7.0245e-07, 5.4613e-06, 3.5007e-08, -5.9902e-06,\n 3.3067e-06, -1.9959e-06, 1.1314e-05, 2.0092e-06, -3.1792e-06,\n -1.4198e-06, -4.0992e-07, -1.0017e-06, -8.5160e-06, 2.7987e-06,\n -1.1155e-06, 2.6194e-06, 2.6990e-06, 2.1231e-07, -1.1122e-05,\n 2.0176e-06, 3.6397e-06, 8.5072e-07, 1.3630e-06, 4.3288e-07,\n 1.3558e-06, -9.8096e-06, 7.9371e-06, -3.4602e-06, -4.6901e-07,\n 2.6375e-06, 3.1494e-07, -4.8593e-06, -1.8597e-06, 6.3819e-06,\n 3.8662e-07, -7.1349e-08, 9.2314e-06, -4.1600e-06, -1.5981e-07,\n -1.8397e-06, 3.1500e-06, 1.0859e-06, -5.7909e-06, 5.1196e-05,\n -1.0967e-05, 8.7650e-07, -3.1551e-06, 7.0121e-06, 9.0300e-07,\n -3.0517e-06, -4.4537e-06, 1.8545e-07, -3.3373e-06, 4.4959e-07,\n 1.9119e-07, 1.2946e-05, -7.6712e-06, 2.0456e-06, -1.0433e-05,\n -7.3176e-06, 1.7111e-06, -1.0713e-05, -6.5723e-06, -1.0649e-05,\n 1.2500e-05, -5.8700e-06, 6.4647e-06, -2.8040e-06, 2.9760e-07,\n 1.2725e-06, 1.6608e-06, 3.3859e-06, 4.0578e-07, 1.5012e-06,\n -8.1557e-06, 4.1980e-06, -4.8164e-06, -1.8212e-06, 5.5741e-06,\n -8.3746e-07, -1.3431e-05, 8.4235e-08, 3.3692e-07, -9.6620e-07,\n -1.8584e-06, -3.4054e-06, -3.1923e-06, -8.2042e-06, -9.8424e-07,\n -1.9801e-06, -2.6243e-06, 7.3642e-06, -9.1434e-06, -5.8108e-08,\n 8.5061e-07, -1.2613e-05, 1.2956e-05, 5.9346e-06, 2.0021e-05,\n -2.3396e-05, -2.6234e-06, -1.3998e-06, 1.4061e-06, 1.1064e-06,\n -2.0746e-05, -1.8522e-05, -4.4649e-06, 5.5334e-06, 1.4325e-05,\n 9.5926e-06, -2.3523e-06, 1.0315e-05, 3.7597e-06, -5.1134e-06,\n -5.4104e-07, 6.0906e-07, -1.0958e-06, 2.4699e-06, -1.0071e-05,\n 8.6208e-06, -3.1280e-06], device='cuda:0'), 'exp_avg_sq': tensor([7.8907e-09, 3.2074e-09, 7.3070e-10, 1.9786e-08, 1.5464e-10, 3.3906e-09,\n 2.3680e-09, 5.7447e-09, 3.5511e-08, 3.2926e-08, 1.1839e-10, 1.7383e-09,\n 2.1431e-09, 1.6176e-10, 4.4763e-09, 5.5781e-09, 1.0426e-10, 2.0538e-09,\n 4.8525e-09, 1.6934e-09, 2.3377e-08, 1.5210e-08, 2.5757e-10, 2.2633e-10,\n 1.0316e-10, 2.5752e-10, 6.1312e-09, 2.4188e-10, 1.6900e-09, 2.3807e-09,\n 1.9491e-10, 3.9861e-09, 1.5797e-09, 5.4142e-09, 1.9737e-09, 3.4956e-09,\n 4.6977e-10, 5.4260e-10, 3.8889e-10, 7.0243e-10, 6.3126e-10, 1.6979e-09,\n 1.7901e-09, 3.6488e-10, 5.4852e-10, 2.3139e-09, 7.2893e-09, 5.5542e-10,\n 2.5382e-10, 2.3134e-09, 6.0251e-10, 5.3160e-10, 7.1246e-10, 9.4272e-09,\n 2.3704e-08, 5.9222e-10, 6.1117e-10, 9.2214e-10, 2.4093e-09, 9.2798e-09,\n 2.6394e-09, 1.7489e-09, 2.1423e-08, 2.6855e-09, 6.5191e-10, 4.9690e-09,\n 8.7294e-10, 2.8365e-10, 4.2650e-10, 2.5251e-09, 2.8746e-10, 2.4060e-09,\n 6.0210e-10, 3.3529e-10, 3.4124e-08, 4.4040e-10, 3.4160e-10, 2.7320e-09,\n 2.0976e-08, 2.0326e-09, 1.3473e-07, 4.0566e-08, 1.3462e-09, 4.4733e-09,\n 1.2683e-09, 5.8728e-10, 4.9188e-09, 1.0690e-10, 2.8012e-08, 2.1943e-08,\n 1.3310e-09, 1.9069e-10, 8.6812e-10, 4.7419e-08, 3.4087e-10, 3.6864e-10,\n 3.1714e-09, 2.0449e-08, 3.4225e-09, 2.0468e-10, 2.5075e-10, 1.6409e-10,\n 1.6468e-10, 2.4524e-09, 5.8638e-10, 1.3869e-09, 1.1673e-09, 1.8426e-08,\n 2.3436e-09, 5.2191e-10, 1.1923e-09, 5.2336e-10, 2.5286e-10, 1.4756e-10,\n 1.7919e-09, 6.4866e-08, 8.0224e-09, 1.7836e-10, 2.1797e-09, 8.9389e-09,\n 8.1624e-09, 9.9196e-10, 4.7291e-09, 6.1951e-10, 4.9591e-10, 3.0843e-10,\n 5.6996e-10, 7.8359e-09, 5.7991e-09, 1.9203e-09, 1.0151e-09, 6.9417e-10,\n 2.2866e-09, 6.2050e-09, 1.6220e-09, 7.8113e-10, 9.4317e-11, 9.7709e-09,\n 1.8312e-10, 6.6833e-09, 2.4312e-10, 3.9055e-08, 4.9392e-11, 5.4992e-09,\n 2.4549e-09, 6.2275e-10, 5.1860e-10, 5.6354e-10, 1.0816e-10, 1.1754e-10,\n 1.6991e-10, 3.8922e-09, 1.2519e-09, 5.6858e-10, 8.8227e-09, 4.8122e-10,\n 1.1578e-09, 4.7415e-10, 5.4740e-10, 2.2106e-09, 5.6891e-10, 1.8356e-08,\n 2.3294e-10, 7.1116e-10, 1.2117e-10, 1.5030e-08, 3.6612e-10, 1.3976e-09,\n 2.2712e-10, 2.3935e-09, 4.5356e-09, 1.9966e-09, 1.8638e-09, 1.0337e-10,\n 3.8760e-09, 2.6072e-10, 6.3805e-10, 4.6707e-10, 4.5965e-08, 5.7986e-10,\n 1.7491e-09, 3.0749e-10, 4.4472e-09, 2.7684e-10, 4.5759e-10, 3.1861e-09,\n 1.3255e-10, 1.5743e-08, 1.1315e-07, 4.2162e-10, 2.6689e-09, 4.4159e-10,\n 2.6213e-10, 1.7706e-08, 1.9593e-10, 9.2216e-09, 3.6440e-10, 9.0840e-10,\n 1.1521e-09, 2.9159e-10, 4.2851e-08, 1.3661e-08, 1.7903e-10, 2.0905e-08,\n 1.5033e-07, 2.8212e-09, 2.1272e-10, 1.7286e-10, 4.3012e-09, 1.9576e-08,\n 6.7348e-10, 5.2722e-10, 4.1903e-08, 2.7585e-09, 5.1532e-09, 4.4990e-09,\n 5.1426e-10, 8.7802e-10, 6.4218e-09, 5.5157e-09, 8.7112e-10, 1.2155e-09,\n 4.3096e-09, 3.2409e-09, 2.0500e-09, 4.4079e-10, 2.5752e-10, 2.2469e-09,\n 9.4821e-10, 2.4519e-10, 1.3073e-09, 2.0310e-08, 2.9735e-10, 2.4326e-09,\n 8.5511e-09, 1.6505e-09, 1.7178e-09, 1.2375e-09, 3.7115e-09, 1.8230e-09,\n 6.8482e-10, 2.0664e-10, 9.0333e-09, 4.4332e-10, 1.6766e-09, 1.3190e-09,\n 1.3454e-09, 1.4558e-08, 2.2507e-10, 1.4126e-08, 1.5085e-10, 1.1623e-09,\n 5.2826e-09, 2.6730e-08, 1.0726e-09, 1.9130e-09, 1.1496e-09, 6.1094e-10,\n 1.3227e-08, 4.8444e-09, 7.1713e-09, 6.5961e-09, 2.9571e-10, 2.5814e-09,\n 1.1748e-09, 1.9091e-09, 1.8964e-10, 2.6805e-08, 2.3069e-09, 1.0804e-08,\n 2.7414e-10, 6.6741e-09, 2.3735e-09, 1.2725e-09, 8.2563e-09, 9.1132e-11,\n 5.4837e-09, 2.5446e-10, 8.8930e-09, 3.1443e-10, 1.7605e-10, 4.4059e-09,\n 2.8223e-10, 3.7830e-10, 4.0390e-10, 6.3356e-10, 1.0222e-09, 1.7517e-09,\n 8.0304e-09, 2.6135e-10, 1.6352e-09, 1.3269e-08, 2.0808e-08, 1.2871e-08,\n 6.2435e-10, 7.2328e-08, 3.7570e-10, 1.7431e-10, 4.3288e-10, 1.9149e-09,\n 4.4883e-09, 6.9307e-10, 2.0460e-10, 1.9996e-09, 1.0473e-09, 3.6765e-09,\n 2.7983e-10, 1.9418e-09, 1.9516e-09, 1.1766e-08, 7.0115e-11, 1.2866e-09,\n 8.1881e-09, 3.2586e-10, 1.2898e-09, 3.5559e-09, 7.4551e-09, 7.3623e-09,\n 2.7897e-10, 5.8940e-09, 2.7501e-09, 2.6509e-08, 3.1358e-09, 3.8378e-10,\n 2.3348e-10, 2.3620e-08, 5.5781e-10, 2.5877e-10, 8.8328e-10, 2.5466e-10,\n 2.0852e-10, 3.1028e-09, 3.5323e-10, 4.9261e-10, 3.6684e-09, 1.0151e-07,\n 9.9319e-09, 5.5029e-09, 2.7249e-10, 8.1115e-11, 5.0033e-10, 1.0400e-09,\n 8.1030e-09, 1.5495e-09, 1.0138e-10, 1.2258e-09, 1.0557e-09, 5.1912e-10,\n 3.4545e-09, 8.6364e-10, 1.2005e-09, 2.3204e-09, 8.4064e-09, 6.3830e-09,\n 2.1625e-09, 3.3704e-09, 6.2545e-10, 3.0765e-09, 2.7948e-09, 2.8960e-10,\n 1.0138e-09, 6.6202e-10, 4.1750e-10, 2.9536e-10, 8.1183e-09, 2.9014e-09,\n 1.0979e-08, 6.1971e-10, 1.5671e-10, 5.0778e-09, 2.7672e-09, 8.0372e-10,\n 8.0857e-09, 1.1963e-10, 6.0983e-09, 1.2274e-09, 5.5169e-10, 2.6287e-10,\n 1.3707e-08, 7.2238e-10, 5.3555e-10, 4.0770e-09, 4.3532e-10, 1.0383e-09,\n 3.7896e-10, 7.8954e-10, 7.1266e-10, 1.9526e-10, 2.2717e-09, 7.0065e-10,\n 1.1728e-07, 1.8594e-10, 2.1063e-09, 3.7375e-10, 1.0663e-09, 1.2083e-09,\n 3.5750e-10, 5.4433e-09, 1.2654e-09, 1.9042e-09, 4.2374e-10, 2.1703e-10,\n 7.5311e-11, 5.4356e-09, 5.9809e-09, 1.7455e-10, 2.6278e-09, 3.8977e-10,\n 1.7816e-10, 2.1519e-09, 1.7236e-09, 2.1309e-09, 7.8150e-10, 5.8494e-10,\n 1.6685e-10, 2.1093e-10, 4.5012e-08, 6.0592e-09, 3.3035e-10, 1.5631e-10,\n 1.2906e-10, 2.8946e-10, 7.2379e-10, 7.3439e-10, 3.0247e-10, 6.7389e-10,\n 3.2720e-10, 5.7965e-09, 1.3166e-08, 4.0405e-10, 1.6066e-10, 1.1398e-08,\n 3.0929e-10, 8.3011e-10, 9.3347e-08, 1.2144e-09, 5.0917e-10, 4.0655e-09,\n 9.5922e-10, 4.5796e-10, 5.3384e-10, 1.8265e-09, 3.7452e-10, 1.1688e-09,\n 2.9110e-10, 2.0004e-09, 4.8275e-09, 6.2594e-09, 9.6262e-10, 1.3199e-08,\n 1.3675e-09, 7.9563e-09, 6.9530e-09, 7.5746e-10, 1.1229e-08, 4.4758e-08,\n 8.1470e-09, 1.7096e-08, 9.7221e-10, 2.7134e-10, 2.0178e-09, 5.7683e-10,\n 2.1304e-10, 4.1808e-10, 1.9433e-09, 1.5444e-09, 1.6218e-09, 2.4943e-09,\n 8.5512e-10, 1.0483e-09, 6.8481e-10, 1.8319e-08, 1.2653e-10, 2.9503e-10,\n 4.3111e-10, 1.8006e-10, 1.1491e-09, 3.5669e-09, 1.8383e-08, 4.2513e-10,\n 6.7119e-10, 4.3734e-09, 3.9806e-09, 1.4979e-08, 5.0774e-10, 1.5016e-09,\n 1.8159e-08, 2.4616e-08, 1.3027e-09, 1.2752e-08, 7.8172e-09, 6.8892e-10,\n 2.0318e-10, 3.7070e-10, 1.6378e-10, 8.3647e-09, 1.1135e-08, 1.7191e-09,\n 7.3995e-10, 1.1181e-08, 5.0679e-09, 1.9162e-10, 1.1305e-09, 1.1134e-09,\n 1.5625e-08, 4.0940e-10, 7.0736e-10, 2.8140e-10, 1.8224e-10, 1.1437e-08,\n 7.8131e-10, 1.1156e-08], device='cuda:0')}, 87: {'step': tensor(62477.), 'exp_avg': tensor([[ 1.9143e-05, 2.6945e-06, 4.7021e-06, ..., 1.5921e-06,\n -7.0376e-08, 1.5610e-06],\n [-4.2246e-06, -1.0279e-05, -4.6598e-06, ..., 7.1173e-06,\n -3.6937e-06, -1.0838e-06],\n [-6.8609e-06, -1.6179e-06, 1.0005e-06, ..., -2.0299e-06,\n -1.8468e-06, 3.3249e-07],\n ...,\n [-1.7257e-05, -1.1353e-05, -1.5200e-05, ..., -1.4240e-06,\n 9.0462e-06, 7.1864e-07],\n [ 5.5312e-06, -5.4656e-06, -6.9480e-06, ..., -6.6508e-06,\n 5.2459e-07, 1.2710e-06],\n [-9.0372e-06, -1.9164e-06, 8.6737e-06, ..., -7.6844e-07,\n 2.6188e-07, -1.1373e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[7.5134e-09, 9.0506e-09, 6.3485e-09, ..., 1.1271e-09, 6.0105e-10,\n 8.7401e-11],\n [4.7729e-09, 5.9295e-09, 2.1698e-09, ..., 8.1451e-10, 4.5693e-10,\n 3.0916e-11],\n [5.4210e-09, 6.2929e-09, 2.9346e-09, ..., 1.2300e-09, 6.3844e-10,\n 1.9593e-11],\n ...,\n [7.3029e-09, 7.0279e-09, 3.8810e-09, ..., 1.6790e-09, 4.7376e-10,\n 4.0746e-11],\n [8.7366e-09, 9.7970e-09, 6.7484e-09, ..., 1.1413e-09, 5.0156e-10,\n 1.4287e-10],\n [7.3453e-09, 1.0141e-08, 1.1790e-08, ..., 1.8877e-09, 1.2995e-09,\n 1.0268e-10]], device='cuda:0')}, 88: {'step': tensor(62477.), 'exp_avg': tensor([ 5.1366e-07, 2.4544e-06, -1.5912e-06, ..., 1.0803e-06,\n 1.4019e-06, -1.5034e-06], device='cuda:0'), 'exp_avg_sq': tensor([8.5294e-10, 4.5297e-10, 3.3381e-10, ..., 5.7015e-10, 5.3884e-10,\n 7.9237e-10], device='cuda:0')}, 89: {'step': tensor(62477.), 'exp_avg': tensor([[ 8.0024e-06, 3.2616e-06, -3.7886e-06, ..., 1.9783e-05,\n -7.5051e-07, 1.2672e-06],\n [ 3.5840e-05, 4.6340e-06, 2.4992e-06, ..., -2.6256e-06,\n -8.8543e-06, 6.2997e-06],\n [ 8.3944e-06, 1.5551e-06, 2.1442e-05, ..., -2.7069e-05,\n 6.3384e-06, 6.2204e-07],\n ...,\n [-2.8407e-06, -3.0464e-07, -1.6480e-06, ..., -1.7565e-06,\n -7.5337e-07, -6.3355e-07],\n [-2.7231e-06, 6.1089e-06, 1.6491e-05, ..., -6.1650e-06,\n 1.1880e-06, -1.6486e-06],\n [-5.2690e-06, -1.2770e-06, 9.4814e-07, ..., -6.0002e-06,\n 1.2836e-06, -3.5966e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[7.2508e-09, 1.9460e-09, 2.2591e-09, ..., 9.4467e-09, 1.1391e-09,\n 2.0050e-09],\n [1.0262e-08, 1.8320e-09, 2.2865e-09, ..., 7.5752e-09, 9.6713e-10,\n 1.9686e-09],\n [1.5003e-08, 2.7161e-09, 5.7856e-09, ..., 1.1908e-08, 1.7341e-09,\n 3.7786e-09],\n ...,\n [5.0597e-10, 8.6763e-11, 1.4213e-10, ..., 2.6764e-10, 1.6486e-10,\n 2.0211e-10],\n [7.2396e-09, 1.1963e-09, 2.0691e-09, ..., 2.1137e-09, 1.7984e-09,\n 4.1484e-09],\n [8.9291e-10, 2.0298e-10, 2.9761e-10, ..., 3.2305e-10, 3.2976e-10,\n 5.1163e-10]], device='cuda:0')}, 90: {'step': tensor(62477.), 'exp_avg': tensor([-1.2568e-06, -7.2334e-07, 1.2379e-06, -9.0531e-06, -2.9602e-05,\n 1.9146e-05, -3.4695e-05, 5.3153e-05, 3.9711e-05, 6.2196e-08,\n -5.6317e-06, -6.4642e-07, -1.7420e-05, 5.0601e-06, -2.6202e-05,\n -3.2500e-05, 6.8428e-06, 3.3035e-05, 4.7545e-05, -3.9090e-05,\n 1.0110e-06, -2.9933e-05, -2.9820e-05, 2.5094e-05, 8.3352e-06,\n 2.0835e-05, 1.5300e-06, 3.8853e-05, -1.6195e-05, 1.8607e-05,\n 7.8900e-06, -3.9047e-05, -2.5498e-05, -6.1144e-06, -1.0887e-05,\n -2.0818e-05, 3.5330e-05, -1.5824e-05, -1.3390e-05, -6.5692e-06,\n 2.2895e-05, -1.0944e-05, 1.9861e-05, 1.1193e-06, 1.9004e-05,\n -3.6118e-06, -8.6705e-06, 1.0846e-05, 2.2921e-05, -2.6455e-06,\n 2.0000e-05, -2.4671e-06, 6.4618e-06, -2.4415e-05, -1.9913e-05,\n 1.3583e-05, -7.3075e-06, -2.5042e-06, -2.0792e-05, -2.0166e-05,\n 3.5979e-05, 8.6702e-06, 4.5698e-06, 2.0207e-05, -9.8827e-06,\n 1.0871e-05, -2.5898e-05, 2.0846e-05, -2.3572e-06, 4.5447e-06,\n -2.4916e-05, -2.7360e-05, 1.0776e-05, 5.0958e-06, 7.9241e-06,\n 1.5125e-05, 1.2426e-05, 6.6001e-06, 5.8930e-08, 6.4609e-06,\n -1.4176e-05, -6.5561e-06, 1.4532e-07, -3.7792e-06, 3.3423e-05,\n -6.6502e-06, 3.2546e-06, 5.2413e-06, 9.6243e-06, -1.0075e-06,\n 7.4845e-08, -7.8768e-06, -3.6798e-06, 2.6662e-05, 2.6457e-06,\n -3.6025e-06, 2.1764e-05, -8.7081e-06, 3.8075e-05, -1.2913e-05,\n 9.3397e-06, -1.5995e-05, -4.7769e-06, -7.5578e-06, 1.9492e-05,\n 9.4327e-06, -2.0509e-05, 3.9399e-06, -1.7388e-05, 1.6219e-05,\n -5.5322e-06, 6.3784e-06, -8.2143e-06, 1.7435e-05, 2.6274e-05,\n -8.2998e-06, 3.8834e-06, -8.7092e-06, 1.7657e-05, -6.1661e-06,\n -1.8893e-06, 1.5003e-06, -1.3144e-05, -1.2641e-05, -5.8100e-06,\n -1.7258e-05, -1.8116e-06, -6.9187e-06, 4.6374e-05, -1.6696e-05,\n -1.0154e-05, -6.8743e-05, 1.0802e-05, -1.3701e-05, -5.9967e-05,\n 4.6676e-06, 1.0156e-05, 6.1951e-06, 1.6997e-05, 1.5585e-05,\n -7.2408e-06, 5.6082e-05, 1.4105e-05, -2.9844e-05, -4.9168e-06,\n -1.0536e-05, -3.4897e-06, -4.7088e-06, 3.2363e-05, -2.8493e-05,\n 7.4584e-06, 1.3109e-05, -8.0523e-06, 3.5091e-06, 1.6488e-05,\n 5.4509e-05, 4.4655e-06, -7.3349e-06, -3.4998e-06, -2.7236e-05,\n 3.3535e-05, 3.4190e-06, 1.7514e-05, 6.2777e-06, -3.7184e-05,\n -1.0974e-05, 3.8859e-06, 3.3950e-06, 6.3909e-07, 9.1377e-06,\n 1.2667e-05, 3.1886e-06, 6.0315e-06, 1.1323e-06, 4.4560e-06,\n -6.5568e-06, 1.7460e-05, -1.9590e-05, -2.4200e-05, -7.1946e-07,\n 1.8201e-05, 5.3671e-06, 1.3093e-05, 1.5608e-05, -3.4116e-05,\n -8.4145e-06, -1.1477e-06, 1.5062e-05, 2.0781e-05, -1.3321e-06,\n -1.8143e-06, 3.6719e-06, -9.7358e-06, -1.4410e-06, -1.8261e-05,\n 1.0179e-05, 1.7744e-06, -8.5795e-06, -5.8505e-06, -1.8484e-06,\n -8.2549e-06, 4.2321e-06, 4.1851e-06, -5.6119e-06, -8.8320e-06,\n -1.0632e-06, -4.8683e-06, -1.2804e-05, 1.6496e-06, 1.6848e-05,\n -4.9129e-05, -7.1943e-06, 1.4341e-05, -1.0852e-05, -1.4434e-05,\n -4.3283e-07, -3.9112e-05, 1.7371e-05, 1.4431e-05, 4.1765e-06,\n 1.4128e-05, -2.3239e-05, -1.1763e-06, -3.0554e-05, -2.9985e-05,\n 3.5496e-07, -3.7837e-07, -1.2331e-05, 1.6847e-05, 1.0907e-05,\n 9.1064e-06, -3.7360e-06, -5.0014e-06, -4.2827e-06, 4.3661e-06,\n 7.7226e-07, 6.2804e-06, 1.1368e-05, -1.0813e-05, 4.4741e-07,\n -4.3202e-06, -5.8833e-06, -1.1927e-06, 1.2510e-05, 2.0192e-05,\n -1.9093e-05, -1.5769e-05, -2.2998e-05, 3.2696e-06, 3.6046e-06,\n 5.6778e-06, -1.0970e-06, 2.4233e-06, 1.2121e-05, -1.0367e-05,\n 1.4466e-05, -6.2679e-05, 2.4180e-05, 3.3556e-06, -3.4848e-05,\n -3.1400e-05, 2.7457e-06, 1.8472e-05, 5.0458e-05, -7.6110e-06,\n -2.6923e-05, 9.6895e-06, -2.6490e-05, 6.1125e-06, 8.6151e-06,\n -9.2203e-06, 2.0739e-05, 1.7036e-05, 1.6227e-05, 1.5325e-05,\n -2.1534e-05, -1.7294e-05, -3.6606e-05, 1.7188e-05, -5.3299e-06,\n 1.6499e-05, -5.4883e-06, 3.9194e-05, 4.5706e-06, -7.5563e-06,\n -1.3993e-05, -1.8226e-05, -3.5094e-05, -1.5708e-05, 2.0291e-05,\n 6.1532e-06, -1.1214e-05, 9.3573e-07, -1.1126e-05, -3.0886e-06,\n -1.1769e-05, 3.9655e-05, -3.6622e-05, -1.8367e-05, 2.7646e-06,\n 2.0490e-05, -1.0861e-05, -1.1309e-05, -8.0082e-06, -9.5985e-06,\n -1.2773e-05, 2.3760e-06, -1.0905e-05, 1.8934e-05, -2.5849e-06,\n -4.3387e-05, -2.1312e-05, -3.9531e-06, -2.0060e-05, 2.5090e-07,\n 1.4486e-06, -3.8282e-06, 6.0283e-06, 2.3312e-07, 1.3504e-06,\n 2.7401e-05, -2.2932e-05, 7.2990e-06, 1.5856e-05, -1.0701e-05,\n -7.3276e-06, -1.3009e-05, 1.3653e-05, 1.5943e-05, -2.0056e-06,\n 4.8468e-06, -3.6976e-06, 2.9051e-05, 1.9632e-05, 1.4357e-06,\n -7.3615e-07, -2.5876e-05, -4.4595e-05, 9.4906e-06, -2.0052e-05,\n -1.1799e-05, -1.1481e-05, 6.4296e-07, 3.6914e-06, -8.0707e-06,\n 7.5779e-06, -4.9348e-06, 3.8448e-06, 2.2883e-06, -4.5971e-06,\n 8.9907e-07, 1.4833e-05, -2.2719e-05, -3.8129e-06, -3.0989e-06,\n 9.9234e-06, -5.8281e-07, -2.4637e-06, -1.9183e-06, 1.6961e-06,\n -2.2255e-06, 1.4216e-05, -8.1516e-06, -2.1101e-06, -1.4395e-05,\n 3.0851e-05, -1.0399e-05, -1.1237e-05, -1.9980e-05, -1.4076e-05,\n -1.3380e-05, -4.1953e-06, 1.5936e-06, 4.6718e-06, 3.9490e-06,\n 1.3322e-06, -4.0324e-05, -7.4026e-06, 7.6510e-07, 2.9029e-06,\n 1.4421e-05, -3.7680e-05, 5.4540e-07, 7.8439e-06, -4.5373e-05,\n 3.5045e-05, 9.8868e-06, -1.1177e-05, -1.4100e-05, 5.3741e-05,\n -6.1714e-05, -2.7936e-05, 1.3148e-05, -2.6134e-05, 1.0906e-05,\n 4.9294e-05, -4.8315e-06, -5.5141e-05, -2.9041e-05, -4.0104e-06,\n -3.5781e-05, -1.5476e-06, 1.1695e-05, 8.3167e-08, -8.8587e-08,\n -5.2496e-05, 2.2001e-05, -1.9537e-06, -3.2088e-05, 2.3219e-05,\n -6.1456e-06, -4.6350e-06, 3.0682e-06, -2.3422e-05, -1.9154e-07,\n -8.8264e-06, -8.1819e-06, 1.3234e-06, -8.1229e-06, 2.2288e-05,\n 1.8834e-05, -1.5747e-05, -1.1148e-06, 2.9386e-06, 2.2475e-05,\n -8.8527e-06, -4.4379e-06, 7.0026e-06, -4.7283e-06, -7.9402e-06,\n 1.0784e-05, -1.4637e-05, -2.7473e-05, -2.7076e-05, -1.4457e-05,\n -1.9774e-05, 5.2667e-06, -1.7188e-06, -3.9215e-05, -1.2546e-06,\n -2.1879e-06, 6.3635e-07, -1.5772e-06, 6.0737e-06, -3.7355e-08,\n -3.0300e-06, 1.0336e-05, -2.5547e-06, -1.6338e-06, -3.7092e-07,\n 1.9030e-05, 4.1524e-07, -9.6117e-06, -2.7340e-05, 4.4666e-06,\n 8.4983e-06, -5.0260e-06, -1.9194e-05, 3.3530e-07, 1.3413e-05,\n -6.1950e-06, 7.8105e-06, -1.4711e-05, 2.9561e-06, 4.3269e-07,\n -5.7356e-06, 3.9646e-06, -5.0854e-06, -1.1834e-05, -2.3390e-05,\n 8.7609e-06, 3.3784e-06, 3.1326e-07, -1.6635e-05, 2.2023e-05,\n 7.9734e-06, 6.9904e-06, 5.8962e-06, -3.2791e-06, 3.7973e-06,\n 1.2718e-05, -2.3352e-06, -1.1398e-05, -3.0107e-07, -2.5409e-06,\n 3.4417e-06, 4.9689e-06, 2.4544e-06, 2.4248e-05, 3.0955e-06,\n -2.9898e-05, -3.6961e-06, 3.1978e-05, -8.0372e-06, -2.2818e-06,\n 1.6543e-05, -8.2898e-06, -2.4432e-05, -1.4201e-05, 5.1949e-06,\n 1.4707e-05, -2.3599e-05, -1.1365e-06, 4.1112e-06, 7.5451e-06,\n -5.7469e-06, 1.2070e-05, 1.2142e-05, -2.5209e-05, -3.4376e-06,\n -3.1244e-05, 7.8724e-07], device='cuda:0'), 'exp_avg_sq': tensor([2.0913e-08, 3.7082e-08, 3.9351e-08, 6.1916e-08, 6.0562e-08, 3.9409e-08,\n 4.3095e-08, 6.8479e-08, 3.7269e-08, 2.1742e-08, 5.3671e-08, 3.0602e-08,\n 3.7708e-08, 2.3030e-08, 2.8859e-08, 4.1089e-08, 4.8016e-08, 4.7121e-08,\n 1.2613e-07, 7.6255e-08, 2.0695e-08, 2.8296e-08, 4.5075e-08, 1.7581e-08,\n 4.5722e-08, 7.1049e-09, 7.2787e-08, 4.7675e-08, 2.5161e-08, 1.8213e-08,\n 7.0742e-08, 3.2015e-08, 4.1525e-08, 1.0433e-07, 6.6163e-08, 2.1964e-08,\n 4.5140e-08, 2.1666e-08, 5.2085e-09, 1.3233e-08, 4.1159e-08, 3.2218e-08,\n 1.2526e-08, 2.1809e-08, 2.5771e-08, 3.2446e-08, 5.0582e-09, 3.2947e-08,\n 1.0819e-08, 1.2448e-08, 2.2219e-08, 3.0745e-08, 2.8216e-08, 2.6660e-08,\n 1.1069e-08, 2.9385e-08, 1.5689e-09, 5.7767e-09, 2.5017e-08, 1.9100e-08,\n 8.0578e-08, 3.2899e-08, 4.2193e-09, 2.7938e-08, 6.2262e-08, 1.7604e-08,\n 9.6464e-09, 1.0424e-08, 8.7393e-10, 8.4975e-09, 1.1415e-08, 3.1975e-08,\n 1.1964e-08, 2.0437e-08, 3.0344e-09, 1.5239e-08, 1.5653e-08, 3.0525e-08,\n 4.3293e-09, 9.4625e-10, 1.0834e-08, 2.3857e-08, 2.0343e-08, 2.7196e-08,\n 4.7608e-08, 8.7256e-09, 2.7261e-08, 5.7560e-09, 1.2561e-08, 3.9680e-09,\n 1.7644e-09, 8.8003e-10, 1.1169e-08, 4.9516e-08, 1.2566e-09, 6.0161e-10,\n 1.5040e-08, 2.0111e-08, 3.7427e-08, 2.0048e-08, 2.1231e-08, 2.5117e-08,\n 3.4105e-09, 6.2397e-09, 1.4808e-08, 1.0304e-08, 2.5482e-08, 3.0843e-09,\n 1.2492e-08, 8.3938e-09, 9.6867e-09, 1.7115e-08, 1.4372e-08, 1.6103e-08,\n 1.4230e-08, 6.3120e-09, 2.1072e-08, 1.1568e-08, 1.6284e-08, 4.9763e-09,\n 1.0333e-08, 3.8439e-09, 9.3929e-09, 7.3859e-09, 9.6620e-09, 3.4891e-08,\n 2.4868e-08, 1.8555e-08, 8.6515e-08, 2.6468e-07, 1.6884e-08, 4.8028e-08,\n 3.5185e-08, 9.2654e-08, 5.1739e-08, 2.9147e-08, 1.5082e-08, 3.7180e-08,\n 4.3712e-08, 7.3617e-08, 3.4928e-08, 7.6646e-08, 1.3146e-08, 1.3556e-08,\n 2.4580e-08, 3.6589e-08, 1.0338e-08, 7.6231e-08, 4.4737e-08, 1.9260e-08,\n 1.4197e-08, 2.8798e-08, 2.5121e-08, 4.3087e-08, 2.1159e-08, 7.0405e-08,\n 4.3441e-08, 3.5113e-08, 5.4738e-09, 1.9589e-08, 3.5903e-08, 6.8163e-08,\n 1.8942e-08, 2.4110e-08, 6.2482e-08, 1.3618e-08, 2.8933e-08, 6.9518e-08,\n 1.7927e-08, 7.2542e-09, 1.1245e-08, 4.5345e-09, 1.9338e-08, 5.1705e-09,\n 2.2775e-09, 2.9016e-08, 9.3034e-09, 1.1256e-08, 2.9246e-08, 1.6537e-09,\n 5.3745e-09, 1.6103e-08, 2.3252e-08, 1.2926e-08, 2.3075e-08, 7.0019e-09,\n 1.9801e-09, 1.0017e-08, 3.5757e-08, 1.6800e-08, 1.3103e-09, 3.4705e-08,\n 5.1762e-09, 2.2256e-09, 1.6389e-08, 1.3026e-08, 2.4031e-09, 1.0009e-08,\n 6.8489e-10, 4.4701e-09, 8.8545e-09, 3.3049e-08, 1.8516e-09, 3.9349e-09,\n 5.0893e-08, 1.1881e-09, 5.6571e-09, 4.6037e-09, 4.1514e-09, 3.0439e-08,\n 1.7463e-08, 1.6288e-08, 1.8767e-08, 1.2025e-08, 1.5603e-08, 2.0899e-10,\n 3.8535e-08, 3.3400e-08, 1.1653e-08, 1.3263e-08, 2.6965e-08, 1.6359e-08,\n 1.0728e-09, 3.5541e-08, 1.2414e-08, 9.7330e-10, 4.9543e-09, 6.5359e-09,\n 6.3345e-09, 3.4494e-09, 1.2890e-08, 3.5664e-08, 1.2703e-08, 3.7730e-09,\n 8.1940e-09, 3.8095e-09, 2.6660e-09, 1.0271e-08, 1.3155e-08, 4.5417e-09,\n 5.0032e-09, 8.8339e-09, 1.3976e-08, 7.9222e-09, 5.1457e-08, 2.2354e-08,\n 1.0564e-08, 1.1709e-08, 1.1597e-08, 6.6309e-09, 5.5214e-09, 1.4527e-08,\n 3.2038e-09, 9.3652e-09, 2.3287e-08, 1.0712e-08, 3.0303e-07, 6.3113e-08,\n 2.5273e-08, 3.0283e-08, 4.6659e-08, 2.8293e-08, 2.2396e-08, 4.6846e-08,\n 2.5601e-08, 3.1648e-08, 4.3295e-08, 1.3546e-08, 2.4530e-08, 2.0710e-08,\n 4.3390e-08, 2.4273e-08, 4.7262e-08, 3.8577e-08, 2.6462e-08, 3.5082e-08,\n 3.4841e-08, 2.1314e-08, 3.1470e-08, 3.4548e-08, 1.4303e-08, 3.4553e-08,\n 2.3690e-08, 1.7211e-08, 2.4171e-08, 3.1216e-08, 6.1218e-08, 3.3891e-08,\n 1.1833e-08, 3.5263e-08, 1.3231e-08, 2.8396e-08, 4.3213e-08, 1.8896e-08,\n 1.7341e-08, 6.9311e-09, 2.8131e-08, 6.4373e-08, 3.6047e-08, 4.2094e-08,\n 1.5551e-08, 3.0554e-08, 3.3870e-08, 1.0752e-08, 1.9242e-08, 9.4068e-09,\n 2.0462e-08, 1.2438e-08, 2.1462e-08, 1.4511e-08, 4.0785e-08, 3.7466e-08,\n 4.0615e-10, 3.5328e-08, 2.0162e-09, 1.0057e-08, 1.1120e-08, 1.3455e-08,\n 3.8719e-08, 1.1077e-08, 7.0006e-08, 3.2406e-08, 1.5139e-08, 7.0257e-09,\n 7.0356e-09, 4.7692e-09, 4.5092e-08, 5.7469e-09, 2.1506e-08, 1.8510e-08,\n 1.0431e-08, 9.1248e-09, 1.9461e-08, 1.3672e-08, 6.1593e-10, 1.0105e-09,\n 1.5452e-08, 2.2547e-08, 1.3875e-08, 2.8036e-08, 1.5012e-08, 6.7638e-09,\n 5.2693e-09, 3.4094e-08, 3.2823e-08, 3.7560e-09, 4.7189e-09, 2.7224e-08,\n 1.6034e-08, 2.3179e-09, 5.3652e-09, 1.2081e-08, 2.2113e-08, 2.4278e-09,\n 2.3723e-09, 1.2559e-08, 1.2717e-08, 3.2881e-08, 1.3879e-08, 9.6116e-09,\n 1.0622e-08, 1.6087e-08, 8.6420e-09, 1.5472e-08, 1.2974e-08, 1.8657e-08,\n 3.7978e-09, 2.2791e-08, 6.6176e-09, 1.3120e-08, 8.4738e-09, 4.3325e-09,\n 1.1280e-08, 8.9614e-09, 8.3415e-09, 9.8598e-09, 4.3136e-08, 1.9924e-08,\n 9.0447e-09, 6.6362e-09, 3.2983e-08, 3.0073e-08, 5.4790e-09, 3.6891e-09,\n 7.9019e-08, 7.6585e-08, 9.2979e-08, 3.5254e-08, 1.5753e-08, 4.4977e-08,\n 6.9740e-07, 5.7008e-08, 2.9657e-08, 3.7516e-08, 2.9207e-08, 4.8002e-08,\n 1.9793e-08, 7.5015e-08, 4.9930e-08, 2.2139e-08, 6.3152e-08, 1.2733e-08,\n 3.6147e-08, 3.9502e-08, 3.3726e-08, 3.9874e-08, 6.5963e-08, 1.9408e-08,\n 3.2294e-08, 2.0287e-08, 1.3882e-08, 2.5159e-08, 1.1204e-08, 6.0172e-08,\n 1.0587e-08, 2.4438e-08, 2.3989e-08, 1.1960e-08, 1.7627e-08, 2.4200e-08,\n 3.6511e-08, 1.4394e-08, 1.5057e-09, 1.9128e-09, 1.4970e-08, 1.1217e-08,\n 1.3242e-08, 3.5224e-09, 1.9827e-09, 3.6965e-08, 4.0445e-09, 2.8853e-08,\n 3.4724e-08, 3.4571e-08, 8.2067e-09, 9.2429e-09, 1.2808e-08, 1.4241e-08,\n 1.9898e-08, 5.5154e-09, 1.2253e-08, 9.9279e-09, 1.5218e-08, 1.0502e-08,\n 6.4765e-09, 5.0464e-09, 5.1701e-09, 1.1841e-08, 3.6446e-08, 1.2641e-08,\n 1.4027e-08, 2.8978e-10, 1.0535e-08, 1.7336e-08, 6.6883e-09, 4.6031e-09,\n 1.3937e-08, 3.5440e-08, 1.5370e-08, 1.6763e-08, 4.3258e-09, 1.7016e-08,\n 1.2493e-08, 1.1981e-08, 4.8920e-09, 2.5062e-08, 3.6714e-09, 1.6186e-09,\n 2.0771e-08, 2.6549e-08, 1.6848e-08, 1.6023e-08, 1.0369e-08, 2.8227e-08,\n 2.7056e-08, 9.5688e-09, 2.5086e-08, 3.1807e-08, 4.4583e-09, 8.8299e-09,\n 1.1091e-08, 1.0142e-08, 4.9649e-09, 6.1331e-10, 8.9141e-09, 1.2071e-08,\n 3.8641e-09, 8.3468e-09, 1.1843e-08, 5.8473e-09, 1.4061e-08, 4.7861e-09,\n 5.0343e-08, 9.7829e-09, 4.9052e-10, 9.4575e-09, 1.5323e-08, 5.8336e-08,\n 2.5370e-08, 1.0643e-09, 8.0696e-09, 1.3754e-08, 2.5909e-09, 5.9519e-09,\n 1.7161e-08, 6.2677e-09, 2.6286e-08, 4.2077e-09, 3.8750e-08, 2.2842e-09,\n 2.4169e-08, 1.5598e-09], device='cuda:0')}, 91: {'step': tensor(62477.), 'exp_avg': tensor([[ 9.2194e-07, 5.9382e-07, -4.0610e-09, ..., -2.5097e-06,\n -1.5688e-06, 7.9865e-07],\n [ 8.3751e-07, 7.6722e-07, -1.0150e-06, ..., -1.6739e-06,\n 9.4757e-08, 5.0034e-07],\n [ 3.6413e-07, 3.5129e-07, -8.1043e-07, ..., -3.2569e-07,\n 4.3064e-07, -2.4223e-07],\n ...,\n [-2.1002e-06, -2.1098e-06, 2.0278e-06, ..., 6.5650e-06,\n 2.0284e-07, 2.7642e-07],\n [-6.5811e-06, -5.4018e-06, -1.9850e-06, ..., 1.0500e-05,\n -9.9016e-07, -7.5321e-06],\n [-6.7451e-07, -1.3360e-06, 4.7350e-06, ..., 1.0260e-06,\n -5.5024e-06, 2.0582e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[2.0250e-11, 1.6921e-11, 6.7158e-11, ..., 1.3666e-10, 7.4475e-11,\n 2.1561e-11],\n [2.7829e-11, 2.4405e-11, 5.6653e-11, ..., 1.6828e-10, 7.7362e-11,\n 2.3028e-11],\n [1.6223e-11, 1.4703e-11, 2.1876e-11, ..., 8.4900e-11, 3.3223e-11,\n 1.4529e-11],\n ...,\n [3.4525e-10, 2.9741e-10, 4.3659e-10, ..., 1.8739e-09, 1.2677e-09,\n 2.9092e-10],\n [9.4183e-10, 8.3638e-10, 1.3619e-09, ..., 4.6836e-09, 2.2884e-09,\n 7.8595e-10],\n [7.7340e-10, 6.6251e-10, 1.0179e-09, ..., 2.8429e-09, 1.6527e-09,\n 6.0673e-10]], device='cuda:0')}, 92: {'step': tensor(62477.), 'exp_avg': tensor([-3.5432e-06, -2.9737e-06, -5.1689e-07, ..., 9.6436e-06,\n 2.7869e-05, 2.4684e-06], device='cuda:0'), 'exp_avg_sq': tensor([4.7061e-10, 6.2968e-10, 3.3458e-10, ..., 6.7842e-09, 1.8027e-08,\n 1.4461e-08], device='cuda:0')}, 93: {'step': tensor(62477.), 'exp_avg': tensor([[ 4.7749e-06, 1.5462e-07, 9.3894e-06, ..., -1.4518e-06,\n -2.1800e-07, -2.2425e-06],\n [ 4.3257e-06, -8.1566e-06, 5.0435e-06, ..., -1.6213e-06,\n 2.3864e-06, 1.4841e-07],\n [-2.3415e-06, -6.2187e-06, -2.8122e-06, ..., -2.0722e-08,\n 6.5308e-07, 1.0628e-06],\n ...,\n [ 7.2887e-07, 2.7706e-07, 2.4020e-06, ..., -1.8020e-07,\n 3.1101e-07, -3.5390e-06],\n [ 1.3099e-06, 2.9886e-06, -1.2180e-06, ..., -2.1270e-06,\n 1.3412e-06, 1.5392e-07],\n [ 1.1086e-06, 1.1664e-06, -3.8008e-06, ..., 4.4158e-07,\n -6.1830e-07, 3.5813e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[5.9259e-10, 1.3383e-09, 1.0034e-09, ..., 5.9375e-11, 7.4671e-11,\n 2.3444e-10],\n [3.5079e-10, 1.2317e-09, 1.1006e-09, ..., 4.3425e-11, 7.5987e-11,\n 2.8003e-10],\n [3.3291e-10, 8.7899e-10, 8.5810e-10, ..., 3.9125e-11, 7.0125e-11,\n 2.0350e-10],\n ...,\n [1.0794e-10, 5.5041e-10, 1.6081e-09, ..., 1.2185e-10, 1.8438e-10,\n 3.0637e-09],\n [3.3256e-10, 1.3631e-09, 5.4420e-09, ..., 2.9717e-10, 3.7321e-10,\n 9.8344e-09],\n [1.3364e-10, 7.3972e-10, 2.1676e-09, ..., 9.8095e-11, 1.9028e-10,\n 4.4866e-09]], device='cuda:0')}, 94: {'step': tensor(62477.), 'exp_avg': tensor([-9.9789e-07, -1.9142e-06, -2.0638e-06, ..., -6.0186e-06,\n -4.8294e-06, 8.4828e-07], device='cuda:0'), 'exp_avg_sq': tensor([2.2901e-10, 2.0570e-10, 1.7703e-10, ..., 2.0704e-09, 3.2891e-09,\n 2.5289e-09], device='cuda:0')}, 95: {'step': tensor(62477.), 'exp_avg': tensor([[-4.2446e-06, -2.9964e-06, 3.9938e-06, ..., -2.8054e-07,\n 7.5895e-06, -2.6571e-07],\n [-3.7189e-07, -4.7178e-07, -1.1497e-06, ..., -8.6761e-07,\n 7.8760e-07, 1.5606e-06],\n [-2.7119e-06, -2.6739e-05, 8.0306e-06, ..., -2.0622e-06,\n 1.8848e-05, 5.2794e-07],\n ...,\n [ 2.3751e-07, -6.3156e-07, 5.1440e-07, ..., 7.2300e-07,\n -2.7418e-06, -3.3955e-07],\n [-3.4632e-06, -1.3474e-05, 5.4701e-06, ..., 3.1966e-06,\n -7.8279e-06, 1.9160e-06],\n [-1.6985e-08, -2.0127e-06, -2.6720e-06, ..., 7.0461e-07,\n 1.8365e-06, -2.4716e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.5086e-09, 4.2847e-09, 6.0110e-10, ..., 2.1256e-10, 1.3409e-09,\n 2.0394e-10],\n [4.4872e-11, 1.0023e-10, 1.2628e-11, ..., 9.0428e-12, 2.4816e-11,\n 2.1365e-11],\n [3.1799e-09, 2.0210e-08, 1.2523e-09, ..., 8.4664e-10, 8.5873e-09,\n 7.4031e-10],\n ...,\n [2.3036e-11, 1.5103e-10, 7.9627e-12, ..., 6.1618e-12, 5.5665e-11,\n 6.6285e-12],\n [2.2950e-10, 2.8841e-09, 1.0181e-10, ..., 1.0341e-10, 1.2669e-09,\n 1.2414e-10],\n [1.8106e-09, 2.0276e-08, 9.8374e-10, ..., 7.9242e-10, 9.6534e-09,\n 8.0890e-10]], device='cuda:0')}, 96: {'step': tensor(62477.), 'exp_avg': tensor([ 2.0434e-06, 1.7458e-06, 1.8902e-06, -1.8350e-05, -2.2015e-06,\n -2.8550e-06, -1.4160e-06, 1.5627e-06, 8.5851e-06, -5.7500e-06,\n -8.4387e-07, -1.5434e-06, 5.3489e-07, 2.8802e-06, -2.4690e-06,\n 1.2044e-05, 5.4951e-06, -1.9955e-06, 1.0052e-05, -6.4148e-06,\n 1.1847e-05, 8.8531e-07, 1.5719e-06, -2.2867e-06, -2.8407e-06,\n 3.9038e-06, -7.3155e-06, -1.8630e-05, 4.0780e-07, 1.0731e-06,\n 6.5505e-06, 2.0903e-05, 3.8692e-06, 4.0141e-06, 3.2737e-07,\n 3.3238e-07, -2.0422e-06, -2.3822e-05, -6.9101e-06, 6.3555e-06,\n 5.6519e-06, -4.0218e-06, -1.0507e-05, 1.8986e-06, 1.0199e-05,\n -2.9535e-06, 5.9011e-06, 3.7690e-06, -1.1173e-06, 2.5570e-06,\n -1.2012e-05, -2.5863e-06, -1.9128e-06, 4.8649e-06, -3.6817e-05,\n 5.0978e-06, 1.7377e-06, 1.4951e-06, -7.9509e-06, 1.5350e-05,\n -2.6280e-06, 4.4019e-07, -7.7463e-06, 5.3721e-06, 9.0091e-07,\n 5.2781e-07, 2.2591e-05, 6.9088e-06, -7.3330e-06, 1.3350e-06,\n 5.9371e-06, 6.0855e-06, 8.8687e-06, -4.8582e-06, -4.1689e-05,\n -4.1129e-06, -7.3254e-06, 2.6058e-06, 3.3481e-07, -4.5673e-05,\n 2.0667e-05, 2.0611e-06, -2.5064e-06, -4.8667e-07, 2.0567e-06,\n -1.2530e-06, 3.7327e-07, -2.7900e-06, 9.3294e-06, -8.3360e-06,\n 8.2971e-07, -3.0851e-06, -1.2771e-06, -7.8123e-06, -3.7353e-06,\n 8.9657e-07, -7.6487e-06, 1.5184e-07, 2.6978e-05, 1.4127e-06,\n -1.6410e-06, -2.1263e-06, 4.0142e-07, 7.4135e-06, -1.3204e-06,\n 8.3293e-06, 1.2058e-05, -3.7681e-06, -1.2149e-05, -9.8110e-06,\n 6.0978e-06, 8.3500e-06, 1.0503e-06, 2.9785e-06, 4.8862e-05,\n 1.1304e-05, -9.0953e-07, -8.1059e-06, -8.2289e-06, 1.1141e-05,\n -1.7168e-06, 9.9152e-06, -9.9788e-06, -4.6824e-07, -1.3190e-06,\n 2.6238e-06, -1.2501e-06, -1.6277e-05, -5.3220e-06, 1.7992e-05,\n 3.1898e-07, -2.7896e-05, 8.5609e-06, 3.2653e-06, 1.1968e-05,\n 4.0145e-06, 8.9846e-06, 1.0365e-05, 1.1654e-06, 8.2835e-07,\n 5.9527e-06, -8.3515e-06, 8.3131e-06, 6.6447e-07, -1.0685e-06,\n -1.4869e-05, 2.6155e-06, -8.5711e-06, -8.3204e-06, 1.8285e-05,\n -3.8838e-06, -1.7986e-07, 1.2058e-05, -2.8594e-06, 6.6159e-06,\n -8.3501e-06, 4.3662e-06, 1.0628e-06, 1.3337e-07, 5.1218e-06,\n 3.4798e-06, 9.2463e-06, 1.1398e-06, 2.1775e-06, 1.6423e-05,\n 2.8222e-06, -2.8662e-06, -4.0494e-06, 3.8211e-06, -1.1631e-05,\n 4.0663e-06, -2.3228e-06, 4.9542e-06, -1.9265e-07, 8.8716e-06,\n -1.7157e-06, 4.4805e-06, -7.2019e-06, -2.6525e-06, -8.4708e-06,\n -1.0837e-05, -3.6481e-06, 8.8872e-07, 1.2313e-05, -5.3404e-06,\n -1.6414e-06, 1.6680e-06, -7.5445e-06, 2.7162e-06, 9.0062e-06,\n 9.2799e-06, 3.4754e-06, -6.3664e-06, 1.4927e-06, -1.6328e-05,\n 2.0070e-06, -1.7168e-06, -6.6806e-06, -3.9887e-05, -9.9832e-06,\n 1.2996e-06, 1.0164e-06, -3.1166e-06, -6.1524e-07, -1.1127e-05,\n -1.3847e-07, 2.7527e-06, -5.5570e-06, -1.3368e-06, 4.3956e-06,\n -1.8458e-05, 2.9429e-06, -1.3215e-05, 3.4232e-07, -2.5123e-05,\n -1.6060e-05, -1.2674e-05, -3.7981e-06, -2.6810e-06, -7.3672e-07,\n -4.3529e-06, -1.7530e-05, 1.0688e-05, -1.0749e-05, 1.9544e-06,\n -8.3346e-06, -8.1936e-06, 8.9346e-06, -2.1626e-05, 2.1335e-05,\n 6.8026e-06, -1.1368e-06, -5.0829e-07, 8.8564e-07, -1.2734e-07,\n 1.5152e-07, 8.9950e-06, -1.3533e-05, 9.8964e-06, -2.0818e-06,\n 9.0920e-07, 3.1885e-06, -1.6732e-06, 8.2922e-06, -6.9027e-06,\n -3.0435e-06, -3.8566e-06, -2.4549e-06, 2.2733e-07, 3.0832e-06,\n 8.4926e-06, -2.9974e-07, -1.0496e-05, -8.6659e-06, -6.4361e-07,\n 1.3049e-05, 1.2104e-05, -5.6257e-06, -6.7852e-07, 7.9863e-06,\n 4.0616e-06, 2.5696e-06, 2.0780e-06, -3.5776e-05, -2.4294e-07,\n 1.0464e-05, -1.5929e-06, -1.7611e-05, 1.4394e-06, -1.8980e-06,\n -5.8508e-07, -2.3672e-06, -2.2568e-06, -1.2600e-06, 5.7218e-06,\n 1.7271e-05, -1.7586e-05, -3.0590e-05, 3.8135e-06, 4.2943e-06,\n 1.9722e-06, -2.3955e-06, -3.5430e-06, -2.9591e-06, -4.9056e-06,\n 4.4103e-07, 6.4056e-07, -5.1713e-06, 1.7253e-05, -3.3882e-06,\n 2.1066e-06, -1.9654e-06, 2.2562e-07, 1.8889e-06, -1.3978e-05,\n 1.7245e-05, 2.4016e-05, 7.9973e-06, 2.1526e-06, 5.0233e-07,\n -5.2183e-06, 4.1478e-06, 5.5130e-06, 1.2170e-06, -1.4655e-06,\n -2.9596e-06, 3.8641e-06, 8.4741e-08, -8.5479e-06, -3.1329e-05,\n 1.0672e-05, 2.1165e-06, -5.0172e-06, -6.9869e-06, 3.4649e-06,\n -1.1038e-06, 1.4747e-06, 4.5823e-06, -1.2031e-06, 6.6751e-07,\n -7.8953e-07, 1.9323e-06, -1.5643e-05, 7.3205e-06, -1.0050e-05,\n -4.2252e-06, -1.2022e-06, 4.7702e-06, 4.6243e-06, 3.0238e-06,\n 2.3112e-06, -1.3055e-06, -9.3881e-06, -7.3607e-06, 2.6363e-05,\n 1.1910e-06, -4.3500e-06, 1.1644e-05, -9.7811e-06, 2.8626e-05,\n -5.8496e-06, 7.6984e-06, -4.7146e-07, 1.0688e-06, 2.1595e-06,\n 1.5700e-05, 3.9455e-06, 2.6656e-06, 2.1342e-06, 2.1314e-07,\n 4.4812e-06, -2.6560e-06, -2.4097e-05, -8.0431e-06, -1.3398e-05,\n -7.0405e-07, 1.6204e-07, 2.2014e-07, -2.1303e-08, -2.6198e-06,\n -1.8473e-06, -9.7004e-06, 3.4841e-06, 1.4417e-06, 1.1041e-05,\n -8.7242e-07, -9.7311e-06, -5.6588e-06, -1.5474e-05, -2.0050e-06,\n -7.1970e-06, -1.7171e-05, 5.0923e-07, 8.5512e-06, 4.1597e-06,\n -4.7053e-06, 3.8880e-06, -6.1456e-07, -4.5295e-06, 3.0448e-06,\n -2.5382e-06, 1.4267e-06, -2.6074e-06, -1.3456e-05, 2.1334e-06,\n -1.5475e-06, 7.4638e-06, 3.5321e-06, -1.2159e-06, 1.2525e-05,\n 4.2053e-06, 1.8982e-05, 7.2429e-06, 9.8095e-06, -8.1132e-06,\n 2.7126e-05, -3.1260e-06, -5.6536e-05, 6.4560e-06, 5.2133e-07,\n 6.1268e-06, -5.8738e-06, -5.7392e-06, 1.4071e-06, 1.4275e-06,\n -3.8288e-06, -2.5260e-06, -1.1781e-06, 1.4670e-06, 4.1919e-06,\n 2.0042e-06, 6.9462e-07, -2.1331e-06, -8.4765e-07, -1.6876e-06,\n -2.1201e-06, -1.5318e-06, -5.2666e-06, -4.4434e-06, 3.7077e-06,\n 1.3144e-06, 8.5557e-06, 1.0121e-05, -2.5000e-07, -2.3719e-05,\n 9.1340e-06, -7.4561e-07, -9.0149e-06, -2.8509e-06, -2.8629e-06,\n 9.4973e-06, -5.3434e-06, 4.8643e-06, 2.5611e-06, -1.1712e-05,\n -2.6676e-05, -2.7096e-06, 3.5111e-06, -2.0229e-05, 4.9370e-06,\n 9.2854e-07, -1.1099e-06, -1.4996e-06, 9.8636e-07, -2.0392e-06,\n -6.7660e-07, -1.7687e-05, 1.9788e-06, 1.7567e-06, -1.5289e-06,\n -1.4543e-06, -4.2075e-06, 8.7812e-07, -6.6749e-06, 2.1784e-05,\n -4.8125e-07, 1.8616e-06, 3.9751e-06, -2.0036e-06, -7.8853e-06,\n 1.6269e-05, 4.3504e-06, -2.2595e-07, 9.3653e-08, 3.5133e-06,\n 4.8201e-06, -1.2028e-06, -1.7321e-06, 3.3600e-06, -7.3066e-07,\n -4.2698e-06, -2.5288e-07, -4.6300e-07, 1.9316e-06, -1.4624e-06,\n -5.5280e-06, -7.3668e-07, -2.8521e-06, 2.7644e-06, -1.5447e-06,\n -5.8227e-06, -1.5217e-06, -2.5347e-06, -6.1914e-06, -6.8877e-06,\n -8.5783e-06, -3.9103e-07, 4.5888e-06, 1.0367e-05, 3.5040e-06,\n -3.8742e-05, 3.2268e-06, -3.0837e-06, 1.5676e-06, 2.7135e-06,\n 6.5131e-06, -2.9344e-06, -5.7308e-06, 3.1105e-06, 2.1275e-05,\n 2.2087e-06, 3.7022e-06, -1.5610e-05, 5.1945e-06, 3.2831e-06,\n -1.3312e-05, -2.5736e-06, -6.4594e-06, 3.0204e-06, 3.4458e-07,\n 1.2116e-05, -1.3800e-05], device='cuda:0'), 'exp_avg_sq': tensor([7.9893e-09, 4.7161e-10, 3.1001e-08, 2.4662e-08, 3.2299e-09, 2.9901e-09,\n 3.9893e-10, 6.8063e-10, 3.2717e-09, 3.5652e-09, 6.4266e-10, 1.4084e-09,\n 2.0806e-09, 2.6112e-09, 1.7590e-09, 5.6314e-09, 4.6312e-09, 3.6432e-09,\n 6.5355e-09, 2.2346e-09, 3.2573e-08, 7.1210e-10, 1.7391e-10, 2.6367e-10,\n 1.8528e-09, 3.4048e-10, 3.1160e-08, 1.6350e-08, 7.2888e-10, 3.9198e-10,\n 3.4361e-09, 8.4415e-09, 1.5347e-08, 3.7986e-10, 3.7891e-09, 6.3400e-09,\n 3.7242e-10, 2.2459e-08, 2.6406e-09, 3.6142e-09, 4.4619e-09, 1.2878e-08,\n 6.0532e-09, 1.0248e-08, 4.9038e-10, 8.7858e-10, 3.7972e-09, 6.2536e-09,\n 3.1139e-10, 2.7931e-10, 4.4511e-09, 1.7415e-09, 3.9891e-09, 9.9930e-10,\n 4.6918e-08, 5.6484e-09, 4.2623e-10, 5.4851e-09, 1.5807e-09, 1.8846e-08,\n 2.7296e-09, 1.6510e-10, 1.0027e-08, 5.2738e-10, 2.7902e-10, 3.5954e-09,\n 1.2590e-08, 2.1161e-09, 5.0224e-09, 4.4753e-09, 1.9543e-09, 5.9969e-10,\n 2.3863e-08, 1.7706e-09, 1.1213e-07, 8.6621e-10, 3.9445e-09, 1.5100e-09,\n 7.2840e-09, 9.4768e-08, 2.9255e-08, 7.7836e-09, 4.2980e-10, 8.8703e-10,\n 5.3842e-10, 9.4459e-10, 4.0817e-09, 3.7298e-09, 1.5188e-08, 1.1587e-08,\n 2.1323e-09, 8.4934e-09, 6.5551e-10, 1.6261e-08, 1.3180e-09, 1.2257e-08,\n 9.3350e-09, 3.3010e-09, 1.6803e-08, 2.7687e-10, 7.2173e-10, 2.7891e-10,\n 2.7247e-10, 2.1898e-09, 2.0241e-09, 1.1715e-09, 2.0081e-08, 3.5373e-09,\n 1.3544e-08, 1.8013e-09, 8.8481e-08, 1.9821e-08, 5.9052e-10, 2.3640e-10,\n 6.7783e-08, 8.1341e-09, 3.9519e-09, 2.0613e-08, 3.5609e-10, 2.4368e-08,\n 3.9677e-10, 3.7089e-08, 7.5096e-09, 4.1090e-10, 3.2594e-10, 8.0736e-10,\n 2.9050e-09, 2.6703e-08, 4.7025e-09, 3.2821e-08, 1.9503e-10, 7.8042e-09,\n 4.8844e-09, 7.2970e-09, 2.5138e-09, 8.4025e-09, 8.0455e-09, 9.5185e-09,\n 3.7433e-09, 7.7951e-09, 2.4911e-09, 8.5487e-09, 6.6961e-09, 1.9411e-10,\n 3.1005e-09, 2.1350e-08, 1.4354e-09, 3.1848e-09, 4.3338e-09, 5.1621e-09,\n 1.2652e-09, 1.2840e-09, 1.1983e-08, 1.9825e-09, 2.3560e-09, 3.2170e-09,\n 3.9766e-09, 6.5701e-09, 3.7337e-10, 1.6017e-09, 7.2700e-10, 3.2775e-08,\n 3.8702e-10, 3.1401e-10, 5.5483e-09, 1.2602e-09, 1.8616e-09, 1.0090e-09,\n 4.7923e-10, 7.2204e-09, 2.7836e-09, 1.4822e-09, 2.4222e-09, 1.1709e-10,\n 7.2751e-09, 1.0777e-09, 2.1998e-09, 6.2492e-09, 3.1304e-10, 3.6059e-08,\n 3.3078e-09, 1.0071e-09, 3.7715e-10, 1.2155e-09, 3.7039e-09, 3.0457e-10,\n 9.0108e-10, 1.2611e-09, 2.1114e-09, 1.1524e-08, 1.1654e-08, 7.9340e-08,\n 1.1932e-08, 5.7929e-10, 1.7700e-08, 7.9264e-10, 1.1037e-08, 1.6615e-08,\n 7.3053e-08, 4.2608e-08, 4.9369e-10, 2.4432e-08, 5.3150e-10, 1.5966e-08,\n 2.4876e-08, 2.2294e-09, 1.5077e-08, 2.7571e-09, 6.7925e-10, 2.4706e-09,\n 4.7818e-09, 1.4177e-09, 1.7368e-08, 2.1822e-10, 2.7651e-08, 2.0837e-08,\n 4.0140e-09, 4.0344e-10, 5.4750e-10, 1.7069e-10, 1.5820e-09, 8.1106e-09,\n 1.2878e-08, 4.8364e-09, 3.2251e-10, 3.1599e-09, 1.9237e-09, 1.9283e-09,\n 1.5328e-08, 1.7733e-08, 1.0608e-08, 4.1013e-10, 2.6117e-10, 3.3035e-09,\n 6.6887e-09, 3.4357e-09, 1.1947e-09, 3.2072e-08, 7.9276e-09, 1.1828e-09,\n 2.1770e-10, 4.0010e-09, 9.7910e-09, 1.9186e-09, 4.0353e-09, 4.5060e-10,\n 2.2921e-09, 1.0198e-09, 2.0082e-09, 6.0483e-10, 2.3471e-08, 1.6463e-09,\n 7.4881e-09, 8.3194e-09, 4.3030e-10, 2.9376e-08, 2.8440e-08, 5.3269e-09,\n 4.4501e-09, 3.3070e-09, 1.1477e-09, 5.5772e-09, 1.8853e-09, 2.2693e-08,\n 5.6621e-09, 6.7319e-09, 2.0843e-09, 6.0217e-09, 1.2911e-09, 1.6932e-09,\n 5.8064e-11, 1.3178e-09, 1.8428e-08, 1.0635e-09, 4.6108e-09, 2.7014e-08,\n 2.4513e-08, 1.2161e-08, 3.7019e-09, 3.3841e-08, 3.0221e-09, 1.6040e-08,\n 2.6600e-10, 4.0186e-09, 2.6645e-09, 1.8114e-09, 1.1478e-09, 4.1714e-10,\n 3.9325e-08, 9.1544e-10, 1.2279e-09, 1.2425e-09, 9.7918e-09, 3.6002e-09,\n 1.1750e-08, 2.0336e-08, 6.5158e-09, 4.1888e-09, 4.6833e-10, 1.0739e-08,\n 3.5138e-10, 1.3920e-08, 5.7785e-09, 1.4962e-09, 2.2200e-09, 2.8391e-09,\n 2.4927e-09, 9.0256e-10, 2.4108e-09, 4.4386e-08, 4.6035e-09, 2.9734e-10,\n 1.7165e-09, 6.4861e-09, 5.9370e-09, 9.9232e-09, 2.2504e-10, 4.9565e-09,\n 1.0555e-09, 1.6941e-09, 1.4114e-09, 1.0210e-09, 5.5948e-08, 6.9157e-09,\n 7.6530e-09, 3.1462e-09, 3.3031e-08, 5.1159e-09, 7.1395e-09, 9.8294e-09,\n 2.7803e-09, 3.5806e-09, 1.2640e-09, 2.6421e-09, 2.3332e-08, 1.0847e-09,\n 3.8483e-10, 1.2882e-09, 3.6873e-09, 2.0458e-08, 2.2180e-09, 5.2800e-09,\n 1.1376e-10, 2.1201e-09, 2.7675e-09, 3.8344e-08, 6.0583e-09, 4.6807e-09,\n 1.8058e-09, 1.2883e-09, 9.5576e-10, 1.7438e-09, 1.1461e-08, 7.0118e-09,\n 2.7321e-08, 1.2020e-10, 4.4007e-10, 6.1579e-10, 1.8109e-10, 1.0256e-09,\n 1.1741e-09, 4.6865e-08, 2.3226e-09, 3.2371e-10, 2.4416e-08, 5.8467e-10,\n 1.1363e-08, 5.8892e-08, 5.7713e-09, 1.9718e-10, 4.6840e-09, 2.3058e-08,\n 2.3735e-10, 1.2810e-08, 3.5953e-10, 5.3530e-08, 8.5823e-10, 1.4175e-09,\n 1.7749e-09, 4.2966e-09, 3.7134e-10, 7.1762e-10, 7.0836e-09, 2.4245e-09,\n 6.1368e-10, 1.4676e-10, 9.6376e-09, 3.4892e-09, 8.7938e-09, 3.3625e-09,\n 7.0766e-08, 2.8931e-08, 7.3823e-09, 5.3431e-09, 3.3832e-08, 1.3844e-08,\n 1.6909e-10, 1.2329e-07, 1.0699e-08, 8.1471e-10, 2.7340e-09, 4.7063e-09,\n 9.0169e-10, 1.2508e-09, 8.7652e-10, 8.4259e-10, 2.3266e-09, 4.4414e-10,\n 1.3099e-08, 1.6535e-09, 1.4436e-09, 1.3321e-09, 2.8197e-10, 4.2727e-10,\n 8.6362e-10, 4.9772e-09, 4.2489e-09, 1.2990e-08, 3.3701e-09, 6.0390e-10,\n 8.4141e-10, 6.5296e-09, 5.4421e-09, 3.9756e-10, 2.8349e-08, 1.7125e-08,\n 6.6193e-10, 8.5876e-09, 2.5288e-09, 7.3656e-10, 1.0781e-08, 1.4012e-09,\n 1.2230e-09, 3.4232e-10, 6.1301e-09, 1.2708e-08, 1.8450e-09, 4.9490e-09,\n 5.6377e-09, 9.9931e-10, 6.9813e-10, 4.4033e-09, 7.7477e-09, 1.8487e-09,\n 8.0193e-10, 2.3944e-10, 5.6487e-09, 1.0518e-08, 9.4907e-10, 5.4653e-10,\n 1.2213e-09, 2.3977e-09, 7.3835e-10, 3.1559e-09, 5.1961e-08, 6.9545e-10,\n 6.8500e-09, 1.1620e-09, 2.4000e-09, 7.5607e-09, 1.6747e-08, 4.6565e-09,\n 1.6351e-10, 2.1670e-09, 2.1200e-09, 2.6305e-09, 7.6930e-10, 1.6721e-10,\n 3.8527e-09, 2.5361e-09, 9.1814e-09, 3.7494e-10, 1.2755e-09, 2.9467e-10,\n 5.8600e-10, 2.9616e-09, 1.3326e-09, 3.2711e-09, 4.5496e-09, 1.4222e-09,\n 1.3488e-09, 2.4560e-09, 2.0137e-09, 3.1359e-08, 4.1377e-10, 3.8371e-08,\n 1.7558e-09, 9.7110e-10, 1.3909e-09, 1.9740e-09, 1.6754e-08, 1.1511e-08,\n 1.5208e-10, 4.3449e-10, 7.6503e-10, 3.2832e-09, 1.0407e-09, 2.9849e-09,\n 5.3071e-10, 1.8371e-08, 2.7704e-10, 1.0670e-09, 1.4316e-08, 2.3406e-08,\n 1.0348e-08, 9.1487e-09, 1.3094e-09, 9.6040e-10, 7.0980e-10, 3.5607e-10,\n 4.7631e-09, 2.7809e-08], device='cuda:0')}, 97: {'step': tensor(62477.), 'exp_avg': tensor([[ 2.3724e-06, 4.9849e-06, 3.9280e-06, ..., 5.3687e-06,\n 2.6556e-06, -2.3057e-06],\n [-9.0908e-06, 2.1142e-06, -1.0393e-05, ..., -4.3677e-06,\n 2.9837e-06, -3.6316e-06],\n [ 4.7776e-06, 4.5945e-07, -2.2404e-06, ..., 3.5814e-06,\n -3.7837e-06, -2.1095e-06],\n ...,\n [ 6.0190e-07, 1.9569e-06, -3.9198e-06, ..., 3.5673e-06,\n -5.5633e-06, 1.0066e-07],\n [ 5.8050e-06, 1.4620e-06, 8.4682e-06, ..., 2.0332e-06,\n 3.1346e-06, 2.6409e-06],\n [-7.4303e-06, 1.5148e-06, 9.3472e-06, ..., -7.9977e-06,\n 6.3561e-06, -3.8369e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.9868e-09, 1.6538e-09, 3.6244e-09, ..., 1.2329e-09, 1.5373e-09,\n 4.5487e-10],\n [3.2381e-09, 1.6063e-09, 6.2868e-09, ..., 2.5270e-09, 2.3120e-09,\n 7.1569e-10],\n [3.0994e-09, 1.3210e-09, 3.8566e-09, ..., 1.1506e-09, 1.3629e-09,\n 4.8374e-10],\n ...,\n [2.6814e-10, 4.9510e-10, 1.0598e-09, ..., 5.2552e-10, 3.4967e-10,\n 2.4815e-10],\n [1.3827e-09, 1.5025e-09, 4.0106e-09, ..., 1.4481e-09, 2.3171e-09,\n 9.9038e-10],\n [2.4426e-09, 1.9860e-09, 5.4175e-09, ..., 1.7756e-09, 1.9755e-09,\n 7.7985e-10]], device='cuda:0')}, 98: {'step': tensor(62477.), 'exp_avg': tensor([ 4.1222e-06, -4.4106e-06, 3.2414e-06, ..., 5.6310e-07,\n 2.4031e-06, -2.2749e-06], device='cuda:0'), 'exp_avg_sq': tensor([5.1884e-10, 8.0629e-10, 7.4934e-10, ..., 1.7276e-10, 6.9379e-10,\n 7.8136e-10], device='cuda:0')}, 99: {'step': tensor(62477.), 'exp_avg': tensor([[ 1.2771e-05, 7.2416e-06, -1.0781e-06, ..., 2.6249e-06,\n 1.1321e-05, 1.0135e-05],\n [ 3.6085e-06, 4.1684e-06, 1.3032e-06, ..., -4.4418e-06,\n -4.6454e-06, -6.4678e-06],\n [ 7.2638e-06, 1.2460e-05, -6.7141e-06, ..., -3.3220e-06,\n -1.8393e-05, -1.8884e-05],\n ...,\n [-1.6377e-06, 2.5490e-06, 1.8918e-06, ..., -3.3633e-07,\n -2.9959e-06, -8.4291e-07],\n [ 1.5324e-07, 4.3428e-06, -1.4606e-06, ..., 9.8766e-07,\n 2.0482e-08, 1.8303e-06],\n [ 2.2088e-06, 6.8641e-06, -2.4648e-07, ..., 3.2072e-06,\n -6.1786e-07, -6.0372e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[3.7804e-09, 2.5368e-08, 2.9317e-09, ..., 1.4019e-09, 2.1959e-09,\n 7.9882e-09],\n [9.3611e-10, 7.9620e-09, 9.2050e-10, ..., 4.0900e-10, 7.7730e-10,\n 3.7542e-09],\n [3.7137e-09, 2.2681e-08, 3.2742e-09, ..., 1.6469e-09, 3.4379e-09,\n 9.8320e-09],\n ...,\n [7.0307e-10, 3.5892e-09, 8.9694e-10, ..., 3.0171e-10, 7.2025e-10,\n 4.1239e-09],\n [2.6107e-10, 7.8130e-10, 4.2494e-10, ..., 1.3145e-10, 3.4234e-10,\n 1.3027e-09],\n [4.7538e-10, 3.8741e-09, 6.8987e-10, ..., 2.6599e-10, 8.6609e-10,\n 3.1112e-09]], device='cuda:0')}, 100: {'step': tensor(62477.), 'exp_avg': tensor([ 2.2141e-05, -4.6771e-06, 7.9832e-07, 2.2667e-05, 1.1966e-06,\n 2.7146e-06, 1.3409e-05, 2.1231e-05, 9.1388e-06, 1.4257e-05,\n -5.2949e-06, -2.4512e-06, -6.6678e-07, 1.9697e-05, -1.5720e-05,\n 1.3784e-05, 1.1677e-06, 1.1253e-05, 3.0163e-05, -1.0913e-05,\n -9.6883e-06, -2.0255e-05, -1.6469e-05, 4.6461e-06, -5.8125e-06,\n -2.3901e-05, 4.9671e-06, 2.0200e-06, 4.1364e-06, -1.7559e-06,\n 1.5213e-05, 2.5191e-05, 3.8442e-07, 5.3398e-06, -1.7122e-05,\n 1.2800e-06, -2.3658e-05, 2.2238e-07, -4.6271e-05, -3.4502e-06,\n 1.0641e-06, -1.5894e-05, -1.5611e-05, 6.3523e-06, 1.5373e-05,\n -1.7528e-06, 1.2356e-05, -8.1665e-06, -5.5901e-06, 1.5027e-05,\n 7.5004e-07, 2.5415e-06, 2.8694e-06, -1.0019e-05, -5.4683e-06,\n -1.3126e-05, -3.7054e-06, -6.0864e-06, -5.7494e-07, -3.9112e-06,\n 1.2707e-05, 7.4416e-06, 4.0441e-06, -9.3535e-07, -4.5254e-06,\n -8.2621e-07, -8.6508e-06, 6.5892e-06, -2.5626e-05, -1.2469e-05,\n 5.6813e-06, 1.7612e-05, 1.1838e-05, 4.8189e-06, -1.3724e-06,\n -1.6334e-05, 3.5054e-06, 1.4172e-05, 1.7267e-05, 1.0009e-05,\n -4.7193e-06, -1.2217e-05, 7.5827e-06, 2.3062e-06, -2.0609e-05,\n 1.4163e-06, 8.0806e-07, -1.1620e-07, 1.5698e-05, -1.2741e-05,\n -3.8435e-06, -1.5210e-05, 3.0685e-05, -1.9189e-05, -6.5517e-06,\n -6.2002e-07, 2.7074e-05, 2.6388e-06, 1.6586e-05, -1.4761e-05,\n -6.4725e-06, -3.2582e-06, 8.5035e-07, -1.0521e-05, -3.9577e-06,\n -3.0051e-05, -1.2507e-05, 8.2659e-06, -2.6757e-05, -5.6549e-06,\n 2.4322e-06, -3.0258e-05, 1.5977e-05, 2.1801e-05, 1.3490e-05,\n 3.1325e-06, 7.6756e-06, 4.3196e-05, -8.4829e-06, -6.2430e-06,\n 1.4429e-06, -2.4480e-05, -2.7827e-05, 2.5163e-05, 4.5783e-06,\n 1.6449e-06, -4.4848e-06, -4.8519e-06, 8.0639e-06, -6.4543e-06,\n -5.1744e-06, -1.6947e-05, 4.5324e-06, 1.2733e-05, -1.9587e-05,\n 3.3199e-06, 1.2346e-05, 7.4048e-06, 7.1943e-06, -6.0349e-06,\n -1.6716e-05, 6.5422e-06, -2.1870e-05, -1.2569e-05, -7.3080e-06,\n 3.3002e-05, -7.9816e-06, 5.6973e-06, -5.0471e-05, -2.1046e-05,\n 9.8056e-06, 1.0485e-05, 2.3295e-05, 1.9676e-05, -4.2197e-06,\n 1.4841e-05, 5.5629e-06, -1.6372e-06, -2.2952e-06, -2.0485e-05,\n -1.0672e-05, -7.9039e-06, 1.8537e-05, -8.8020e-06, -2.3553e-05,\n -4.3809e-06, 7.6087e-06, 7.1714e-07, -4.0157e-06, -2.3719e-06,\n -1.3199e-05, -5.6076e-07, -9.7134e-06, 3.7135e-06, -1.4516e-05,\n -6.8148e-06, -2.9314e-06, -1.9455e-06, -1.3399e-05, 7.2260e-06,\n 5.0668e-06, 1.4333e-07, 9.0069e-06, -1.3390e-05, -1.5738e-05,\n 3.9233e-06, 2.9585e-05, -4.2399e-05, 3.4712e-06, 3.6824e-07,\n -9.9613e-06, 2.2673e-05, 1.9587e-05, -2.2539e-07, -1.0993e-05,\n -1.3301e-05, 4.0853e-06, -2.1087e-05, 1.9370e-05, 5.3966e-06,\n -7.2707e-06, -8.2680e-06, -3.8499e-06, -8.1052e-06, -1.3805e-05,\n 1.7442e-05, 1.9768e-06, -4.4387e-06, 7.3185e-06, -1.8121e-05,\n -2.1524e-06, 6.5982e-06, 3.4079e-06, 4.5257e-06, -1.9867e-06,\n -2.8573e-06, 2.1619e-05, -1.8176e-05, -6.2891e-06, 1.6292e-05,\n -3.8675e-07, -2.8719e-05, 1.4072e-05, -1.3343e-05, -1.7291e-05,\n 6.1730e-07, 3.4074e-06, 1.2501e-05, -1.0738e-05, -3.2013e-06,\n -1.3025e-05, -9.1147e-06, -4.6496e-06, -1.6923e-06, 6.0901e-06,\n 3.2961e-06, -2.5028e-05, 1.8160e-06, -1.6975e-05, -7.6116e-06,\n 6.2104e-06, -1.6806e-06, -2.2908e-06, -2.1417e-05, -8.4409e-06,\n 1.0414e-05, 3.5832e-05, -3.0029e-05, -8.4246e-06, -2.6156e-06,\n -8.8592e-06, -1.1720e-05, -9.2313e-06, -8.3767e-06, -2.9736e-05,\n 5.0167e-06, -4.0896e-05, -2.8706e-06, 1.2481e-05, 1.7790e-05,\n -2.9044e-05, 4.6607e-06, -9.2465e-06, 5.7113e-06, -2.8804e-06,\n 1.9516e-05, 8.9071e-06, 3.0148e-05, -6.4360e-06, 8.4048e-06,\n -3.7531e-06, -1.5044e-05, -2.1166e-06, -3.9429e-06, 2.7372e-06,\n 1.5966e-05, -7.9395e-06, -2.1451e-05, 1.4968e-06, -1.2086e-05,\n 8.2084e-07, -5.7831e-06, -2.6606e-05, -1.2059e-05, -1.8551e-05,\n 3.3135e-06, 1.5835e-05, -1.8801e-05, -5.4493e-06, 8.2312e-06,\n -1.9810e-05, -2.2929e-05, -1.0607e-05, 6.7132e-06, 1.3753e-06,\n -4.7710e-06, -3.6147e-05, -6.4819e-06, -5.2063e-06, 1.0639e-05,\n -1.0334e-05, 2.3734e-06, -5.8256e-05, 1.4005e-05, -2.0697e-05,\n 2.2242e-06, -1.6385e-05, -2.6677e-06, -3.4626e-05, -3.3541e-06,\n 5.9578e-06, 4.2179e-06, -3.8726e-05, 9.4957e-06, 1.1366e-05,\n -1.2078e-05, -9.3703e-07, 7.7564e-06, -6.2316e-06, 1.0512e-05,\n 5.2301e-06, -5.1922e-06, -1.9739e-06, 2.4006e-05, 4.0807e-06,\n -1.2869e-05, -1.2466e-05, -9.6710e-06, 2.9006e-06, 4.2350e-06,\n -1.0136e-05, 8.2945e-06, 1.1133e-05, 6.8921e-06, 1.4555e-05,\n 4.2777e-06, 1.6512e-05, 6.8693e-05, 2.5528e-05, -3.2782e-06,\n 9.2263e-06, -5.7226e-06, -3.9697e-06, 4.6484e-07, 1.8517e-06,\n 2.7416e-05, 8.7029e-06, -1.2712e-05, -1.1249e-05, -2.6154e-06,\n 1.3453e-05, -5.6903e-06, 1.5573e-05, -4.1079e-07, -1.7144e-05,\n -1.2360e-05, -1.0746e-05, -4.8997e-06, 1.0597e-05, -1.5629e-05,\n -3.5370e-06, 8.0921e-06, -6.0146e-06, 7.3139e-06, 7.4084e-06,\n 4.9490e-07, 9.6249e-06, -2.0601e-05, 1.1795e-05, 9.5761e-06,\n -3.6135e-06, -2.2447e-06, 4.4079e-07, 1.5898e-06, -6.8131e-06,\n -2.1577e-05, -7.1918e-06, -1.1533e-05, -2.4116e-06, -6.9170e-06,\n 3.1001e-05, -3.6257e-05, -6.7767e-07, 8.9801e-06, 2.4606e-05,\n 2.1949e-05, 2.8939e-06, -1.4507e-05, -2.3148e-05, -1.8622e-06,\n -1.4042e-05, 6.3548e-07, -4.0886e-05, 3.3268e-06, 1.0411e-05,\n -1.9802e-05, -3.5324e-06, -9.5274e-06, 7.5774e-06, -3.3924e-06,\n 1.2415e-05, -1.4836e-06, 2.9036e-07, -9.2917e-06, 8.7766e-06,\n -1.3099e-05, 6.2372e-06, 3.6894e-06, 4.5929e-06, -5.3652e-06,\n -4.2651e-06, -2.3237e-05, 6.9366e-07, -6.4297e-06, -1.5690e-05,\n 2.0782e-05, 3.1846e-06, -8.0425e-06, -1.6321e-05, 1.9839e-05,\n 1.1572e-06, 1.2656e-05, -1.9922e-05, 2.0259e-05, -2.3071e-05,\n 2.9988e-05, -4.2781e-06, -2.3762e-05, 6.9426e-07, 2.4606e-06,\n 5.4907e-07, 2.8258e-05, 2.2374e-05, -1.5322e-06, -3.3810e-05,\n 8.5923e-06, 1.1663e-05, 8.6175e-06, -1.2181e-05, -5.5994e-06,\n 7.9602e-07, 2.6325e-06, -5.0994e-06, 9.6338e-06, 3.1599e-06,\n 7.5475e-06, -1.0723e-05, 1.0130e-05, -1.0851e-05, -1.7612e-06,\n 8.1281e-06, 5.0279e-06, 7.6402e-06, 4.4810e-05, -8.1313e-06,\n -1.0947e-05, 1.5669e-06, 2.6978e-05, -9.7661e-07, 8.9763e-06,\n 1.1772e-05, 2.6650e-06, -5.1747e-06, -4.9091e-06, -5.0080e-06,\n 1.1204e-05, -1.6243e-05, 5.3520e-06, 7.4775e-06, 1.4290e-05,\n -1.6343e-05, 8.3424e-06, -2.3388e-06, -1.3812e-05, -1.4936e-05,\n -1.2739e-05, -3.6375e-06, 4.1860e-06, -5.6260e-06, -4.8666e-06,\n -1.1581e-06, 3.1935e-06, 6.4408e-06, 1.3098e-05, -9.3985e-06,\n 3.8133e-06, 1.3004e-05, -1.7195e-05, 4.1859e-05, 1.7852e-05,\n -4.9827e-05, -6.9393e-06, 8.9085e-07, 5.3546e-06, 2.0064e-05,\n 2.1127e-05, 4.0749e-06, 1.5508e-05, -1.7194e-05, 1.6166e-05,\n 1.2262e-05, 2.7717e-05, 1.2821e-05, 2.3801e-05, -8.4605e-06,\n 1.1632e-05, 1.0660e-06, 7.4713e-06, -2.7545e-05, -1.4149e-06,\n 9.4866e-06, -3.5518e-06], device='cuda:0'), 'exp_avg_sq': tensor([2.2731e-08, 1.4335e-08, 2.9627e-08, 4.7786e-08, 2.3107e-08, 9.9850e-09,\n 2.8430e-08, 1.8333e-08, 7.4318e-09, 1.4755e-08, 7.1682e-09, 4.2961e-09,\n 7.1808e-09, 2.0653e-08, 9.7579e-09, 4.5988e-09, 7.9359e-09, 2.4284e-08,\n 1.7777e-08, 3.1628e-08, 2.2319e-08, 1.9269e-08, 1.9904e-08, 2.8260e-08,\n 3.8334e-08, 1.7377e-08, 1.1109e-08, 7.1889e-09, 5.8948e-09, 8.5183e-09,\n 2.8106e-08, 3.8013e-08, 1.5458e-08, 7.3336e-08, 1.5971e-08, 8.3877e-09,\n 2.2854e-08, 8.2956e-09, 3.3335e-08, 1.0958e-08, 2.3477e-08, 2.8474e-08,\n 4.2597e-08, 1.7786e-08, 4.4773e-09, 3.8568e-09, 1.8762e-08, 2.5588e-08,\n 4.1103e-09, 1.6166e-08, 1.2619e-08, 2.7900e-08, 9.0272e-09, 9.6086e-09,\n 8.6065e-09, 9.5401e-09, 6.7431e-10, 1.4179e-08, 2.7538e-09, 1.8236e-08,\n 8.2467e-09, 1.7266e-08, 9.4898e-09, 6.0144e-10, 9.1881e-09, 8.7484e-09,\n 9.5711e-09, 1.3723e-08, 1.0418e-08, 1.1378e-08, 3.1411e-09, 3.3839e-08,\n 2.4297e-08, 2.0434e-08, 1.7965e-08, 2.2215e-08, 2.8219e-09, 1.2008e-08,\n 1.1915e-08, 4.8895e-08, 2.9878e-08, 6.3758e-08, 2.9255e-08, 9.4532e-09,\n 3.9191e-08, 3.0297e-08, 1.0337e-08, 1.3244e-10, 7.5377e-09, 1.4754e-08,\n 1.2179e-08, 1.4908e-08, 5.1245e-08, 2.3344e-08, 9.5888e-09, 5.0039e-09,\n 5.6673e-08, 4.3307e-09, 2.7388e-08, 1.5341e-08, 5.5436e-09, 2.8354e-10,\n 9.8602e-09, 1.1880e-09, 1.3487e-08, 3.8608e-08, 7.9392e-08, 4.7492e-08,\n 1.7116e-08, 1.1247e-08, 2.9857e-09, 6.2806e-08, 1.3710e-08, 1.8697e-08,\n 7.7514e-09, 1.4708e-08, 1.0952e-08, 1.8500e-08, 1.5497e-08, 1.4499e-08,\n 2.7124e-08, 2.5146e-08, 2.0885e-08, 1.5459e-08, 8.3697e-09, 2.1364e-08,\n 6.4220e-09, 9.1252e-09, 5.7048e-08, 4.4412e-08, 1.3027e-08, 9.7575e-09,\n 2.8554e-08, 2.7701e-08, 2.7224e-08, 3.0632e-08, 1.1771e-08, 3.6794e-08,\n 1.8722e-08, 2.5066e-08, 1.4988e-08, 1.3435e-08, 1.9739e-08, 1.5277e-08,\n 4.4053e-09, 1.9034e-08, 6.7234e-09, 2.1968e-08, 5.3883e-08, 1.8676e-08,\n 2.9328e-08, 2.5477e-08, 1.3430e-08, 2.7930e-08, 7.2403e-09, 1.3821e-08,\n 7.3505e-09, 1.8813e-08, 2.4102e-08, 5.5005e-08, 1.5687e-08, 2.1042e-09,\n 1.4424e-08, 9.9025e-09, 2.4599e-08, 5.1118e-08, 2.1084e-08, 7.0780e-10,\n 4.9702e-09, 1.7998e-09, 1.1255e-08, 1.2017e-08, 1.8221e-08, 1.0861e-08,\n 1.4205e-08, 2.9483e-09, 9.8946e-09, 3.8066e-08, 8.2886e-09, 2.4549e-08,\n 1.6698e-08, 1.4555e-09, 1.3560e-08, 8.5522e-09, 2.4575e-09, 6.3280e-09,\n 3.2340e-08, 3.4648e-08, 1.2049e-08, 9.6826e-09, 2.1657e-08, 9.9055e-09,\n 2.8699e-08, 1.0917e-08, 2.0063e-08, 1.9055e-08, 5.5057e-09, 1.8083e-08,\n 2.4100e-08, 1.1666e-08, 7.4099e-09, 4.5431e-09, 4.1866e-08, 1.9116e-08,\n 1.3826e-08, 2.5665e-08, 1.0848e-08, 5.6737e-09, 1.3989e-08, 2.5617e-08,\n 6.8975e-09, 4.1548e-08, 1.2733e-09, 6.9386e-09, 9.0467e-09, 2.4732e-10,\n 6.3539e-09, 1.4378e-08, 8.1410e-09, 2.4680e-08, 1.1846e-08, 1.7135e-08,\n 9.3133e-09, 1.2224e-08, 6.2287e-09, 4.0663e-09, 9.6277e-09, 1.6597e-08,\n 6.7564e-09, 1.2869e-08, 1.0895e-08, 1.1431e-08, 1.0615e-08, 2.3537e-08,\n 2.4678e-08, 1.9626e-08, 2.0467e-08, 3.4445e-08, 1.1620e-08, 5.1757e-08,\n 1.1940e-08, 1.4628e-08, 5.6612e-09, 4.1175e-08, 1.9599e-08, 3.2218e-09,\n 2.5018e-08, 1.3993e-08, 2.1489e-08, 6.9335e-09, 5.0651e-09, 1.8095e-08,\n 2.5230e-08, 1.6676e-08, 2.3834e-08, 7.6648e-09, 5.9319e-07, 1.0769e-08,\n 4.2147e-08, 1.6175e-08, 2.4284e-08, 1.1147e-08, 1.2554e-08, 2.1923e-08,\n 1.4350e-08, 5.0035e-08, 3.2905e-08, 1.9128e-08, 7.5480e-09, 8.9541e-09,\n 8.9853e-09, 9.9805e-09, 1.7796e-08, 1.0536e-08, 1.1075e-08, 1.7453e-08,\n 4.3868e-08, 1.4205e-08, 1.6608e-08, 1.1354e-08, 2.2266e-08, 1.9616e-08,\n 2.3829e-08, 4.9485e-08, 1.3383e-08, 1.5233e-08, 2.4656e-08, 1.3264e-08,\n 7.3167e-09, 1.1853e-08, 2.0566e-08, 1.8236e-08, 8.0356e-09, 2.4254e-08,\n 1.4048e-08, 5.5815e-08, 1.3294e-08, 3.1429e-09, 2.9542e-08, 3.8588e-08,\n 1.4259e-08, 4.6921e-10, 2.9431e-08, 1.8415e-08, 2.5321e-08, 2.1439e-08,\n 3.8904e-08, 4.1651e-09, 2.3527e-08, 2.6045e-09, 7.5170e-09, 4.0442e-09,\n 1.3231e-08, 1.7838e-08, 7.4147e-09, 5.0657e-08, 1.4082e-08, 5.1532e-08,\n 2.2103e-08, 1.0643e-08, 2.5066e-08, 5.2207e-09, 3.1783e-08, 2.5685e-08,\n 1.6240e-08, 2.3427e-09, 1.9619e-08, 1.8507e-08, 4.8235e-08, 1.9851e-08,\n 2.1476e-08, 2.9031e-08, 7.5108e-09, 2.2806e-08, 6.7132e-09, 2.6933e-08,\n 9.5116e-09, 4.2514e-08, 3.6491e-08, 2.4259e-09, 5.1062e-09, 1.3760e-08,\n 8.7863e-09, 2.3658e-09, 2.4530e-09, 2.6235e-08, 8.6849e-09, 2.2102e-08,\n 2.9667e-08, 3.4328e-09, 2.8729e-08, 8.5375e-09, 1.0769e-08, 1.3643e-08,\n 1.5452e-08, 2.9329e-08, 2.3215e-08, 1.4693e-08, 9.7958e-09, 4.3610e-08,\n 1.2241e-08, 1.6070e-08, 1.0539e-09, 1.1387e-08, 3.9005e-08, 8.1809e-09,\n 3.2267e-08, 2.6289e-08, 9.6143e-09, 2.6381e-09, 5.3953e-09, 2.1742e-08,\n 1.1386e-08, 1.3228e-08, 1.1627e-08, 3.2844e-08, 2.7699e-09, 2.3012e-08,\n 1.5067e-08, 2.1919e-08, 1.7896e-08, 4.5701e-08, 1.8536e-08, 2.4244e-08,\n 1.8968e-08, 2.9327e-08, 4.8459e-08, 2.2947e-08, 4.5709e-08, 2.0442e-08,\n 1.4142e-07, 6.3285e-08, 3.3263e-08, 1.8143e-08, 1.7668e-08, 8.9436e-09,\n 3.6488e-08, 9.3118e-09, 1.1383e-08, 2.2118e-08, 1.5097e-08, 7.7773e-09,\n 2.1055e-08, 2.6441e-08, 1.3646e-08, 5.2209e-09, 1.0709e-08, 1.4786e-08,\n 8.5516e-09, 9.4319e-09, 9.7629e-09, 1.2544e-08, 6.9520e-09, 3.3345e-09,\n 3.2723e-08, 1.7067e-08, 1.1684e-08, 1.3692e-08, 2.2957e-08, 1.6166e-08,\n 1.8823e-08, 1.7506e-08, 1.1747e-08, 2.6496e-08, 9.9428e-09, 3.1347e-08,\n 1.1280e-08, 2.3545e-08, 1.0335e-08, 2.9326e-08, 4.9317e-10, 3.1560e-08,\n 2.6981e-08, 7.8200e-10, 2.2972e-08, 9.3944e-09, 1.3617e-08, 1.8031e-08,\n 2.3439e-08, 4.6234e-10, 7.5200e-09, 6.0854e-09, 1.0601e-08, 3.5204e-08,\n 1.4403e-08, 3.6777e-08, 8.6266e-09, 6.9362e-09, 2.8705e-08, 1.7046e-08,\n 2.2066e-09, 2.2421e-08, 1.0178e-08, 3.2948e-08, 4.6352e-09, 5.8929e-09,\n 2.4677e-08, 4.4002e-08, 8.8399e-10, 1.1710e-08, 1.6083e-08, 7.0417e-09,\n 4.4827e-09, 1.1567e-08, 5.5020e-09, 4.2356e-08, 1.6165e-08, 2.4892e-08,\n 1.6189e-08, 8.8985e-09, 1.6945e-08, 1.7738e-08, 1.9410e-09, 2.5099e-08,\n 2.6602e-08, 1.1677e-08, 4.6342e-09, 1.5472e-08, 2.7512e-09, 1.2406e-08,\n 1.1025e-08, 1.5117e-08, 8.0946e-09, 1.4445e-08, 5.3364e-09, 1.0077e-08,\n 3.3355e-08, 1.1573e-08, 2.1858e-08, 2.3563e-08, 4.1094e-08, 2.0868e-09,\n 3.7737e-09, 7.2499e-09, 2.4225e-08, 3.6574e-08, 1.5972e-08, 1.0754e-08,\n 9.1213e-09, 2.9646e-08, 8.7700e-09, 8.2920e-09, 1.9922e-08, 1.3451e-08,\n 1.8233e-08, 1.1661e-08, 1.7464e-09, 2.5095e-09, 1.6342e-08, 1.3042e-08,\n 4.8377e-09, 1.0798e-08], device='cuda:0')}, 101: {'step': tensor(62477.), 'exp_avg': tensor([[ 4.0730e-08, 1.9314e-07, -2.4441e-07, ..., 3.1371e-07,\n 1.3184e-06, -1.8439e-07],\n [-8.0041e-08, -1.0512e-07, 8.7012e-07, ..., 7.5219e-07,\n -1.1391e-06, 5.1054e-07],\n [ 1.0875e-07, -1.6762e-08, 6.5440e-07, ..., -7.5468e-07,\n 6.3240e-07, 6.7444e-08],\n ...,\n [ 3.6025e-08, 3.5857e-07, 5.8330e-07, ..., -1.8851e-06,\n 4.5051e-06, -8.6113e-08],\n [ 9.5597e-07, 7.2129e-07, -1.8952e-06, ..., -5.3912e-06,\n -2.1912e-06, 3.9859e-07],\n [-2.0224e-06, -1.4230e-06, 1.1450e-06, ..., 4.9111e-06,\n 3.2955e-06, -2.8034e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[2.9898e-11, 2.6449e-11, 7.3455e-11, ..., 2.3032e-10, 7.6862e-11,\n 2.9860e-11],\n [3.9055e-11, 3.4455e-11, 1.5010e-10, ..., 2.9410e-10, 1.5036e-10,\n 4.8895e-11],\n [8.4601e-11, 7.5044e-11, 1.5505e-10, ..., 5.6197e-10, 2.7139e-10,\n 8.1625e-11],\n ...,\n [1.6704e-10, 1.4423e-10, 3.7926e-10, ..., 1.3164e-09, 4.9068e-10,\n 1.7828e-10],\n [1.6185e-10, 1.3485e-10, 5.4089e-10, ..., 1.7852e-09, 4.8596e-10,\n 1.9714e-10],\n [1.4295e-10, 1.2364e-10, 1.2168e-09, ..., 1.9251e-09, 7.4342e-10,\n 2.2699e-10]], device='cuda:0')}, 102: {'step': tensor(62477.), 'exp_avg': tensor([ 4.0677e-07, 1.5687e-06, -7.8717e-07, ..., 2.6749e-06,\n -5.5124e-06, 1.1441e-05], device='cuda:0'), 'exp_avg_sq': tensor([7.1038e-10, 1.0295e-09, 1.9378e-09, ..., 4.4652e-09, 5.2896e-09,\n 6.0185e-09], device='cuda:0')}, 103: {'step': tensor(62477.), 'exp_avg': tensor([[ 1.0629e-05, 4.2947e-06, 1.4775e-05, ..., 3.0106e-06,\n 2.7077e-06, 7.8165e-08],\n [ 7.0553e-06, 7.0643e-09, 1.0159e-05, ..., 2.0371e-06,\n 1.6601e-06, -1.2148e-06],\n [ 2.2553e-06, -2.6543e-06, -4.4249e-06, ..., -1.0550e-06,\n 8.2976e-06, 8.2126e-07],\n ...,\n [-3.7793e-06, 1.5619e-05, -9.2729e-06, ..., -1.8408e-06,\n -4.3879e-06, -4.7938e-06],\n [ 1.0601e-07, 2.1559e-05, 1.2196e-05, ..., -5.1131e-06,\n -1.5287e-06, -2.5386e-06],\n [-7.6541e-09, -2.9563e-06, -1.4966e-05, ..., 6.9100e-06,\n -3.4858e-07, -4.1971e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[5.4106e-09, 1.6990e-09, 3.5362e-09, ..., 4.7090e-10, 7.5570e-10,\n 5.4503e-10],\n [2.2604e-09, 4.5646e-10, 1.7318e-09, ..., 1.8272e-10, 2.6301e-10,\n 2.5424e-10],\n [1.6220e-09, 4.2641e-10, 1.0234e-09, ..., 1.3502e-10, 1.8939e-10,\n 1.8468e-10],\n ...,\n [1.4387e-08, 3.9965e-09, 7.0425e-09, ..., 3.7564e-09, 2.1419e-09,\n 2.0390e-09],\n [2.2656e-08, 6.7609e-09, 1.0338e-08, ..., 5.7209e-09, 3.7249e-09,\n 3.5857e-09],\n [8.1259e-09, 2.0347e-09, 3.7050e-09, ..., 2.0403e-09, 1.2810e-09,\n 1.0911e-09]], device='cuda:0')}, 104: {'step': tensor(62477.), 'exp_avg': tensor([ 1.2474e-06, -8.1814e-07, -7.1506e-07, ..., 2.7634e-06,\n 9.4069e-06, 1.5811e-06], device='cuda:0'), 'exp_avg_sq': tensor([6.4412e-10, 2.8172e-10, 2.2382e-10, ..., 1.2475e-08, 1.9623e-08,\n 7.8319e-09], device='cuda:0')}, 105: {'step': tensor(62477.), 'exp_avg': tensor([[ 7.7331e-06, 2.0674e-06, 9.0696e-07, ..., -2.3758e-06,\n 1.7289e-05, 2.8050e-06],\n [-2.2786e-06, -1.3070e-05, -1.0237e-05, ..., 3.0768e-06,\n 1.0742e-05, -1.0640e-06],\n [ 3.9787e-07, -3.2166e-07, -1.4531e-07, ..., 1.7956e-06,\n 2.0482e-06, -7.3690e-07],\n ...,\n [ 6.3633e-06, 2.8454e-06, -7.0441e-08, ..., 4.9644e-06,\n 2.6333e-05, -4.6067e-06],\n [ 2.9240e-08, -7.2418e-07, 2.9707e-08, ..., -3.9197e-06,\n -1.2143e-06, 9.5275e-08],\n [-7.9876e-06, -4.8737e-06, -4.7481e-06, ..., -1.0979e-05,\n -4.3462e-06, 5.3999e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[2.7472e-09, 4.4839e-09, 6.0127e-09, ..., 1.8315e-08, 1.8890e-08,\n 5.2578e-09],\n [2.6004e-09, 3.6104e-09, 5.2757e-09, ..., 1.2159e-08, 1.4946e-08,\n 4.3888e-09],\n [1.4329e-10, 1.5731e-10, 1.3860e-10, ..., 1.1512e-09, 1.4534e-09,\n 2.9979e-10],\n ...,\n [2.2538e-09, 3.7668e-09, 5.3272e-09, ..., 1.2269e-08, 1.4235e-08,\n 4.6515e-09],\n [4.6155e-11, 5.3363e-11, 6.7754e-11, ..., 2.1434e-10, 1.9785e-10,\n 5.2038e-11],\n [2.1976e-09, 3.9871e-09, 4.4201e-09, ..., 1.8056e-08, 2.1271e-08,\n 4.2343e-09]], device='cuda:0')}, 106: {'step': tensor(62477.), 'exp_avg': tensor([-9.4666e-06, -9.4885e-06, -2.4651e-06, 5.5775e-06, 7.7602e-07,\n -1.2998e-05, 2.6058e-05, 2.6793e-05, -2.7358e-06, 3.7371e-06,\n -2.4739e-05, 3.0063e-06, 2.9804e-05, -3.2370e-06, -7.5400e-07,\n -1.1268e-05, 1.8268e-05, -1.8457e-05, -2.4119e-05, 6.2929e-06,\n 1.2234e-05, 5.2619e-05, -1.4036e-05, 2.3775e-06, 6.2760e-06,\n 4.4761e-05, -3.2887e-06, 4.6019e-06, -6.8021e-06, 2.7838e-06,\n -9.4519e-06, 6.6068e-06, 1.4648e-05, -2.6783e-05, -1.1968e-05,\n 2.2940e-05, -4.4254e-06, -1.8833e-05, 3.3418e-05, -2.9976e-06,\n -2.1198e-05, -7.8934e-06, 3.8902e-05, 4.4623e-06, 1.5442e-05,\n -5.3071e-06, -1.7789e-05, 1.4697e-05, 6.0250e-06, 2.4217e-06,\n 1.7426e-05, 7.4602e-06, 1.7210e-05, 4.6680e-06, -5.0874e-06,\n 1.9557e-05, 2.3991e-06, -4.8334e-06, -9.4898e-06, -1.4061e-05,\n 2.4629e-05, -1.5478e-06, 6.0986e-06, 1.7170e-05, -2.0354e-06,\n 3.6219e-06, -1.7077e-05, -2.0003e-05, -1.3323e-05, -8.4105e-06,\n 3.5743e-06, -5.9133e-06, -1.2994e-05, -2.9993e-05, 2.4971e-05,\n -7.4654e-06, -7.9438e-06, 8.1710e-06, 4.9262e-06, -3.5616e-05,\n 6.6551e-06, -9.8920e-06, -1.6636e-05, 1.8247e-06, -9.8256e-06,\n 7.6491e-06, 8.1221e-06, 3.1543e-06, -7.3350e-06, 2.4481e-05,\n -6.9730e-06, 3.6150e-05, 1.5548e-05, -2.0524e-05, 4.9821e-06,\n 9.7914e-06, 1.9019e-05, -1.0410e-05, 4.6109e-06, -1.2202e-05,\n 2.7906e-06, 3.5588e-05, -2.7027e-06, -1.6907e-05, 2.0348e-06,\n -1.9444e-05, 3.8507e-06, -2.9258e-05, 1.3737e-05, 2.5278e-06,\n 1.6915e-05, 1.5940e-05, 3.6747e-06, -3.9503e-05, 2.1351e-05,\n -5.5225e-06, -2.9164e-06, 4.6081e-05, -3.8099e-05, -2.8187e-06,\n 2.7939e-05, 4.5100e-06, 3.8214e-05, -2.0681e-05, 8.5576e-06,\n -2.4614e-06, 2.2041e-05, -3.4741e-05, 9.6221e-06, 2.3028e-05,\n 1.1780e-05, -2.1652e-05, 1.3075e-05, 6.0616e-06, 3.6309e-06,\n -7.8965e-06, 3.7176e-07, -4.3114e-06, -1.4666e-05, -1.9606e-05,\n 6.4081e-06, -3.2420e-06, -5.0173e-07, 1.8171e-05, -2.2680e-06,\n 2.8894e-06, 2.0107e-05, -2.5967e-05, 5.5731e-05, -4.0771e-05,\n 1.0201e-06, 1.7503e-06, -1.4772e-07, -1.2051e-05, -2.6426e-06,\n -5.7180e-06, 2.9137e-06, -1.9645e-05, -2.5478e-05, -4.2527e-05,\n -1.6361e-05, 4.4919e-06, -1.3584e-05, -7.5299e-06, 3.9751e-06,\n -2.3860e-05, 4.3969e-06, 3.7527e-07, 1.6074e-06, 1.8444e-05,\n 1.4505e-05, 1.0207e-06, 5.2147e-06, -9.8192e-06, -4.1257e-06,\n 9.9389e-07, 1.6304e-05, -1.0684e-06, 1.0932e-05, -3.2603e-05,\n 1.6091e-05, 1.2256e-06, -2.7578e-06, -1.2634e-05, -9.2944e-06,\n -7.0057e-06, 1.6467e-06, -1.8922e-05, 9.6975e-06, -4.0297e-06,\n -2.7342e-06, 4.0458e-05, -1.1989e-05, -1.4308e-06, -3.5180e-05,\n -3.0576e-05, -3.8263e-06, -1.9482e-05, 4.8897e-05, 2.8281e-06,\n 1.6175e-06, 4.7797e-06, 1.2236e-05, 6.4692e-06, 1.0096e-06,\n 6.7868e-05, -1.5745e-06, 1.6605e-06, -1.6943e-08, -1.6422e-05,\n -3.6404e-06, -1.6470e-05, -6.0560e-06, 3.2758e-06, -2.7325e-06,\n 2.6504e-06, 3.8609e-05, 1.2278e-05, 4.2323e-06, 2.8636e-06,\n 4.1974e-06, 1.1784e-05, 3.5263e-05, 5.5950e-06, 2.7563e-05,\n 4.5095e-06, 1.2820e-05, 6.4771e-06, -1.2275e-06, 1.7723e-06,\n 9.2724e-06, 1.2190e-05, -1.3809e-05, -6.5644e-06, -9.9581e-07,\n -1.5741e-05, 6.1122e-06, -1.1736e-05, -9.3568e-06, 7.9252e-06,\n -2.0857e-05, -2.2027e-05, 2.0477e-06, -1.9946e-05, 1.6696e-05,\n -2.8647e-05, 2.5010e-05, 4.1357e-06, -1.1563e-05, -1.3267e-05,\n -5.9066e-06, -4.5844e-06, 1.7491e-05, -2.6133e-06, 7.7524e-06,\n -8.9451e-06, 2.0671e-05, 2.8263e-06, -3.1137e-05, 3.1420e-05,\n 1.1233e-05, -3.2480e-05, 1.8504e-05, -3.5587e-07, -1.1337e-05,\n 2.1575e-05, -3.7142e-06, -6.9615e-07, 2.5580e-06, 5.4142e-06,\n -9.5907e-06, -8.6784e-06, -8.1882e-06, -2.8668e-05, -2.1594e-07,\n -1.6114e-05, 3.1678e-06, -3.3977e-06, -1.1569e-05, -2.6166e-06,\n -2.2249e-05, -6.4079e-07, 5.6472e-06, 1.1564e-05, -1.7012e-05,\n 1.3801e-05, 3.1695e-05, 1.2899e-05, -4.5752e-06, -6.4058e-06,\n 4.9709e-06, -5.0088e-07, 2.1653e-05, 4.1832e-06, -8.3563e-06,\n -1.0346e-05, -2.2574e-05, -9.5344e-06, 4.8820e-06, -2.3953e-06,\n -1.1001e-05, -4.0126e-06, 8.8127e-06, -1.3472e-05, -3.8803e-06,\n -3.6489e-06, -8.1419e-06, 6.0568e-06, 1.0503e-06, 6.8378e-06,\n -3.4138e-05, 7.8786e-06, 3.5053e-05, -1.0831e-05, 2.4660e-05,\n -1.4820e-05, 8.2934e-07, -1.4242e-05, 2.3113e-05, -1.4477e-06,\n 1.0721e-05, 6.8984e-06, -1.0710e-05, -7.8715e-06, -2.8626e-06,\n 5.6156e-07, -2.8905e-05, -1.7761e-05, 6.3703e-06, 1.0229e-05,\n 1.4483e-06, 1.7758e-05, -3.5364e-06, -5.5085e-06, 2.7216e-05,\n -6.7900e-06, 1.5830e-05, -2.6017e-05, 1.2068e-05, 8.2943e-06,\n -9.6069e-06, 9.9044e-06, 5.9098e-06, 2.5323e-06, -4.1594e-06,\n -8.3566e-06, -6.1265e-06, -2.3098e-06, 4.4605e-05, 2.7023e-06,\n 1.4143e-05, 4.7880e-07, 2.2626e-06, 1.3390e-05, -4.7887e-05,\n 2.7772e-05, 1.8724e-05, -4.2209e-06, 7.8419e-06, -3.3818e-06,\n -1.7652e-05, 1.0584e-05, -1.8239e-07, 2.5862e-05, 1.8718e-05,\n -1.7726e-06, 6.8924e-06, -8.3918e-06, 1.1343e-05, -7.0359e-06,\n -5.1881e-06, 1.3947e-05, 7.6312e-06, -7.2081e-06, -1.5120e-05,\n -2.7478e-06, -3.0243e-05, -5.5169e-06, 7.0845e-06, 1.0560e-05,\n 9.1357e-06, 8.8495e-06, -1.7109e-06, -1.6312e-05, -5.2877e-06,\n 8.4116e-06, 3.2556e-05, -6.6122e-06, 1.4730e-05, 4.0868e-05,\n -1.0704e-06, 5.1359e-06, 2.6935e-06, 3.9050e-06, 2.4754e-06,\n 1.0342e-05, 1.9616e-05, 9.9610e-06, 9.3257e-06, -2.3059e-05,\n -1.4364e-05, 6.4649e-06, 3.5292e-06, 2.9686e-05, -1.8673e-06,\n -1.3754e-05, -7.4000e-07, -5.5603e-07, 8.1379e-06, 9.7001e-07,\n -1.0430e-07, 2.2969e-07, -4.2245e-06, -5.6844e-06, -1.8971e-06,\n 1.0816e-06, 4.5013e-06, 3.5980e-06, 8.7384e-06, 2.7706e-07,\n 1.4085e-05, -5.7407e-07, -2.6591e-05, 2.9129e-05, 2.1512e-05,\n 6.4067e-06, -2.5132e-05, -7.4748e-06, 2.1850e-05, 2.1909e-07,\n -1.2366e-06, -1.5968e-05, -1.1044e-05, -1.3715e-06, 4.9466e-05,\n 4.3592e-06, 1.0022e-05, 2.3213e-05, 4.0379e-07, 2.2412e-05,\n 4.6283e-06, -1.5898e-05, 4.7506e-06, -1.5299e-05, 3.9361e-06,\n 1.7339e-05, -5.7252e-07, 1.2177e-05, 5.0730e-05, -4.0287e-06,\n 1.5125e-05, -1.0524e-05, 1.0616e-05, 8.9383e-06, 3.2817e-06,\n 1.1731e-05, 3.3301e-05, -6.0812e-06, 5.8118e-06, 1.4349e-05,\n -9.3055e-06, 5.0180e-07, -2.5140e-05, -1.3833e-06, 8.7242e-06,\n 5.3408e-06, 3.2297e-06, 2.4197e-05, 4.0775e-06, -5.7913e-06,\n -6.9804e-06, -6.3852e-06, -4.3965e-06, 1.0722e-06, -8.3359e-06,\n 1.5305e-05, -7.9734e-06, 1.6154e-06, -8.5477e-06, 2.4612e-07,\n 2.2471e-06, 1.0593e-05, -5.1718e-06, -8.4569e-06, 7.6824e-06,\n 8.2922e-06, -2.8085e-05, -2.2875e-05, -3.6425e-05, -1.6806e-05,\n -4.1170e-05, 1.0904e-05, 2.3950e-06, 3.6479e-06, 2.8792e-05,\n -2.6907e-05, 3.4323e-06, 3.7809e-06, -2.7817e-05, 4.0511e-06,\n 7.2674e-06, -1.9525e-05, 2.5867e-05, 8.9631e-06, -3.4384e-06,\n -1.1855e-05, -1.8430e-05, 2.0411e-06, -1.5752e-05, 5.7705e-06,\n -3.1068e-06, 1.3957e-05], device='cuda:0'), 'exp_avg_sq': tensor([3.2323e-08, 2.7807e-08, 1.9511e-09, 2.4408e-08, 3.2073e-09, 1.5362e-08,\n 7.5882e-08, 4.1488e-08, 2.6327e-08, 1.5730e-08, 2.6990e-08, 4.0742e-08,\n 2.1313e-08, 1.5716e-08, 2.4363e-08, 4.1093e-09, 1.6011e-08, 4.9253e-08,\n 3.0744e-08, 7.5369e-09, 3.2951e-08, 9.3208e-08, 1.9369e-09, 1.1947e-08,\n 2.0935e-08, 4.1222e-08, 6.9293e-09, 2.7646e-09, 3.2174e-09, 7.1146e-09,\n 5.1905e-08, 4.7207e-09, 5.7221e-08, 1.1456e-07, 4.3850e-08, 2.0542e-08,\n 1.6204e-08, 4.0106e-08, 2.2109e-08, 2.2920e-08, 5.3252e-08, 1.8520e-09,\n 4.3447e-08, 2.5459e-08, 1.4992e-08, 8.3148e-09, 1.3564e-08, 4.0349e-08,\n 5.5208e-09, 2.6919e-09, 4.7263e-08, 4.2351e-09, 3.6120e-09, 2.9290e-09,\n 1.5031e-08, 2.2534e-08, 1.0803e-08, 3.3495e-08, 1.2264e-08, 5.4694e-09,\n 5.0171e-08, 6.5873e-09, 1.7150e-08, 2.7102e-08, 1.9579e-09, 2.8545e-08,\n 1.5191e-08, 3.0507e-08, 1.2914e-09, 7.4129e-09, 1.1676e-08, 7.5488e-09,\n 7.2014e-09, 5.2372e-08, 4.9488e-08, 1.2160e-08, 3.2958e-08, 4.0813e-09,\n 8.8553e-09, 7.2589e-08, 1.8548e-08, 3.0585e-08, 4.2239e-08, 3.3290e-09,\n 4.1025e-08, 2.4075e-08, 1.1453e-08, 2.3873e-08, 7.4164e-08, 3.2648e-08,\n 7.5993e-09, 3.5782e-08, 3.6427e-08, 2.6747e-08, 8.5579e-09, 1.3492e-08,\n 2.4222e-08, 6.4848e-09, 5.0137e-08, 6.2885e-09, 5.5151e-09, 3.1073e-08,\n 1.3248e-08, 2.1736e-08, 5.8634e-09, 6.1757e-08, 3.4603e-09, 3.0928e-08,\n 8.0268e-09, 3.4613e-08, 1.1807e-07, 2.9645e-08, 1.4800e-08, 6.5855e-08,\n 1.6407e-08, 5.5044e-08, 8.4573e-09, 4.9202e-08, 6.5555e-08, 1.0209e-08,\n 6.5711e-08, 2.1578e-08, 5.8250e-08, 1.4266e-08, 3.2231e-08, 1.6818e-08,\n 2.9022e-08, 5.3980e-08, 1.2198e-08, 1.6874e-08, 2.0448e-08, 1.4106e-08,\n 3.2583e-08, 9.2300e-10, 1.5098e-08, 2.3266e-08, 4.4371e-08, 2.7097e-08,\n 5.3779e-09, 8.6326e-08, 5.9863e-09, 8.5011e-09, 2.2591e-09, 1.0280e-08,\n 1.6304e-09, 4.3808e-09, 8.5812e-09, 1.0190e-08, 1.7556e-08, 3.1404e-08,\n 8.0442e-09, 1.4446e-09, 1.9750e-08, 1.5598e-08, 3.1806e-09, 1.4952e-09,\n 6.7554e-09, 4.7878e-08, 4.7350e-08, 3.3887e-08, 5.8197e-08, 2.5050e-09,\n 8.5251e-09, 5.1810e-08, 2.8364e-08, 7.8704e-08, 2.2351e-09, 6.2733e-08,\n 1.2322e-08, 3.6188e-08, 1.1258e-08, 1.5472e-08, 6.2315e-09, 7.3872e-09,\n 1.2346e-08, 1.5169e-08, 1.6171e-08, 1.1992e-08, 2.5304e-09, 2.6442e-08,\n 2.7456e-08, 6.8167e-09, 4.6208e-08, 3.1320e-08, 1.5303e-08, 7.9302e-09,\n 6.9049e-08, 2.7838e-08, 3.9572e-09, 1.8476e-09, 3.0876e-09, 1.9213e-08,\n 4.5941e-08, 1.1008e-08, 1.6842e-08, 4.6194e-08, 1.1984e-08, 3.5947e-09,\n 6.5005e-08, 6.5596e-09, 3.8422e-08, 3.9100e-09, 2.8713e-08, 2.7327e-08,\n 1.7922e-09, 1.1934e-07, 2.4375e-10, 2.9630e-09, 7.1680e-09, 2.5087e-08,\n 3.3277e-09, 5.0319e-09, 1.3953e-08, 1.8832e-09, 2.7763e-09, 2.1539e-09,\n 2.9189e-08, 2.7072e-08, 4.0479e-09, 4.3675e-10, 5.8135e-08, 3.4010e-09,\n 8.8163e-08, 1.0041e-08, 1.9954e-08, 4.8753e-08, 2.9122e-09, 2.4148e-08,\n 9.2169e-09, 2.5289e-08, 4.2566e-08, 1.8704e-08, 2.9656e-08, 4.2557e-09,\n 1.7908e-09, 1.1866e-08, 3.1238e-08, 7.9286e-09, 3.6590e-09, 5.9830e-09,\n 3.2133e-08, 3.2743e-08, 3.5108e-09, 4.6715e-08, 3.0014e-08, 1.1997e-08,\n 3.6056e-08, 1.4989e-09, 1.4893e-08, 3.1635e-08, 2.3299e-08, 2.6513e-09,\n 3.5886e-08, 1.5218e-09, 9.9123e-09, 2.8332e-08, 4.4243e-07, 8.9830e-09,\n 1.5099e-08, 3.9008e-08, 1.6949e-08, 4.3310e-08, 1.9869e-08, 3.0804e-08,\n 1.3703e-08, 3.2984e-07, 2.6987e-08, 4.4465e-09, 1.6410e-08, 7.1142e-09,\n 1.0955e-08, 7.1407e-09, 3.2779e-09, 2.6583e-08, 1.0015e-08, 2.3789e-08,\n 2.8629e-09, 3.9312e-09, 7.9042e-08, 6.0125e-10, 8.1259e-08, 1.7079e-08,\n 9.1244e-09, 2.7841e-08, 2.4262e-08, 2.5329e-08, 5.8107e-08, 3.2090e-09,\n 8.8479e-09, 2.1587e-09, 9.7890e-09, 8.9704e-10, 3.1968e-08, 4.0331e-09,\n 1.3754e-08, 8.9118e-08, 3.7664e-09, 1.8726e-08, 7.0122e-08, 3.0226e-08,\n 3.2263e-08, 6.2424e-09, 1.4491e-08, 1.9217e-08, 6.0214e-09, 3.5330e-08,\n 7.2616e-08, 1.3757e-08, 5.8414e-09, 2.5771e-09, 4.0378e-08, 2.7883e-08,\n 6.0408e-08, 1.2168e-08, 4.0101e-08, 8.8090e-09, 2.3672e-08, 1.8469e-08,\n 6.7730e-09, 1.7112e-08, 1.7040e-08, 6.0781e-09, 2.2212e-08, 1.3217e-08,\n 4.4659e-09, 7.6861e-08, 4.4845e-08, 9.6477e-09, 1.7623e-08, 1.4240e-08,\n 6.3322e-09, 9.4277e-08, 1.2041e-09, 4.5112e-09, 1.2162e-08, 1.0491e-08,\n 9.5357e-09, 2.0788e-08, 7.2751e-09, 3.5196e-09, 1.2854e-08, 2.3871e-08,\n 9.7599e-09, 9.5182e-09, 2.4308e-09, 2.1261e-08, 3.3849e-08, 4.8054e-09,\n 5.7127e-08, 2.6650e-09, 2.0803e-08, 2.1539e-08, 3.7166e-09, 2.9559e-08,\n 3.3430e-08, 8.3761e-08, 2.2733e-08, 1.6037e-08, 3.3142e-09, 5.4268e-09,\n 5.8145e-08, 3.4033e-08, 3.9635e-08, 9.0927e-09, 7.1460e-08, 2.5452e-08,\n 2.1171e-08, 7.0305e-08, 8.1826e-08, 2.5690e-09, 4.7841e-09, 5.9828e-08,\n 2.0055e-08, 8.5242e-09, 1.0550e-08, 6.4658e-09, 2.0484e-08, 2.4656e-08,\n 1.5640e-08, 1.7194e-08, 6.7964e-09, 9.4059e-09, 6.1414e-09, 6.0071e-08,\n 2.9076e-08, 7.1646e-08, 3.2956e-08, 1.8827e-08, 1.9155e-08, 4.0748e-08,\n 1.6395e-07, 6.7190e-09, 9.2256e-08, 1.1649e-08, 3.4196e-08, 2.3613e-09,\n 3.3370e-08, 1.4122e-08, 2.2551e-08, 3.7220e-08, 1.6360e-08, 3.4882e-08,\n 5.3094e-09, 3.7994e-08, 5.7338e-08, 5.8309e-09, 3.4064e-11, 1.3689e-10,\n 6.7828e-09, 4.4132e-09, 2.1710e-09, 2.0522e-08, 4.6552e-08, 1.6132e-08,\n 7.7520e-09, 2.9440e-09, 1.2879e-08, 1.1576e-08, 4.7025e-09, 1.6222e-08,\n 1.3173e-08, 7.2099e-09, 8.3770e-08, 1.2023e-08, 6.3470e-08, 6.0252e-09,\n 6.7996e-08, 1.7188e-07, 9.4280e-08, 1.2193e-08, 4.0333e-09, 1.7703e-08,\n 1.6718e-08, 9.8931e-10, 5.7486e-08, 6.6237e-08, 2.7785e-08, 2.8936e-08,\n 7.3985e-09, 1.3471e-08, 6.5863e-09, 4.3227e-08, 1.4343e-08, 1.8866e-08,\n 1.5511e-08, 1.3732e-07, 2.4720e-09, 4.9698e-08, 5.5025e-08, 3.2204e-08,\n 3.9602e-08, 2.6993e-08, 9.0288e-09, 2.9649e-08, 6.1683e-10, 3.8518e-09,\n 4.4103e-08, 1.4891e-08, 5.2192e-09, 6.8361e-08, 7.0780e-09, 1.8509e-10,\n 1.4102e-08, 3.4157e-08, 2.6678e-08, 2.8745e-08, 1.0893e-09, 4.5165e-08,\n 2.7161e-08, 5.5798e-10, 7.0439e-08, 2.5954e-08, 1.5522e-08, 2.4206e-08,\n 1.1821e-09, 4.0330e-08, 1.1102e-09, 3.2413e-08, 6.7058e-09, 1.0962e-08,\n 4.7345e-09, 5.7792e-09, 1.2308e-08, 2.3526e-08, 5.1366e-09, 4.2653e-09,\n 1.8382e-08, 1.3644e-08, 3.9565e-08, 2.1423e-08, 2.4898e-08, 1.5609e-08,\n 1.3435e-08, 4.3449e-08, 5.1349e-08, 4.6380e-08, 3.4866e-09, 2.3284e-09,\n 3.0885e-08, 7.3478e-09, 2.9262e-09, 8.7241e-09, 2.4564e-08, 9.9508e-09,\n 2.9142e-08, 2.0501e-08, 8.5750e-09, 1.0050e-09, 5.8594e-09, 3.4285e-08,\n 5.2042e-10, 4.0257e-08], device='cuda:0')}, 107: {'step': tensor(62477.), 'exp_avg': tensor([[ 1.7743e-06, 2.1102e-06, 9.8593e-07, ..., 3.4790e-06,\n -2.3385e-08, -1.6024e-06],\n [-3.2989e-06, 4.2986e-06, -1.2015e-06, ..., -6.2639e-06,\n -2.6892e-06, -5.7489e-07],\n [-2.0702e-06, -3.4292e-06, -1.1476e-06, ..., -5.7285e-06,\n -2.0376e-07, 6.0889e-06],\n ...,\n [ 1.7070e-06, 2.8299e-06, -1.8196e-06, ..., 2.5089e-06,\n 1.9585e-06, -7.4423e-06],\n [ 1.3882e-06, 3.4396e-07, 1.2800e-06, ..., -1.5025e-06,\n -1.4943e-06, -1.1978e-06],\n [-3.0789e-06, -4.5370e-06, 1.2648e-07, ..., -1.4178e-06,\n -2.7950e-06, 2.6069e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[7.0071e-10, 5.0160e-10, 4.7802e-10, ..., 7.0028e-10, 8.8743e-11,\n 4.2267e-10],\n [1.0869e-09, 7.6181e-10, 1.0568e-09, ..., 1.8131e-09, 1.9350e-10,\n 6.0051e-10],\n [5.3547e-10, 5.8748e-10, 4.4471e-10, ..., 1.2947e-09, 1.1428e-10,\n 6.0846e-10],\n ...,\n [3.4991e-10, 4.3979e-10, 3.1604e-10, ..., 4.8126e-10, 4.2622e-11,\n 2.7537e-10],\n [5.6010e-10, 4.2199e-10, 4.3653e-10, ..., 7.2620e-10, 1.0627e-10,\n 3.3644e-10],\n [8.6110e-10, 5.0756e-10, 5.2523e-10, ..., 9.0412e-10, 1.0326e-10,\n 4.0807e-10]], device='cuda:0')}, 108: {'step': tensor(62477.), 'exp_avg': tensor([ 2.9053e-06, 2.6464e-07, -5.3696e-06, ..., 3.5140e-06,\n 1.7763e-06, -4.2618e-06], device='cuda:0'), 'exp_avg_sq': tensor([8.3585e-10, 1.1533e-09, 1.2241e-09, ..., 3.6514e-10, 8.3346e-10,\n 9.7796e-10], device='cuda:0')}, 109: {'step': tensor(62477.), 'exp_avg': tensor([[ 5.1754e-07, 2.3579e-06, 2.6648e-06, ..., -2.6075e-06,\n 1.5548e-06, -1.7679e-06],\n [-1.6729e-06, -8.0936e-07, -2.4934e-06, ..., 2.6990e-06,\n 1.5195e-06, 1.7198e-06],\n [-6.9301e-07, 1.9489e-06, -1.6417e-06, ..., 1.6259e-06,\n -9.0317e-07, -3.3724e-07],\n ...,\n [-2.4492e-06, 4.5973e-06, 1.8764e-06, ..., 5.4295e-07,\n -1.4659e-06, 1.0299e-06],\n [-2.3788e-06, -7.9915e-06, -1.2066e-05, ..., -8.3400e-06,\n -5.1768e-06, -3.9734e-06],\n [-8.1067e-06, -3.3919e-06, 7.9109e-06, ..., -4.4825e-06,\n -3.0792e-06, -3.1185e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[2.2882e-10, 2.6145e-10, 1.3544e-09, ..., 4.0399e-10, 1.9148e-10,\n 3.0128e-10],\n [4.2568e-10, 5.3397e-10, 2.8075e-09, ..., 5.6779e-10, 5.4411e-10,\n 9.0455e-10],\n [7.1607e-11, 9.8965e-11, 4.2417e-10, ..., 1.1960e-10, 6.9170e-11,\n 1.4234e-10],\n ...,\n [1.9367e-10, 1.6697e-10, 9.1332e-10, ..., 2.1844e-10, 1.4874e-10,\n 2.5824e-10],\n [5.0991e-10, 6.6433e-10, 3.2623e-09, ..., 9.2865e-10, 5.4968e-10,\n 9.3571e-10],\n [9.6358e-10, 1.4514e-09, 7.7131e-09, ..., 1.7040e-09, 1.0541e-09,\n 1.7304e-09]], device='cuda:0')}, 110: {'step': tensor(62477.), 'exp_avg': tensor([-4.1893e-06, 6.8114e-06, 1.5696e-06, 3.8314e-06, 2.4931e-06,\n -5.5225e-06, 5.6906e-06, -3.5397e-06, -8.3664e-07, 1.7421e-06,\n -1.2721e-05, 9.5267e-06, -2.2455e-06, -6.6715e-07, 5.1634e-06,\n -8.1052e-06, -2.0600e-05, -4.3267e-06, -6.7003e-06, 3.6045e-06,\n 4.0215e-06, 1.3981e-05, 9.9569e-06, -1.0682e-05, -3.1003e-06,\n -1.8114e-05, 6.9553e-06, -5.4901e-06, 3.0977e-07, -2.9263e-05,\n 4.2889e-06, -6.2043e-06, 1.8376e-05, -9.9424e-07, -1.7997e-06,\n -8.8352e-06, 4.5409e-06, 1.8193e-05, -3.4179e-05, 4.5610e-06,\n 5.0863e-08, -1.6426e-05, 2.6705e-05, 3.0567e-06, -1.1758e-05,\n 5.2304e-06, 8.1159e-06, 2.8753e-05, 1.1726e-05, 6.2178e-06,\n 9.8255e-06, -1.7210e-05, 3.1956e-05, 9.1752e-06, 6.2964e-07,\n 1.5530e-05, -1.3484e-05, -6.1924e-07, 1.4086e-05, -4.5113e-05,\n -6.1941e-06, -8.2778e-07, -9.9628e-06, 5.4862e-06, 1.9222e-05,\n 3.6619e-06, 1.4358e-05, 1.9578e-05, -1.2207e-05, 3.1297e-06,\n 1.0610e-05, -2.8438e-05, 1.3888e-05, 1.1966e-05, -5.7630e-06,\n -7.0170e-06, 5.9714e-06, 3.0767e-06, 6.3877e-06, -1.2885e-05,\n -1.4814e-05, 2.5020e-05, 9.9709e-06, -1.6021e-05, -6.0080e-06,\n 2.2561e-05, 1.8302e-06, 7.4433e-06, 1.1502e-06, -2.1353e-05,\n -1.2492e-05, 1.2215e-05, -2.0712e-06, 2.6554e-05, 7.5575e-06,\n -1.4328e-05, -4.1911e-07, -4.8200e-06, 2.0291e-06, 5.4576e-05,\n 3.5383e-06, -1.6827e-05, -8.2447e-06, -5.2337e-05, -4.0914e-06,\n -2.6335e-05, -1.3186e-05, 1.7643e-05, -5.5270e-05, 1.5673e-07,\n -4.6439e-08, -8.2050e-06, 2.6720e-05, -1.4101e-05, -2.5495e-05,\n 5.7988e-06, -6.0229e-06, 8.9897e-06, 1.3909e-05, -6.4743e-06,\n 1.0831e-05, -5.5898e-06, 7.9560e-06, -5.3662e-06, 1.7380e-06,\n -2.6742e-06, 7.2056e-06, -8.0773e-06, 1.1648e-06, 2.8035e-06,\n 9.7755e-07, 2.3038e-06, -5.3894e-06, 4.8204e-06, -1.7438e-05,\n -1.0847e-06, -1.6987e-06, 3.2434e-06, 2.3971e-06, -4.3087e-06,\n -2.4431e-06, -4.2926e-06, -1.0985e-05, 1.0615e-05, 3.1806e-06,\n -7.4553e-06, 7.6554e-07, 1.7849e-05, -3.8763e-05, 1.3175e-05,\n 3.2365e-06, 1.9840e-06, 4.6557e-06, 4.8994e-06, -2.2570e-06,\n -1.2435e-05, 1.1297e-05, -8.5689e-06, -1.1222e-05, 8.5645e-06,\n 1.3744e-05, -2.1435e-05, -1.3378e-05, 1.8688e-05, -1.3297e-06,\n -4.4682e-06, 3.7691e-06, 1.2111e-05, -4.1701e-06, 4.5461e-06,\n 3.1902e-05, -2.5230e-06, 1.0750e-05, 3.6519e-06, 1.0170e-06,\n -1.8550e-05, 3.0553e-06, -2.5724e-05, 4.8715e-06, -1.3067e-05,\n -1.3013e-05, -2.4631e-06, 2.1679e-06, 1.5468e-05, 2.0267e-05,\n -6.1246e-06, 1.0421e-05, 2.5883e-05, -2.5453e-07, -5.6699e-07,\n -2.2022e-05, 1.8792e-05, 7.9399e-07, 7.6677e-06, -2.0600e-05,\n 6.2489e-06, -1.2833e-06, 2.8291e-05, -1.5649e-05, 1.1040e-05,\n -1.0550e-05, 3.3452e-06, 1.9841e-06, 1.6097e-06, -9.6407e-06,\n -1.4582e-05, -3.3277e-06, 2.1940e-05, 4.9626e-06, 7.6465e-06,\n -2.0616e-06, -3.7198e-06, -8.4740e-06, -2.1007e-05, 3.1247e-06,\n -2.5545e-06, -4.1895e-05, 1.4900e-05, -5.0381e-06, 3.1810e-06,\n -2.9544e-06, 4.1584e-05, 1.0978e-05, -3.5937e-05, -2.4536e-05,\n 4.4326e-06, -1.1018e-05, 1.8254e-06, 6.9036e-06, -8.8942e-06,\n -5.4661e-06, 1.4994e-05, -6.6111e-06, 1.0965e-05, -1.3833e-05,\n 1.4057e-05, -8.0507e-06, -1.6005e-05, -6.9185e-06, -5.8835e-07,\n -1.3395e-05, 4.2782e-06, -1.0173e-05, -2.5487e-06, 1.7097e-05,\n 3.3785e-05, 4.3031e-06, 7.7729e-06, -1.6091e-05, 1.2171e-06,\n -1.6619e-05, -9.6690e-06, 7.3236e-06, -4.6844e-06, 3.1882e-05,\n -5.2924e-06, 1.8807e-05, -1.3146e-05, -7.7756e-06, 8.0834e-06,\n 5.3212e-06, -1.5799e-06, 3.9786e-07, 7.4344e-07, -5.0781e-06,\n -1.2597e-06, -6.6272e-06, 8.5509e-06, -5.0464e-06, -2.0872e-06,\n -1.1437e-06, 1.0349e-05, -8.1483e-06, 1.5923e-05, -1.4869e-05,\n -1.1499e-05, -1.0299e-05, -7.0499e-06, -1.5688e-05, 2.6004e-06,\n -4.5174e-06, -3.4443e-06, 1.5734e-05, 8.9230e-06, 4.9127e-06,\n -1.7472e-06, -1.4788e-05, -4.2733e-05, 5.6144e-06, 1.0495e-06,\n -3.1606e-06, -2.5904e-06, 1.1459e-05, 1.5634e-06, 1.6919e-05,\n 8.6807e-06, 6.4701e-05, 1.4275e-05, 4.6261e-07, 9.6487e-06,\n 1.3239e-05, -8.0673e-06, -1.2351e-05, 8.9272e-06, -3.5230e-06,\n -6.4766e-06, 3.2994e-06, 1.0946e-05, -6.7521e-07, -4.7063e-06,\n 1.6139e-05, -1.1656e-05, 2.3394e-05, 5.4837e-06, -6.1588e-06,\n -1.5306e-05, 5.7695e-06, 2.9056e-06, -5.3854e-06, -6.5178e-06,\n -9.4351e-06, 6.4380e-06, -2.2908e-06, 5.0582e-06, 7.7925e-06,\n -1.0970e-05, -5.5462e-06, 2.0167e-05, -1.0112e-06, 1.7809e-06,\n 6.3190e-06, 7.4299e-06, 3.3524e-05, -8.3667e-06, -2.4238e-05,\n 2.1634e-05, -9.7593e-06, 5.5212e-05, 2.0509e-05, -2.4368e-05,\n 5.2579e-06, -1.5383e-05, 1.4352e-08, 1.0898e-05, -3.3527e-06,\n 5.4970e-06, -2.0787e-06, -2.5489e-06, 1.2487e-05, 2.0689e-06,\n 6.7909e-07, -1.9260e-07, -1.6770e-05, 1.8762e-05, -3.4374e-05,\n 1.5183e-05, 8.2242e-06, 4.9826e-06, 1.4074e-06, -3.5991e-06,\n 4.7923e-06, 3.9501e-06, -1.3490e-05, 4.5684e-05, 5.9182e-06,\n 2.6284e-05, 2.4793e-07, -2.2558e-06, -7.1160e-06, 2.5266e-05,\n -4.9661e-06, -1.3336e-05, -1.0959e-05, 9.2526e-06, 1.7397e-05,\n -1.2296e-05, 3.2960e-05, 2.4957e-05, -1.3520e-06, 6.9240e-06,\n -1.6215e-05, -4.1253e-05, 1.2097e-07, -2.7517e-06, -1.4377e-05,\n -7.2091e-06, 8.2113e-06, -7.5298e-06, -8.9250e-06, 1.1346e-05,\n -2.8499e-05, -1.6542e-06, -4.3618e-06, -2.5194e-05, 1.1647e-06,\n 1.1697e-05, -3.4673e-06, -7.7740e-06, 3.2237e-06, 2.7105e-05,\n 1.2731e-05, 1.8896e-06, -1.9940e-06, -1.5161e-05, 1.4341e-06,\n 4.9322e-06, 1.2951e-05, -1.4351e-05, -1.0669e-05, 5.1991e-06,\n 7.8539e-07, 1.3009e-05, -2.6040e-06, -1.1118e-05, -3.1690e-06,\n -2.9695e-06, 1.0651e-06, 7.8431e-06, 1.1885e-05, -8.8224e-07,\n -1.8813e-06, 5.7544e-07, 1.4230e-05, -2.3026e-05, 1.0529e-05,\n -4.7061e-06, -1.9557e-05, 7.3375e-06, 5.3149e-06, 9.1027e-06,\n -7.8180e-06, 3.3173e-05, -2.5822e-05, 2.5726e-05, -5.6113e-06,\n 1.1905e-05, -1.7228e-05, -5.8486e-06, 2.3607e-06, -3.5574e-05,\n 1.2591e-05, -1.5883e-05, 2.3002e-06, -1.4224e-05, -3.9363e-06,\n 1.6872e-05, 1.6827e-05, 8.4229e-06, 2.8704e-05, -1.1396e-05,\n -2.7937e-05, -2.0657e-05, 2.8183e-06, -2.2476e-05, -7.3690e-06,\n -3.4101e-06, -7.2078e-06, -1.1787e-05, -1.7699e-05, -2.2366e-05,\n -1.7356e-05, 1.3202e-05, 1.1726e-05, -2.1568e-06, -4.1315e-06,\n -3.2364e-06, 1.9312e-05, -1.8829e-06, 1.1120e-05, 1.9135e-05,\n 1.2143e-06, 2.6751e-06, -9.6264e-06, 1.5853e-05, 4.3331e-06,\n -4.4014e-06, 1.7823e-05, 1.1154e-05, -1.4188e-05, -5.5928e-06,\n -1.0808e-05, -1.5741e-05, 1.5145e-05, 7.7721e-06, -3.1296e-06,\n -6.4638e-06, 1.2653e-05, 1.3663e-05, 3.4013e-05, -1.6140e-06,\n -7.7022e-06, -5.2249e-06, -2.6174e-07, -2.0678e-06, 5.8936e-06,\n 9.0271e-06, 3.8830e-06, 2.4024e-05, -6.3583e-06, 6.2485e-06,\n -3.5382e-05, 2.3416e-05, 2.8436e-05, -3.3064e-06, -1.8778e-06,\n -5.7153e-06, -7.7883e-06, -2.5650e-05, -3.0224e-05, 4.1541e-06,\n -2.6222e-05, -1.1121e-05], device='cuda:0'), 'exp_avg_sq': tensor([1.0528e-08, 2.0847e-08, 3.2046e-09, 4.9779e-09, 1.2304e-08, 2.0454e-09,\n 1.6801e-08, 2.0954e-08, 5.2590e-09, 1.3413e-08, 6.5152e-09, 8.1816e-09,\n 1.4085e-09, 2.6656e-09, 4.8178e-09, 4.1150e-09, 3.1628e-08, 6.9934e-09,\n 4.0825e-09, 8.0311e-09, 5.8469e-09, 4.5522e-09, 7.9278e-09, 1.3637e-08,\n 2.1930e-08, 3.5086e-08, 1.4282e-09, 1.5528e-08, 1.1676e-08, 2.9146e-08,\n 1.6604e-08, 1.2115e-08, 3.8551e-08, 4.7782e-09, 4.9626e-09, 7.8400e-09,\n 3.0649e-08, 3.0953e-08, 2.9028e-08, 9.4502e-09, 8.9293e-09, 2.1974e-08,\n 8.4480e-09, 2.3018e-08, 2.6943e-08, 4.0184e-08, 4.2464e-08, 1.6215e-08,\n 2.0244e-08, 7.2428e-09, 6.8311e-09, 3.3962e-08, 1.1836e-08, 2.2554e-08,\n 1.1523e-08, 2.9091e-08, 1.7428e-08, 6.9304e-09, 2.8699e-08, 4.4581e-08,\n 1.0130e-08, 4.3873e-09, 3.0199e-08, 2.2777e-08, 4.2551e-08, 2.2542e-08,\n 5.4798e-09, 1.9296e-08, 2.6711e-08, 2.8601e-09, 3.3216e-09, 3.5713e-08,\n 6.7675e-08, 1.8564e-08, 2.4670e-08, 4.9087e-09, 2.4786e-08, 3.1925e-09,\n 2.9419e-08, 7.2877e-09, 1.3036e-08, 8.0331e-08, 3.5772e-08, 1.5606e-08,\n 4.6448e-09, 1.0080e-08, 1.4413e-09, 2.7565e-08, 7.4329e-09, 1.7960e-08,\n 4.6596e-08, 2.1595e-08, 1.6019e-08, 3.9177e-08, 2.3637e-08, 3.8469e-08,\n 7.9536e-08, 8.0647e-09, 1.5106e-08, 6.9248e-08, 4.7778e-09, 1.2152e-08,\n 4.4431e-08, 1.0693e-07, 4.7512e-09, 4.0759e-08, 1.2054e-08, 2.1492e-08,\n 7.1544e-08, 5.4238e-08, 3.3754e-08, 1.3070e-09, 1.9029e-08, 1.7492e-08,\n 3.8974e-08, 8.5013e-09, 5.8224e-09, 1.0974e-08, 5.6265e-09, 2.0341e-08,\n 1.9311e-08, 1.8129e-08, 6.3026e-09, 2.6637e-09, 4.5806e-08, 3.8927e-09,\n 8.2557e-09, 3.2183e-09, 2.0048e-09, 1.6464e-09, 7.5947e-09, 4.4796e-09,\n 1.4235e-08, 8.9836e-10, 1.6423e-08, 3.6765e-09, 8.0405e-09, 3.0433e-09,\n 6.2443e-09, 9.1817e-09, 1.9835e-08, 2.0340e-09, 1.3317e-08, 6.8214e-09,\n 2.3154e-09, 1.9037e-08, 1.3868e-09, 3.4931e-08, 3.1713e-08, 1.5653e-09,\n 6.6186e-09, 6.7499e-09, 2.4138e-08, 4.1245e-09, 8.7512e-09, 7.7619e-09,\n 9.1188e-09, 1.8036e-08, 1.4752e-08, 1.3086e-08, 1.9027e-08, 3.1486e-08,\n 1.4035e-08, 2.1256e-08, 8.6439e-09, 7.1193e-09, 5.9420e-09, 2.3452e-08,\n 3.8022e-08, 9.6325e-09, 3.2513e-08, 5.6233e-09, 1.7116e-08, 7.0622e-09,\n 9.3438e-09, 3.2096e-08, 1.7216e-09, 1.1466e-07, 1.7282e-09, 3.3745e-08,\n 3.1880e-08, 1.7732e-08, 8.2348e-09, 1.5302e-08, 9.6675e-09, 7.8382e-09,\n 1.9831e-08, 3.4269e-08, 1.0127e-08, 2.3042e-08, 1.0446e-08, 1.2575e-08,\n 7.0387e-09, 2.7112e-08, 2.1750e-08, 2.6944e-08, 5.0882e-08, 3.9246e-08,\n 1.1401e-08, 1.4591e-08, 5.0215e-09, 1.1881e-08, 1.8680e-08, 5.8786e-09,\n 9.6261e-09, 1.1803e-08, 5.7466e-09, 7.0058e-08, 5.6644e-09, 8.0213e-09,\n 4.9780e-09, 3.5184e-08, 3.6667e-09, 2.7036e-08, 1.5678e-08, 5.1173e-08,\n 3.3445e-08, 3.3646e-08, 2.8165e-08, 3.0552e-08, 2.5373e-09, 6.0087e-08,\n 1.8833e-08, 4.0202e-08, 3.4954e-08, 1.7093e-08, 7.9796e-09, 1.4751e-08,\n 7.2855e-09, 4.1984e-08, 3.7319e-08, 1.3073e-08, 9.6738e-09, 2.5206e-08,\n 7.4224e-09, 1.7035e-08, 4.0988e-08, 2.7848e-08, 5.2044e-09, 2.1576e-08,\n 8.8828e-09, 3.5959e-09, 2.2667e-08, 2.9709e-09, 1.5464e-08, 5.2191e-08,\n 2.6365e-08, 1.1582e-08, 3.3743e-08, 3.1690e-08, 6.8394e-09, 1.8254e-08,\n 6.0312e-09, 7.5743e-09, 5.5268e-08, 4.1899e-09, 5.4266e-07, 1.1636e-08,\n 1.4283e-09, 6.4819e-09, 2.9418e-09, 8.5973e-09, 4.5867e-09, 1.1227e-08,\n 1.3939e-08, 6.7600e-09, 5.3987e-09, 6.3282e-09, 1.2266e-08, 1.2906e-08,\n 6.1041e-09, 1.4781e-08, 7.5415e-09, 5.8487e-09, 3.2078e-08, 2.3742e-08,\n 4.1141e-08, 6.6768e-09, 2.1191e-08, 1.1990e-08, 6.9678e-09, 1.5103e-08,\n 1.4478e-08, 2.9376e-08, 1.4214e-08, 5.9051e-09, 3.2635e-08, 6.7593e-08,\n 7.5848e-09, 2.7821e-08, 3.5918e-09, 8.1995e-09, 3.5496e-08, 2.5928e-09,\n 2.8109e-08, 2.3180e-08, 1.1749e-07, 7.5974e-09, 9.5634e-09, 4.3507e-08,\n 3.0487e-08, 3.9498e-08, 2.9572e-08, 3.0417e-08, 3.3292e-08, 5.0455e-09,\n 2.6328e-08, 1.7237e-08, 7.8593e-09, 7.4073e-09, 8.6090e-09, 7.5128e-09,\n 1.7023e-08, 1.3296e-08, 2.2059e-08, 1.3095e-07, 6.3344e-09, 1.8938e-08,\n 1.7480e-08, 3.5399e-08, 1.5337e-08, 1.6949e-08, 7.2461e-09, 7.1511e-09,\n 1.3522e-08, 6.3516e-08, 1.5355e-09, 3.0965e-08, 2.0235e-08, 8.5709e-09,\n 3.7924e-09, 1.0869e-08, 2.7282e-08, 1.4660e-08, 2.5883e-08, 7.3433e-08,\n 1.6700e-08, 9.1006e-08, 1.5801e-08, 2.5408e-08, 4.7195e-08, 1.5994e-08,\n 1.1271e-08, 5.7769e-08, 7.9684e-09, 9.7431e-09, 3.0633e-08, 6.1349e-08,\n 3.0914e-09, 3.5038e-08, 2.5609e-08, 5.4987e-08, 8.8137e-09, 1.1547e-08,\n 1.8311e-08, 1.9937e-08, 1.4553e-08, 3.0367e-08, 6.9686e-09, 2.2610e-08,\n 3.4298e-09, 2.0428e-08, 9.8335e-09, 3.8634e-08, 1.3005e-08, 4.5053e-08,\n 4.0939e-08, 5.7623e-09, 1.4509e-08, 3.7612e-08, 1.7639e-08, 3.6820e-08,\n 7.6020e-09, 1.7109e-08, 1.1607e-08, 1.7614e-08, 2.7865e-08, 3.3008e-08,\n 1.6335e-08, 1.1897e-08, 2.0157e-08, 4.3118e-08, 2.2486e-08, 1.8397e-08,\n 9.7214e-09, 1.0561e-08, 1.0584e-08, 1.4691e-08, 6.9512e-09, 8.4434e-09,\n 2.5736e-07, 6.4658e-09, 2.4125e-09, 1.8520e-08, 9.5510e-09, 7.8438e-09,\n 3.1067e-08, 5.1079e-09, 2.3171e-08, 2.0320e-08, 7.6158e-09, 4.2915e-09,\n 2.6769e-09, 5.1706e-09, 5.7030e-09, 7.9195e-09, 2.1990e-08, 3.6452e-08,\n 1.5785e-08, 3.2091e-09, 2.4946e-09, 6.8035e-09, 4.2281e-08, 1.7932e-08,\n 2.4358e-08, 7.8114e-09, 1.9233e-08, 2.2749e-08, 8.5178e-09, 1.2789e-08,\n 8.2193e-09, 1.1761e-08, 2.5646e-08, 1.6735e-08, 2.1739e-08, 1.7060e-09,\n 4.4934e-08, 4.5612e-09, 1.9129e-08, 3.4085e-08, 1.7982e-08, 4.6103e-08,\n 4.3702e-08, 4.8172e-08, 5.8767e-09, 1.0424e-08, 3.9086e-08, 5.5363e-08,\n 6.0017e-09, 7.5831e-08, 2.1758e-08, 2.2960e-08, 4.4072e-09, 4.9345e-08,\n 6.1771e-09, 5.3068e-08, 2.8503e-08, 2.8738e-08, 2.2107e-08, 4.1231e-08,\n 2.8294e-08, 4.4900e-08, 2.8517e-08, 5.5578e-08, 1.6279e-08, 8.3282e-09,\n 2.0878e-09, 2.0573e-08, 4.8470e-08, 1.9484e-08, 2.8265e-08, 4.3976e-08,\n 2.8792e-08, 1.4796e-08, 2.6898e-08, 2.5134e-08, 1.3301e-08, 3.3749e-10,\n 1.9406e-08, 1.5819e-08, 4.0823e-09, 2.5849e-08, 9.2228e-09, 2.5883e-08,\n 9.6979e-09, 2.3521e-08, 5.1728e-08, 2.7236e-08, 8.3684e-09, 1.7972e-08,\n 1.4978e-08, 3.3746e-08, 1.4329e-08, 1.8078e-08, 6.7552e-09, 4.3840e-09,\n 5.7335e-09, 3.1001e-08, 4.7427e-08, 1.2469e-08, 5.6008e-09, 1.1270e-08,\n 2.6421e-08, 4.2250e-08, 1.1873e-08, 9.7842e-09, 6.2878e-09, 2.1443e-08,\n 1.2123e-08, 2.0155e-08, 2.6265e-08, 1.7291e-08, 7.0837e-08, 3.7927e-09,\n 1.0503e-08, 7.7834e-09, 1.7439e-08, 4.3045e-08, 3.3650e-08, 7.3825e-09,\n 2.5812e-08, 7.0857e-08], device='cuda:0')}, 111: {'step': tensor(62477.), 'exp_avg': tensor([[-1.8951e-07, -2.4066e-07, 1.8272e-06, ..., 1.0283e-06,\n -1.0838e-06, 4.0505e-07],\n [-2.1738e-08, 4.6801e-07, -8.7626e-08, ..., 3.6623e-06,\n 1.7960e-06, -1.1282e-06],\n [-1.7318e-06, -1.2356e-06, 1.7579e-06, ..., 1.4793e-06,\n 3.5090e-06, -5.3346e-07],\n ...,\n [ 1.5398e-06, 1.4031e-06, -5.0358e-06, ..., -4.3896e-06,\n -1.8412e-06, 5.0845e-07],\n [-5.3312e-07, -2.7944e-07, 1.3035e-06, ..., 4.2159e-06,\n 1.9290e-07, 1.2018e-06],\n [-5.8773e-07, -5.0401e-07, -2.6741e-06, ..., -2.2176e-06,\n -3.8851e-06, -1.2473e-06]], device='cuda:0'), 'exp_avg_sq': tensor([[1.5494e-10, 1.3110e-10, 2.7359e-10, ..., 1.0344e-09, 4.7105e-10,\n 1.3911e-10],\n [2.9136e-10, 2.5145e-10, 5.4723e-10, ..., 1.8064e-09, 1.0770e-09,\n 2.7508e-10],\n [2.1897e-10, 1.8330e-10, 3.8258e-10, ..., 1.1570e-09, 6.5776e-10,\n 1.9867e-10],\n ...,\n [2.9807e-11, 2.5844e-11, 1.4343e-10, ..., 2.6136e-10, 1.1710e-10,\n 4.2099e-11],\n [6.5582e-11, 5.6201e-11, 3.4828e-10, ..., 8.5819e-10, 2.6367e-10,\n 9.3333e-11],\n [7.6983e-11, 6.9710e-11, 7.2315e-10, ..., 6.8485e-10, 4.3395e-10,\n 1.6150e-10]], device='cuda:0')}, 112: {'step': tensor(62477.), 'exp_avg': tensor([ 4.1598e-06, 2.4213e-06, 4.8045e-06, ..., -1.0794e-05,\n 4.0490e-06, -2.6069e-06], device='cuda:0'), 'exp_avg_sq': tensor([3.4830e-09, 5.8586e-09, 5.0595e-09, ..., 8.8333e-10, 2.5098e-09,\n 2.6508e-09], device='cuda:0')}}, 'param_groups': [{'lr': 1.604211101863864e-05, 'betas': (0.9499890335888962, 0.999), 'eps': 1e-08, 'weight_decay': 1e-05, 'amsgrad': False, 'foreach': None, 'maximize': False, 'capturable': False, 'differentiable': False, 'fused': None, 'initial_lr': 1.6e-05, 'max_lr': 0.0004, 'min_lr': 1.5999999999999999e-09, 'max_momentum': 0.95, 'base_momentum': 0.85, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360]}]}]", + "lr_schedulers": "[{'total_steps': 125000, '_schedule_phases': [{'end_step': 37499.0, 'start_lr': 'initial_lr', 'end_lr': 'max_lr', 'start_momentum': 'max_momentum', 'end_momentum': 'base_momentum'}, {'end_step': 124999, 'start_lr': 'max_lr', 'end_lr': 'min_lr', 'start_momentum': 'base_momentum', 'end_momentum': 'max_momentum'}], 'anneal_func': , 'cycle_momentum': True, 'use_beta1': True, 'base_lrs': [1.6e-05], 'last_epoch': 250, 'verbose': False, '_step_count': 251, '_get_lr_called_within_step': False, '_last_lr': [1.604211101863864e-05]}]" +} \ No newline at end of file