diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9953fc99b83bed527fbe59155d8600fe8d04c2a1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/run-jnzzkcth.wandb filter=lfs diff=lfs merge=lfs -text +2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/run-bbl5fd2u.wandb filter=lfs diff=lfs merge=lfs -text +2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/run-jp82yqcj.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/2024-09-23/06-36-18/.hydra/config.yaml b/2024-09-23/06-36-18/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/06-36-18/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/06-36-18/.hydra/hydra.yaml b/2024-09-23/06-36-18/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..576fbaf4e4092d12a10549e3031881ff9045343b --- /dev/null +++ b/2024-09-23/06-36-18/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/06-36-18 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/06-36-18/.hydra/overrides.yaml b/2024-09-23/06-36-18/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/06-36-18/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/06-36-18/train.log b/2024-09-23/06-36-18/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/07-06-14/.hydra/config.yaml b/2024-09-23/07-06-14/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/07-06-14/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/07-06-14/.hydra/hydra.yaml b/2024-09-23/07-06-14/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aed0d6ba2d07b9b237504c23171073163153893e --- /dev/null +++ b/2024-09-23/07-06-14/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/07-06-14 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/07-06-14/.hydra/overrides.yaml b/2024-09-23/07-06-14/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/07-06-14/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/07-06-14/train.log b/2024-09-23/07-06-14/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/08-39-13/.hydra/config.yaml b/2024-09-23/08-39-13/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/08-39-13/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/08-39-13/.hydra/hydra.yaml b/2024-09-23/08-39-13/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05e6201ef662aad8f1a95e11b78170c368984a30 --- /dev/null +++ b/2024-09-23/08-39-13/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/08-39-13 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/08-39-13/.hydra/overrides.yaml b/2024-09-23/08-39-13/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/08-39-13/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/08-39-13/train.log b/2024-09-23/08-39-13/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/08-40-08/.hydra/config.yaml b/2024-09-23/08-40-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/08-40-08/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/08-40-08/.hydra/hydra.yaml b/2024-09-23/08-40-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1aabdd2bc9ab47027e80dd611d18c09bd599849 --- /dev/null +++ b/2024-09-23/08-40-08/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/08-40-08/.hydra/overrides.yaml b/2024-09-23/08-40-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/08-40-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/08-40-08/train.log b/2024-09-23/08-40-08/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/08-40-08/wandb/debug-internal.log b/2024-09-23/08-40-08/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0d56b77004bab416c59a082e3b3ecaeaa4259cc9 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/debug-internal.log @@ -0,0 +1,14 @@ +{"time":"2024-09-23T09:14:22.59580271Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:14:22.59581747Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log"} +{"time":"2024-09-23T09:14:22.595881422Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:14:22.595887882Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log"} +{"time":"2024-09-23T09:14:22.59917443Z","level":"INFO","msg":"created new stream","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:22.59919309Z","level":"INFO","msg":"stream: started","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:22.59921417Z","level":"INFO","msg":"sender: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.599226691Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.599236461Z","level":"INFO","msg":"handler: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.982350736Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:14:22.985015444Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T09:14:27.10372121Z","level":"INFO","msg":"stream: closing","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:27.103806442Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T09:14:27.104964992Z","level":"INFO","msg":"Stopped system monitor"} diff --git a/2024-09-23/08-40-08/wandb/debug.log b/2024-09-23/08-40-08/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..482b10e81b3a41bd8b5f27a5100b02c29abf1ed0 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Configure stats pid to 78108 +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/settings +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug.log +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-internal.log +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():666] starting backend +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():670] setting up manager +2024-09-23 09:14:22,584 INFO MainThread:78108 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:14:22,586 INFO MainThread:78108 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:14:22,588 INFO MainThread:78108 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:14:22,598 INFO MainThread:78108 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:14:22,974 INFO MainThread:78108 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:14:23,128 INFO MainThread:78108 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:14:23,128 INFO MainThread:78108 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:14:23,129 INFO MainThread:78108 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:14:23,129 INFO MainThread:78108 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:14:23,135 INFO MainThread:78108 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 09:14:27,104 WARNING MsgRouterThr:78108 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/config.yaml b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d42d0a0b39774527204c1abef78f074da3481da9 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/config.yaml @@ -0,0 +1,114 @@ +_wandb: + value: + cli_version: 0.18.1 + m: [] + python_version: 3.10.14 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "2": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + "4": 3.10.14 + "5": 0.18.1 + "6": 4.44.2 + "8": + - 5 + - 9 + "12": 0.18.1 + "13": linux-x86_64 +general: + value: + device: cuda + logging: + group_name: experimental_byte_level + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + paths: + checkpoint_dir: checkpoints + data_dir: /root/SuperTinyLanguageModels/data + eval_dir: /root/SuperTinyLanguageModels/evals + output_dir: outputs + seed: 489 +model: + value: + byte_hidden: 128 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + core_model_type: pass_through + cproj_weight_tying: false + embedding_model_type: byte_level + embedding_weight_tying: true + ffn_weight_tying: false + hidden_dim: 384 + lm_head_bias: false + lm_head_dropout: 0 + lm_head_normalization: rms_norm + lm_head_type: byte_level + max_chunk_length: 12 + max_num_chunks: 1024 + model_shell_type: byte_autoencoder_shell + num_byte_decoder_layers: 5 + num_delimiter_layers: 3 + positional_encoding_type: rope + target_chunk_len: 8 + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + tokenizer_type: bpe + vocab_size: 259 +trainer: + value: + batch_size: 6 + checkpoint_interval: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + dataset: fineweb_edu_10B + eval: + eval_byte_metrics: false + mcq_benchmarks: null + mcq_num_samples: 1000 + text_generation_eval: false + text_modeling_eval: false + eval_interval: 50000000 + eval_iters: 1000 + gradient_accumulation_steps: 8 + log_interval: 1 + loss_fn: + name: pass_through + lr_scheduler: + name: cosine + warmup_iters: 100 + max_iters: 10000 + optimizer: + beta1: 0.9 + beta2: 0.95 + grad_clip: 1 + lr: 0.0005 + min_lr: 5e-05 + optimizer_name: adamW + weight_decay: 0.01 + run_eval: false + trainer_type: base_trainer diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/output.log b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..62352d6c67bb9b6844a2ab2b4f4afce362e4e702 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/output.log @@ -0,0 +1,3 @@ +Weight and Biases Initialized +Rank0 Trainer built +Training loop is starting diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/requirements.txt b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..68566bb067c42f795bc9c3d68df112461250945c --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/requirements.txt @@ -0,0 +1,121 @@ +setuptools==75.1.0 +wheel==0.44.0 +pip==24.2 +wcwidth==0.2.13 +sentencepiece==0.2.0 +pytz==2024.2 +mpmath==1.3.0 +distlib==0.3.8 +antlr4-python3-runtime==4.9.3 +xxhash==3.5.0 +urllib3==2.2.3 +tzdata==2024.1 +typing_extensions==4.12.2 +tqdm==4.66.5 +threadpoolctl==3.5.0 +sympy==1.13.3 +smmap==5.0.1 +six==1.16.0 +setproctitle==1.3.3 +safetensors==0.4.5 +regex==2024.9.11 +rapidfuzz==3.9.7 +PyYAML==6.0.2 +pytrec-eval-terrier==0.5.6 +pyphen==0.16.0 +Pygments==2.18.0 +psutil==6.0.0 +protobuf==5.28.2 +prettytable==3.11.0 +polars==1.7.1 +platformdirs==4.3.6 +pillow==10.4.0 +packaging==24.1 +nvidia-nvtx-cu12==12.1.105 +nvidia-nvjitlink-cu12==12.6.68 +nvidia-nccl-cu12==2.20.5 +nvidia-curand-cu12==10.3.2.106 +nvidia-cufft-cu12==11.0.2.54 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cublas-cu12==12.1.3.1 +numpy==1.26.4 +nodeenv==1.9.1 +networkx==3.3 +mdurl==0.1.2 +MarkupSafe==2.1.5 +joblib==1.4.2 +idna==3.10 +identify==2.6.1 +fsspec==2024.6.1 +frozenlist==1.4.1 +filelock==3.16.1 +eval_type_backport==0.2.0 +dill==0.3.8 +click==8.1.7 +charset-normalizer==3.3.2 +cfgv==3.4.0 +certifi==2024.8.30 +attrs==24.2.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +aiohappyeyeballs==2.4.0 +virtualenv==20.26.5 +triton==3.0.0 +textstat==0.7.4 +sentry-sdk==2.14.0 +scipy==1.14.1 +requests==2.32.3 +python-dateutil==2.9.0.post0 +pydantic_core==2.23.4 +pyarrow==17.0.0 +omegaconf==2.3.0 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-cudnn-cu12==9.1.0.70 +nltk==3.9.1 +multiprocess==0.70.16 +multidict==6.1.0 +markdown-it-py==3.0.0 +Levenshtein==0.26.0 +Jinja2==3.1.4 +gitdb==4.0.11 +docker-pycreds==0.4.0 +aiosignal==1.3.1 +yarl==1.11.1 +tiktoken==0.7.0 +scikit-learn==1.5.2 +rich==13.8.1 +pydantic==2.9.2 +pre-commit==3.8.0 +pandas==2.2.3 +nvidia-cusolver-cu12==11.4.5.107 +language_tool_python==2.8.1 +hydra-core==1.3.2 +huggingface-hub==0.25.0 +GitPython==3.1.43 +wandb==0.18.1 +torch==2.4.1 +tokenizers==0.19.1 +aiohttp==3.10.5 +transformers==4.44.2 +sentence-transformers==3.1.1 +datasets==3.0.0 +mteb==1.14.21 +autocommand==2.2.2 +backports.tarfile==1.2.0 +importlib_metadata==8.0.0 +importlib_resources==6.4.0 +inflect==7.3.1 +jaraco.collections==5.1.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +more-itertools==10.3.0 +packaging==24.1 +platformdirs==4.2.2 +tomli==2.0.1 +typeguard==4.3.0 +typing_extensions==4.12.2 +wheel==0.43.0 +zipp==3.19.2 diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-metadata.json b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..a51c5c2e358595bbc3bff2b3dab5ad35c3d9a176 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-metadata.json @@ -0,0 +1,88 @@ +{ + "os": "Linux-5.15.0-117-generic-x86_64-with-glibc2.31", + "python": "3.10.14", + "startedAt": "2024-09-23T09:14:22.586171Z", + "args": [ + "--config-name", + "experimental/byte_autoencoder_1" + ], + "program": "/root/SuperTinyLanguageModels/train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/LeonGuertler/SuperTinyLanguageModels.git", + "commit": "ebdf9039e89c5d337997d0c2b11bf4e992886243" + }, + "email": "calvin14@gmail.com", + "root": "/root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08", + "host": "11c6e13f6a55", + "username": "root", + "executable": "/root/SuperTinyLanguageModels/.conda/bin/python3", + "cpu_count": 128, + "cpu_count_logical": 256, + "gpu": "[NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090]", + "gpu_count": 8, + "disk": { + "/": { + "total": "1123133947904", + "used": "551794225152" + } + }, + "memory": { + "total": "540812599296" + }, + "cpu": { + "count": 128, + "countLogical": 256 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + } + ], + "cudaVersion": "12.5" +} \ No newline at end of file diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-summary.json b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..c059977b34bd915d979f049d6de5e7dbe3ee842d --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":4}} \ No newline at end of file diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..deb0a45d470539fd6e99ccb0cd62ae10fd9f23b1 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log @@ -0,0 +1,12 @@ +{"time":"2024-09-23T09:14:21.933081362Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp9hgpve6u/port-78108.txt","pid":78108,"debug":false,"disable-analytics":false} +{"time":"2024-09-23T09:14:21.933136193Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2024-09-23T09:14:21.935284221Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":78108} +{"time":"2024-09-23T09:14:21.935348272Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43153,"Zone":""}} +{"time":"2024-09-23T09:14:22.076126266Z","level":"INFO","msg":"created new connection","id":"127.0.0.1:57616"} +{"time":"2024-09-23T09:14:22.595626377Z","level":"INFO","msg":"connection init received","streamId":"a2kxhd8v","id":"127.0.0.1:57616"} +{"time":"2024-09-23T09:14:22.595853241Z","level":"ERROR","msg":"error creating symlink","error":"symlink /root/.cache/wandb/logs/core-debug-20240923_091421.log /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log: file exists"} +{"time":"2024-09-23T09:14:22.59919809Z","level":"INFO","msg":"connection init completed","streamId":"a2kxhd8v","id":"127.0.0.1:57616"} +{"time":"2024-09-23T09:14:27.103590738Z","level":"INFO","msg":"connection: teardown","id":"127.0.0.1:57616"} +{"time":"2024-09-23T09:14:27.103797162Z","level":"INFO","msg":"server is shutting down"} +{"time":"2024-09-23T09:14:27.104072727Z","level":"INFO","msg":"closed connection","id":"127.0.0.1:57616"} +{"time":"2024-09-23T09:14:28.465863147Z","level":"INFO","msg":"Parent process exited, terminating service process."} diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-internal.log b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0d56b77004bab416c59a082e3b3ecaeaa4259cc9 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-internal.log @@ -0,0 +1,14 @@ +{"time":"2024-09-23T09:14:22.59580271Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:14:22.59581747Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log"} +{"time":"2024-09-23T09:14:22.595881422Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:14:22.595887882Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-core.log"} +{"time":"2024-09-23T09:14:22.59917443Z","level":"INFO","msg":"created new stream","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:22.59919309Z","level":"INFO","msg":"stream: started","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:22.59921417Z","level":"INFO","msg":"sender: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.599226691Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.599236461Z","level":"INFO","msg":"handler: started","stream_id":{"value":"a2kxhd8v"}} +{"time":"2024-09-23T09:14:22.982350736Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:14:22.985015444Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T09:14:27.10372121Z","level":"INFO","msg":"stream: closing","id":"a2kxhd8v"} +{"time":"2024-09-23T09:14:27.103806442Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T09:14:27.104964992Z","level":"INFO","msg":"Stopped system monitor"} diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug.log b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..482b10e81b3a41bd8b5f27a5100b02c29abf1ed0 --- /dev/null +++ b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Configure stats pid to 78108 +2024-09-23 09:14:22,582 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/settings +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug.log +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/logs/debug-internal.log +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():666] starting backend +2024-09-23 09:14:22,583 INFO MainThread:78108 [wandb_init.py:init():670] setting up manager +2024-09-23 09:14:22,584 INFO MainThread:78108 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:14:22,586 INFO MainThread:78108 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:14:22,588 INFO MainThread:78108 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:14:22,598 INFO MainThread:78108 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:14:22,974 INFO MainThread:78108 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:14:23,128 INFO MainThread:78108 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:14:23,128 INFO MainThread:78108 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:14:23,129 INFO MainThread:78108 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:14:23,129 INFO MainThread:78108 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:14:23,135 INFO MainThread:78108 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 09:14:27,104 WARNING MsgRouterThr:78108 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/run-a2kxhd8v.wandb b/2024-09-23/08-40-08/wandb/run-20240923_091422-a2kxhd8v/run-a2kxhd8v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/09-32-28/.hydra/config.yaml b/2024-09-23/09-32-28/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/09-32-28/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/09-32-28/.hydra/hydra.yaml b/2024-09-23/09-32-28/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8058acb2520f263f976ac715f1c64802a1813e5 --- /dev/null +++ b/2024-09-23/09-32-28/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/09-32-28/.hydra/overrides.yaml b/2024-09-23/09-32-28/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/09-32-28/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/09-32-28/train.log b/2024-09-23/09-32-28/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/09-32-28/wandb/debug-internal.log b/2024-09-23/09-32-28/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c6b8d7bb373af352b1f8dc86d61ba7c061147fed --- /dev/null +++ b/2024-09-23/09-32-28/wandb/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T09:32:37.2270228Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:32:37.227060611Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log"} +{"time":"2024-09-23T09:32:37.227169702Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:32:37.227182172Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log"} +{"time":"2024-09-23T09:32:37.230824708Z","level":"INFO","msg":"created new stream","id":"tkkvzfon"} +{"time":"2024-09-23T09:32:37.230859859Z","level":"INFO","msg":"stream: started","id":"tkkvzfon"} +{"time":"2024-09-23T09:32:37.230903499Z","level":"INFO","msg":"sender: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.23092371Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.23097304Z","level":"INFO","msg":"handler: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.634282756Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:32:37.636527894Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T09:33:46.746283667Z","level":"INFO","msg":"stream: closing","id":"tkkvzfon"} +{"time":"2024-09-23T09:33:46.746349498Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T09:33:46.747359311Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T09:33:49.926631346Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.926725448Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.926795918Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.927056922Z","level":"INFO","msg":"stream: closed","id":"tkkvzfon"} diff --git a/2024-09-23/09-32-28/wandb/debug.log b/2024-09-23/09-32-28/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2a1473bd13d72359b07e14dc46d6c263e87b9ad5 --- /dev/null +++ b/2024-09-23/09-32-28/wandb/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Configure stats pid to 81916 +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/settings +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug.log +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-internal.log +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():666] starting backend +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():670] setting up manager +2024-09-23 09:32:37,223 INFO MainThread:81916 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:32:37,224 INFO MainThread:81916 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:32:37,227 INFO MainThread:81916 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:32:37,236 INFO MainThread:81916 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:32:37,631 INFO MainThread:81916 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:32:37,802 INFO MainThread:81916 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:32:37,802 INFO MainThread:81916 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:32:37,803 INFO MainThread:81916 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:32:37,803 INFO MainThread:81916 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:32:37,806 INFO MainThread:81916 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 09:33:46,746 WARNING MsgRouterThr:81916 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/config.yaml b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47a12e10d355e3ed05a889f578804e4547a369f3 --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/config.yaml @@ -0,0 +1,115 @@ +_wandb: + value: + cli_version: 0.18.1 + m: [] + python_version: 3.10.14 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "2": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + - 61 + "4": 3.10.14 + "5": 0.18.1 + "6": 4.44.2 + "8": + - 5 + - 9 + "12": 0.18.1 + "13": linux-x86_64 +general: + value: + device: cuda + logging: + group_name: experimental_byte_level + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + paths: + checkpoint_dir: checkpoints + data_dir: /root/SuperTinyLanguageModels/data + eval_dir: /root/SuperTinyLanguageModels/evals + output_dir: outputs + seed: 489 +model: + value: + byte_hidden: 128 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + core_model_type: pass_through + cproj_weight_tying: false + embedding_model_type: byte_level + embedding_weight_tying: true + ffn_weight_tying: false + hidden_dim: 384 + lm_head_bias: false + lm_head_dropout: 0 + lm_head_normalization: rms_norm + lm_head_type: byte_level + max_chunk_length: 12 + max_num_chunks: 1024 + model_shell_type: byte_autoencoder_shell + num_byte_decoder_layers: 5 + num_delimiter_layers: 3 + positional_encoding_type: rope + target_chunk_len: 8 + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + tokenizer_type: bpe + vocab_size: 259 +trainer: + value: + batch_size: 6 + checkpoint_interval: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + dataset: fineweb_edu_10B + eval: + eval_byte_metrics: false + mcq_benchmarks: null + mcq_num_samples: 1000 + text_generation_eval: false + text_modeling_eval: false + eval_interval: 50000000 + eval_iters: 1000 + gradient_accumulation_steps: 8 + log_interval: 1 + loss_fn: + name: pass_through + lr_scheduler: + name: cosine + warmup_iters: 100 + max_iters: 10000 + optimizer: + beta1: 0.9 + beta2: 0.95 + grad_clip: 1 + lr: 0.0005 + min_lr: 5e-05 + optimizer_name: adamW + weight_decay: 0.01 + run_eval: false + trainer_type: base_trainer diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/output.log b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9d0a22b4a8b81989b9ceedc3c9203589fa9e511e --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/output.log @@ -0,0 +1,33 @@ +Weight and Biases Initialized +Rank0 Trainer built +Training loop is starting +All GPU(s): step 1: loss 10.4062, lr 5.0e-06, dt 2.1s +All GPU(s): step 2: loss 10.4297, lr 1.0e-05, dt 2.1s +All GPU(s): step 3: loss 10.3672, lr 1.5e-05, dt 2.1s +All GPU(s): step 4: loss 10.3203, lr 2.0e-05, dt 2.1s +All GPU(s): step 5: loss 10.2344, lr 2.5e-05, dt 2.1s +All GPU(s): step 6: loss 10.1406, lr 3.0e-05, dt 2.1s +All GPU(s): step 7: loss 10.0234, lr 3.5e-05, dt 2.1s +All GPU(s): step 8: loss 9.9688, lr 4.0e-05, dt 2.1s +All GPU(s): step 9: loss 9.8594, lr 4.5e-05, dt 2.2s +All GPU(s): step 10: loss 9.6328, lr 5.0e-05, dt 2.1s +All GPU(s): step 11: loss 9.5312, lr 5.5e-05, dt 2.1s +All GPU(s): step 12: loss 9.3750, lr 6.0e-05, dt 2.1s +All GPU(s): step 13: loss 9.2109, lr 6.5e-05, dt 2.1s +All GPU(s): step 14: loss 9.0078, lr 7.0e-05, dt 2.1s +All GPU(s): step 15: loss 8.8203, lr 7.5e-05, dt 2.1s +All GPU(s): step 16: loss 8.6562, lr 8.0e-05, dt 2.0s +All GPU(s): step 17: loss 8.4922, lr 8.5e-05, dt 2.1s +All GPU(s): step 18: loss 8.2891, lr 9.0e-05, dt 2.1s +All GPU(s): step 19: loss 8.1328, lr 9.5e-05, dt 2.1s +All GPU(s): step 20: loss 7.9414, lr 1.0e-04, dt 2.0s +All GPU(s): step 21: loss 7.7852, lr 1.1e-04, dt 2.1s +All GPU(s): step 22: loss 7.5977, lr 1.1e-04, dt 2.1s +All GPU(s): step 23: loss 7.4453, lr 1.2e-04, dt 2.1s +All GPU(s): step 24: loss 7.3164, lr 1.2e-04, dt 2.1s +All GPU(s): step 25: loss 7.1836, lr 1.3e-04, dt 2.1s +All GPU(s): step 26: loss 7.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 27: loss 6.9414, lr 1.4e-04, dt 2.1s +All GPU(s): step 28: loss 6.8633, lr 1.4e-04, dt 2.2s +All GPU(s): step 29: loss 6.7461, lr 1.5e-04, dt 2.1s +All GPU(s): step 30: loss 6.5742, lr 1.5e-04, dt 2.1s diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-metadata.json b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..80fe4e3342ae7dea872a0a19c157d20bcd37df7d --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-metadata.json @@ -0,0 +1,88 @@ +{ + "os": "Linux-5.15.0-117-generic-x86_64-with-glibc2.31", + "python": "3.10.14", + "startedAt": "2024-09-23T09:32:37.224689Z", + "args": [ + "--config-name", + "experimental/byte_autoencoder_1" + ], + "program": "/root/SuperTinyLanguageModels/train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/LeonGuertler/SuperTinyLanguageModels.git", + "commit": "c36bf6b78927d4d365c52a835f0e178edacbab29" + }, + "email": "calvin14@gmail.com", + "root": "/root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28", + "host": "11c6e13f6a55", + "username": "root", + "executable": "/root/SuperTinyLanguageModels/.conda/bin/python3", + "cpu_count": 128, + "cpu_count_logical": 256, + "gpu": "[NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090]", + "gpu_count": 8, + "disk": { + "/": { + "total": "1123133947904", + "used": "551794495488" + } + }, + "memory": { + "total": "540812599296" + }, + "cpu": { + "count": 128, + "countLogical": 256 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + } + ], + "cudaVersion": "12.5" +} \ No newline at end of file diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-summary.json b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..8513f147724e1cd6ebd0c66e5ab8b9a99a502d14 --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/files/wandb-summary.json @@ -0,0 +1 @@ +{"additional_info/chunk_len_penalty_loss":0,"additional_info/total-loss":6.543508529663086,"_step":1474560,"additional_info/chunk_len_loss":2.0561606884002686,"iter":30,"token_num":1474560,"additional_info/BCE-loss":4.487347602844238,"loss":6.57421875,"lr":0.00015,"_timestamp":1.7270840240730202e+09,"_runtime":69.521643938,"additional_info/average_chunk_length":3.4655094146728516,"_wandb":{"runtime":69}} \ No newline at end of file diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..0cf78828a0a41e479bb8b3b34aaf09d2b3b1d63f --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log @@ -0,0 +1,13 @@ +{"time":"2024-09-23T09:32:36.53490736Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmppr55fcxh/port-81916.txt","pid":81916,"debug":false,"disable-analytics":false} +{"time":"2024-09-23T09:32:36.534984841Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2024-09-23T09:32:36.551541231Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":81916} +{"time":"2024-09-23T09:32:36.55148544Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44587,"Zone":""}} +{"time":"2024-09-23T09:32:36.722786198Z","level":"INFO","msg":"created new connection","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:32:37.226730857Z","level":"INFO","msg":"connection init received","streamId":"tkkvzfon","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:32:37.227116001Z","level":"ERROR","msg":"error creating symlink","error":"symlink /root/.cache/wandb/logs/core-debug-20240923_093236.log /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log: file exists"} +{"time":"2024-09-23T09:32:37.230871019Z","level":"INFO","msg":"connection init completed","streamId":"tkkvzfon","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:33:46.746114105Z","level":"INFO","msg":"connection: teardown","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:33:46.746363968Z","level":"INFO","msg":"server is shutting down"} +{"time":"2024-09-23T09:33:46.746627582Z","level":"INFO","msg":"closed connection","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:33:49.927260015Z","level":"INFO","msg":"connection closed","id":"127.0.0.1:60908"} +{"time":"2024-09-23T09:33:49.927297555Z","level":"INFO","msg":"server is closed"} diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-internal.log b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c6b8d7bb373af352b1f8dc86d61ba7c061147fed --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T09:32:37.2270228Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:32:37.227060611Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log"} +{"time":"2024-09-23T09:32:37.227169702Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:32:37.227182172Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-core.log"} +{"time":"2024-09-23T09:32:37.230824708Z","level":"INFO","msg":"created new stream","id":"tkkvzfon"} +{"time":"2024-09-23T09:32:37.230859859Z","level":"INFO","msg":"stream: started","id":"tkkvzfon"} +{"time":"2024-09-23T09:32:37.230903499Z","level":"INFO","msg":"sender: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.23092371Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.23097304Z","level":"INFO","msg":"handler: started","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:32:37.634282756Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:32:37.636527894Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T09:33:46.746283667Z","level":"INFO","msg":"stream: closing","id":"tkkvzfon"} +{"time":"2024-09-23T09:33:46.746349498Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T09:33:46.747359311Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T09:33:49.926631346Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.926725448Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.926795918Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"tkkvzfon"}} +{"time":"2024-09-23T09:33:49.927056922Z","level":"INFO","msg":"stream: closed","id":"tkkvzfon"} diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug.log b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2a1473bd13d72359b07e14dc46d6c263e87b9ad5 --- /dev/null +++ b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Configure stats pid to 81916 +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/settings +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug.log +2024-09-23 09:32:37,221 INFO MainThread:81916 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/logs/debug-internal.log +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():666] starting backend +2024-09-23 09:32:37,222 INFO MainThread:81916 [wandb_init.py:init():670] setting up manager +2024-09-23 09:32:37,223 INFO MainThread:81916 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:32:37,224 INFO MainThread:81916 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:32:37,227 INFO MainThread:81916 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:32:37,236 INFO MainThread:81916 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:32:37,631 INFO MainThread:81916 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:32:37,802 INFO MainThread:81916 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:32:37,802 INFO MainThread:81916 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:32:37,803 INFO MainThread:81916 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:32:37,803 INFO MainThread:81916 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:32:37,806 INFO MainThread:81916 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 09:33:46,746 WARNING MsgRouterThr:81916 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/run-tkkvzfon.wandb b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/run-tkkvzfon.wandb new file mode 100644 index 0000000000000000000000000000000000000000..dd9c0a33f99420159a1bb9bbd176da9a37ce864a Binary files /dev/null and b/2024-09-23/09-32-28/wandb/run-20240923_093237-tkkvzfon/run-tkkvzfon.wandb differ diff --git a/2024-09-23/09-33-58/.hydra/config.yaml b/2024-09-23/09-33-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a530e67cc9e2a492d1c55eb6b2bab1f49e3d471 --- /dev/null +++ b/2024-09-23/09-33-58/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/09-33-58/.hydra/hydra.yaml b/2024-09-23/09-33-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b80a86faabbbd9b65035cead37b8927cf9c6a93 --- /dev/null +++ b/2024-09-23/09-33-58/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/09-33-58/.hydra/overrides.yaml b/2024-09-23/09-33-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/09-33-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_1000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e71c74f8bbe7e235828216c94f4591f32135bf3 --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e847e5371dfd2f5ac68ee97e737d4ab63d42fdde1c885d6ab4915a9b3ccf83 +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_2000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_2000.pt new file mode 100644 index 0000000000000000000000000000000000000000..21f856c65ea09362a8e0bf8cd32a9967f79a49e8 --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_2000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261a3f997548dd7b6a92a1a7a51b37b1d559a7b64547c95b98a336bdc2685da0 +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_3000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_3000.pt new file mode 100644 index 0000000000000000000000000000000000000000..922510049f5da3c6d8df8017014c7b74eb8e0b8f --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_3000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51018e44f695f15948d2cbcd014d62113a7a82a67ca7ca25dc767a77c12ae563 +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_4000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_4000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a11c3fdfaf49b32fb1e4cf6567ccabafab7ea70b --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_4000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf165a859555ddeb74ad0c7b6e10f17fa5f91c1b060a14bd77dd7fedbde5503c +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_5000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_5000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f308b0c9dae11c9709d757f0b8533d19d76bbfde --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_5000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bdcae468dc981532ae56ecd8616824b1cf86801d364510be19a57467a81dbb +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_6000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_6000.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa66edd30c5a35536504938364cbda7124be6634 --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_6000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f205a0504483f912063f3b3630bc7ac288e24d0a1e33073549fe54b65b3932 +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_7000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_7000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ae8c72549f98b2b012f83ad8caf7efe6827ed93 --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_7000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb35be4402fb88ef5ffdd2a5c48c00f27f106a1d62d6e24886d5c665835e6c3e +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_8000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_8000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0139bcef137067bae1d1673294ea574c8e539aa --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_8000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bd3b490b6e4aa9396199fa9f8c97e9ed8012aea18d420c166bbabfafcb48ae +size 69377274 diff --git a/2024-09-23/09-33-58/checkpoints/ckpt_9000.pt b/2024-09-23/09-33-58/checkpoints/ckpt_9000.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa9f450bb8c428386c2c543f5fe58f37c09dc789 --- /dev/null +++ b/2024-09-23/09-33-58/checkpoints/ckpt_9000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6633a689931cd10c33c611ecedd9a62e68765443840a6d9e0363e8f281be15f0 +size 69377274 diff --git a/2024-09-23/09-33-58/train.log b/2024-09-23/09-33-58/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/09-33-58/wandb/debug-internal.log b/2024-09-23/09-33-58/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..50dd3985c96156de29fc7af1025242df965d3601 --- /dev/null +++ b/2024-09-23/09-33-58/wandb/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T09:34:07.258097267Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:34:07.258119398Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log"} +{"time":"2024-09-23T09:34:07.258175408Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:34:07.258180668Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log"} +{"time":"2024-09-23T09:34:07.260911834Z","level":"INFO","msg":"created new stream","id":"jnzzkcth"} +{"time":"2024-09-23T09:34:07.260929995Z","level":"INFO","msg":"stream: started","id":"jnzzkcth"} +{"time":"2024-09-23T09:34:07.260949385Z","level":"INFO","msg":"handler: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.260966215Z","level":"INFO","msg":"sender: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.260991135Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.631920515Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:34:07.634314975Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T14:57:46.67482666Z","level":"INFO","msg":"stream: closing","id":"jnzzkcth"} +{"time":"2024-09-23T14:57:46.674896121Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T14:57:46.678091073Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T14:57:48.203214047Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203305789Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203294698Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203550773Z","level":"INFO","msg":"stream: closed","id":"jnzzkcth"} diff --git a/2024-09-23/09-33-58/wandb/debug.log b/2024-09-23/09-33-58/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e640057693b0eae0c84abea9a0f6fac9caaae9bc --- /dev/null +++ b/2024-09-23/09-33-58/wandb/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Configure stats pid to 85375 +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/settings +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug.log +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-internal.log +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():666] starting backend +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():670] setting up manager +2024-09-23 09:34:07,251 INFO MainThread:85375 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:34:07,253 INFO MainThread:85375 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:34:07,256 INFO MainThread:85375 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:34:07,262 INFO MainThread:85375 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:34:07,628 INFO MainThread:85375 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:34:07,802 INFO MainThread:85375 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 14:57:46,675 WARNING MsgRouterThr:85375 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/config.yaml b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47a12e10d355e3ed05a889f578804e4547a369f3 --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/config.yaml @@ -0,0 +1,115 @@ +_wandb: + value: + cli_version: 0.18.1 + m: [] + python_version: 3.10.14 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "2": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + - 61 + "4": 3.10.14 + "5": 0.18.1 + "6": 4.44.2 + "8": + - 5 + - 9 + "12": 0.18.1 + "13": linux-x86_64 +general: + value: + device: cuda + logging: + group_name: experimental_byte_level + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + paths: + checkpoint_dir: checkpoints + data_dir: /root/SuperTinyLanguageModels/data + eval_dir: /root/SuperTinyLanguageModels/evals + output_dir: outputs + seed: 489 +model: + value: + byte_hidden: 128 + chunk_len_loss_weight: 0.1 + chunk_len_penalty: 0.1 + context_window: 8192 + core_model_type: pass_through + cproj_weight_tying: false + embedding_model_type: byte_level + embedding_weight_tying: true + ffn_weight_tying: false + hidden_dim: 384 + lm_head_bias: false + lm_head_dropout: 0 + lm_head_normalization: rms_norm + lm_head_type: byte_level + max_chunk_length: 12 + max_num_chunks: 1024 + model_shell_type: byte_autoencoder_shell + num_byte_decoder_layers: 5 + num_delimiter_layers: 3 + positional_encoding_type: rope + target_chunk_len: 8 + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + tokenizer_type: bpe + vocab_size: 259 +trainer: + value: + batch_size: 6 + checkpoint_interval: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + dataset: fineweb_edu_10B + eval: + eval_byte_metrics: false + mcq_benchmarks: null + mcq_num_samples: 1000 + text_generation_eval: false + text_modeling_eval: false + eval_interval: 50000000 + eval_iters: 1000 + gradient_accumulation_steps: 8 + log_interval: 1 + loss_fn: + name: pass_through + lr_scheduler: + name: cosine + warmup_iters: 100 + max_iters: 10000 + optimizer: + beta1: 0.9 + beta2: 0.95 + grad_clip: 1 + lr: 0.0005 + min_lr: 5e-05 + optimizer_name: adamW + weight_decay: 0.01 + run_eval: false + trainer_type: base_trainer diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/output.log b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..99795af3422df94171fcc55c19d0ce325076f21b --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/output.log @@ -0,0 +1,9390 @@ +Weight and Biases Initialized +Rank0 Trainer built +Training loop is starting +All GPU(s): step 1: loss 11.9375, lr 5.0e-06, dt 2.1s +All GPU(s): step 2: loss 11.8828, lr 1.0e-05, dt 2.1s +All GPU(s): step 3: loss 11.8516, lr 1.5e-05, dt 2.1s +All GPU(s): step 4: loss 11.7734, lr 2.0e-05, dt 2.1s +All GPU(s): step 5: loss 11.7109, lr 2.5e-05, dt 2.0s +All GPU(s): step 6: loss 11.5938, lr 3.0e-05, dt 2.1s +All GPU(s): step 7: loss 11.4922, lr 3.5e-05, dt 2.0s +All GPU(s): step 8: loss 11.3359, lr 4.0e-05, dt 2.0s +All GPU(s): step 9: loss 11.2109, lr 4.5e-05, dt 2.1s +All GPU(s): step 10: loss 11.0234, lr 5.0e-05, dt 2.0s +All GPU(s): step 11: loss 10.8516, lr 5.5e-05, dt 2.0s +All GPU(s): step 12: loss 10.7031, lr 6.0e-05, dt 2.0s +All GPU(s): step 13: loss 10.5078, lr 6.5e-05, dt 2.1s +All GPU(s): step 14: loss 10.2969, lr 7.0e-05, dt 2.2s +All GPU(s): step 15: loss 10.1094, lr 7.5e-05, dt 2.1s +All GPU(s): step 16: loss 9.9375, lr 8.0e-05, dt 2.1s +All GPU(s): step 17: loss 9.8281, lr 8.5e-05, dt 2.1s +All GPU(s): step 18: loss 9.6641, lr 9.0e-05, dt 2.0s +All GPU(s): step 19: loss 9.4922, lr 9.5e-05, dt 2.1s +All GPU(s): step 20: loss 9.3750, lr 1.0e-04, dt 2.0s +All GPU(s): step 21: loss 9.1953, lr 1.1e-04, dt 2.1s +All GPU(s): step 22: loss 9.0547, lr 1.1e-04, dt 2.1s +All GPU(s): step 23: loss 8.9141, lr 1.2e-04, dt 2.1s +All GPU(s): step 24: loss 8.7578, lr 1.2e-04, dt 2.0s +All GPU(s): step 25: loss 8.6562, lr 1.3e-04, dt 2.0s +All GPU(s): step 26: loss 8.5703, lr 1.3e-04, dt 2.0s +All GPU(s): step 27: loss 8.3828, lr 1.4e-04, dt 2.1s +All GPU(s): step 28: loss 8.2891, lr 1.4e-04, dt 2.1s +All GPU(s): step 29: loss 8.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 30: loss 8.0469, lr 1.5e-04, dt 2.1s +All GPU(s): step 31: loss 7.9414, lr 1.5e-04, dt 2.0s +All GPU(s): step 32: loss 7.8008, lr 1.6e-04, dt 2.0s +All GPU(s): step 33: loss 7.6680, lr 1.6e-04, dt 2.1s +All GPU(s): step 34: loss 7.6094, lr 1.7e-04, dt 2.1s +All GPU(s): step 35: loss 7.4961, lr 1.8e-04, dt 2.1s +All GPU(s): step 36: loss 7.3984, lr 1.8e-04, dt 2.1s +All GPU(s): step 37: loss 7.3164, lr 1.8e-04, dt 2.0s +All GPU(s): step 38: loss 7.2695, lr 1.9e-04, dt 2.1s +All GPU(s): step 39: loss 7.1641, lr 1.9e-04, dt 2.0s +All GPU(s): step 40: loss 7.1133, lr 2.0e-04, dt 2.0s +All GPU(s): step 41: loss 7.0430, lr 2.0e-04, dt 2.0s +All GPU(s): step 42: loss 7.0039, lr 2.1e-04, dt 2.0s +All GPU(s): step 43: loss 6.8828, lr 2.2e-04, dt 2.1s +All GPU(s): step 44: loss 6.8828, lr 2.2e-04, dt 2.0s +All GPU(s): step 45: loss 6.8242, lr 2.2e-04, dt 2.1s +All GPU(s): step 46: loss 6.7656, lr 2.3e-04, dt 2.0s +All GPU(s): step 47: loss 6.7227, lr 2.3e-04, dt 2.1s +All GPU(s): step 48: loss 6.7695, lr 2.4e-04, dt 2.1s +All GPU(s): step 49: loss 6.6758, lr 2.4e-04, dt 2.0s +All GPU(s): step 50: loss 6.6055, lr 2.5e-04, dt 2.0s +All GPU(s): step 51: loss 6.6328, lr 2.6e-04, dt 2.0s +All GPU(s): step 52: loss 6.6641, lr 2.6e-04, dt 2.1s +All GPU(s): step 53: loss 6.6172, lr 2.6e-04, dt 2.1s +All GPU(s): step 54: loss 6.6172, lr 2.7e-04, dt 2.0s +All GPU(s): step 55: loss 6.5703, lr 2.8e-04, dt 2.0s +All GPU(s): step 56: loss 6.5781, lr 2.8e-04, dt 2.0s +All GPU(s): step 57: loss 6.5742, lr 2.8e-04, dt 2.1s +All GPU(s): step 58: loss 6.5977, lr 2.9e-04, dt 2.0s +All GPU(s): step 59: loss 6.5742, lr 3.0e-04, dt 2.0s +All GPU(s): step 60: loss 6.6133, lr 3.0e-04, dt 2.0s +All GPU(s): step 61: loss 6.5898, lr 3.0e-04, dt 2.1s +All GPU(s): step 62: loss 6.5469, lr 3.1e-04, dt 2.1s +All GPU(s): step 63: loss 6.5391, lr 3.2e-04, dt 2.1s +All GPU(s): step 64: loss 6.5469, lr 3.2e-04, dt 2.0s +All GPU(s): step 65: loss 6.5898, lr 3.2e-04, dt 2.0s +All GPU(s): step 66: loss 6.5312, lr 3.3e-04, dt 2.0s +All GPU(s): step 67: loss 6.5312, lr 3.4e-04, dt 2.2s +All GPU(s): step 68: loss 6.5273, lr 3.4e-04, dt 2.0s +All GPU(s): step 69: loss 6.5469, lr 3.5e-04, dt 2.0s +All GPU(s): step 70: loss 6.5547, lr 3.5e-04, dt 2.0s +All GPU(s): step 71: loss 6.5234, lr 3.6e-04, dt 2.0s +All GPU(s): step 72: loss 6.5977, lr 3.6e-04, dt 2.1s +All GPU(s): step 73: loss 6.5195, lr 3.6e-04, dt 2.0s +All GPU(s): step 74: loss 6.5273, lr 3.7e-04, dt 2.0s +All GPU(s): step 75: loss 6.4961, lr 3.8e-04, dt 2.0s +All GPU(s): step 76: loss 6.5352, lr 3.8e-04, dt 2.1s +All GPU(s): step 77: loss 6.5430, lr 3.8e-04, dt 2.1s +All GPU(s): step 78: loss 6.5508, lr 3.9e-04, dt 2.0s +All GPU(s): step 79: loss 6.5156, lr 4.0e-04, dt 2.0s +All GPU(s): step 80: loss 6.5195, lr 4.0e-04, dt 2.0s +All GPU(s): step 81: loss 6.5430, lr 4.1e-04, dt 2.1s +All GPU(s): step 82: loss 6.5195, lr 4.1e-04, dt 2.0s +All GPU(s): step 83: loss 6.5195, lr 4.2e-04, dt 2.0s +All GPU(s): step 84: loss 6.5078, lr 4.2e-04, dt 2.0s +All GPU(s): step 85: loss 6.5273, lr 4.3e-04, dt 2.0s +All GPU(s): step 86: loss 6.5273, lr 4.3e-04, dt 2.1s +All GPU(s): step 87: loss 6.5078, lr 4.4e-04, dt 2.0s +All GPU(s): step 88: loss 6.5547, lr 4.4e-04, dt 2.0s +All GPU(s): step 89: loss 6.4883, lr 4.4e-04, dt 2.0s +All GPU(s): step 90: loss 6.4844, lr 4.5e-04, dt 2.0s +All GPU(s): step 91: loss 6.5352, lr 4.6e-04, dt 2.1s +All GPU(s): step 92: loss 6.4180, lr 4.6e-04, dt 2.0s +All GPU(s): step 93: loss 6.4805, lr 4.6e-04, dt 2.0s +All GPU(s): step 94: loss 6.4727, lr 4.7e-04, dt 2.0s +All GPU(s): step 95: loss 6.4727, lr 4.7e-04, dt 2.0s +All GPU(s): step 96: loss 6.4648, lr 4.8e-04, dt 2.1s +All GPU(s): step 97: loss 6.3828, lr 4.9e-04, dt 2.0s +All GPU(s): step 98: loss 6.4023, lr 4.9e-04, dt 2.0s +All GPU(s): step 99: loss 6.4805, lr 4.9e-04, dt 2.0s +All GPU(s): step 100: loss 6.4102, lr 5.0e-04, dt 2.0s +All GPU(s): step 101: loss 6.4492, lr 5.0e-04, dt 2.1s +All GPU(s): step 102: loss 6.3828, lr 5.0e-04, dt 2.0s +All GPU(s): step 103: loss 6.3906, lr 5.0e-04, dt 2.0s +All GPU(s): step 104: loss 6.4062, lr 5.0e-04, dt 2.0s +All GPU(s): step 105: loss 6.4062, lr 5.0e-04, dt 2.0s +All GPU(s): step 106: loss 6.3633, lr 5.0e-04, dt 2.1s +All GPU(s): step 107: loss 6.4258, lr 5.0e-04, dt 2.0s +All GPU(s): step 108: loss 6.3438, lr 5.0e-04, dt 2.0s +All GPU(s): step 109: loss 6.3633, lr 5.0e-04, dt 2.0s +All GPU(s): step 110: loss 6.2930, lr 5.0e-04, dt 2.1s +All GPU(s): step 111: loss 6.2695, lr 5.0e-04, dt 2.0s +All GPU(s): step 112: loss 6.2656, lr 5.0e-04, dt 2.0s +All GPU(s): step 113: loss 6.2930, lr 5.0e-04, dt 2.0s +All GPU(s): step 114: loss 6.2070, lr 5.0e-04, dt 2.0s +All GPU(s): step 115: loss 6.1016, lr 5.0e-04, dt 2.1s +All GPU(s): step 116: loss 6.0781, lr 5.0e-04, dt 2.0s +All GPU(s): step 117: loss 5.8398, lr 5.0e-04, dt 2.0s +All GPU(s): step 118: loss 6.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 119: loss 6.0234, lr 5.0e-04, dt 2.0s +All GPU(s): step 120: loss 5.9492, lr 5.0e-04, dt 2.1s +All GPU(s): step 121: loss 6.3945, lr 5.0e-04, dt 2.0s +All GPU(s): step 122: loss 6.1562, lr 5.0e-04, dt 2.0s +All GPU(s): step 123: loss 6.3867, lr 5.0e-04, dt 2.0s +All GPU(s): step 124: loss 6.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 125: loss 6.6172, lr 5.0e-04, dt 2.1s +All GPU(s): step 126: loss 6.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 127: loss 6.8516, lr 5.0e-04, dt 2.0s +All GPU(s): step 128: loss 6.9180, lr 5.0e-04, dt 2.0s +All GPU(s): step 129: loss 6.9922, lr 5.0e-04, dt 2.0s +All GPU(s): step 130: loss 7.0195, lr 5.0e-04, dt 2.1s +All GPU(s): step 131: loss 7.0781, lr 5.0e-04, dt 2.0s +All GPU(s): step 132: loss 7.0977, lr 5.0e-04, dt 2.0s +All GPU(s): step 133: loss 7.0703, lr 5.0e-04, dt 2.0s +All GPU(s): step 134: loss 7.1172, lr 5.0e-04, dt 2.0s +All GPU(s): step 135: loss 7.1758, lr 5.0e-04, dt 2.1s +All GPU(s): step 136: loss 7.2539, lr 5.0e-04, dt 2.1s +All GPU(s): step 137: loss 7.2461, lr 5.0e-04, dt 2.1s +All GPU(s): step 138: loss 7.1680, lr 5.0e-04, dt 2.0s +All GPU(s): step 139: loss 7.2109, lr 5.0e-04, dt 2.1s +All GPU(s): step 140: loss 7.2383, lr 5.0e-04, dt 2.1s +All GPU(s): step 141: loss 7.2930, lr 5.0e-04, dt 2.0s +All GPU(s): step 142: loss 7.2812, lr 5.0e-04, dt 2.0s +All GPU(s): step 143: loss 7.2930, lr 5.0e-04, dt 2.0s +All GPU(s): step 144: loss 7.2773, lr 5.0e-04, dt 2.1s +All GPU(s): step 145: loss 7.2969, lr 5.0e-04, dt 2.0s +All GPU(s): step 146: loss 7.2969, lr 5.0e-04, dt 2.0s +All GPU(s): step 147: loss 7.2852, lr 5.0e-04, dt 2.0s +All GPU(s): step 148: loss 7.2969, lr 5.0e-04, dt 2.0s +All GPU(s): step 149: loss 7.4062, lr 5.0e-04, dt 2.1s +All GPU(s): step 150: loss 7.3789, lr 5.0e-04, dt 2.0s +All GPU(s): step 151: loss 7.4062, lr 5.0e-04, dt 2.0s +All GPU(s): step 152: loss 7.3906, lr 5.0e-04, dt 2.0s +All GPU(s): step 153: loss 7.4180, lr 5.0e-04, dt 2.0s +All GPU(s): step 154: loss 7.3672, lr 5.0e-04, dt 2.1s +All GPU(s): step 155: loss 7.3516, lr 5.0e-04, dt 2.0s +All GPU(s): step 156: loss 7.4258, lr 5.0e-04, dt 2.0s +All GPU(s): step 157: loss 7.3398, lr 5.0e-04, dt 2.0s +All GPU(s): step 158: loss 7.3789, lr 5.0e-04, dt 2.0s +All GPU(s): step 159: loss 7.3359, lr 5.0e-04, dt 2.2s +All GPU(s): step 160: loss 7.2617, lr 5.0e-04, dt 2.0s +All GPU(s): step 161: loss 7.2734, lr 5.0e-04, dt 2.0s +All GPU(s): step 162: loss 7.3633, lr 5.0e-04, dt 2.0s +All GPU(s): step 163: loss 7.3203, lr 5.0e-04, dt 2.0s +All GPU(s): step 164: loss 7.3359, lr 5.0e-04, dt 2.1s +All GPU(s): step 165: loss 7.3320, lr 5.0e-04, dt 2.0s +All GPU(s): step 166: loss 7.3438, lr 5.0e-04, dt 2.0s +All GPU(s): step 167: loss 7.4492, lr 5.0e-04, dt 2.0s +All GPU(s): step 168: loss 7.3398, lr 5.0e-04, dt 2.1s +All GPU(s): step 169: loss 7.3906, lr 5.0e-04, dt 2.1s +All GPU(s): step 170: loss 7.3555, lr 5.0e-04, dt 2.0s +All GPU(s): step 171: loss 7.3711, lr 5.0e-04, dt 2.0s +All GPU(s): step 172: loss 7.3281, lr 5.0e-04, dt 2.0s +All GPU(s): step 173: loss 7.3047, lr 5.0e-04, dt 2.1s +All GPU(s): step 174: loss 7.3008, lr 5.0e-04, dt 2.0s +All GPU(s): step 175: loss 7.3008, lr 5.0e-04, dt 2.0s +All GPU(s): step 176: loss 7.3047, lr 5.0e-04, dt 2.0s +All GPU(s): step 177: loss 7.2344, lr 5.0e-04, dt 2.0s +All GPU(s): step 178: loss 7.2148, lr 5.0e-04, dt 2.1s +All GPU(s): step 179: loss 7.2773, lr 5.0e-04, dt 2.0s +All GPU(s): step 180: loss 7.3047, lr 5.0e-04, dt 2.0s +All GPU(s): step 181: loss 7.2188, lr 5.0e-04, dt 2.0s +All GPU(s): step 182: loss 7.2148, lr 5.0e-04, dt 2.1s +All GPU(s): step 183: loss 7.2266, lr 5.0e-04, dt 2.1s +All GPU(s): step 184: loss 7.1680, lr 5.0e-04, dt 2.0s +All GPU(s): step 185: loss 7.1523, lr 5.0e-04, dt 2.0s +All GPU(s): step 186: loss 7.2305, lr 5.0e-04, dt 2.0s +All GPU(s): step 187: loss 7.1719, lr 5.0e-04, dt 2.0s +All GPU(s): step 188: loss 7.1406, lr 5.0e-04, dt 2.1s +All GPU(s): step 189: loss 7.1406, lr 5.0e-04, dt 2.1s +All GPU(s): step 190: loss 7.1016, lr 5.0e-04, dt 2.0s +All GPU(s): step 191: loss 7.1016, lr 5.0e-04, dt 2.0s +All GPU(s): step 192: loss 7.0938, lr 5.0e-04, dt 2.1s +All GPU(s): step 193: loss 7.0703, lr 5.0e-04, dt 2.1s +All GPU(s): step 194: loss 7.0039, lr 5.0e-04, dt 2.0s +All GPU(s): step 195: loss 7.0938, lr 5.0e-04, dt 2.0s +All GPU(s): step 196: loss 7.1445, lr 5.0e-04, dt 2.0s +All GPU(s): step 197: loss 7.1016, lr 5.0e-04, dt 2.1s +All GPU(s): step 198: loss 7.0703, lr 5.0e-04, dt 2.1s +All GPU(s): step 199: loss 7.0508, lr 5.0e-04, dt 2.0s +All GPU(s): step 200: loss 6.9727, lr 5.0e-04, dt 2.0s +All GPU(s): step 201: loss 7.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 202: loss 7.0273, lr 5.0e-04, dt 2.1s +All GPU(s): step 203: loss 6.9961, lr 5.0e-04, dt 2.0s +All GPU(s): step 204: loss 7.0039, lr 5.0e-04, dt 2.0s +All GPU(s): step 205: loss 7.0039, lr 5.0e-04, dt 2.0s +All GPU(s): step 206: loss 7.0156, lr 5.0e-04, dt 2.0s +All GPU(s): step 207: loss 7.0117, lr 5.0e-04, dt 2.1s +All GPU(s): step 208: loss 6.9023, lr 5.0e-04, dt 2.0s +All GPU(s): step 209: loss 6.9492, lr 5.0e-04, dt 2.0s +All GPU(s): step 210: loss 6.9414, lr 5.0e-04, dt 2.0s +All GPU(s): step 211: loss 6.9180, lr 5.0e-04, dt 2.1s +All GPU(s): step 212: loss 6.9062, lr 5.0e-04, dt 2.2s +All GPU(s): step 213: loss 6.9141, lr 5.0e-04, dt 2.0s +All GPU(s): step 214: loss 6.9062, lr 5.0e-04, dt 2.1s +All GPU(s): step 215: loss 6.8789, lr 5.0e-04, dt 2.1s +All GPU(s): step 216: loss 6.8906, lr 5.0e-04, dt 2.0s +All GPU(s): step 217: loss 6.8477, lr 5.0e-04, dt 2.1s +All GPU(s): step 218: loss 6.8359, lr 5.0e-04, dt 2.1s +All GPU(s): step 219: loss 6.8359, lr 5.0e-04, dt 2.0s +All GPU(s): step 220: loss 6.8047, lr 5.0e-04, dt 2.0s +All GPU(s): step 221: loss 6.8047, lr 5.0e-04, dt 2.0s +All GPU(s): step 222: loss 6.7852, lr 5.0e-04, dt 2.1s +All GPU(s): step 223: loss 6.8047, lr 5.0e-04, dt 2.1s +All GPU(s): step 224: loss 6.7188, lr 5.0e-04, dt 2.1s +All GPU(s): step 225: loss 6.7383, lr 5.0e-04, dt 2.1s +All GPU(s): step 226: loss 6.7578, lr 5.0e-04, dt 2.1s +All GPU(s): step 227: loss 6.7617, lr 5.0e-04, dt 2.1s +All GPU(s): step 228: loss 6.7266, lr 5.0e-04, dt 2.0s +All GPU(s): step 229: loss 6.7266, lr 5.0e-04, dt 2.1s +All GPU(s): step 230: loss 6.6641, lr 5.0e-04, dt 2.1s +All GPU(s): step 231: loss 6.6875, lr 5.0e-04, dt 2.1s +All GPU(s): step 232: loss 6.6445, lr 5.0e-04, dt 2.0s +All GPU(s): step 233: loss 6.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 234: loss 6.5781, lr 5.0e-04, dt 2.0s +All GPU(s): step 235: loss 6.6484, lr 5.0e-04, dt 2.1s +All GPU(s): step 236: loss 6.5938, lr 5.0e-04, dt 2.2s +All GPU(s): step 237: loss 6.5703, lr 5.0e-04, dt 2.1s +All GPU(s): step 238: loss 6.5391, lr 5.0e-04, dt 2.0s +All GPU(s): step 239: loss 6.5000, lr 5.0e-04, dt 2.0s +All GPU(s): step 240: loss 6.4805, lr 5.0e-04, dt 2.1s +All GPU(s): step 241: loss 6.5078, lr 5.0e-04, dt 2.2s +All GPU(s): step 242: loss 6.5078, lr 5.0e-04, dt 2.1s +All GPU(s): step 243: loss 6.4766, lr 5.0e-04, dt 2.0s +All GPU(s): step 244: loss 6.4844, lr 5.0e-04, dt 2.0s +All GPU(s): step 245: loss 6.4609, lr 5.0e-04, dt 2.0s +All GPU(s): step 246: loss 6.4648, lr 5.0e-04, dt 2.1s +All GPU(s): step 247: loss 6.4961, lr 5.0e-04, dt 2.0s +All GPU(s): step 248: loss 6.4531, lr 5.0e-04, dt 2.0s +All GPU(s): step 249: loss 6.4414, lr 5.0e-04, dt 2.1s +All GPU(s): step 250: loss 6.4297, lr 5.0e-04, dt 2.0s +All GPU(s): step 251: loss 6.5625, lr 5.0e-04, dt 2.1s +All GPU(s): step 252: loss 6.4375, lr 5.0e-04, dt 2.0s +All GPU(s): step 253: loss 6.3750, lr 5.0e-04, dt 2.0s +All GPU(s): step 254: loss 6.3516, lr 5.0e-04, dt 2.0s +All GPU(s): step 255: loss 6.3203, lr 5.0e-04, dt 2.1s +All GPU(s): step 256: loss 6.3516, lr 5.0e-04, dt 2.1s +All GPU(s): step 257: loss 6.3203, lr 5.0e-04, dt 2.0s +All GPU(s): step 258: loss 6.3008, lr 5.0e-04, dt 2.0s +All GPU(s): step 259: loss 6.3242, lr 5.0e-04, dt 2.0s +All GPU(s): step 260: loss 6.2422, lr 5.0e-04, dt 2.1s +All GPU(s): step 261: loss 6.1914, lr 5.0e-04, dt 2.0s +All GPU(s): step 262: loss 6.2109, lr 5.0e-04, dt 2.0s +All GPU(s): step 263: loss 6.2500, lr 5.0e-04, dt 2.0s +All GPU(s): step 264: loss 6.1484, lr 5.0e-04, dt 2.0s +All GPU(s): step 265: loss 6.1875, lr 5.0e-04, dt 2.2s +All GPU(s): step 266: loss 6.1523, lr 5.0e-04, dt 2.0s +All GPU(s): step 267: loss 6.1094, lr 5.0e-04, dt 2.0s +All GPU(s): step 268: loss 6.1367, lr 5.0e-04, dt 2.0s +All GPU(s): step 269: loss 6.1094, lr 5.0e-04, dt 2.0s +All GPU(s): step 270: loss 6.1016, lr 5.0e-04, dt 2.1s +All GPU(s): step 271: loss 6.0664, lr 5.0e-04, dt 2.0s +All GPU(s): step 272: loss 6.0977, lr 5.0e-04, dt 2.0s +All GPU(s): step 273: loss 6.1250, lr 5.0e-04, dt 2.0s +All GPU(s): step 274: loss 6.0586, lr 5.0e-04, dt 2.0s +All GPU(s): step 275: loss 6.0742, lr 5.0e-04, dt 2.2s +All GPU(s): step 276: loss 6.0625, lr 5.0e-04, dt 2.1s +All GPU(s): step 277: loss 6.0625, lr 5.0e-04, dt 2.0s +All GPU(s): step 278: loss 6.0508, lr 5.0e-04, dt 2.0s +All GPU(s): step 279: loss 6.0859, lr 5.0e-04, dt 2.0s +All GPU(s): step 280: loss 6.0352, lr 5.0e-04, dt 2.1s +All GPU(s): step 281: loss 6.0000, lr 5.0e-04, dt 2.1s +All GPU(s): step 282: loss 6.0234, lr 5.0e-04, dt 2.0s +All GPU(s): step 283: loss 6.0234, lr 5.0e-04, dt 2.0s +All GPU(s): step 284: loss 6.0586, lr 5.0e-04, dt 2.1s +All GPU(s): step 285: loss 6.0039, lr 5.0e-04, dt 2.1s +All GPU(s): step 286: loss 6.0000, lr 5.0e-04, dt 2.0s +All GPU(s): step 287: loss 6.0312, lr 5.0e-04, dt 2.0s +All GPU(s): step 288: loss 6.0195, lr 5.0e-04, dt 2.0s +All GPU(s): step 289: loss 6.0508, lr 5.0e-04, dt 2.1s +All GPU(s): step 290: loss 6.0547, lr 5.0e-04, dt 2.1s +All GPU(s): step 291: loss 6.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 292: loss 6.0078, lr 5.0e-04, dt 2.0s +All GPU(s): step 293: loss 6.0117, lr 5.0e-04, dt 2.1s +All GPU(s): step 294: loss 5.9648, lr 5.0e-04, dt 2.1s +All GPU(s): step 295: loss 5.9805, lr 5.0e-04, dt 2.0s +All GPU(s): step 296: loss 5.9805, lr 5.0e-04, dt 2.0s +All GPU(s): step 297: loss 5.9727, lr 5.0e-04, dt 2.0s +All GPU(s): step 298: loss 5.9844, lr 5.0e-04, dt 2.0s +All GPU(s): step 299: loss 5.9766, lr 5.0e-04, dt 2.1s +All GPU(s): step 300: loss 5.9453, lr 5.0e-04, dt 2.1s +All GPU(s): step 301: loss 5.9219, lr 5.0e-04, dt 2.0s +All GPU(s): step 302: loss 5.9727, lr 5.0e-04, dt 2.0s +All GPU(s): step 303: loss 5.9531, lr 5.0e-04, dt 2.1s +All GPU(s): step 304: loss 5.9375, lr 5.0e-04, dt 2.1s +All GPU(s): step 305: loss 5.9414, lr 5.0e-04, dt 2.0s +All GPU(s): step 306: loss 5.9570, lr 5.0e-04, dt 2.0s +All GPU(s): step 307: loss 5.8867, lr 5.0e-04, dt 2.0s +All GPU(s): step 308: loss 5.9453, lr 5.0e-04, dt 2.1s +All GPU(s): step 309: loss 5.8633, lr 5.0e-04, dt 2.1s +All GPU(s): step 310: loss 5.9141, lr 5.0e-04, dt 2.0s +All GPU(s): step 311: loss 5.8906, lr 5.0e-04, dt 2.0s +All GPU(s): step 312: loss 5.8867, lr 5.0e-04, dt 2.0s +All GPU(s): step 313: loss 5.8672, lr 5.0e-04, dt 2.1s +All GPU(s): step 314: loss 5.9102, lr 5.0e-04, dt 2.1s +All GPU(s): step 315: loss 5.8672, lr 5.0e-04, dt 2.0s +All GPU(s): step 316: loss 5.8984, lr 5.0e-04, dt 2.0s +All GPU(s): step 317: loss 5.8711, lr 5.0e-04, dt 2.0s +All GPU(s): step 318: loss 5.8477, lr 5.0e-04, dt 2.1s +All GPU(s): step 319: loss 5.8945, lr 5.0e-04, dt 2.0s +All GPU(s): step 320: loss 5.8086, lr 5.0e-04, dt 2.0s +All GPU(s): step 321: loss 5.8398, lr 5.0e-04, dt 2.0s +All GPU(s): step 322: loss 5.8125, lr 5.0e-04, dt 2.0s +All GPU(s): step 323: loss 5.8242, lr 5.0e-04, dt 2.2s +All GPU(s): step 324: loss 5.8086, lr 5.0e-04, dt 2.0s +All GPU(s): step 325: loss 5.8164, lr 5.0e-04, dt 2.0s +All GPU(s): step 326: loss 5.8008, lr 5.0e-04, dt 2.0s +All GPU(s): step 327: loss 5.9766, lr 5.0e-04, dt 2.0s +All GPU(s): step 328: loss 5.8047, lr 5.0e-04, dt 2.1s +All GPU(s): step 329: loss 5.7695, lr 5.0e-04, dt 2.0s +All GPU(s): step 330: loss 5.7695, lr 5.0e-04, dt 2.0s +All GPU(s): step 331: loss 5.7930, lr 5.0e-04, dt 2.0s +All GPU(s): step 332: loss 5.7891, lr 5.0e-04, dt 2.0s +All GPU(s): step 333: loss 5.7969, lr 5.0e-04, dt 2.1s +All GPU(s): step 334: loss 5.7734, lr 5.0e-04, dt 2.0s +All GPU(s): step 335: loss 5.8398, lr 5.0e-04, dt 2.0s +All GPU(s): step 336: loss 5.8516, lr 5.0e-04, dt 2.0s +All GPU(s): step 337: loss 5.7930, lr 5.0e-04, dt 2.0s +All GPU(s): step 338: loss 5.8906, lr 5.0e-04, dt 2.3s +All GPU(s): step 339: loss 5.7891, lr 5.0e-04, dt 2.0s +All GPU(s): step 340: loss 5.8242, lr 5.0e-04, dt 2.0s +All GPU(s): step 341: loss 5.8398, lr 5.0e-04, dt 2.0s +All GPU(s): step 342: loss 5.8164, lr 5.0e-04, dt 2.1s +All GPU(s): step 343: loss 5.7891, lr 5.0e-04, dt 2.1s +All GPU(s): step 344: loss 5.7422, lr 5.0e-04, dt 2.0s +All GPU(s): step 345: loss 5.7539, lr 5.0e-04, dt 2.0s +All GPU(s): step 346: loss 5.7227, lr 5.0e-04, dt 2.0s +All GPU(s): step 347: loss 5.7812, lr 5.0e-04, dt 2.1s +All GPU(s): step 348: loss 5.8320, lr 5.0e-04, dt 2.0s +All GPU(s): step 349: loss 5.6758, lr 5.0e-04, dt 2.0s +All GPU(s): step 350: loss 5.6992, lr 5.0e-04, dt 2.0s +All GPU(s): step 351: loss 5.6719, lr 5.0e-04, dt 2.0s +All GPU(s): step 352: loss 5.6875, lr 5.0e-04, dt 2.1s +All GPU(s): step 353: loss 5.7031, lr 5.0e-04, dt 2.0s +All GPU(s): step 354: loss 5.6602, lr 5.0e-04, dt 2.0s +All GPU(s): step 355: loss 5.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 356: loss 5.6523, lr 5.0e-04, dt 2.1s +All GPU(s): step 357: loss 5.6406, lr 5.0e-04, dt 2.1s +All GPU(s): step 358: loss 5.6523, lr 5.0e-04, dt 2.0s +All GPU(s): step 359: loss 5.6484, lr 5.0e-04, dt 2.0s +All GPU(s): step 360: loss 5.6133, lr 5.0e-04, dt 2.0s +All GPU(s): step 361: loss 5.6211, lr 5.0e-04, dt 2.0s +All GPU(s): step 362: loss 5.6484, lr 5.0e-04, dt 2.1s +All GPU(s): step 363: loss 5.5938, lr 5.0e-04, dt 2.0s +All GPU(s): step 364: loss 5.6523, lr 5.0e-04, dt 2.0s +All GPU(s): step 365: loss 5.6055, lr 5.0e-04, dt 2.0s +All GPU(s): step 366: loss 5.6094, lr 5.0e-04, dt 2.0s +All GPU(s): step 367: loss 5.6406, lr 5.0e-04, dt 2.1s +All GPU(s): step 368: loss 5.6055, lr 5.0e-04, dt 2.0s +All GPU(s): step 369: loss 5.6445, lr 5.0e-04, dt 2.0s +All GPU(s): step 370: loss 5.6133, lr 5.0e-04, dt 2.0s +All GPU(s): step 371: loss 5.6172, lr 5.0e-04, dt 2.1s +All GPU(s): step 372: loss 5.6484, lr 5.0e-04, dt 2.1s +All GPU(s): step 373: loss 5.5703, lr 5.0e-04, dt 2.0s +All GPU(s): step 374: loss 5.6250, lr 5.0e-04, dt 2.0s +All GPU(s): step 375: loss 5.6016, lr 5.0e-04, dt 2.0s +All GPU(s): step 376: loss 5.5703, lr 5.0e-04, dt 2.1s +All GPU(s): step 377: loss 5.5664, lr 5.0e-04, dt 2.0s +All GPU(s): step 378: loss 5.5859, lr 5.0e-04, dt 2.0s +All GPU(s): step 379: loss 5.5781, lr 5.0e-04, dt 2.0s +All GPU(s): step 380: loss 5.6133, lr 5.0e-04, dt 2.0s +All GPU(s): step 381: loss 5.5703, lr 5.0e-04, dt 2.1s +All GPU(s): step 382: loss 5.6133, lr 5.0e-04, dt 2.0s +All GPU(s): step 383: loss 5.5664, lr 5.0e-04, dt 2.0s +All GPU(s): step 384: loss 5.5977, lr 5.0e-04, dt 2.1s +All GPU(s): step 385: loss 5.5742, lr 5.0e-04, dt 2.0s +All GPU(s): step 386: loss 5.5391, lr 5.0e-04, dt 2.1s +All GPU(s): step 387: loss 5.5977, lr 5.0e-04, dt 2.0s +All GPU(s): step 388: loss 5.5586, lr 5.0e-04, dt 2.0s +All GPU(s): step 389: loss 5.5781, lr 5.0e-04, dt 2.0s +All GPU(s): step 390: loss 5.5469, lr 5.0e-04, dt 2.0s +All GPU(s): step 391: loss 5.5195, lr 5.0e-04, dt 2.2s +All GPU(s): step 392: loss 5.5195, lr 5.0e-04, dt 2.0s +All GPU(s): step 393: loss 5.5469, lr 5.0e-04, dt 2.0s +All GPU(s): step 394: loss 5.5469, lr 5.0e-04, dt 2.0s +All GPU(s): step 395: loss 5.5273, lr 5.0e-04, dt 2.1s +All GPU(s): step 396: loss 5.5195, lr 5.0e-04, dt 2.1s +All GPU(s): step 397: loss 5.5078, lr 5.0e-04, dt 2.0s +All GPU(s): step 398: loss 5.5586, lr 5.0e-04, dt 2.0s +All GPU(s): step 399: loss 5.4961, lr 5.0e-04, dt 2.0s +All GPU(s): step 400: loss 5.5703, lr 5.0e-04, dt 2.0s +All GPU(s): step 401: loss 5.5742, lr 5.0e-04, dt 2.1s +All GPU(s): step 402: loss 5.5547, lr 5.0e-04, dt 2.0s +All GPU(s): step 403: loss 5.5469, lr 5.0e-04, dt 2.0s +All GPU(s): step 404: loss 5.5234, lr 5.0e-04, dt 2.0s +All GPU(s): step 405: loss 5.5469, lr 5.0e-04, dt 2.1s +All GPU(s): step 406: loss 5.5234, lr 5.0e-04, dt 2.1s +All GPU(s): step 407: loss 5.4922, lr 5.0e-04, dt 2.0s +All GPU(s): step 408: loss 5.5000, lr 5.0e-04, dt 2.0s +All GPU(s): step 409: loss 5.5039, lr 5.0e-04, dt 2.0s +All GPU(s): step 410: loss 5.5000, lr 5.0e-04, dt 2.1s +All GPU(s): step 411: loss 5.4961, lr 5.0e-04, dt 2.0s +All GPU(s): step 412: loss 5.5039, lr 5.0e-04, dt 2.0s +All GPU(s): step 413: loss 5.5156, lr 5.0e-04, dt 2.0s +All GPU(s): step 414: loss 5.5000, lr 5.0e-04, dt 2.1s +All GPU(s): step 415: loss 5.4766, lr 5.0e-04, dt 2.2s +All GPU(s): step 416: loss 5.4414, lr 5.0e-04, dt 2.0s +All GPU(s): step 417: loss 5.5391, lr 5.0e-04, dt 2.0s +All GPU(s): step 418: loss 5.4531, lr 5.0e-04, dt 2.0s +All GPU(s): step 419: loss 5.4648, lr 5.0e-04, dt 2.0s +All GPU(s): step 420: loss 5.4727, lr 5.0e-04, dt 2.1s +All GPU(s): step 421: loss 5.4883, lr 5.0e-04, dt 2.0s +All GPU(s): step 422: loss 5.4688, lr 5.0e-04, dt 2.0s +All GPU(s): step 423: loss 5.4570, lr 5.0e-04, dt 2.0s +All GPU(s): step 424: loss 5.4023, lr 5.0e-04, dt 2.0s +All GPU(s): step 425: loss 5.4141, lr 5.0e-04, dt 2.1s +All GPU(s): step 426: loss 5.4297, lr 5.0e-04, dt 2.1s +All GPU(s): step 427: loss 5.4219, lr 5.0e-04, dt 2.0s +All GPU(s): step 428: loss 5.4023, lr 5.0e-04, dt 2.0s +All GPU(s): step 429: loss 5.3867, lr 5.0e-04, dt 2.0s +All GPU(s): step 430: loss 5.3477, lr 5.0e-04, dt 2.1s +All GPU(s): step 431: loss 5.4141, lr 5.0e-04, dt 2.0s +All GPU(s): step 432: loss 5.4609, lr 5.0e-04, dt 2.0s +All GPU(s): step 433: loss 5.3633, lr 5.0e-04, dt 2.0s +All GPU(s): step 434: loss 5.3789, lr 5.0e-04, dt 2.1s +All GPU(s): step 435: loss 5.3984, lr 5.0e-04, dt 2.0s +All GPU(s): step 436: loss 5.3438, lr 5.0e-04, dt 2.0s +All GPU(s): step 437: loss 5.3086, lr 5.0e-04, dt 2.0s +All GPU(s): step 438: loss 5.3828, lr 5.0e-04, dt 2.0s +All GPU(s): step 439: loss 5.3008, lr 5.0e-04, dt 2.1s +All GPU(s): step 440: loss 5.3750, lr 5.0e-04, dt 2.0s +All GPU(s): step 441: loss 5.3164, lr 5.0e-04, dt 2.0s +All GPU(s): step 442: loss 5.3359, lr 5.0e-04, dt 2.0s +All GPU(s): step 443: loss 5.3750, lr 5.0e-04, dt 2.0s +All GPU(s): step 444: loss 5.3281, lr 5.0e-04, dt 2.1s +All GPU(s): step 445: loss 5.3672, lr 5.0e-04, dt 2.0s +All GPU(s): step 446: loss 5.3164, lr 5.0e-04, dt 2.0s +All GPU(s): step 447: loss 5.3594, lr 5.0e-04, dt 2.0s +All GPU(s): step 448: loss 5.4062, lr 5.0e-04, dt 2.0s +All GPU(s): step 449: loss 5.4023, lr 5.0e-04, dt 2.1s +All GPU(s): step 450: loss 5.3633, lr 5.0e-04, dt 2.0s +All GPU(s): step 451: loss 5.2969, lr 5.0e-04, dt 2.0s +All GPU(s): step 452: loss 5.2812, lr 5.0e-04, dt 2.0s +All GPU(s): step 453: loss 5.2891, lr 5.0e-04, dt 2.0s +All GPU(s): step 454: loss 5.2773, lr 5.0e-04, dt 2.2s +All GPU(s): step 455: loss 5.2773, lr 5.0e-04, dt 2.0s +All GPU(s): step 456: loss 5.2734, lr 5.0e-04, dt 2.0s +All GPU(s): step 457: loss 5.2930, lr 5.0e-04, dt 2.0s +All GPU(s): step 458: loss 5.2344, lr 5.0e-04, dt 2.0s +All GPU(s): step 459: loss 5.2617, lr 5.0e-04, dt 2.1s +All GPU(s): step 460: loss 5.1992, lr 5.0e-04, dt 2.0s +All GPU(s): step 461: loss 5.2266, lr 5.0e-04, dt 2.0s +All GPU(s): step 462: loss 5.1758, lr 5.0e-04, dt 2.0s +All GPU(s): step 463: loss 5.2422, lr 5.0e-04, dt 2.1s +All GPU(s): step 464: loss 5.2891, lr 5.0e-04, dt 2.1s +All GPU(s): step 465: loss 5.2969, lr 5.0e-04, dt 2.0s +All GPU(s): step 466: loss 5.2148, lr 5.0e-04, dt 2.0s +All GPU(s): step 467: loss 5.2148, lr 5.0e-04, dt 2.0s +All GPU(s): step 468: loss 5.2578, lr 5.0e-04, dt 2.1s +All GPU(s): step 469: loss 5.1641, lr 5.0e-04, dt 2.1s +All GPU(s): step 470: loss 5.2070, lr 5.0e-04, dt 2.0s +All GPU(s): step 471: loss 5.1875, lr 5.0e-04, dt 2.0s +All GPU(s): step 472: loss 5.1484, lr 5.0e-04, dt 2.0s +All GPU(s): step 473: loss 5.1172, lr 5.0e-04, dt 2.1s +All GPU(s): step 474: loss 5.1953, lr 5.0e-04, dt 2.0s +All GPU(s): step 475: loss 5.1445, lr 5.0e-04, dt 2.0s +All GPU(s): step 476: loss 5.0898, lr 5.0e-04, dt 2.0s +All GPU(s): step 477: loss 5.2578, lr 5.0e-04, dt 2.0s +All GPU(s): step 478: loss 5.1562, lr 5.0e-04, dt 2.1s +All GPU(s): step 479: loss 5.1992, lr 5.0e-04, dt 2.1s +All GPU(s): step 480: loss 5.1172, lr 5.0e-04, dt 2.0s +All GPU(s): step 481: loss 5.2656, lr 5.0e-04, dt 2.0s +All GPU(s): step 482: loss 5.2188, lr 5.0e-04, dt 2.0s +All GPU(s): step 483: loss 5.0938, lr 5.0e-04, dt 2.1s +All GPU(s): step 484: loss 5.1133, lr 5.0e-04, dt 2.1s +All GPU(s): step 485: loss 5.1211, lr 5.0e-04, dt 2.0s +All GPU(s): step 486: loss 5.1250, lr 5.0e-04, dt 2.0s +All GPU(s): step 487: loss 5.0703, lr 5.0e-04, dt 2.0s +All GPU(s): step 488: loss 5.1094, lr 5.0e-04, dt 2.1s +All GPU(s): step 489: loss 5.1250, lr 5.0e-04, dt 2.0s +All GPU(s): step 490: loss 5.0977, lr 5.0e-04, dt 2.0s +All GPU(s): step 491: loss 5.1055, lr 5.0e-04, dt 2.0s +All GPU(s): step 492: loss 5.1523, lr 5.0e-04, dt 2.0s +All GPU(s): step 493: loss 5.0781, lr 5.0e-04, dt 2.1s +All GPU(s): step 494: loss 5.1094, lr 5.0e-04, dt 2.0s +All GPU(s): step 495: loss 5.1094, lr 5.0e-04, dt 2.0s +All GPU(s): step 496: loss 5.0859, lr 5.0e-04, dt 2.0s +All GPU(s): step 497: loss 5.0742, lr 5.0e-04, dt 2.1s +All GPU(s): step 498: loss 5.0508, lr 5.0e-04, dt 2.0s +All GPU(s): step 499: loss 5.0469, lr 5.0e-04, dt 2.0s +All GPU(s): step 500: loss 5.0977, lr 5.0e-04, dt 2.0s +All GPU(s): step 501: loss 5.0742, lr 5.0e-04, dt 2.0s +All GPU(s): step 502: loss 5.0703, lr 5.0e-04, dt 2.1s +All GPU(s): step 503: loss 5.0508, lr 5.0e-04, dt 2.1s +All GPU(s): step 504: loss 5.0430, lr 5.0e-04, dt 2.0s +All GPU(s): step 505: loss 5.1406, lr 5.0e-04, dt 2.0s +All GPU(s): step 506: loss 5.0352, lr 5.0e-04, dt 2.1s +All GPU(s): step 507: loss 5.0664, lr 5.0e-04, dt 2.1s +All GPU(s): step 508: loss 5.0117, lr 5.0e-04, dt 2.0s +All GPU(s): step 509: loss 5.0156, lr 5.0e-04, dt 2.0s +All GPU(s): step 510: loss 5.0273, lr 5.0e-04, dt 2.0s +All GPU(s): step 511: loss 5.0078, lr 5.0e-04, dt 2.0s +All GPU(s): step 512: loss 5.0156, lr 5.0e-04, dt 2.1s +All GPU(s): step 513: loss 4.9805, lr 5.0e-04, dt 2.1s +All GPU(s): step 514: loss 4.9961, lr 5.0e-04, dt 2.0s +All GPU(s): step 515: loss 5.0430, lr 5.0e-04, dt 2.0s +All GPU(s): step 516: loss 5.0000, lr 5.0e-04, dt 2.1s +All GPU(s): step 517: loss 5.0469, lr 5.0e-04, dt 2.1s +All GPU(s): step 518: loss 4.9883, lr 5.0e-04, dt 2.0s +All GPU(s): step 519: loss 5.1055, lr 5.0e-04, dt 2.1s +All GPU(s): step 520: loss 5.0469, lr 5.0e-04, dt 2.1s +All GPU(s): step 521: loss 5.0273, lr 5.0e-04, dt 2.1s +All GPU(s): step 522: loss 4.9922, lr 5.0e-04, dt 2.1s +All GPU(s): step 523: loss 4.9531, lr 5.0e-04, dt 2.0s +All GPU(s): step 524: loss 4.9883, lr 5.0e-04, dt 2.0s +All GPU(s): step 525: loss 4.9648, lr 5.0e-04, dt 2.0s +All GPU(s): step 526: loss 4.9883, lr 5.0e-04, dt 2.1s +All GPU(s): step 527: loss 4.9531, lr 5.0e-04, dt 2.0s +All GPU(s): step 528: loss 4.9297, lr 5.0e-04, dt 2.0s +All GPU(s): step 529: loss 4.8867, lr 5.0e-04, dt 2.0s +All GPU(s): step 530: loss 4.9727, lr 5.0e-04, dt 2.0s +All GPU(s): step 531: loss 4.9141, lr 5.0e-04, dt 2.1s +All GPU(s): step 532: loss 4.9609, lr 5.0e-04, dt 2.0s +All GPU(s): step 533: loss 4.9648, lr 5.0e-04, dt 2.0s +All GPU(s): step 534: loss 4.9336, lr 5.0e-04, dt 2.0s +All GPU(s): step 535: loss 4.9258, lr 5.0e-04, dt 2.0s +All GPU(s): step 536: loss 4.9453, lr 5.0e-04, dt 2.1s +All GPU(s): step 537: loss 4.9453, lr 5.0e-04, dt 2.0s +All GPU(s): step 538: loss 4.9062, lr 5.0e-04, dt 2.0s +All GPU(s): step 539: loss 4.9688, lr 5.0e-04, dt 2.0s +All GPU(s): step 540: loss 4.9414, lr 5.0e-04, dt 2.0s +All GPU(s): step 541: loss 4.9062, lr 5.0e-04, dt 2.1s +All GPU(s): step 542: loss 4.9297, lr 5.0e-04, dt 2.0s +All GPU(s): step 543: loss 4.8945, lr 5.0e-04, dt 2.0s +All GPU(s): step 544: loss 4.8711, lr 5.0e-04, dt 2.0s +All GPU(s): step 545: loss 4.8906, lr 5.0e-04, dt 2.0s +All GPU(s): step 546: loss 4.8828, lr 5.0e-04, dt 2.1s +All GPU(s): step 547: loss 4.8906, lr 5.0e-04, dt 2.0s +All GPU(s): step 548: loss 4.9375, lr 5.0e-04, dt 2.0s +All GPU(s): step 549: loss 4.9453, lr 5.0e-04, dt 2.0s +All GPU(s): step 550: loss 4.9297, lr 5.0e-04, dt 2.0s +All GPU(s): step 551: loss 4.9219, lr 5.0e-04, dt 2.1s +All GPU(s): step 552: loss 4.8984, lr 5.0e-04, dt 2.0s +All GPU(s): step 553: loss 4.9062, lr 5.0e-04, dt 2.0s +All GPU(s): step 554: loss 4.9023, lr 5.0e-04, dt 2.0s +All GPU(s): step 555: loss 4.9023, lr 5.0e-04, dt 2.1s +All GPU(s): step 556: loss 4.8594, lr 5.0e-04, dt 2.0s +All GPU(s): step 557: loss 4.8828, lr 5.0e-04, dt 2.0s +All GPU(s): step 558: loss 4.8867, lr 5.0e-04, dt 2.1s +All GPU(s): step 559: loss 4.8828, lr 5.0e-04, dt 2.0s +All GPU(s): step 560: loss 4.8398, lr 5.0e-04, dt 2.1s +All GPU(s): step 561: loss 4.8203, lr 5.0e-04, dt 2.0s +All GPU(s): step 562: loss 4.8555, lr 5.0e-04, dt 2.0s +All GPU(s): step 563: loss 4.8281, lr 5.0e-04, dt 2.0s +All GPU(s): step 564: loss 4.8555, lr 5.0e-04, dt 2.0s +All GPU(s): step 565: loss 4.8672, lr 5.0e-04, dt 2.1s +All GPU(s): step 566: loss 4.8398, lr 5.0e-04, dt 2.0s +All GPU(s): step 567: loss 4.8086, lr 5.0e-04, dt 2.0s +All GPU(s): step 568: loss 4.8438, lr 5.0e-04, dt 2.0s +All GPU(s): step 569: loss 4.8438, lr 5.0e-04, dt 2.0s +All GPU(s): step 570: loss 4.8555, lr 5.0e-04, dt 2.1s +All GPU(s): step 571: loss 4.7773, lr 5.0e-04, dt 2.0s +All GPU(s): step 572: loss 4.8320, lr 5.0e-04, dt 2.0s +All GPU(s): step 573: loss 4.7930, lr 5.0e-04, dt 2.0s +All GPU(s): step 574: loss 4.7695, lr 5.0e-04, dt 2.0s +All GPU(s): step 575: loss 4.7969, lr 5.0e-04, dt 2.1s +All GPU(s): step 576: loss 4.8125, lr 5.0e-04, dt 2.0s +All GPU(s): step 577: loss 4.7852, lr 5.0e-04, dt 2.1s +All GPU(s): step 578: loss 4.7734, lr 5.0e-04, dt 2.0s +All GPU(s): step 579: loss 4.7695, lr 5.0e-04, dt 2.0s +All GPU(s): step 580: loss 4.8008, lr 5.0e-04, dt 2.1s +All GPU(s): step 581: loss 4.7812, lr 5.0e-04, dt 2.0s +All GPU(s): step 582: loss 4.8242, lr 5.0e-04, dt 2.0s +All GPU(s): step 583: loss 4.8281, lr 5.0e-04, dt 2.0s +All GPU(s): step 584: loss 4.8828, lr 5.0e-04, dt 2.1s +All GPU(s): step 585: loss 4.8711, lr 5.0e-04, dt 2.1s +All GPU(s): step 586: loss 4.8477, lr 5.0e-04, dt 2.0s +All GPU(s): step 587: loss 4.7852, lr 5.0e-04, dt 2.0s +All GPU(s): step 588: loss 4.7695, lr 5.0e-04, dt 2.0s +All GPU(s): step 589: loss 4.8047, lr 5.0e-04, dt 2.1s +All GPU(s): step 590: loss 4.8164, lr 5.0e-04, dt 2.0s +All GPU(s): step 591: loss 4.8477, lr 5.0e-04, dt 2.0s +All GPU(s): step 592: loss 4.7852, lr 5.0e-04, dt 2.0s +All GPU(s): step 593: loss 4.7969, lr 5.0e-04, dt 2.0s +All GPU(s): step 594: loss 4.7578, lr 5.0e-04, dt 2.1s +All GPU(s): step 595: loss 4.7773, lr 5.0e-04, dt 2.0s +All GPU(s): step 596: loss 4.7578, lr 5.0e-04, dt 2.0s +All GPU(s): step 597: loss 4.7617, lr 5.0e-04, dt 2.0s +All GPU(s): step 598: loss 4.7812, lr 5.0e-04, dt 2.0s +All GPU(s): step 599: loss 4.7383, lr 5.0e-04, dt 2.1s +All GPU(s): step 600: loss 4.7578, lr 5.0e-04, dt 2.0s +All GPU(s): step 601: loss 4.7344, lr 5.0e-04, dt 2.0s +All GPU(s): step 602: loss 4.7422, lr 5.0e-04, dt 2.0s +All GPU(s): step 603: loss 4.7148, lr 5.0e-04, dt 2.0s +All GPU(s): step 604: loss 4.7812, lr 5.0e-04, dt 2.1s +All GPU(s): step 605: loss 4.7188, lr 5.0e-04, dt 2.0s +All GPU(s): step 606: loss 4.7500, lr 5.0e-04, dt 2.0s +All GPU(s): step 607: loss 4.7188, lr 5.0e-04, dt 2.0s +All GPU(s): step 608: loss 4.7188, lr 5.0e-04, dt 2.1s +All GPU(s): step 609: loss 4.7617, lr 5.0e-04, dt 2.1s +All GPU(s): step 610: loss 4.7148, lr 5.0e-04, dt 2.0s +All GPU(s): step 611: loss 4.7656, lr 5.0e-04, dt 2.0s +All GPU(s): step 612: loss 4.6953, lr 5.0e-04, dt 2.1s +All GPU(s): step 613: loss 4.6758, lr 5.0e-04, dt 2.1s +All GPU(s): step 614: loss 4.6602, lr 5.0e-04, dt 2.1s +All GPU(s): step 615: loss 4.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 616: loss 4.6172, lr 5.0e-04, dt 2.0s +All GPU(s): step 617: loss 4.6758, lr 5.0e-04, dt 2.0s +All GPU(s): step 618: loss 4.6094, lr 5.0e-04, dt 2.1s +All GPU(s): step 619: loss 4.6406, lr 5.0e-04, dt 2.0s +All GPU(s): step 620: loss 4.6562, lr 5.0e-04, dt 2.1s +All GPU(s): step 621: loss 4.6562, lr 5.0e-04, dt 2.0s +All GPU(s): step 622: loss 4.6836, lr 5.0e-04, dt 2.0s +All GPU(s): step 623: loss 4.6172, lr 5.0e-04, dt 2.2s +All GPU(s): step 624: loss 4.5703, lr 5.0e-04, dt 2.0s +All GPU(s): step 625: loss 4.6211, lr 5.0e-04, dt 2.0s +All GPU(s): step 626: loss 4.6133, lr 5.0e-04, dt 2.0s +All GPU(s): step 627: loss 4.5820, lr 5.0e-04, dt 2.0s +All GPU(s): step 628: loss 4.5195, lr 5.0e-04, dt 2.1s +All GPU(s): step 629: loss 4.5508, lr 5.0e-04, dt 2.0s +All GPU(s): step 630: loss 4.4180, lr 5.0e-04, dt 2.0s +All GPU(s): step 631: loss 4.4805, lr 5.0e-04, dt 2.0s +All GPU(s): step 632: loss 4.4531, lr 5.0e-04, dt 2.1s +All GPU(s): step 633: loss 4.4453, lr 5.0e-04, dt 2.2s +All GPU(s): step 634: loss 4.4570, lr 5.0e-04, dt 2.0s +All GPU(s): step 635: loss 4.5039, lr 5.0e-04, dt 2.1s +All GPU(s): step 636: loss 4.4023, lr 5.0e-04, dt 2.0s +All GPU(s): step 637: loss 4.5547, lr 5.0e-04, dt 2.0s +All GPU(s): step 638: loss 4.4688, lr 5.0e-04, dt 2.1s +All GPU(s): step 639: loss 4.4102, lr 5.0e-04, dt 2.0s +All GPU(s): step 640: loss 4.4414, lr 5.0e-04, dt 2.0s +All GPU(s): step 641: loss 4.4609, lr 5.0e-04, dt 2.0s +All GPU(s): step 642: loss 4.4219, lr 5.0e-04, dt 2.1s +All GPU(s): step 643: loss 4.4492, lr 5.0e-04, dt 2.0s +All GPU(s): step 644: loss 4.4766, lr 5.0e-04, dt 2.0s +All GPU(s): step 645: loss 4.4141, lr 5.0e-04, dt 2.0s +All GPU(s): step 646: loss 4.4375, lr 5.0e-04, dt 2.0s +All GPU(s): step 647: loss 4.3906, lr 5.0e-04, dt 2.1s +All GPU(s): step 648: loss 4.4023, lr 5.0e-04, dt 2.0s +All GPU(s): step 649: loss 4.4258, lr 5.0e-04, dt 2.0s +All GPU(s): step 650: loss 4.4414, lr 5.0e-04, dt 2.0s +All GPU(s): step 651: loss 4.4375, lr 5.0e-04, dt 2.1s +All GPU(s): step 652: loss 4.3555, lr 5.0e-04, dt 2.1s +All GPU(s): step 653: loss 4.3906, lr 5.0e-04, dt 2.0s +All GPU(s): step 654: loss 4.4102, lr 5.0e-04, dt 2.0s +All GPU(s): step 655: loss 4.4219, lr 5.0e-04, dt 2.0s +All GPU(s): step 656: loss 4.3945, lr 5.0e-04, dt 2.0s +All GPU(s): step 657: loss 4.4180, lr 5.0e-04, dt 2.1s +All GPU(s): step 658: loss 4.3477, lr 5.0e-04, dt 2.0s +All GPU(s): step 659: loss 4.3555, lr 5.0e-04, dt 2.0s +All GPU(s): step 660: loss 4.3125, lr 5.0e-04, dt 2.0s +All GPU(s): step 661: loss 4.4492, lr 5.0e-04, dt 2.0s +All GPU(s): step 662: loss 4.3906, lr 5.0e-04, dt 2.1s +All GPU(s): step 663: loss 4.3438, lr 5.0e-04, dt 2.0s +All GPU(s): step 664: loss 4.3945, lr 5.0e-04, dt 2.0s +All GPU(s): step 665: loss 4.3555, lr 5.0e-04, dt 2.0s +All GPU(s): step 666: loss 4.2930, lr 5.0e-04, dt 2.1s +All GPU(s): step 667: loss 4.3398, lr 5.0e-04, dt 2.1s +All GPU(s): step 668: loss 4.4492, lr 5.0e-04, dt 2.0s +All GPU(s): step 669: loss 4.2773, lr 5.0e-04, dt 2.0s +All GPU(s): step 670: loss 4.2578, lr 5.0e-04, dt 2.0s +All GPU(s): step 671: loss 4.2266, lr 5.0e-04, dt 2.1s +All GPU(s): step 672: loss 4.3359, lr 5.0e-04, dt 2.0s +All GPU(s): step 673: loss 4.3281, lr 5.0e-04, dt 2.0s +All GPU(s): step 674: loss 4.2422, lr 5.0e-04, dt 2.0s +All GPU(s): step 675: loss 4.1797, lr 5.0e-04, dt 2.0s +All GPU(s): step 676: loss 4.2578, lr 5.0e-04, dt 2.1s +All GPU(s): step 677: loss 4.2461, lr 5.0e-04, dt 2.0s +All GPU(s): step 678: loss 4.1758, lr 5.0e-04, dt 2.0s +All GPU(s): step 679: loss 4.1523, lr 5.0e-04, dt 2.0s +All GPU(s): step 680: loss 4.1875, lr 5.0e-04, dt 2.0s +All GPU(s): step 681: loss 4.1562, lr 5.0e-04, dt 2.1s +All GPU(s): step 682: loss 4.1406, lr 5.0e-04, dt 2.1s +All GPU(s): step 683: loss 4.0977, lr 5.0e-04, dt 2.0s +All GPU(s): step 684: loss 4.1562, lr 5.0e-04, dt 2.0s +All GPU(s): step 685: loss 4.1211, lr 5.0e-04, dt 2.0s +All GPU(s): step 686: loss 4.1016, lr 5.0e-04, dt 2.1s +All GPU(s): step 687: loss 4.0938, lr 5.0e-04, dt 2.0s +All GPU(s): step 688: loss 4.1367, lr 5.0e-04, dt 2.0s +All GPU(s): step 689: loss 4.1406, lr 5.0e-04, dt 2.0s +All GPU(s): step 690: loss 4.2852, lr 5.0e-04, dt 2.1s +All GPU(s): step 691: loss 4.3086, lr 5.0e-04, dt 2.1s +All GPU(s): step 692: loss 4.2734, lr 5.0e-04, dt 2.1s +All GPU(s): step 693: loss 4.1484, lr 5.0e-04, dt 2.0s +All GPU(s): step 694: loss 4.2070, lr 5.0e-04, dt 2.0s +All GPU(s): step 695: loss 4.1602, lr 5.0e-04, dt 2.1s +All GPU(s): step 696: loss 4.1484, lr 5.0e-04, dt 2.1s +All GPU(s): step 697: loss 4.1172, lr 5.0e-04, dt 2.1s +All GPU(s): step 698: loss 4.1133, lr 5.0e-04, dt 2.0s +All GPU(s): step 699: loss 4.1523, lr 5.0e-04, dt 2.1s +All GPU(s): step 700: loss 4.0977, lr 5.0e-04, dt 2.1s +All GPU(s): step 701: loss 4.0664, lr 5.0e-04, dt 2.0s +All GPU(s): step 702: loss 4.0625, lr 5.0e-04, dt 2.0s +All GPU(s): step 703: loss 4.1250, lr 5.0e-04, dt 2.0s +All GPU(s): step 704: loss 4.1055, lr 5.0e-04, dt 2.1s +All GPU(s): step 705: loss 4.0332, lr 5.0e-04, dt 2.1s +All GPU(s): step 706: loss 4.0781, lr 5.0e-04, dt 2.1s +All GPU(s): step 707: loss 4.0664, lr 5.0e-04, dt 2.0s +All GPU(s): step 708: loss 4.0781, lr 5.0e-04, dt 2.0s +All GPU(s): step 709: loss 4.0508, lr 5.0e-04, dt 2.1s +All GPU(s): step 710: loss 4.0605, lr 5.0e-04, dt 2.1s +All GPU(s): step 711: loss 4.0000, lr 5.0e-04, dt 2.0s +All GPU(s): step 712: loss 4.0293, lr 5.0e-04, dt 2.0s +All GPU(s): step 713: loss 4.1094, lr 5.0e-04, dt 2.1s +All GPU(s): step 714: loss 4.0273, lr 5.0e-04, dt 2.1s +All GPU(s): step 715: loss 4.0273, lr 5.0e-04, dt 2.2s +All GPU(s): step 716: loss 4.0137, lr 5.0e-04, dt 2.0s +All GPU(s): step 717: loss 4.0332, lr 5.0e-04, dt 2.0s +All GPU(s): step 718: loss 4.0195, lr 5.0e-04, dt 2.1s +All GPU(s): step 719: loss 4.0586, lr 5.0e-04, dt 2.1s +All GPU(s): step 720: loss 4.0215, lr 5.0e-04, dt 2.1s +All GPU(s): step 721: loss 4.0664, lr 5.0e-04, dt 2.0s +All GPU(s): step 722: loss 4.0430, lr 5.0e-04, dt 2.1s +All GPU(s): step 723: loss 3.9551, lr 5.0e-04, dt 2.0s +All GPU(s): step 724: loss 4.1875, lr 5.0e-04, dt 2.1s +All GPU(s): step 725: loss 4.1797, lr 5.0e-04, dt 2.1s +All GPU(s): step 726: loss 4.0898, lr 5.0e-04, dt 2.1s +All GPU(s): step 727: loss 4.0742, lr 5.0e-04, dt 2.0s +All GPU(s): step 728: loss 4.1914, lr 5.0e-04, dt 2.0s +All GPU(s): step 729: loss 4.1680, lr 5.0e-04, dt 2.1s +All GPU(s): step 730: loss 4.0820, lr 5.0e-04, dt 2.0s +All GPU(s): step 731: loss 4.0820, lr 5.0e-04, dt 2.0s +All GPU(s): step 732: loss 4.1094, lr 5.0e-04, dt 2.1s +All GPU(s): step 733: loss 4.1172, lr 5.0e-04, dt 2.0s +All GPU(s): step 734: loss 4.0859, lr 5.0e-04, dt 2.1s +All GPU(s): step 735: loss 4.0371, lr 5.0e-04, dt 2.0s +All GPU(s): step 736: loss 4.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 737: loss 4.0430, lr 5.0e-04, dt 2.0s +All GPU(s): step 738: loss 4.0742, lr 5.0e-04, dt 2.0s +All GPU(s): step 739: loss 4.0781, lr 5.0e-04, dt 2.1s +All GPU(s): step 740: loss 4.0703, lr 5.0e-04, dt 2.1s +All GPU(s): step 741: loss 4.0723, lr 5.0e-04, dt 2.0s +All GPU(s): step 742: loss 4.0156, lr 5.0e-04, dt 2.0s +All GPU(s): step 743: loss 4.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 744: loss 4.0078, lr 5.0e-04, dt 2.1s +All GPU(s): step 745: loss 4.0820, lr 5.0e-04, dt 2.0s +All GPU(s): step 746: loss 4.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 747: loss 4.0273, lr 5.0e-04, dt 2.0s +All GPU(s): step 748: loss 3.9941, lr 5.0e-04, dt 2.0s +All GPU(s): step 749: loss 3.9883, lr 5.0e-04, dt 2.1s +All GPU(s): step 750: loss 4.0430, lr 5.0e-04, dt 2.0s +All GPU(s): step 751: loss 3.9902, lr 5.0e-04, dt 2.1s +All GPU(s): step 752: loss 3.9961, lr 5.0e-04, dt 2.1s +All GPU(s): step 753: loss 4.0039, lr 5.0e-04, dt 2.1s +All GPU(s): step 754: loss 3.9824, lr 5.0e-04, dt 2.0s +All GPU(s): step 755: loss 4.0020, lr 5.0e-04, dt 2.0s +All GPU(s): step 756: loss 3.9902, lr 5.0e-04, dt 2.0s +All GPU(s): step 757: loss 3.9961, lr 5.0e-04, dt 2.0s +All GPU(s): step 758: loss 3.9766, lr 5.0e-04, dt 2.1s +All GPU(s): step 759: loss 3.9629, lr 5.0e-04, dt 2.0s +All GPU(s): step 760: loss 4.0273, lr 5.0e-04, dt 2.0s +All GPU(s): step 761: loss 4.0039, lr 5.0e-04, dt 2.0s +All GPU(s): step 762: loss 3.9883, lr 5.0e-04, dt 2.0s +All GPU(s): step 763: loss 4.0039, lr 5.0e-04, dt 2.1s +All GPU(s): step 764: loss 4.0527, lr 5.0e-04, dt 2.1s +All GPU(s): step 765: loss 4.0391, lr 5.0e-04, dt 2.0s +All GPU(s): step 766: loss 3.9805, lr 5.0e-04, dt 2.0s +All GPU(s): step 767: loss 4.0137, lr 5.0e-04, dt 2.0s +All GPU(s): step 768: loss 3.9766, lr 5.0e-04, dt 2.1s +All GPU(s): step 769: loss 4.0195, lr 5.0e-04, dt 2.0s +All GPU(s): step 770: loss 4.0312, lr 5.0e-04, dt 2.0s +All GPU(s): step 771: loss 4.1328, lr 5.0e-04, dt 2.0s +All GPU(s): step 772: loss 4.0625, lr 5.0e-04, dt 2.0s +All GPU(s): step 773: loss 4.0254, lr 4.9e-04, dt 2.1s +All GPU(s): step 774: loss 4.0977, lr 4.9e-04, dt 2.0s +All GPU(s): step 775: loss 4.1406, lr 4.9e-04, dt 2.0s +All GPU(s): step 776: loss 4.1172, lr 4.9e-04, dt 2.0s +All GPU(s): step 777: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 778: loss 4.1016, lr 4.9e-04, dt 2.1s +All GPU(s): step 779: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 780: loss 4.1289, lr 4.9e-04, dt 2.0s +All GPU(s): step 781: loss 4.1016, lr 4.9e-04, dt 2.0s +All GPU(s): step 782: loss 4.1602, lr 4.9e-04, dt 2.1s +All GPU(s): step 783: loss 4.1250, lr 4.9e-04, dt 2.1s +All GPU(s): step 784: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 785: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 786: loss 4.1445, lr 4.9e-04, dt 2.0s +All GPU(s): step 787: loss 4.1367, lr 4.9e-04, dt 2.1s +All GPU(s): step 788: loss 4.1289, lr 4.9e-04, dt 2.0s +All GPU(s): step 789: loss 4.1602, lr 4.9e-04, dt 2.0s +All GPU(s): step 790: loss 4.1367, lr 4.9e-04, dt 2.1s +All GPU(s): step 791: loss 4.1523, lr 4.9e-04, dt 2.0s +All GPU(s): step 792: loss 4.1445, lr 4.9e-04, dt 2.2s +All GPU(s): step 793: loss 4.1602, lr 4.9e-04, dt 2.0s +All GPU(s): step 794: loss 4.1211, lr 4.9e-04, dt 2.0s +All GPU(s): step 795: loss 4.1367, lr 4.9e-04, dt 2.0s +All GPU(s): step 796: loss 4.1328, lr 4.9e-04, dt 2.1s +All GPU(s): step 797: loss 4.1836, lr 4.9e-04, dt 2.2s +All GPU(s): step 798: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 799: loss 4.1250, lr 4.9e-04, dt 2.1s +All GPU(s): step 800: loss 4.1289, lr 4.9e-04, dt 2.0s +All GPU(s): step 801: loss 4.1406, lr 4.9e-04, dt 2.1s +All GPU(s): step 802: loss 4.1172, lr 4.9e-04, dt 2.1s +All GPU(s): step 803: loss 4.1875, lr 4.9e-04, dt 2.0s +All GPU(s): step 804: loss 4.1016, lr 4.9e-04, dt 2.0s +All GPU(s): step 805: loss 4.1172, lr 4.9e-04, dt 2.0s +All GPU(s): step 806: loss 4.1289, lr 4.9e-04, dt 2.1s +All GPU(s): step 807: loss 4.1367, lr 4.9e-04, dt 2.1s +All GPU(s): step 808: loss 4.1367, lr 4.9e-04, dt 2.0s +All GPU(s): step 809: loss 4.1094, lr 4.9e-04, dt 2.0s +All GPU(s): step 810: loss 4.1094, lr 4.9e-04, dt 2.1s +All GPU(s): step 811: loss 4.0898, lr 4.9e-04, dt 2.1s +All GPU(s): step 812: loss 4.1211, lr 4.9e-04, dt 2.1s +All GPU(s): step 813: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 814: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 815: loss 4.1094, lr 4.9e-04, dt 2.0s +All GPU(s): step 816: loss 4.1133, lr 4.9e-04, dt 2.1s +All GPU(s): step 817: loss 4.1055, lr 4.9e-04, dt 2.0s +All GPU(s): step 818: loss 4.0625, lr 4.9e-04, dt 2.1s +All GPU(s): step 819: loss 4.0859, lr 4.9e-04, dt 2.0s +All GPU(s): step 820: loss 4.0859, lr 4.9e-04, dt 2.0s +All GPU(s): step 821: loss 4.0703, lr 4.9e-04, dt 2.1s +All GPU(s): step 822: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 823: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 824: loss 4.1484, lr 4.9e-04, dt 2.0s +All GPU(s): step 825: loss 4.1758, lr 4.9e-04, dt 2.1s +All GPU(s): step 826: loss 4.1172, lr 4.9e-04, dt 2.1s +All GPU(s): step 827: loss 4.1289, lr 4.9e-04, dt 2.0s +All GPU(s): step 828: loss 4.1133, lr 4.9e-04, dt 2.0s +All GPU(s): step 829: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 830: loss 4.0938, lr 4.9e-04, dt 2.0s +All GPU(s): step 831: loss 4.0938, lr 4.9e-04, dt 2.1s +All GPU(s): step 832: loss 4.0820, lr 4.9e-04, dt 2.0s +All GPU(s): step 833: loss 4.1016, lr 4.9e-04, dt 2.0s +All GPU(s): step 834: loss 4.0742, lr 4.9e-04, dt 2.0s +All GPU(s): step 835: loss 4.0781, lr 4.9e-04, dt 2.0s +All GPU(s): step 836: loss 4.0977, lr 4.9e-04, dt 2.1s +All GPU(s): step 837: loss 4.0781, lr 4.9e-04, dt 2.0s +All GPU(s): step 838: loss 4.0781, lr 4.9e-04, dt 2.0s +All GPU(s): step 839: loss 4.0703, lr 4.9e-04, dt 2.0s +All GPU(s): step 840: loss 4.0859, lr 4.9e-04, dt 2.1s +All GPU(s): step 841: loss 4.0625, lr 4.9e-04, dt 2.1s +All GPU(s): step 842: loss 4.0703, lr 4.9e-04, dt 2.0s +All GPU(s): step 843: loss 4.0664, lr 4.9e-04, dt 2.0s +All GPU(s): step 844: loss 4.0742, lr 4.9e-04, dt 2.0s +All GPU(s): step 845: loss 4.0625, lr 4.9e-04, dt 2.1s +All GPU(s): step 846: loss 4.0449, lr 4.9e-04, dt 2.1s +All GPU(s): step 847: loss 4.0664, lr 4.9e-04, dt 2.0s +All GPU(s): step 848: loss 4.0586, lr 4.9e-04, dt 2.0s +All GPU(s): step 849: loss 4.0586, lr 4.9e-04, dt 2.0s +All GPU(s): step 850: loss 4.0508, lr 4.9e-04, dt 2.1s +All GPU(s): step 851: loss 4.0391, lr 4.9e-04, dt 2.0s +All GPU(s): step 852: loss 4.0469, lr 4.9e-04, dt 2.0s +All GPU(s): step 853: loss 4.0430, lr 4.9e-04, dt 2.0s +All GPU(s): step 854: loss 4.0586, lr 4.9e-04, dt 2.0s +All GPU(s): step 855: loss 4.0430, lr 4.9e-04, dt 2.3s +All GPU(s): step 856: loss 4.0391, lr 4.9e-04, dt 2.0s +All GPU(s): step 857: loss 4.0391, lr 4.9e-04, dt 2.0s +All GPU(s): step 858: loss 4.0547, lr 4.9e-04, dt 2.1s +All GPU(s): step 859: loss 4.0234, lr 4.9e-04, dt 2.0s +All GPU(s): step 860: loss 4.0273, lr 4.9e-04, dt 2.1s +All GPU(s): step 861: loss 4.0352, lr 4.9e-04, dt 2.0s +All GPU(s): step 862: loss 4.0430, lr 4.9e-04, dt 2.0s +All GPU(s): step 863: loss 4.0312, lr 4.9e-04, dt 2.0s +All GPU(s): step 864: loss 4.0508, lr 4.9e-04, dt 2.0s +All GPU(s): step 865: loss 4.0332, lr 4.9e-04, dt 2.1s +All GPU(s): step 866: loss 4.0547, lr 4.9e-04, dt 2.0s +All GPU(s): step 867: loss 4.0312, lr 4.9e-04, dt 2.0s +All GPU(s): step 868: loss 4.0371, lr 4.9e-04, dt 2.0s +All GPU(s): step 869: loss 4.0098, lr 4.9e-04, dt 2.1s +All GPU(s): step 870: loss 4.0332, lr 4.9e-04, dt 2.1s +All GPU(s): step 871: loss 4.0215, lr 4.9e-04, dt 2.0s +All GPU(s): step 872: loss 4.0039, lr 4.9e-04, dt 2.0s +All GPU(s): step 873: loss 4.0098, lr 4.9e-04, dt 2.0s +All GPU(s): step 874: loss 4.0039, lr 4.9e-04, dt 2.1s +All GPU(s): step 875: loss 4.0215, lr 4.9e-04, dt 2.0s +All GPU(s): step 876: loss 4.0098, lr 4.9e-04, dt 2.0s +All GPU(s): step 877: loss 4.0078, lr 4.9e-04, dt 2.1s +All GPU(s): step 878: loss 4.0195, lr 4.9e-04, dt 2.1s +All GPU(s): step 879: loss 4.0039, lr 4.9e-04, dt 2.1s +All GPU(s): step 880: loss 4.0117, lr 4.9e-04, dt 2.0s +All GPU(s): step 881: loss 4.0176, lr 4.9e-04, dt 2.0s +All GPU(s): step 882: loss 4.0195, lr 4.9e-04, dt 2.0s +All GPU(s): step 883: loss 4.0625, lr 4.9e-04, dt 2.0s +All GPU(s): step 884: loss 3.9961, lr 4.9e-04, dt 2.1s +All GPU(s): step 885: loss 4.0215, lr 4.9e-04, dt 2.0s +All GPU(s): step 886: loss 4.0078, lr 4.9e-04, dt 2.0s +All GPU(s): step 887: loss 4.0039, lr 4.9e-04, dt 2.0s +All GPU(s): step 888: loss 4.0117, lr 4.9e-04, dt 2.1s +All GPU(s): step 889: loss 4.0605, lr 4.9e-04, dt 2.1s +All GPU(s): step 890: loss 4.0020, lr 4.9e-04, dt 2.0s +All GPU(s): step 891: loss 4.0117, lr 4.9e-04, dt 2.0s +All GPU(s): step 892: loss 4.0430, lr 4.9e-04, dt 2.0s +All GPU(s): step 893: loss 4.0000, lr 4.9e-04, dt 2.0s +All GPU(s): step 894: loss 4.0020, lr 4.9e-04, dt 2.1s +All GPU(s): step 895: loss 4.0293, lr 4.9e-04, dt 2.0s +All GPU(s): step 896: loss 3.9961, lr 4.9e-04, dt 2.0s +All GPU(s): step 897: loss 4.0371, lr 4.9e-04, dt 2.0s +All GPU(s): step 898: loss 4.0156, lr 4.9e-04, dt 2.1s +All GPU(s): step 899: loss 4.0273, lr 4.9e-04, dt 2.0s +All GPU(s): step 900: loss 4.0059, lr 4.9e-04, dt 2.0s +All GPU(s): step 901: loss 4.0078, lr 4.9e-04, dt 2.0s +All GPU(s): step 902: loss 4.0508, lr 4.9e-04, dt 2.0s +All GPU(s): step 903: loss 4.0059, lr 4.9e-04, dt 2.1s +All GPU(s): step 904: loss 4.0273, lr 4.9e-04, dt 2.1s +All GPU(s): step 905: loss 4.0078, lr 4.9e-04, dt 2.0s +All GPU(s): step 906: loss 4.0098, lr 4.9e-04, dt 2.0s +All GPU(s): step 907: loss 4.0195, lr 4.9e-04, dt 2.0s +All GPU(s): step 908: loss 4.0078, lr 4.9e-04, dt 2.2s +All GPU(s): step 909: loss 4.0176, lr 4.9e-04, dt 2.0s +All GPU(s): step 910: loss 4.0000, lr 4.9e-04, dt 2.0s +All GPU(s): step 911: loss 4.0215, lr 4.9e-04, dt 2.0s +All GPU(s): step 912: loss 4.0078, lr 4.9e-04, dt 2.1s +All GPU(s): step 913: loss 3.9902, lr 4.9e-04, dt 2.1s +All GPU(s): step 914: loss 3.9844, lr 4.9e-04, dt 2.0s +All GPU(s): step 915: loss 3.9961, lr 4.9e-04, dt 2.0s +All GPU(s): step 916: loss 4.0020, lr 4.9e-04, dt 2.0s +All GPU(s): step 917: loss 4.0215, lr 4.9e-04, dt 2.1s +All GPU(s): step 918: loss 3.9883, lr 4.9e-04, dt 2.1s +All GPU(s): step 919: loss 4.0020, lr 4.9e-04, dt 2.0s +All GPU(s): step 920: loss 3.9883, lr 4.9e-04, dt 2.0s +All GPU(s): step 921: loss 4.0098, lr 4.9e-04, dt 2.0s +All GPU(s): step 922: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 923: loss 4.0039, lr 4.9e-04, dt 2.1s +All GPU(s): step 924: loss 3.9785, lr 4.9e-04, dt 2.0s +All GPU(s): step 925: loss 3.9863, lr 4.9e-04, dt 2.0s +All GPU(s): step 926: loss 3.9824, lr 4.9e-04, dt 2.0s +All GPU(s): step 927: loss 3.9824, lr 4.9e-04, dt 2.1s +All GPU(s): step 928: loss 4.0059, lr 4.9e-04, dt 2.1s +All GPU(s): step 929: loss 3.9941, lr 4.9e-04, dt 2.0s +All GPU(s): step 930: loss 3.9844, lr 4.9e-04, dt 2.0s +All GPU(s): step 931: loss 3.9863, lr 4.9e-04, dt 2.0s +All GPU(s): step 932: loss 4.0312, lr 4.9e-04, dt 2.1s +All GPU(s): step 933: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 934: loss 3.9746, lr 4.9e-04, dt 2.0s +All GPU(s): step 935: loss 3.9668, lr 4.9e-04, dt 2.0s +All GPU(s): step 936: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 937: loss 3.9590, lr 4.9e-04, dt 2.1s +All GPU(s): step 938: loss 3.9590, lr 4.9e-04, dt 2.0s +All GPU(s): step 939: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 940: loss 3.9980, lr 4.9e-04, dt 2.0s +All GPU(s): step 941: loss 3.9746, lr 4.9e-04, dt 2.0s +All GPU(s): step 942: loss 3.9883, lr 4.9e-04, dt 2.1s +All GPU(s): step 943: loss 3.9824, lr 4.9e-04, dt 2.0s +All GPU(s): step 944: loss 3.9688, lr 4.9e-04, dt 2.0s +All GPU(s): step 945: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 946: loss 3.9707, lr 4.9e-04, dt 2.1s +All GPU(s): step 947: loss 3.9785, lr 4.9e-04, dt 2.1s +All GPU(s): step 948: loss 3.9746, lr 4.9e-04, dt 2.0s +All GPU(s): step 949: loss 3.9883, lr 4.9e-04, dt 2.0s +All GPU(s): step 950: loss 3.9863, lr 4.9e-04, dt 2.0s +All GPU(s): step 951: loss 3.9902, lr 4.9e-04, dt 2.0s +All GPU(s): step 952: loss 3.9844, lr 4.9e-04, dt 2.1s +All GPU(s): step 953: loss 3.9844, lr 4.9e-04, dt 2.0s +All GPU(s): step 954: loss 3.9746, lr 4.9e-04, dt 2.0s +All GPU(s): step 955: loss 3.9844, lr 4.9e-04, dt 2.0s +All GPU(s): step 956: loss 4.0000, lr 4.9e-04, dt 2.0s +All GPU(s): step 957: loss 3.9766, lr 4.9e-04, dt 2.1s +All GPU(s): step 958: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 959: loss 3.9688, lr 4.9e-04, dt 2.0s +All GPU(s): step 960: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 961: loss 3.9961, lr 4.9e-04, dt 2.1s +All GPU(s): step 962: loss 3.9785, lr 4.9e-04, dt 2.0s +All GPU(s): step 963: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 964: loss 3.9688, lr 4.9e-04, dt 2.0s +All GPU(s): step 965: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 966: loss 3.9844, lr 4.9e-04, dt 2.1s +All GPU(s): step 967: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 968: loss 3.9648, lr 4.9e-04, dt 2.0s +All GPU(s): step 969: loss 3.9648, lr 4.9e-04, dt 2.0s +All GPU(s): step 970: loss 3.9590, lr 4.9e-04, dt 2.0s +All GPU(s): step 971: loss 3.9629, lr 4.9e-04, dt 2.1s +All GPU(s): step 972: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 973: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 974: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 975: loss 3.9570, lr 4.9e-04, dt 2.0s +All GPU(s): step 976: loss 3.9570, lr 4.9e-04, dt 2.1s +All GPU(s): step 977: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 978: loss 3.9570, lr 4.9e-04, dt 2.0s +All GPU(s): step 979: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 980: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 981: loss 3.9668, lr 4.9e-04, dt 2.1s +All GPU(s): step 982: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 983: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 984: loss 4.0176, lr 4.9e-04, dt 2.0s +All GPU(s): step 985: loss 3.9609, lr 4.9e-04, dt 2.1s +All GPU(s): step 986: loss 3.9609, lr 4.9e-04, dt 2.1s +All GPU(s): step 987: loss 3.9668, lr 4.9e-04, dt 2.0s +All GPU(s): step 988: loss 3.9902, lr 4.9e-04, dt 2.0s +All GPU(s): step 989: loss 3.9648, lr 4.9e-04, dt 2.0s +All GPU(s): step 990: loss 3.9629, lr 4.9e-04, dt 2.1s +All GPU(s): step 991: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 992: loss 3.9590, lr 4.9e-04, dt 2.0s +All GPU(s): step 993: loss 3.9434, lr 4.9e-04, dt 2.0s +All GPU(s): step 994: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 995: loss 3.9570, lr 4.9e-04, dt 2.1s +All GPU(s): step 996: loss 3.9609, lr 4.9e-04, dt 2.0s +All GPU(s): step 997: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 998: loss 3.9590, lr 4.9e-04, dt 2.1s +All GPU(s): step 999: loss 3.9492, lr 4.9e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_1000.pt +All GPU(s): step 1000: loss 3.9707, lr 4.9e-04, dt 2.3s +All GPU(s): step 1001: loss 3.9648, lr 4.9e-04, dt 2.1s +All GPU(s): step 1002: loss 3.9473, lr 4.9e-04, dt 2.1s +All GPU(s): step 1003: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 1004: loss 3.9707, lr 4.9e-04, dt 2.0s +All GPU(s): step 1005: loss 3.9512, lr 4.9e-04, dt 2.2s +All GPU(s): step 1006: loss 3.9277, lr 4.9e-04, dt 2.1s +All GPU(s): step 1007: loss 3.9434, lr 4.9e-04, dt 2.1s +All GPU(s): step 1008: loss 3.9492, lr 4.9e-04, dt 2.0s +All GPU(s): step 1009: loss 3.9512, lr 4.9e-04, dt 2.0s +All GPU(s): step 1010: loss 3.9531, lr 4.9e-04, dt 2.1s +All GPU(s): step 1011: loss 3.9395, lr 4.9e-04, dt 2.1s +All GPU(s): step 1012: loss 3.9512, lr 4.9e-04, dt 2.0s +All GPU(s): step 1013: loss 3.9492, lr 4.9e-04, dt 2.0s +All GPU(s): step 1014: loss 3.9609, lr 4.9e-04, dt 2.1s +All GPU(s): step 1015: loss 3.9570, lr 4.9e-04, dt 2.1s +All GPU(s): step 1016: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1017: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1018: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 1019: loss 3.9531, lr 4.9e-04, dt 2.1s +All GPU(s): step 1020: loss 3.9492, lr 4.9e-04, dt 2.1s +All GPU(s): step 1021: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1022: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 1023: loss 3.9473, lr 4.9e-04, dt 2.1s +All GPU(s): step 1024: loss 4.0098, lr 4.9e-04, dt 2.2s +All GPU(s): step 1025: loss 3.9453, lr 4.9e-04, dt 2.1s +All GPU(s): step 1026: loss 3.9883, lr 4.9e-04, dt 2.0s +All GPU(s): step 1027: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1028: loss 3.9707, lr 4.9e-04, dt 2.0s +All GPU(s): step 1029: loss 3.9512, lr 4.9e-04, dt 2.1s +All GPU(s): step 1030: loss 3.9668, lr 4.9e-04, dt 2.0s +All GPU(s): step 1031: loss 3.9648, lr 4.9e-04, dt 2.0s +All GPU(s): step 1032: loss 3.9727, lr 4.9e-04, dt 2.1s +All GPU(s): step 1033: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 1034: loss 3.9805, lr 4.9e-04, dt 2.1s +All GPU(s): step 1035: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1036: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1037: loss 3.9590, lr 4.9e-04, dt 2.0s +All GPU(s): step 1038: loss 3.9531, lr 4.9e-04, dt 2.1s +All GPU(s): step 1039: loss 3.9434, lr 4.9e-04, dt 2.1s +All GPU(s): step 1040: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1041: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1042: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1043: loss 3.9492, lr 4.9e-04, dt 2.1s +All GPU(s): step 1044: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1045: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1046: loss 3.9590, lr 4.9e-04, dt 2.0s +All GPU(s): step 1047: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1048: loss 3.9414, lr 4.9e-04, dt 2.1s +All GPU(s): step 1049: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1050: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1051: loss 3.9434, lr 4.9e-04, dt 2.0s +All GPU(s): step 1052: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1053: loss 3.9492, lr 4.9e-04, dt 2.1s +All GPU(s): step 1054: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1055: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1056: loss 3.9492, lr 4.9e-04, dt 2.0s +All GPU(s): step 1057: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1058: loss 3.9297, lr 4.9e-04, dt 2.1s +All GPU(s): step 1059: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1060: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1061: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1062: loss 3.9512, lr 4.9e-04, dt 2.0s +All GPU(s): step 1063: loss 3.9395, lr 4.9e-04, dt 2.1s +All GPU(s): step 1064: loss 3.9609, lr 4.9e-04, dt 2.0s +All GPU(s): step 1065: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1066: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1067: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1068: loss 3.9434, lr 4.9e-04, dt 2.1s +All GPU(s): step 1069: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1070: loss 3.9512, lr 4.9e-04, dt 2.0s +All GPU(s): step 1071: loss 3.9805, lr 4.9e-04, dt 2.0s +All GPU(s): step 1072: loss 3.9375, lr 4.9e-04, dt 2.1s +All GPU(s): step 1073: loss 3.9648, lr 4.9e-04, dt 2.1s +All GPU(s): step 1074: loss 3.9570, lr 4.9e-04, dt 2.0s +All GPU(s): step 1075: loss 3.9492, lr 4.9e-04, dt 2.0s +All GPU(s): step 1076: loss 4.0508, lr 4.9e-04, dt 2.0s +All GPU(s): step 1077: loss 4.0176, lr 4.9e-04, dt 2.1s +All GPU(s): step 1078: loss 4.1641, lr 4.9e-04, dt 2.0s +All GPU(s): step 1079: loss 4.5312, lr 4.9e-04, dt 2.0s +All GPU(s): step 1080: loss 4.0977, lr 4.9e-04, dt 2.0s +All GPU(s): step 1081: loss 4.0664, lr 4.9e-04, dt 2.0s +All GPU(s): step 1082: loss 4.1133, lr 4.9e-04, dt 2.1s +All GPU(s): step 1083: loss 4.0508, lr 4.9e-04, dt 2.0s +All GPU(s): step 1084: loss 4.0547, lr 4.9e-04, dt 2.0s +All GPU(s): step 1085: loss 4.0312, lr 4.9e-04, dt 2.0s +All GPU(s): step 1086: loss 4.0547, lr 4.9e-04, dt 2.1s +All GPU(s): step 1087: loss 3.9648, lr 4.9e-04, dt 2.2s +All GPU(s): step 1088: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1089: loss 3.9648, lr 4.9e-04, dt 2.0s +All GPU(s): step 1090: loss 3.9727, lr 4.9e-04, dt 2.0s +All GPU(s): step 1091: loss 3.9570, lr 4.9e-04, dt 2.0s +All GPU(s): step 1092: loss 3.9727, lr 4.9e-04, dt 2.1s +All GPU(s): step 1093: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1094: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1095: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1096: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1097: loss 3.9434, lr 4.9e-04, dt 2.1s +All GPU(s): step 1098: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1099: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1100: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1101: loss 3.9473, lr 4.9e-04, dt 2.1s +All GPU(s): step 1102: loss 3.9668, lr 4.9e-04, dt 2.1s +All GPU(s): step 1103: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1104: loss 3.9355, lr 4.9e-04, dt 2.1s +All GPU(s): step 1105: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1106: loss 3.9375, lr 4.9e-04, dt 2.1s +All GPU(s): step 1107: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1108: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1109: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1110: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1111: loss 3.9160, lr 4.9e-04, dt 2.2s +All GPU(s): step 1112: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1113: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1114: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1115: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1116: loss 3.9434, lr 4.9e-04, dt 2.2s +All GPU(s): step 1117: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1118: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1119: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1120: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1121: loss 3.9199, lr 4.9e-04, dt 2.1s +All GPU(s): step 1122: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1123: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1124: loss 3.9414, lr 4.9e-04, dt 2.0s +All GPU(s): step 1125: loss 3.9258, lr 4.9e-04, dt 2.0s +All GPU(s): step 1126: loss 3.9258, lr 4.9e-04, dt 2.1s +All GPU(s): step 1127: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1128: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1129: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1130: loss 3.9414, lr 4.9e-04, dt 2.1s +All GPU(s): step 1131: loss 3.9316, lr 4.9e-04, dt 2.1s +All GPU(s): step 1132: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1133: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1134: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1135: loss 3.9219, lr 4.9e-04, dt 2.1s +All GPU(s): step 1136: loss 3.9473, lr 4.9e-04, dt 2.1s +All GPU(s): step 1137: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1138: loss 3.9258, lr 4.9e-04, dt 2.0s +All GPU(s): step 1139: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1140: loss 3.9297, lr 4.9e-04, dt 2.1s +All GPU(s): step 1141: loss 3.9258, lr 4.9e-04, dt 2.0s +All GPU(s): step 1142: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1143: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1144: loss 3.9199, lr 4.9e-04, dt 2.0s +All GPU(s): step 1145: loss 3.9277, lr 4.9e-04, dt 2.1s +All GPU(s): step 1146: loss 3.9238, lr 4.9e-04, dt 2.1s +All GPU(s): step 1147: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1148: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1149: loss 3.9336, lr 4.9e-04, dt 2.1s +All GPU(s): step 1150: loss 3.9316, lr 4.9e-04, dt 2.1s +All GPU(s): step 1151: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1152: loss 3.9199, lr 4.9e-04, dt 2.0s +All GPU(s): step 1153: loss 3.9199, lr 4.9e-04, dt 2.0s +All GPU(s): step 1154: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1155: loss 3.9199, lr 4.9e-04, dt 2.1s +All GPU(s): step 1156: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1157: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1158: loss 3.9180, lr 4.9e-04, dt 2.0s +All GPU(s): step 1159: loss 3.9258, lr 4.9e-04, dt 2.0s +All GPU(s): step 1160: loss 3.9883, lr 4.9e-04, dt 2.1s +All GPU(s): step 1161: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1162: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1163: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1164: loss 3.9492, lr 4.9e-04, dt 2.1s +All GPU(s): step 1165: loss 3.9395, lr 4.9e-04, dt 2.1s +All GPU(s): step 1166: loss 3.9434, lr 4.9e-04, dt 2.0s +All GPU(s): step 1167: loss 3.9473, lr 4.9e-04, dt 2.0s +All GPU(s): step 1168: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 1169: loss 3.9453, lr 4.9e-04, dt 2.1s +All GPU(s): step 1170: loss 3.9961, lr 4.9e-04, dt 2.0s +All GPU(s): step 1171: loss 3.9902, lr 4.9e-04, dt 2.0s +All GPU(s): step 1172: loss 4.0215, lr 4.9e-04, dt 2.0s +All GPU(s): step 1173: loss 3.9922, lr 4.9e-04, dt 2.0s +All GPU(s): step 1174: loss 4.0293, lr 4.9e-04, dt 2.1s +All GPU(s): step 1175: loss 4.1094, lr 4.9e-04, dt 2.0s +All GPU(s): step 1176: loss 4.0059, lr 4.9e-04, dt 2.0s +All GPU(s): step 1177: loss 4.0469, lr 4.9e-04, dt 2.0s +All GPU(s): step 1178: loss 3.9707, lr 4.9e-04, dt 2.0s +All GPU(s): step 1179: loss 3.9844, lr 4.9e-04, dt 2.2s +All GPU(s): step 1180: loss 3.9824, lr 4.9e-04, dt 2.0s +All GPU(s): step 1181: loss 3.9609, lr 4.9e-04, dt 2.0s +All GPU(s): step 1182: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1183: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1184: loss 3.9492, lr 4.9e-04, dt 2.2s +All GPU(s): step 1185: loss 3.9434, lr 4.9e-04, dt 2.0s +All GPU(s): step 1186: loss 3.9707, lr 4.9e-04, dt 2.0s +All GPU(s): step 1187: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 1188: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1189: loss 3.9531, lr 4.9e-04, dt 2.1s +All GPU(s): step 1190: loss 3.9355, lr 4.9e-04, dt 2.0s +All GPU(s): step 1191: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1192: loss 3.9414, lr 4.9e-04, dt 2.0s +All GPU(s): step 1193: loss 3.9375, lr 4.9e-04, dt 2.1s +All GPU(s): step 1194: loss 3.9453, lr 4.9e-04, dt 2.1s +All GPU(s): step 1195: loss 3.9531, lr 4.9e-04, dt 2.0s +All GPU(s): step 1196: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1197: loss 3.9355, lr 4.9e-04, dt 2.1s +All GPU(s): step 1198: loss 3.9336, lr 4.9e-04, dt 2.1s +All GPU(s): step 1199: loss 3.9629, lr 4.9e-04, dt 2.0s +All GPU(s): step 1200: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1201: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1202: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1203: loss 3.9277, lr 4.9e-04, dt 2.1s +All GPU(s): step 1204: loss 3.9316, lr 4.9e-04, dt 2.1s +All GPU(s): step 1205: loss 3.9551, lr 4.9e-04, dt 2.0s +All GPU(s): step 1206: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1207: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1208: loss 3.9355, lr 4.9e-04, dt 2.1s +All GPU(s): step 1209: loss 3.9238, lr 4.9e-04, dt 2.1s +All GPU(s): step 1210: loss 3.9160, lr 4.9e-04, dt 2.0s +All GPU(s): step 1211: loss 3.9121, lr 4.9e-04, dt 2.1s +All GPU(s): step 1212: loss 3.9160, lr 4.9e-04, dt 2.0s +All GPU(s): step 1213: loss 3.9219, lr 4.9e-04, dt 2.1s +All GPU(s): step 1214: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1215: loss 3.9336, lr 4.9e-04, dt 2.1s +All GPU(s): step 1216: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1217: loss 3.9395, lr 4.9e-04, dt 2.0s +All GPU(s): step 1218: loss 3.9258, lr 4.9e-04, dt 2.1s +All GPU(s): step 1219: loss 4.0391, lr 4.9e-04, dt 2.0s +All GPU(s): step 1220: loss 3.9414, lr 4.9e-04, dt 2.1s +All GPU(s): step 1221: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1222: loss 3.9316, lr 4.9e-04, dt 2.1s +All GPU(s): step 1223: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1224: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1225: loss 3.9492, lr 4.9e-04, dt 2.1s +All GPU(s): step 1226: loss 3.9453, lr 4.9e-04, dt 2.0s +All GPU(s): step 1227: loss 3.9277, lr 4.9e-04, dt 2.1s +All GPU(s): step 1228: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1229: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1230: loss 3.9199, lr 4.9e-04, dt 2.0s +All GPU(s): step 1231: loss 3.9316, lr 4.9e-04, dt 2.1s +All GPU(s): step 1232: loss 3.9219, lr 4.9e-04, dt 2.1s +All GPU(s): step 1233: loss 3.9219, lr 4.9e-04, dt 2.0s +All GPU(s): step 1234: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1235: loss 3.9219, lr 4.9e-04, dt 2.0s +All GPU(s): step 1236: loss 3.9199, lr 4.9e-04, dt 2.0s +All GPU(s): step 1237: loss 3.9453, lr 4.9e-04, dt 2.1s +All GPU(s): step 1238: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1239: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1240: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1241: loss 3.9219, lr 4.9e-04, dt 2.0s +All GPU(s): step 1242: loss 3.9258, lr 4.9e-04, dt 2.1s +All GPU(s): step 1243: loss 3.9258, lr 4.9e-04, dt 2.0s +All GPU(s): step 1244: loss 3.9160, lr 4.9e-04, dt 2.0s +All GPU(s): step 1245: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1246: loss 3.9375, lr 4.9e-04, dt 2.0s +All GPU(s): step 1247: loss 3.9219, lr 4.9e-04, dt 2.1s +All GPU(s): step 1248: loss 3.9180, lr 4.9e-04, dt 2.0s +All GPU(s): step 1249: loss 3.9238, lr 4.9e-04, dt 2.0s +All GPU(s): step 1250: loss 3.9102, lr 4.9e-04, dt 2.0s +All GPU(s): step 1251: loss 3.9199, lr 4.9e-04, dt 2.1s +All GPU(s): step 1252: loss 3.9180, lr 4.9e-04, dt 2.1s +All GPU(s): step 1253: loss 3.9160, lr 4.9e-04, dt 2.0s +All GPU(s): step 1254: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1255: loss 3.9297, lr 4.9e-04, dt 2.0s +All GPU(s): step 1256: loss 3.9258, lr 4.9e-04, dt 2.1s +All GPU(s): step 1257: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1258: loss 3.9180, lr 4.9e-04, dt 2.0s +All GPU(s): step 1259: loss 3.9062, lr 4.9e-04, dt 2.0s +All GPU(s): step 1260: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1261: loss 3.9355, lr 4.9e-04, dt 2.1s +All GPU(s): step 1262: loss 3.9277, lr 4.9e-04, dt 2.0s +All GPU(s): step 1263: loss 3.9336, lr 4.9e-04, dt 2.0s +All GPU(s): step 1264: loss 3.9316, lr 4.9e-04, dt 2.0s +All GPU(s): step 1265: loss 3.9121, lr 4.9e-04, dt 2.0s +All GPU(s): step 1266: loss 3.9219, lr 4.9e-04, dt 2.1s +All GPU(s): step 1267: loss 3.9414, lr 4.9e-04, dt 2.0s +All GPU(s): step 1268: loss 3.9766, lr 4.9e-04, dt 2.0s +All GPU(s): step 1269: loss 3.9531, lr 4.8e-04, dt 2.0s +All GPU(s): step 1270: loss 3.9395, lr 4.8e-04, dt 2.1s +All GPU(s): step 1271: loss 3.9473, lr 4.8e-04, dt 2.1s +All GPU(s): step 1272: loss 3.9375, lr 4.8e-04, dt 2.1s +All GPU(s): step 1273: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1274: loss 3.9336, lr 4.8e-04, dt 2.0s +All GPU(s): step 1275: loss 3.9551, lr 4.8e-04, dt 2.0s +All GPU(s): step 1276: loss 3.9199, lr 4.8e-04, dt 2.1s +All GPU(s): step 1277: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1278: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1279: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1280: loss 3.9199, lr 4.8e-04, dt 2.1s +All GPU(s): step 1281: loss 3.9336, lr 4.8e-04, dt 2.1s +All GPU(s): step 1282: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1283: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1284: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1285: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1286: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1287: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1288: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1289: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1290: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1291: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1292: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1293: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1294: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1295: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1296: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1297: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1298: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1299: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1300: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1301: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1302: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1303: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1304: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1305: loss 3.9160, lr 4.8e-04, dt 2.1s +All GPU(s): step 1306: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1307: loss 3.9609, lr 4.8e-04, dt 2.0s +All GPU(s): step 1308: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1309: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1310: loss 3.9336, lr 4.8e-04, dt 2.1s +All GPU(s): step 1311: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1312: loss 3.9395, lr 4.8e-04, dt 2.0s +All GPU(s): step 1313: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1314: loss 3.9336, lr 4.8e-04, dt 2.1s +All GPU(s): step 1315: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1316: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1317: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1318: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1319: loss 3.9082, lr 4.8e-04, dt 2.2s +All GPU(s): step 1320: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1321: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1322: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1323: loss 3.9395, lr 4.8e-04, dt 2.0s +All GPU(s): step 1324: loss 3.9277, lr 4.8e-04, dt 2.1s +All GPU(s): step 1325: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1326: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1327: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1328: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1329: loss 3.9180, lr 4.8e-04, dt 2.1s +All GPU(s): step 1330: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1331: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1332: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1333: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1334: loss 3.9316, lr 4.8e-04, dt 2.1s +All GPU(s): step 1335: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1336: loss 3.9434, lr 4.8e-04, dt 2.1s +All GPU(s): step 1337: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1338: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1339: loss 3.9277, lr 4.8e-04, dt 2.1s +All GPU(s): step 1340: loss 3.9434, lr 4.8e-04, dt 2.0s +All GPU(s): step 1341: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1342: loss 3.9316, lr 4.8e-04, dt 2.0s +All GPU(s): step 1343: loss 3.9336, lr 4.8e-04, dt 2.1s +All GPU(s): step 1344: loss 3.9355, lr 4.8e-04, dt 2.1s +All GPU(s): step 1345: loss 3.9395, lr 4.8e-04, dt 2.0s +All GPU(s): step 1346: loss 4.0117, lr 4.8e-04, dt 2.0s +All GPU(s): step 1347: loss 3.9688, lr 4.8e-04, dt 2.0s +All GPU(s): step 1348: loss 3.9355, lr 4.8e-04, dt 2.1s +All GPU(s): step 1349: loss 3.9492, lr 4.8e-04, dt 2.0s +All GPU(s): step 1350: loss 4.0020, lr 4.8e-04, dt 2.0s +All GPU(s): step 1351: loss 3.9922, lr 4.8e-04, dt 2.0s +All GPU(s): step 1352: loss 3.9414, lr 4.8e-04, dt 2.0s +All GPU(s): step 1353: loss 3.9297, lr 4.8e-04, dt 2.1s +All GPU(s): step 1354: loss 3.9668, lr 4.8e-04, dt 2.0s +All GPU(s): step 1355: loss 3.9629, lr 4.8e-04, dt 2.0s +All GPU(s): step 1356: loss 3.9473, lr 4.8e-04, dt 2.0s +All GPU(s): step 1357: loss 3.9395, lr 4.8e-04, dt 2.1s +All GPU(s): step 1358: loss 3.9297, lr 4.8e-04, dt 2.1s +All GPU(s): step 1359: loss 3.9570, lr 4.8e-04, dt 2.1s +All GPU(s): step 1360: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1361: loss 3.9512, lr 4.8e-04, dt 2.0s +All GPU(s): step 1362: loss 3.9512, lr 4.8e-04, dt 2.1s +All GPU(s): step 1363: loss 3.9473, lr 4.8e-04, dt 2.1s +All GPU(s): step 1364: loss 3.9492, lr 4.8e-04, dt 2.0s +All GPU(s): step 1365: loss 3.9473, lr 4.8e-04, dt 2.0s +All GPU(s): step 1366: loss 3.9375, lr 4.8e-04, dt 2.0s +All GPU(s): step 1367: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1368: loss 3.9414, lr 4.8e-04, dt 2.1s +All GPU(s): step 1369: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1370: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1371: loss 3.9375, lr 4.8e-04, dt 2.0s +All GPU(s): step 1372: loss 3.9492, lr 4.8e-04, dt 2.1s +All GPU(s): step 1373: loss 3.9395, lr 4.8e-04, dt 2.1s +All GPU(s): step 1374: loss 3.9121, lr 4.8e-04, dt 2.0s +All GPU(s): step 1375: loss 3.9336, lr 4.8e-04, dt 2.0s +All GPU(s): step 1376: loss 3.9336, lr 4.8e-04, dt 2.0s +All GPU(s): step 1377: loss 3.9297, lr 4.8e-04, dt 2.1s +All GPU(s): step 1378: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1379: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1380: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1381: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1382: loss 3.9219, lr 4.8e-04, dt 2.2s +All GPU(s): step 1383: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1384: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1385: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1386: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1387: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1388: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1389: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1390: loss 3.9375, lr 4.8e-04, dt 2.0s +All GPU(s): step 1391: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1392: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1393: loss 3.9551, lr 4.8e-04, dt 2.0s +All GPU(s): step 1394: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1395: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1396: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1397: loss 3.9082, lr 4.8e-04, dt 2.1s +All GPU(s): step 1398: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1399: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1400: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1401: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1402: loss 3.9082, lr 4.8e-04, dt 2.1s +All GPU(s): step 1403: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1404: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1405: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1406: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1407: loss 3.9199, lr 4.8e-04, dt 2.1s +All GPU(s): step 1408: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1409: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1410: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1411: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1412: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1413: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1414: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1415: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1416: loss 3.9141, lr 4.8e-04, dt 2.1s +All GPU(s): step 1417: loss 3.9121, lr 4.8e-04, dt 2.0s +All GPU(s): step 1418: loss 3.9102, lr 4.8e-04, dt 2.1s +All GPU(s): step 1419: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1420: loss 3.9043, lr 4.8e-04, dt 2.1s +All GPU(s): step 1421: loss 3.9160, lr 4.8e-04, dt 2.1s +All GPU(s): step 1422: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1423: loss 3.9062, lr 4.8e-04, dt 2.0s +All GPU(s): step 1424: loss 3.8984, lr 4.8e-04, dt 2.0s +All GPU(s): step 1425: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1426: loss 3.9082, lr 4.8e-04, dt 2.1s +All GPU(s): step 1427: loss 3.9004, lr 4.8e-04, dt 2.0s +All GPU(s): step 1428: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1429: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1430: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1431: loss 3.9355, lr 4.8e-04, dt 2.1s +All GPU(s): step 1432: loss 3.9062, lr 4.8e-04, dt 2.0s +All GPU(s): step 1433: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1434: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1435: loss 3.9043, lr 4.8e-04, dt 2.1s +All GPU(s): step 1436: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1437: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1438: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1439: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1440: loss 3.9199, lr 4.8e-04, dt 2.1s +All GPU(s): step 1441: loss 3.9043, lr 4.8e-04, dt 2.0s +All GPU(s): step 1442: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1443: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1444: loss 3.9336, lr 4.8e-04, dt 2.0s +All GPU(s): step 1445: loss 3.9062, lr 4.8e-04, dt 2.1s +All GPU(s): step 1446: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1447: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1448: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1449: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1450: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1451: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1452: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1453: loss 3.9316, lr 4.8e-04, dt 2.0s +All GPU(s): step 1454: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1455: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1456: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1457: loss 3.8965, lr 4.8e-04, dt 2.0s +All GPU(s): step 1458: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1459: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1460: loss 3.9082, lr 4.8e-04, dt 2.1s +All GPU(s): step 1461: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1462: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1463: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1464: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1465: loss 3.9316, lr 4.8e-04, dt 2.1s +All GPU(s): step 1466: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1467: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1468: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1469: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1470: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1471: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1472: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1473: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1474: loss 3.9414, lr 4.8e-04, dt 2.1s +All GPU(s): step 1475: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1476: loss 3.9121, lr 4.8e-04, dt 2.0s +All GPU(s): step 1477: loss 3.9336, lr 4.8e-04, dt 2.0s +All GPU(s): step 1478: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1479: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1480: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1481: loss 3.9414, lr 4.8e-04, dt 2.0s +All GPU(s): step 1482: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1483: loss 3.9160, lr 4.8e-04, dt 2.1s +All GPU(s): step 1484: loss 3.9297, lr 4.8e-04, dt 2.1s +All GPU(s): step 1485: loss 3.9219, lr 4.8e-04, dt 2.0s +All GPU(s): step 1486: loss 3.9688, lr 4.8e-04, dt 2.0s +All GPU(s): step 1487: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1488: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1489: loss 3.9355, lr 4.8e-04, dt 2.1s +All GPU(s): step 1490: loss 3.9883, lr 4.8e-04, dt 2.0s +All GPU(s): step 1491: loss 4.0742, lr 4.8e-04, dt 2.0s +All GPU(s): step 1492: loss 4.0527, lr 4.8e-04, dt 2.0s +All GPU(s): step 1493: loss 3.9961, lr 4.8e-04, dt 2.0s +All GPU(s): step 1494: loss 3.9844, lr 4.8e-04, dt 2.1s +All GPU(s): step 1495: loss 4.0000, lr 4.8e-04, dt 2.0s +All GPU(s): step 1496: loss 4.0332, lr 4.8e-04, dt 2.0s +All GPU(s): step 1497: loss 4.0430, lr 4.8e-04, dt 2.0s +All GPU(s): step 1498: loss 4.0254, lr 4.8e-04, dt 2.1s +All GPU(s): step 1499: loss 4.0020, lr 4.8e-04, dt 2.1s +All GPU(s): step 1500: loss 3.9883, lr 4.8e-04, dt 2.0s +All GPU(s): step 1501: loss 3.9668, lr 4.8e-04, dt 2.0s +All GPU(s): step 1502: loss 3.9570, lr 4.8e-04, dt 2.0s +All GPU(s): step 1503: loss 3.9277, lr 4.8e-04, dt 2.1s +All GPU(s): step 1504: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1505: loss 3.9238, lr 4.8e-04, dt 2.0s +All GPU(s): step 1506: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1507: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1508: loss 3.9258, lr 4.8e-04, dt 2.1s +All GPU(s): step 1509: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1510: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1511: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1512: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1513: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1514: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1515: loss 3.9121, lr 4.8e-04, dt 2.0s +All GPU(s): step 1516: loss 3.9160, lr 4.8e-04, dt 2.0s +All GPU(s): step 1517: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1518: loss 3.9277, lr 4.8e-04, dt 2.1s +All GPU(s): step 1519: loss 3.9297, lr 4.8e-04, dt 2.0s +All GPU(s): step 1520: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1521: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1522: loss 3.9121, lr 4.8e-04, dt 2.0s +All GPU(s): step 1523: loss 3.9199, lr 4.8e-04, dt 2.1s +All GPU(s): step 1524: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1525: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1526: loss 3.9102, lr 4.8e-04, dt 2.0s +All GPU(s): step 1527: loss 3.9277, lr 4.8e-04, dt 2.1s +All GPU(s): step 1528: loss 3.9238, lr 4.8e-04, dt 2.1s +All GPU(s): step 1529: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1530: loss 3.9277, lr 4.8e-04, dt 2.0s +All GPU(s): step 1531: loss 3.9531, lr 4.8e-04, dt 2.0s +All GPU(s): step 1532: loss 3.9805, lr 4.8e-04, dt 2.1s +All GPU(s): step 1533: loss 3.9922, lr 4.8e-04, dt 2.0s +All GPU(s): step 1534: loss 4.0000, lr 4.8e-04, dt 2.0s +All GPU(s): step 1535: loss 4.0039, lr 4.8e-04, dt 2.0s +All GPU(s): step 1536: loss 3.9902, lr 4.8e-04, dt 2.0s +All GPU(s): step 1537: loss 4.0000, lr 4.8e-04, dt 2.1s +All GPU(s): step 1538: loss 4.0312, lr 4.8e-04, dt 2.0s +All GPU(s): step 1539: loss 4.0527, lr 4.8e-04, dt 2.0s +All GPU(s): step 1540: loss 4.0391, lr 4.8e-04, dt 2.0s +All GPU(s): step 1541: loss 4.0000, lr 4.8e-04, dt 2.0s +All GPU(s): step 1542: loss 4.0098, lr 4.8e-04, dt 2.1s +All GPU(s): step 1543: loss 4.0020, lr 4.8e-04, dt 2.0s +All GPU(s): step 1544: loss 3.9785, lr 4.8e-04, dt 2.0s +All GPU(s): step 1545: loss 3.9551, lr 4.8e-04, dt 2.0s +All GPU(s): step 1546: loss 3.9531, lr 4.8e-04, dt 2.0s +All GPU(s): step 1547: loss 3.9902, lr 4.8e-04, dt 2.1s +All GPU(s): step 1548: loss 3.9785, lr 4.8e-04, dt 2.1s +All GPU(s): step 1549: loss 3.9590, lr 4.8e-04, dt 2.0s +All GPU(s): step 1550: loss 3.9746, lr 4.8e-04, dt 2.0s +All GPU(s): step 1551: loss 3.9941, lr 4.8e-04, dt 2.0s +All GPU(s): step 1552: loss 4.0059, lr 4.8e-04, dt 2.2s +All GPU(s): step 1553: loss 3.9629, lr 4.8e-04, dt 2.0s +All GPU(s): step 1554: loss 3.9727, lr 4.8e-04, dt 2.0s +All GPU(s): step 1555: loss 3.9590, lr 4.8e-04, dt 2.0s +All GPU(s): step 1556: loss 3.9434, lr 4.8e-04, dt 2.0s +All GPU(s): step 1557: loss 3.9297, lr 4.8e-04, dt 2.1s +All GPU(s): step 1558: loss 3.9395, lr 4.8e-04, dt 2.0s +All GPU(s): step 1559: loss 3.9199, lr 4.8e-04, dt 2.0s +All GPU(s): step 1560: loss 3.9141, lr 4.8e-04, dt 2.0s +All GPU(s): step 1561: loss 3.9492, lr 4.8e-04, dt 2.1s +All GPU(s): step 1562: loss 3.9102, lr 4.8e-04, dt 2.1s +All GPU(s): step 1563: loss 3.9219, lr 4.8e-04, dt 2.1s +All GPU(s): step 1564: loss 3.9043, lr 4.8e-04, dt 2.1s +All GPU(s): step 1565: loss 3.9258, lr 4.8e-04, dt 2.0s +All GPU(s): step 1566: loss 3.9434, lr 4.8e-04, dt 2.1s +All GPU(s): step 1567: loss 3.9355, lr 4.8e-04, dt 2.0s +All GPU(s): step 1568: loss 3.9082, lr 4.8e-04, dt 2.0s +All GPU(s): step 1569: loss 3.9180, lr 4.8e-04, dt 2.0s +All GPU(s): step 1570: loss 3.9375, lr 4.8e-04, dt 2.1s +All GPU(s): step 1571: loss 3.9121, lr 4.8e-04, dt 2.1s +All GPU(s): step 1572: loss 3.9805, lr 4.8e-04, dt 2.1s +All GPU(s): step 1573: loss 4.1562, lr 4.8e-04, dt 2.0s +All GPU(s): step 1574: loss 4.0215, lr 4.8e-04, dt 2.0s +All GPU(s): step 1575: loss 4.1797, lr 4.8e-04, dt 2.0s +All GPU(s): step 1576: loss 4.1797, lr 4.8e-04, dt 2.1s +All GPU(s): step 1577: loss 4.1758, lr 4.8e-04, dt 2.0s +All GPU(s): step 1578: loss 4.1289, lr 4.8e-04, dt 2.0s +All GPU(s): step 1579: loss 4.1289, lr 4.8e-04, dt 2.0s +All GPU(s): step 1580: loss 4.1289, lr 4.8e-04, dt 2.1s +All GPU(s): step 1581: loss 4.1211, lr 4.8e-04, dt 2.1s +All GPU(s): step 1582: loss 4.1445, lr 4.8e-04, dt 2.0s +All GPU(s): step 1583: loss 4.1250, lr 4.8e-04, dt 2.0s +All GPU(s): step 1584: loss 4.1641, lr 4.8e-04, dt 2.0s +All GPU(s): step 1585: loss 4.1328, lr 4.8e-04, dt 2.1s +All GPU(s): step 1586: loss 4.1289, lr 4.8e-04, dt 2.1s +All GPU(s): step 1587: loss 4.1133, lr 4.8e-04, dt 2.0s +All GPU(s): step 1588: loss 4.1172, lr 4.8e-04, dt 2.0s +All GPU(s): step 1589: loss 4.1211, lr 4.8e-04, dt 2.0s +All GPU(s): step 1590: loss 4.1211, lr 4.8e-04, dt 2.1s +All GPU(s): step 1591: loss 4.1289, lr 4.8e-04, dt 2.1s +All GPU(s): step 1592: loss 4.1094, lr 4.8e-04, dt 2.0s +All GPU(s): step 1593: loss 4.1055, lr 4.8e-04, dt 2.0s +All GPU(s): step 1594: loss 4.1172, lr 4.8e-04, dt 2.0s +All GPU(s): step 1595: loss 4.1328, lr 4.8e-04, dt 2.1s +All GPU(s): step 1596: loss 4.1172, lr 4.8e-04, dt 2.0s +All GPU(s): step 1597: loss 4.1055, lr 4.8e-04, dt 2.0s +All GPU(s): step 1598: loss 4.1094, lr 4.8e-04, dt 2.0s +All GPU(s): step 1599: loss 4.1211, lr 4.8e-04, dt 2.0s +All GPU(s): step 1600: loss 4.1094, lr 4.8e-04, dt 2.1s +All GPU(s): step 1601: loss 4.1133, lr 4.8e-04, dt 2.1s +All GPU(s): step 1602: loss 4.1133, lr 4.8e-04, dt 2.0s +All GPU(s): step 1603: loss 4.1055, lr 4.8e-04, dt 2.0s +All GPU(s): step 1604: loss 4.1133, lr 4.8e-04, dt 2.0s +All GPU(s): step 1605: loss 4.1133, lr 4.8e-04, dt 2.2s +All GPU(s): step 1606: loss 4.1094, lr 4.8e-04, dt 2.0s +All GPU(s): step 1607: loss 4.1094, lr 4.8e-04, dt 2.0s +All GPU(s): step 1608: loss 4.1055, lr 4.8e-04, dt 2.0s +All GPU(s): step 1609: loss 4.1250, lr 4.8e-04, dt 2.0s +All GPU(s): step 1610: loss 4.0977, lr 4.8e-04, dt 2.1s +All GPU(s): step 1611: loss 4.1133, lr 4.8e-04, dt 2.0s +All GPU(s): step 1612: loss 4.1016, lr 4.8e-04, dt 2.0s +All GPU(s): step 1613: loss 4.1094, lr 4.8e-04, dt 2.0s +All GPU(s): step 1614: loss 4.1055, lr 4.8e-04, dt 2.0s +All GPU(s): step 1615: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1616: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1617: loss 4.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 1618: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1619: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1620: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1621: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1622: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1623: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1624: loss 4.1016, lr 4.7e-04, dt 2.1s +All GPU(s): step 1625: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1626: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1627: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1628: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1629: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1630: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1631: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1632: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1633: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1634: loss 4.1250, lr 4.7e-04, dt 2.1s +All GPU(s): step 1635: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1636: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1637: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1638: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1639: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1640: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1641: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1642: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1643: loss 4.1016, lr 4.7e-04, dt 2.1s +All GPU(s): step 1644: loss 4.1016, lr 4.7e-04, dt 2.1s +All GPU(s): step 1645: loss 4.0840, lr 4.7e-04, dt 2.0s +All GPU(s): step 1646: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1647: loss 4.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 1648: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1649: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1650: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1651: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1652: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1653: loss 4.1484, lr 4.7e-04, dt 2.1s +All GPU(s): step 1654: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1655: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1656: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1657: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1658: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1659: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1660: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1661: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1662: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1663: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1664: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1665: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1666: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1667: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1668: loss 4.1172, lr 4.7e-04, dt 2.2s +All GPU(s): step 1669: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1670: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1671: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1672: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1673: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1674: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1675: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1676: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1677: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1678: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1679: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1680: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1681: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1682: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1683: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1684: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1685: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1686: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1687: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1688: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1689: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1690: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1691: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1692: loss 4.1250, lr 4.7e-04, dt 2.1s +All GPU(s): step 1693: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1694: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1695: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1696: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1697: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1698: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1699: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1700: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1701: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1702: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1703: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1704: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1705: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1706: loss 4.2422, lr 4.7e-04, dt 2.0s +All GPU(s): step 1707: loss 4.3398, lr 4.7e-04, dt 2.1s +All GPU(s): step 1708: loss 4.1680, lr 4.7e-04, dt 2.0s +All GPU(s): step 1709: loss 4.3438, lr 4.7e-04, dt 2.0s +All GPU(s): step 1710: loss 4.1953, lr 4.7e-04, dt 2.0s +All GPU(s): step 1711: loss 4.1680, lr 4.7e-04, dt 2.1s +All GPU(s): step 1712: loss 4.1289, lr 4.7e-04, dt 2.1s +All GPU(s): step 1713: loss 4.1562, lr 4.7e-04, dt 2.0s +All GPU(s): step 1714: loss 4.1523, lr 4.7e-04, dt 2.0s +All GPU(s): step 1715: loss 4.1367, lr 4.7e-04, dt 2.0s +All GPU(s): step 1716: loss 4.1328, lr 4.7e-04, dt 2.1s +All GPU(s): step 1717: loss 4.1328, lr 4.7e-04, dt 2.0s +All GPU(s): step 1718: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1719: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1720: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1721: loss 4.1250, lr 4.7e-04, dt 2.1s +All GPU(s): step 1722: loss 4.1289, lr 4.7e-04, dt 2.0s +All GPU(s): step 1723: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1724: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1725: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1726: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1727: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1728: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1729: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1730: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1731: loss 4.1133, lr 4.7e-04, dt 2.2s +All GPU(s): step 1732: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1733: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1734: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1735: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1736: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1737: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1738: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1739: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1740: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1741: loss 4.1094, lr 4.7e-04, dt 2.2s +All GPU(s): step 1742: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1743: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1744: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1745: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1746: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1747: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1748: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1749: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1750: loss 4.0977, lr 4.7e-04, dt 2.1s +All GPU(s): step 1751: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1752: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1753: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1754: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1755: loss 4.1016, lr 4.7e-04, dt 2.1s +All GPU(s): step 1756: loss 4.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 1757: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1758: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1759: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1760: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1761: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1762: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1763: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1764: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1765: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1766: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1767: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1768: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1769: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1770: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1771: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1772: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1773: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1774: loss 4.1016, lr 4.7e-04, dt 2.1s +All GPU(s): step 1775: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1776: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1777: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1778: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1779: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1780: loss 4.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 1781: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1782: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1783: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1784: loss 4.1250, lr 4.7e-04, dt 2.1s +All GPU(s): step 1785: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1786: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1787: loss 4.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 1788: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1789: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1790: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1791: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1792: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1793: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1794: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1795: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1796: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1797: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1798: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1799: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1800: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1801: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1802: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1803: loss 4.1328, lr 4.7e-04, dt 2.1s +All GPU(s): step 1804: loss 4.1289, lr 4.7e-04, dt 2.1s +All GPU(s): step 1805: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1806: loss 4.1289, lr 4.7e-04, dt 2.1s +All GPU(s): step 1807: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1808: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1809: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1810: loss 4.1328, lr 4.7e-04, dt 2.0s +All GPU(s): step 1811: loss 4.1953, lr 4.7e-04, dt 2.0s +All GPU(s): step 1812: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1813: loss 4.1250, lr 4.7e-04, dt 2.1s +All GPU(s): step 1814: loss 4.1367, lr 4.7e-04, dt 2.0s +All GPU(s): step 1815: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1816: loss 4.1289, lr 4.7e-04, dt 2.0s +All GPU(s): step 1817: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1818: loss 4.1289, lr 4.7e-04, dt 2.1s +All GPU(s): step 1819: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1820: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1821: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1822: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1823: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1824: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1825: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1826: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1827: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1828: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1829: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1830: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1831: loss 4.1289, lr 4.7e-04, dt 2.0s +All GPU(s): step 1832: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1833: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1834: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1835: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1836: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1837: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1838: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1839: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1840: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1841: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1842: loss 4.1172, lr 4.7e-04, dt 2.2s +All GPU(s): step 1843: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1844: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1845: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1846: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1847: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1848: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1849: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1850: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1851: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1852: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1853: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1854: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1855: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1856: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1857: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1858: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1859: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1860: loss 4.0977, lr 4.7e-04, dt 2.1s +All GPU(s): step 1861: loss 4.1133, lr 4.7e-04, dt 2.2s +All GPU(s): step 1862: loss 4.0977, lr 4.7e-04, dt 2.1s +All GPU(s): step 1863: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1864: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1865: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1866: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1867: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1868: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1869: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1870: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1871: loss 4.1172, lr 4.7e-04, dt 2.2s +All GPU(s): step 1872: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1873: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1874: loss 4.1250, lr 4.7e-04, dt 2.0s +All GPU(s): step 1875: loss 4.0977, lr 4.7e-04, dt 2.1s +All GPU(s): step 1876: loss 4.0938, lr 4.7e-04, dt 2.1s +All GPU(s): step 1877: loss 4.0977, lr 4.7e-04, dt 2.0s +All GPU(s): step 1878: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1879: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1880: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1881: loss 4.1055, lr 4.7e-04, dt 2.1s +All GPU(s): step 1882: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1883: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1884: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1885: loss 4.1211, lr 4.7e-04, dt 2.1s +All GPU(s): step 1886: loss 4.1016, lr 4.7e-04, dt 2.0s +All GPU(s): step 1887: loss 4.1211, lr 4.7e-04, dt 2.0s +All GPU(s): step 1888: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1889: loss 4.1133, lr 4.7e-04, dt 2.1s +All GPU(s): step 1890: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1891: loss 4.1172, lr 4.7e-04, dt 2.0s +All GPU(s): step 1892: loss 4.1094, lr 4.7e-04, dt 2.0s +All GPU(s): step 1893: loss 4.1094, lr 4.7e-04, dt 2.1s +All GPU(s): step 1894: loss 4.1172, lr 4.7e-04, dt 2.1s +All GPU(s): step 1895: loss 4.0879, lr 4.7e-04, dt 2.1s +All GPU(s): step 1896: loss 4.0996, lr 4.7e-04, dt 2.0s +All GPU(s): step 1897: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1898: loss 4.1133, lr 4.7e-04, dt 2.0s +All GPU(s): step 1899: loss 4.1055, lr 4.7e-04, dt 2.0s +All GPU(s): step 1900: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1901: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 1902: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1903: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1904: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1905: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 1906: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 1907: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1908: loss 4.0938, lr 4.6e-04, dt 2.0s +All GPU(s): step 1909: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 1910: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1911: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 1912: loss 4.1250, lr 4.6e-04, dt 2.0s +All GPU(s): step 1913: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1914: loss 4.3672, lr 4.6e-04, dt 2.1s +All GPU(s): step 1915: loss 4.1328, lr 4.6e-04, dt 2.0s +All GPU(s): step 1916: loss 4.1602, lr 4.6e-04, dt 2.0s +All GPU(s): step 1917: loss 4.1445, lr 4.6e-04, dt 2.0s +All GPU(s): step 1918: loss 4.1484, lr 4.6e-04, dt 2.0s +All GPU(s): step 1919: loss 4.1406, lr 4.6e-04, dt 2.1s +All GPU(s): step 1920: loss 4.1602, lr 4.6e-04, dt 2.0s +All GPU(s): step 1921: loss 4.1328, lr 4.6e-04, dt 2.0s +All GPU(s): step 1922: loss 4.1367, lr 4.6e-04, dt 2.0s +All GPU(s): step 1923: loss 4.1289, lr 4.6e-04, dt 2.0s +All GPU(s): step 1924: loss 4.1289, lr 4.6e-04, dt 2.1s +All GPU(s): step 1925: loss 4.1406, lr 4.6e-04, dt 2.0s +All GPU(s): step 1926: loss 4.1328, lr 4.6e-04, dt 2.0s +All GPU(s): step 1927: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1928: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 1929: loss 4.1328, lr 4.6e-04, dt 2.1s +All GPU(s): step 1930: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1931: loss 4.1250, lr 4.6e-04, dt 2.1s +All GPU(s): step 1932: loss 4.1250, lr 4.6e-04, dt 2.0s +All GPU(s): step 1933: loss 4.1289, lr 4.6e-04, dt 2.0s +All GPU(s): step 1934: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1935: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1936: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1937: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1938: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1939: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1940: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1941: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1942: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 1943: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1944: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 1945: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1946: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1947: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1948: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1949: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1950: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 1951: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1952: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1953: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1954: loss 4.0938, lr 4.6e-04, dt 2.1s +All GPU(s): step 1955: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 1956: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1957: loss 4.0898, lr 4.6e-04, dt 2.0s +All GPU(s): step 1958: loss 4.0938, lr 4.6e-04, dt 2.1s +All GPU(s): step 1959: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1960: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1961: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1962: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 1963: loss 4.1328, lr 4.6e-04, dt 2.1s +All GPU(s): step 1964: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1965: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1966: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 1967: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1968: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 1969: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1970: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1971: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1972: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1973: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1974: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1975: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1976: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1977: loss 4.0977, lr 4.6e-04, dt 2.2s +All GPU(s): step 1978: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1979: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1980: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 1981: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1982: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1983: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1984: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1985: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1986: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1987: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 1988: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1989: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 1990: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1991: loss 4.1250, lr 4.6e-04, dt 2.0s +All GPU(s): step 1992: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 1993: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 1994: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 1995: loss 4.0859, lr 4.6e-04, dt 2.0s +All GPU(s): step 1996: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 1997: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 1998: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 1999: loss 4.1211, lr 4.6e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_2000.pt +All GPU(s): step 2000: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2001: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2002: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2003: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2004: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2005: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2006: loss 4.1055, lr 4.6e-04, dt 2.2s +All GPU(s): step 2007: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2008: loss 4.0977, lr 4.6e-04, dt 2.1s +All GPU(s): step 2009: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2010: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2011: loss 4.1094, lr 4.6e-04, dt 2.3s +All GPU(s): step 2012: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2013: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2014: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2015: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2016: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2017: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2018: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2019: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2020: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2021: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2022: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2023: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2024: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2025: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2026: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2027: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2028: loss 4.0977, lr 4.6e-04, dt 2.1s +All GPU(s): step 2029: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2030: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2031: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2032: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2033: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2034: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2035: loss 4.1406, lr 4.6e-04, dt 2.1s +All GPU(s): step 2036: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2037: loss 4.1250, lr 4.6e-04, dt 2.0s +All GPU(s): step 2038: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2039: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2040: loss 4.1133, lr 4.6e-04, dt 2.3s +All GPU(s): step 2041: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 2042: loss 4.1602, lr 4.6e-04, dt 2.1s +All GPU(s): step 2043: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2044: loss 4.1406, lr 4.6e-04, dt 2.1s +All GPU(s): step 2045: loss 4.1289, lr 4.6e-04, dt 2.1s +All GPU(s): step 2046: loss 4.1406, lr 4.6e-04, dt 2.1s +All GPU(s): step 2047: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2048: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2049: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2050: loss 4.1289, lr 4.6e-04, dt 2.1s +All GPU(s): step 2051: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2052: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 2053: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2054: loss 4.1211, lr 4.6e-04, dt 2.1s +All GPU(s): step 2055: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2056: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2057: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2058: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 2059: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 2060: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2061: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 2062: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2063: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2064: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2065: loss 4.0977, lr 4.6e-04, dt 2.1s +All GPU(s): step 2066: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2067: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2068: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2069: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2070: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2071: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2072: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2073: loss 4.1211, lr 4.6e-04, dt 2.1s +All GPU(s): step 2074: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2075: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2076: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2077: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2078: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2079: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2080: loss 4.1211, lr 4.6e-04, dt 2.0s +All GPU(s): step 2081: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2082: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2083: loss 4.1055, lr 4.6e-04, dt 2.2s +All GPU(s): step 2084: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2085: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2086: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2087: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2088: loss 4.1094, lr 4.6e-04, dt 2.2s +All GPU(s): step 2089: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2090: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2091: loss 4.1055, lr 4.6e-04, dt 2.1s +All GPU(s): step 2092: loss 4.1406, lr 4.6e-04, dt 2.0s +All GPU(s): step 2093: loss 4.1289, lr 4.6e-04, dt 2.1s +All GPU(s): step 2094: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2095: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2096: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2097: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2098: loss 4.1250, lr 4.6e-04, dt 2.1s +All GPU(s): step 2099: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2100: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2101: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2102: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2103: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2104: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2105: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2106: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2107: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 2108: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2109: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2110: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2111: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2112: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 2113: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2114: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2115: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2116: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2117: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2118: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2119: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2120: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2121: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2122: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2123: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2124: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2125: loss 4.1133, lr 4.6e-04, dt 2.0s +All GPU(s): step 2126: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2127: loss 4.0977, lr 4.6e-04, dt 2.1s +All GPU(s): step 2128: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2129: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2130: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2131: loss 4.0977, lr 4.6e-04, dt 2.1s +All GPU(s): step 2132: loss 4.1016, lr 4.6e-04, dt 2.0s +All GPU(s): step 2133: loss 4.0859, lr 4.6e-04, dt 2.1s +All GPU(s): step 2134: loss 4.1094, lr 4.6e-04, dt 2.0s +All GPU(s): step 2135: loss 4.0977, lr 4.6e-04, dt 2.0s +All GPU(s): step 2136: loss 4.1133, lr 4.6e-04, dt 2.1s +All GPU(s): step 2137: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2138: loss 4.1094, lr 4.6e-04, dt 2.1s +All GPU(s): step 2139: loss 4.0859, lr 4.6e-04, dt 2.1s +All GPU(s): step 2140: loss 4.1055, lr 4.6e-04, dt 2.0s +All GPU(s): step 2141: loss 4.1016, lr 4.6e-04, dt 2.1s +All GPU(s): step 2142: loss 4.1523, lr 4.6e-04, dt 2.0s +All GPU(s): step 2143: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2144: loss 4.1172, lr 4.6e-04, dt 2.0s +All GPU(s): step 2145: loss 4.1250, lr 4.6e-04, dt 2.0s +All GPU(s): step 2146: loss 4.1172, lr 4.6e-04, dt 2.1s +All GPU(s): step 2147: loss 4.2109, lr 4.6e-04, dt 2.0s +All GPU(s): step 2148: loss 4.1406, lr 4.6e-04, dt 2.0s +All GPU(s): step 2149: loss 4.1133, lr 4.5e-04, dt 2.0s +All GPU(s): step 2150: loss 4.2461, lr 4.5e-04, dt 2.0s +All GPU(s): step 2151: loss 4.1562, lr 4.5e-04, dt 2.1s +All GPU(s): step 2152: loss 4.1211, lr 4.5e-04, dt 2.0s +All GPU(s): step 2153: loss 4.1328, lr 4.5e-04, dt 2.1s +All GPU(s): step 2154: loss 4.1367, lr 4.5e-04, dt 2.0s +All GPU(s): step 2155: loss 4.1367, lr 4.5e-04, dt 2.1s +All GPU(s): step 2156: loss 4.1172, lr 4.5e-04, dt 2.1s +All GPU(s): step 2157: loss 4.1211, lr 4.5e-04, dt 2.1s +All GPU(s): step 2158: loss 4.1133, lr 4.5e-04, dt 2.0s +All GPU(s): step 2159: loss 4.1055, lr 4.5e-04, dt 2.0s +All GPU(s): step 2160: loss 4.1055, lr 4.5e-04, dt 2.1s +All GPU(s): step 2161: loss 4.1094, lr 4.5e-04, dt 2.0s +All GPU(s): step 2162: loss 4.1016, lr 4.5e-04, dt 2.0s +All GPU(s): step 2163: loss 4.1055, lr 4.5e-04, dt 2.0s +All GPU(s): step 2164: loss 4.0977, lr 4.5e-04, dt 2.0s +All GPU(s): step 2165: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2166: loss 4.0859, lr 4.5e-04, dt 2.1s +All GPU(s): step 2167: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2168: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2169: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2170: loss 4.0859, lr 4.5e-04, dt 2.2s +All GPU(s): step 2171: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2172: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2173: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2174: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2175: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2176: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2177: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2178: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2179: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2180: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2181: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2182: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2183: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2184: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2185: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2186: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2187: loss 4.0664, lr 4.5e-04, dt 2.0s +All GPU(s): step 2188: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2189: loss 4.0703, lr 4.5e-04, dt 2.1s +All GPU(s): step 2190: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2191: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2192: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2193: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2194: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2195: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2196: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2197: loss 4.0859, lr 4.5e-04, dt 2.1s +All GPU(s): step 2198: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2199: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2200: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2201: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2202: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2203: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2204: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2205: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2206: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2207: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2208: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2209: loss 4.0625, lr 4.5e-04, dt 2.1s +All GPU(s): step 2210: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2211: loss 4.0703, lr 4.5e-04, dt 2.1s +All GPU(s): step 2212: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2213: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2214: loss 4.0664, lr 4.5e-04, dt 2.1s +All GPU(s): step 2215: loss 4.1133, lr 4.5e-04, dt 2.0s +All GPU(s): step 2216: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2217: loss 4.0586, lr 4.5e-04, dt 2.0s +All GPU(s): step 2218: loss 4.1641, lr 4.5e-04, dt 2.1s +All GPU(s): step 2219: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2220: loss 4.2031, lr 4.5e-04, dt 2.0s +All GPU(s): step 2221: loss 4.1250, lr 4.5e-04, dt 2.0s +All GPU(s): step 2222: loss 4.1094, lr 4.5e-04, dt 2.0s +All GPU(s): step 2223: loss 4.1016, lr 4.5e-04, dt 2.2s +All GPU(s): step 2224: loss 4.1094, lr 4.5e-04, dt 2.0s +All GPU(s): step 2225: loss 4.1211, lr 4.5e-04, dt 2.0s +All GPU(s): step 2226: loss 4.1016, lr 4.5e-04, dt 2.0s +All GPU(s): step 2227: loss 4.1055, lr 4.5e-04, dt 2.0s +All GPU(s): step 2228: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2229: loss 4.1094, lr 4.5e-04, dt 2.0s +All GPU(s): step 2230: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2231: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2232: loss 4.1016, lr 4.5e-04, dt 2.0s +All GPU(s): step 2233: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2234: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2235: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2236: loss 4.1055, lr 4.5e-04, dt 2.0s +All GPU(s): step 2237: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2238: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2239: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2240: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2241: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2242: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2243: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2244: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2245: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2246: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2247: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2248: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2249: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2250: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2251: loss 4.0684, lr 4.5e-04, dt 2.0s +All GPU(s): step 2252: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2253: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2254: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2255: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2256: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2257: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2258: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2259: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2260: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2261: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2262: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2263: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2264: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2265: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2266: loss 4.0625, lr 4.5e-04, dt 2.0s +All GPU(s): step 2267: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2268: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2269: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2270: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2271: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2272: loss 4.0703, lr 4.5e-04, dt 2.1s +All GPU(s): step 2273: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2274: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2275: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2276: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2277: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2278: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2279: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2280: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2281: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2282: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2283: loss 4.0664, lr 4.5e-04, dt 2.0s +All GPU(s): step 2284: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2285: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2286: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2287: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2288: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2289: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2290: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2291: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2292: loss 4.0859, lr 4.5e-04, dt 2.1s +All GPU(s): step 2293: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2294: loss 4.0977, lr 4.5e-04, dt 2.0s +All GPU(s): step 2295: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2296: loss 4.0742, lr 4.5e-04, dt 2.4s +All GPU(s): step 2297: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2298: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2299: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2300: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2301: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2302: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2303: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2304: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2305: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2306: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2307: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2308: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2309: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2310: loss 4.0820, lr 4.5e-04, dt 2.2s +All GPU(s): step 2311: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2312: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2313: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2314: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2315: loss 4.0664, lr 4.5e-04, dt 2.1s +All GPU(s): step 2316: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2317: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2318: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2319: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2320: loss 4.0898, lr 4.5e-04, dt 2.1s +All GPU(s): step 2321: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2322: loss 4.0977, lr 4.5e-04, dt 2.0s +All GPU(s): step 2323: loss 4.1055, lr 4.5e-04, dt 2.0s +All GPU(s): step 2324: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2325: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2326: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2327: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2328: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2329: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2330: loss 4.0859, lr 4.5e-04, dt 2.1s +All GPU(s): step 2331: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2332: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2333: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2334: loss 4.0898, lr 4.5e-04, dt 2.2s +All GPU(s): step 2335: loss 4.0977, lr 4.5e-04, dt 2.0s +All GPU(s): step 2336: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2337: loss 4.0898, lr 4.5e-04, dt 2.0s +All GPU(s): step 2338: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2339: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2340: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2341: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2342: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2343: loss 4.0938, lr 4.5e-04, dt 2.0s +All GPU(s): step 2344: loss 4.0586, lr 4.5e-04, dt 2.2s +All GPU(s): step 2345: loss 4.0703, lr 4.5e-04, dt 2.1s +All GPU(s): step 2346: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2347: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2348: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2349: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2350: loss 4.0859, lr 4.5e-04, dt 2.0s +All GPU(s): step 2351: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2352: loss 4.0664, lr 4.5e-04, dt 2.0s +All GPU(s): step 2353: loss 4.0938, lr 4.5e-04, dt 2.1s +All GPU(s): step 2354: loss 4.0703, lr 4.5e-04, dt 2.1s +All GPU(s): step 2355: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2356: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2357: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2358: loss 4.0781, lr 4.5e-04, dt 2.1s +All GPU(s): step 2359: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2360: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2361: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2362: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2363: loss 4.0820, lr 4.5e-04, dt 2.2s +All GPU(s): step 2364: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2365: loss 4.0781, lr 4.5e-04, dt 2.0s +All GPU(s): step 2366: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2367: loss 4.0742, lr 4.5e-04, dt 2.0s +All GPU(s): step 2368: loss 4.0820, lr 4.5e-04, dt 2.1s +All GPU(s): step 2369: loss 4.0664, lr 4.5e-04, dt 2.0s +All GPU(s): step 2370: loss 4.0645, lr 4.5e-04, dt 2.0s +All GPU(s): step 2371: loss 4.0820, lr 4.5e-04, dt 2.0s +All GPU(s): step 2372: loss 4.0703, lr 4.5e-04, dt 2.0s +All GPU(s): step 2373: loss 4.0742, lr 4.5e-04, dt 2.1s +All GPU(s): step 2374: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2375: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2376: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2377: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2378: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2379: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2380: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2381: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2382: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2383: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2384: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2385: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2386: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2387: loss 4.0938, lr 4.4e-04, dt 2.1s +All GPU(s): step 2388: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2389: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2390: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2391: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2392: loss 4.0703, lr 4.4e-04, dt 2.2s +All GPU(s): step 2393: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2394: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2395: loss 4.0664, lr 4.4e-04, dt 2.0s +All GPU(s): step 2396: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2397: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2398: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2399: loss 4.0664, lr 4.4e-04, dt 2.0s +All GPU(s): step 2400: loss 4.0664, lr 4.4e-04, dt 2.1s +All GPU(s): step 2401: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2402: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2403: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2404: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2405: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2406: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2407: loss 4.1016, lr 4.4e-04, dt 2.0s +All GPU(s): step 2408: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2409: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2410: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2411: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2412: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2413: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2414: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2415: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2416: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2417: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2418: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2419: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2420: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2421: loss 4.0820, lr 4.4e-04, dt 2.2s +All GPU(s): step 2422: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2423: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2424: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2425: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2426: loss 4.0898, lr 4.4e-04, dt 2.1s +All GPU(s): step 2427: loss 4.0664, lr 4.4e-04, dt 2.0s +All GPU(s): step 2428: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2429: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2430: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2431: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2432: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2433: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2434: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2435: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2436: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2437: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2438: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2439: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2440: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2441: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2442: loss 4.0430, lr 4.4e-04, dt 2.0s +All GPU(s): step 2443: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2444: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2445: loss 4.0938, lr 4.4e-04, dt 2.2s +All GPU(s): step 2446: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2447: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2448: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2449: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2450: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2451: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2452: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2453: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2454: loss 4.0898, lr 4.4e-04, dt 2.1s +All GPU(s): step 2455: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2456: loss 4.2656, lr 4.4e-04, dt 2.0s +All GPU(s): step 2457: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2458: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2459: loss 4.0938, lr 4.4e-04, dt 2.1s +All GPU(s): step 2460: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2461: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2462: loss 4.1094, lr 4.4e-04, dt 2.0s +All GPU(s): step 2463: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2464: loss 4.0977, lr 4.4e-04, dt 2.1s +All GPU(s): step 2465: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2466: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2467: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2468: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2469: loss 4.0859, lr 4.4e-04, dt 2.2s +All GPU(s): step 2470: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2471: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2472: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2473: loss 4.0898, lr 4.4e-04, dt 2.1s +All GPU(s): step 2474: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2475: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2476: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2477: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2478: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2479: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2480: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2481: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2482: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2483: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2484: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2485: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2486: loss 4.0605, lr 4.4e-04, dt 2.0s +All GPU(s): step 2487: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2488: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2489: loss 4.0781, lr 4.4e-04, dt 2.2s +All GPU(s): step 2490: loss 4.0664, lr 4.4e-04, dt 2.0s +All GPU(s): step 2491: loss 4.1172, lr 4.4e-04, dt 2.0s +All GPU(s): step 2492: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2493: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2494: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2495: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2496: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2497: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2498: loss 4.0781, lr 4.4e-04, dt 2.2s +All GPU(s): step 2499: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2500: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2501: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2502: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2503: loss 4.0586, lr 4.4e-04, dt 2.1s +All GPU(s): step 2504: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2505: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2506: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2507: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2508: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2509: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2510: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2511: loss 4.0625, lr 4.4e-04, dt 2.0s +All GPU(s): step 2512: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2513: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2514: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2515: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2516: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2517: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2518: loss 4.0820, lr 4.4e-04, dt 2.1s +All GPU(s): step 2519: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2520: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2521: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2522: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2523: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2524: loss 4.0938, lr 4.4e-04, dt 2.0s +All GPU(s): step 2525: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2526: loss 4.0625, lr 4.4e-04, dt 2.0s +All GPU(s): step 2527: loss 4.0664, lr 4.4e-04, dt 2.1s +All GPU(s): step 2528: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2529: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2530: loss 4.1055, lr 4.4e-04, dt 2.0s +All GPU(s): step 2531: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2532: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2533: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2534: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2535: loss 4.0664, lr 4.4e-04, dt 2.0s +All GPU(s): step 2536: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2537: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2538: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2539: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2540: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2541: loss 4.0664, lr 4.4e-04, dt 2.1s +All GPU(s): step 2542: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2543: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2544: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2545: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2546: loss 4.0586, lr 4.4e-04, dt 2.1s +All GPU(s): step 2547: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2548: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2549: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2550: loss 4.0625, lr 4.4e-04, dt 2.0s +All GPU(s): step 2551: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2552: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2553: loss 4.0898, lr 4.4e-04, dt 2.0s +All GPU(s): step 2554: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2555: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2556: loss 4.0781, lr 4.4e-04, dt 2.1s +All GPU(s): step 2557: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2558: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2559: loss 4.0703, lr 4.4e-04, dt 2.0s +All GPU(s): step 2560: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2561: loss 4.0625, lr 4.4e-04, dt 2.1s +All GPU(s): step 2562: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2563: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2564: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2565: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2566: loss 4.0703, lr 4.4e-04, dt 2.2s +All GPU(s): step 2567: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2568: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2569: loss 4.0859, lr 4.4e-04, dt 2.0s +All GPU(s): step 2570: loss 4.0781, lr 4.4e-04, dt 2.0s +All GPU(s): step 2571: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2572: loss 4.0859, lr 4.4e-04, dt 2.1s +All GPU(s): step 2573: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2574: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2575: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2576: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2577: loss 4.0742, lr 4.4e-04, dt 2.1s +All GPU(s): step 2578: loss 4.0625, lr 4.4e-04, dt 2.0s +All GPU(s): step 2579: loss 4.0742, lr 4.4e-04, dt 2.0s +All GPU(s): step 2580: loss 4.0703, lr 4.4e-04, dt 2.1s +All GPU(s): step 2581: loss 4.0820, lr 4.4e-04, dt 2.0s +All GPU(s): step 2582: loss 4.0625, lr 4.3e-04, dt 2.1s +All GPU(s): step 2583: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2584: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2585: loss 4.0742, lr 4.3e-04, dt 2.1s +All GPU(s): step 2586: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2587: loss 4.0625, lr 4.3e-04, dt 2.0s +All GPU(s): step 2588: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2589: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2590: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2591: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2592: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2593: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2594: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2595: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2596: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2597: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2598: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2599: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2600: loss 4.0898, lr 4.3e-04, dt 2.2s +All GPU(s): step 2601: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2602: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2603: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2604: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2605: loss 4.0703, lr 4.3e-04, dt 2.1s +All GPU(s): step 2606: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2607: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2608: loss 4.1133, lr 4.3e-04, dt 2.0s +All GPU(s): step 2609: loss 4.0938, lr 4.3e-04, dt 2.1s +All GPU(s): step 2610: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2611: loss 4.1055, lr 4.3e-04, dt 2.1s +All GPU(s): step 2612: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2613: loss 4.1055, lr 4.3e-04, dt 2.0s +All GPU(s): step 2614: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2615: loss 4.1055, lr 4.3e-04, dt 2.0s +All GPU(s): step 2616: loss 4.1055, lr 4.3e-04, dt 2.0s +All GPU(s): step 2617: loss 4.1094, lr 4.3e-04, dt 2.0s +All GPU(s): step 2618: loss 4.1094, lr 4.3e-04, dt 2.0s +All GPU(s): step 2619: loss 4.1094, lr 4.3e-04, dt 2.1s +All GPU(s): step 2620: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2621: loss 4.1172, lr 4.3e-04, dt 2.0s +All GPU(s): step 2622: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2623: loss 4.1094, lr 4.3e-04, dt 2.0s +All GPU(s): step 2624: loss 4.1094, lr 4.3e-04, dt 2.1s +All GPU(s): step 2625: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2626: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2627: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2628: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2629: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2630: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2631: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2632: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2633: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2634: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2635: loss 4.0625, lr 4.3e-04, dt 2.0s +All GPU(s): step 2636: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2637: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2638: loss 4.0703, lr 4.3e-04, dt 2.1s +All GPU(s): step 2639: loss 4.0605, lr 4.3e-04, dt 2.0s +All GPU(s): step 2640: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2641: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2642: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2643: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2644: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2645: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2646: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2647: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2648: loss 4.1055, lr 4.3e-04, dt 2.1s +All GPU(s): step 2649: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2650: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2651: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2652: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2653: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2654: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2655: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2656: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2657: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2658: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2659: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2660: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2661: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2662: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2663: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2664: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2665: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2666: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2667: loss 4.0703, lr 4.3e-04, dt 2.1s +All GPU(s): step 2668: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2669: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2670: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2671: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2672: loss 4.0859, lr 4.3e-04, dt 2.1s +All GPU(s): step 2673: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2674: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2675: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2676: loss 4.0938, lr 4.3e-04, dt 2.1s +All GPU(s): step 2677: loss 4.0762, lr 4.3e-04, dt 2.2s +All GPU(s): step 2678: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2679: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2680: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2681: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2682: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2683: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2684: loss 4.1133, lr 4.3e-04, dt 2.0s +All GPU(s): step 2685: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2686: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2687: loss 4.0977, lr 4.3e-04, dt 2.2s +All GPU(s): step 2688: loss 4.1133, lr 4.3e-04, dt 2.1s +All GPU(s): step 2689: loss 4.0938, lr 4.3e-04, dt 2.1s +All GPU(s): step 2690: loss 4.1094, lr 4.3e-04, dt 2.0s +All GPU(s): step 2691: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2692: loss 4.1094, lr 4.3e-04, dt 2.1s +All GPU(s): step 2693: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2694: loss 4.1055, lr 4.3e-04, dt 2.0s +All GPU(s): step 2695: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2696: loss 4.1016, lr 4.3e-04, dt 2.1s +All GPU(s): step 2697: loss 4.1055, lr 4.3e-04, dt 2.1s +All GPU(s): step 2698: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2699: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2700: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2701: loss 4.0977, lr 4.3e-04, dt 2.2s +All GPU(s): step 2702: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2703: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2704: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2705: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2706: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2707: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2708: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2709: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2710: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2711: loss 4.0781, lr 4.3e-04, dt 2.2s +All GPU(s): step 2712: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2713: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2714: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2715: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2716: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2717: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2718: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2719: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2720: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2721: loss 4.1992, lr 4.3e-04, dt 2.1s +All GPU(s): step 2722: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2723: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2724: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2725: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2726: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2727: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2728: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2729: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2730: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2731: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2732: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2733: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2734: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2735: loss 4.0859, lr 4.3e-04, dt 2.1s +All GPU(s): step 2736: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2737: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2738: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2739: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2740: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2741: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2742: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2743: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2744: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2745: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2746: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2747: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2748: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2749: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2750: loss 4.1055, lr 4.3e-04, dt 2.1s +All GPU(s): step 2751: loss 4.0898, lr 4.3e-04, dt 2.1s +All GPU(s): step 2752: loss 4.1016, lr 4.3e-04, dt 2.0s +All GPU(s): step 2753: loss 4.1055, lr 4.3e-04, dt 2.0s +All GPU(s): step 2754: loss 4.0977, lr 4.3e-04, dt 2.1s +All GPU(s): step 2755: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2756: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2757: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2758: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2759: loss 4.0742, lr 4.3e-04, dt 2.1s +All GPU(s): step 2760: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2761: loss 4.0938, lr 4.3e-04, dt 2.0s +All GPU(s): step 2762: loss 4.0859, lr 4.3e-04, dt 2.0s +All GPU(s): step 2763: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2764: loss 4.0820, lr 4.3e-04, dt 2.1s +All GPU(s): step 2765: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2766: loss 4.0742, lr 4.3e-04, dt 2.0s +All GPU(s): step 2767: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2768: loss 4.0898, lr 4.3e-04, dt 2.0s +All GPU(s): step 2769: loss 4.0859, lr 4.3e-04, dt 2.1s +All GPU(s): step 2770: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2771: loss 4.0977, lr 4.3e-04, dt 2.0s +All GPU(s): step 2772: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2773: loss 4.0820, lr 4.3e-04, dt 2.0s +All GPU(s): step 2774: loss 4.0781, lr 4.3e-04, dt 2.1s +All GPU(s): step 2775: loss 4.0781, lr 4.3e-04, dt 2.0s +All GPU(s): step 2776: loss 4.0703, lr 4.3e-04, dt 2.0s +All GPU(s): step 2777: loss 4.0625, lr 4.3e-04, dt 2.0s +All GPU(s): step 2778: loss 4.0703, lr 4.2e-04, dt 2.0s +All GPU(s): step 2779: loss 4.0742, lr 4.2e-04, dt 2.1s +All GPU(s): step 2780: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2781: loss 4.0781, lr 4.2e-04, dt 2.0s +All GPU(s): step 2782: loss 4.0859, lr 4.2e-04, dt 2.0s +All GPU(s): step 2783: loss 4.0859, lr 4.2e-04, dt 2.0s +All GPU(s): step 2784: loss 4.0820, lr 4.2e-04, dt 2.1s +All GPU(s): step 2785: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2786: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2787: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2788: loss 4.0859, lr 4.2e-04, dt 2.1s +All GPU(s): step 2789: loss 4.0820, lr 4.2e-04, dt 2.0s +All GPU(s): step 2790: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2791: loss 4.0859, lr 4.2e-04, dt 2.0s +All GPU(s): step 2792: loss 4.0938, lr 4.2e-04, dt 2.1s +All GPU(s): step 2793: loss 4.0938, lr 4.2e-04, dt 2.2s +All GPU(s): step 2794: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2795: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2796: loss 4.0781, lr 4.2e-04, dt 2.0s +All GPU(s): step 2797: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2798: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2799: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2800: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2801: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2802: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2803: loss 4.1016, lr 4.2e-04, dt 2.2s +All GPU(s): step 2804: loss 4.0898, lr 4.2e-04, dt 2.1s +All GPU(s): step 2805: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2806: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2807: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2808: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2809: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2810: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2811: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2812: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2813: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2814: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2815: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2816: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2817: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2818: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2819: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2820: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2821: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2822: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2823: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2824: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2825: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2826: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2827: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2828: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2829: loss 4.1406, lr 4.2e-04, dt 2.1s +All GPU(s): step 2830: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2831: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2832: loss 4.1289, lr 4.2e-04, dt 2.1s +All GPU(s): step 2833: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2834: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2835: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2836: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2837: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2838: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2839: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2840: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2841: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2842: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2843: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2844: loss 4.1211, lr 4.2e-04, dt 2.0s +All GPU(s): step 2845: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2846: loss 4.0859, lr 4.2e-04, dt 2.1s +All GPU(s): step 2847: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2848: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2849: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2850: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2851: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2852: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2853: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2854: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2855: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2856: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2857: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2858: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2859: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2860: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2861: loss 4.0898, lr 4.2e-04, dt 2.1s +All GPU(s): step 2862: loss 4.0898, lr 4.2e-04, dt 2.1s +All GPU(s): step 2863: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2864: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2865: loss 4.1211, lr 4.2e-04, dt 2.1s +All GPU(s): step 2866: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2867: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2868: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2869: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2870: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2871: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2872: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2873: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2874: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2875: loss 4.1016, lr 4.2e-04, dt 2.2s +All GPU(s): step 2876: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2877: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2878: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2879: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2880: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2881: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2882: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2883: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2884: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2885: loss 4.1055, lr 4.2e-04, dt 2.2s +All GPU(s): step 2886: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2887: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2888: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2889: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2890: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2891: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2892: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2893: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2894: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2895: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2896: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2897: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2898: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2899: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2900: loss 4.1523, lr 4.2e-04, dt 2.1s +All GPU(s): step 2901: loss 4.1211, lr 4.2e-04, dt 2.0s +All GPU(s): step 2902: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2903: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2904: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2905: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2906: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2907: loss 4.1211, lr 4.2e-04, dt 2.0s +All GPU(s): step 2908: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2909: loss 4.1094, lr 4.2e-04, dt 2.2s +All GPU(s): step 2910: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2911: loss 4.1445, lr 4.2e-04, dt 2.0s +All GPU(s): step 2912: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2913: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2914: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2915: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2916: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2917: loss 4.0938, lr 4.2e-04, dt 2.1s +All GPU(s): step 2918: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2919: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2920: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2921: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2922: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2923: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2924: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2925: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2926: loss 4.0859, lr 4.2e-04, dt 2.0s +All GPU(s): step 2927: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2928: loss 4.0977, lr 4.2e-04, dt 2.2s +All GPU(s): step 2929: loss 4.1250, lr 4.2e-04, dt 2.0s +All GPU(s): step 2930: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2931: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2932: loss 4.1289, lr 4.2e-04, dt 2.0s +All GPU(s): step 2933: loss 4.1172, lr 4.2e-04, dt 2.1s +All GPU(s): step 2934: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2935: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2936: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2937: loss 4.0898, lr 4.2e-04, dt 2.0s +All GPU(s): step 2938: loss 4.1016, lr 4.2e-04, dt 2.1s +All GPU(s): step 2939: loss 4.1016, lr 4.2e-04, dt 2.0s +All GPU(s): step 2940: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2941: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2942: loss 4.1133, lr 4.2e-04, dt 2.1s +All GPU(s): step 2943: loss 4.0977, lr 4.2e-04, dt 2.1s +All GPU(s): step 2944: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2945: loss 4.1172, lr 4.2e-04, dt 2.0s +All GPU(s): step 2946: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2947: loss 4.1055, lr 4.2e-04, dt 2.1s +All GPU(s): step 2948: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2949: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2950: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2951: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2952: loss 4.1016, lr 4.2e-04, dt 2.2s +All GPU(s): step 2953: loss 4.1133, lr 4.2e-04, dt 2.0s +All GPU(s): step 2954: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2955: loss 4.0977, lr 4.2e-04, dt 2.0s +All GPU(s): step 2956: loss 4.1094, lr 4.2e-04, dt 2.0s +All GPU(s): step 2957: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2958: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2959: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2960: loss 4.1055, lr 4.2e-04, dt 2.0s +All GPU(s): step 2961: loss 4.0938, lr 4.2e-04, dt 2.0s +All GPU(s): step 2962: loss 4.1094, lr 4.2e-04, dt 2.1s +All GPU(s): step 2963: loss 4.0938, lr 4.1e-04, dt 2.1s +All GPU(s): step 2964: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2965: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 2966: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 2967: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 2968: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2969: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2970: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2971: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 2972: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 2973: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 2974: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2975: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2976: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 2977: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2978: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 2979: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 2980: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 2981: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 2982: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 2983: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2984: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2985: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 2986: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 2987: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2988: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2989: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 2990: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2991: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 2992: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 2993: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 2994: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 2995: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 2996: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 2997: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2998: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 2999: loss 4.1094, lr 4.1e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_3000.pt +All GPU(s): step 3000: loss 4.1094, lr 4.1e-04, dt 2.2s +All GPU(s): step 3001: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3002: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3003: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3004: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3005: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3006: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3007: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3008: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3009: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3010: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3011: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3012: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3013: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3014: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3015: loss 4.1094, lr 4.1e-04, dt 2.2s +All GPU(s): step 3016: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3017: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3018: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3019: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3020: loss 4.1133, lr 4.1e-04, dt 2.1s +All GPU(s): step 3021: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3022: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3023: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3024: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3025: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3026: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3027: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3028: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3029: loss 4.0781, lr 4.1e-04, dt 2.1s +All GPU(s): step 3030: loss 4.1133, lr 4.1e-04, dt 2.1s +All GPU(s): step 3031: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3032: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3033: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3034: loss 4.0938, lr 4.1e-04, dt 2.1s +All GPU(s): step 3035: loss 4.0898, lr 4.1e-04, dt 2.0s +All GPU(s): step 3036: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3037: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3038: loss 4.1211, lr 4.1e-04, dt 2.0s +All GPU(s): step 3039: loss 4.0977, lr 4.1e-04, dt 2.2s +All GPU(s): step 3040: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3041: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3042: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3043: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3044: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 3045: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3046: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3047: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3048: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3049: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3050: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3051: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3052: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3053: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3054: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3055: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3056: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3057: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3058: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3059: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3060: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3061: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3062: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3063: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3064: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3065: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3066: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3067: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3068: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3069: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3070: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3071: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3072: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3073: loss 4.1094, lr 4.1e-04, dt 2.2s +All GPU(s): step 3074: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 3075: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3076: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3077: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3078: loss 4.0938, lr 4.1e-04, dt 2.1s +All GPU(s): step 3079: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3080: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3081: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3082: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3083: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3084: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3085: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3086: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3087: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3088: loss 4.0859, lr 4.1e-04, dt 2.1s +All GPU(s): step 3089: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3090: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3091: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3092: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3093: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3094: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3095: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3096: loss 4.1016, lr 4.1e-04, dt 2.0s +All GPU(s): step 3097: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3098: loss 4.0977, lr 4.1e-04, dt 2.1s +All GPU(s): step 3099: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3100: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3101: loss 4.1172, lr 4.1e-04, dt 2.1s +All GPU(s): step 3102: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3103: loss 4.1172, lr 4.1e-04, dt 2.1s +All GPU(s): step 3104: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3105: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3106: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3107: loss 4.1172, lr 4.1e-04, dt 2.1s +All GPU(s): step 3108: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3109: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3110: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3111: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3112: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3113: loss 4.1172, lr 4.1e-04, dt 2.0s +All GPU(s): step 3114: loss 4.1211, lr 4.1e-04, dt 2.0s +All GPU(s): step 3115: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3116: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3117: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3118: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3119: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3120: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3121: loss 4.1133, lr 4.1e-04, dt 2.1s +All GPU(s): step 3122: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3123: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 3124: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3125: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3126: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3127: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3128: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3129: loss 4.1055, lr 4.1e-04, dt 2.0s +All GPU(s): step 3130: loss 4.0977, lr 4.1e-04, dt 2.0s +All GPU(s): step 3131: loss 4.1016, lr 4.1e-04, dt 2.1s +All GPU(s): step 3132: loss 4.0938, lr 4.1e-04, dt 2.0s +All GPU(s): step 3133: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3134: loss 4.0859, lr 4.1e-04, dt 2.0s +All GPU(s): step 3135: loss 4.1133, lr 4.1e-04, dt 2.0s +All GPU(s): step 3136: loss 4.1094, lr 4.1e-04, dt 2.1s +All GPU(s): step 3137: loss 4.1094, lr 4.1e-04, dt 2.0s +All GPU(s): step 3138: loss 4.1055, lr 4.1e-04, dt 2.1s +All GPU(s): step 3139: loss 4.0898, lr 4.1e-04, dt 2.0s +All GPU(s): step 3140: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3141: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3142: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3143: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3144: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3145: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3146: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3147: loss 4.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 3148: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3149: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3150: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3151: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3152: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3153: loss 4.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 3154: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3155: loss 4.1055, lr 4.0e-04, dt 2.2s +All GPU(s): step 3156: loss 4.0898, lr 4.0e-04, dt 2.0s +All GPU(s): step 3157: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3158: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3159: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3160: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3161: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3162: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3163: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3164: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3165: loss 4.0938, lr 4.0e-04, dt 2.1s +All GPU(s): step 3166: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3167: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3168: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3169: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3170: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3171: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3172: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3173: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3174: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3175: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3176: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3177: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3178: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3179: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3180: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3181: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3182: loss 4.0977, lr 4.0e-04, dt 2.1s +All GPU(s): step 3183: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3184: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3185: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3186: loss 4.1172, lr 4.0e-04, dt 2.0s +All GPU(s): step 3187: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3188: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3189: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3190: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3191: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3192: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3193: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3194: loss 4.1094, lr 4.0e-04, dt 2.2s +All GPU(s): step 3195: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3196: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3197: loss 4.1172, lr 4.0e-04, dt 2.0s +All GPU(s): step 3198: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3199: loss 4.4102, lr 4.0e-04, dt 2.1s +All GPU(s): step 3200: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3201: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3202: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3203: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3204: loss 4.1211, lr 4.0e-04, dt 2.1s +All GPU(s): step 3205: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3206: loss 4.1445, lr 4.0e-04, dt 2.0s +All GPU(s): step 3207: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3208: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3209: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3210: loss 4.1172, lr 4.0e-04, dt 2.0s +All GPU(s): step 3211: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3212: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3213: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3214: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3215: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3216: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3217: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3218: loss 4.0977, lr 4.0e-04, dt 2.1s +All GPU(s): step 3219: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3220: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3221: loss 4.1172, lr 4.0e-04, dt 2.0s +All GPU(s): step 3222: loss 4.1211, lr 4.0e-04, dt 2.0s +All GPU(s): step 3223: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3224: loss 4.0898, lr 4.0e-04, dt 2.0s +All GPU(s): step 3225: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3226: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3227: loss 4.1211, lr 4.0e-04, dt 2.0s +All GPU(s): step 3228: loss 4.1172, lr 4.0e-04, dt 2.1s +All GPU(s): step 3229: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3230: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3231: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3232: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3233: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3234: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3235: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3236: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3237: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3238: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3239: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3240: loss 4.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 3241: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3242: loss 4.0898, lr 4.0e-04, dt 2.1s +All GPU(s): step 3243: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3244: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3245: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3246: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3247: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3248: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3249: loss 4.0781, lr 4.0e-04, dt 2.0s +All GPU(s): step 3250: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3251: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3252: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3253: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3254: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3255: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3256: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3257: loss 4.0820, lr 4.0e-04, dt 2.1s +All GPU(s): step 3258: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3259: loss 4.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 3260: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3261: loss 4.1055, lr 4.0e-04, dt 2.1s +All GPU(s): step 3262: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3263: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3264: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3265: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3266: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3267: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3268: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3269: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3270: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3271: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3272: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3273: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3274: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3275: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3276: loss 4.1055, lr 4.0e-04, dt 2.2s +All GPU(s): step 3277: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3278: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3279: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3280: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3281: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3282: loss 4.0898, lr 4.0e-04, dt 2.0s +All GPU(s): step 3283: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3284: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3285: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3286: loss 4.1133, lr 4.0e-04, dt 2.1s +All GPU(s): step 3287: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3288: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3289: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3290: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3291: loss 4.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 3292: loss 4.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 3293: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3294: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3295: loss 4.0977, lr 4.0e-04, dt 2.1s +All GPU(s): step 3296: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3297: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3298: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3299: loss 4.0977, lr 4.0e-04, dt 2.0s +All GPU(s): step 3300: loss 4.0938, lr 4.0e-04, dt 2.1s +All GPU(s): step 3301: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3302: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3303: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3304: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3305: loss 4.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 3306: loss 4.0781, lr 4.0e-04, dt 2.0s +All GPU(s): step 3307: loss 4.1016, lr 4.0e-04, dt 2.0s +All GPU(s): step 3308: loss 4.1133, lr 4.0e-04, dt 2.0s +All GPU(s): step 3309: loss 4.1055, lr 4.0e-04, dt 2.0s +All GPU(s): step 3310: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3311: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3312: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3313: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3314: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3315: loss 4.0977, lr 3.9e-04, dt 2.2s +All GPU(s): step 3316: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3317: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3318: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3319: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3320: loss 4.1133, lr 3.9e-04, dt 2.3s +All GPU(s): step 3321: loss 4.0898, lr 3.9e-04, dt 2.0s +All GPU(s): step 3322: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3323: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3324: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3325: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3326: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3327: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3328: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3329: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3330: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3331: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3332: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3333: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3334: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3335: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3336: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3337: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3338: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3339: loss 4.1055, lr 3.9e-04, dt 2.2s +All GPU(s): step 3340: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3341: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3342: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3343: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3344: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3345: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3346: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3347: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3348: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3349: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3350: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3351: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3352: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3353: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3354: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3355: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3356: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3357: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3358: loss 4.1172, lr 3.9e-04, dt 2.1s +All GPU(s): step 3359: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3360: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3361: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3362: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3363: loss 4.0938, lr 3.9e-04, dt 2.1s +All GPU(s): step 3364: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3365: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3366: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3367: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3368: loss 4.1055, lr 3.9e-04, dt 2.2s +All GPU(s): step 3369: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3370: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3371: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3372: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3373: loss 4.0977, lr 3.9e-04, dt 2.1s +All GPU(s): step 3374: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3375: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3376: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3377: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3378: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3379: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3380: loss 4.0820, lr 3.9e-04, dt 2.0s +All GPU(s): step 3381: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3382: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3383: loss 4.1016, lr 3.9e-04, dt 2.1s +All GPU(s): step 3384: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3385: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3386: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3387: loss 4.0781, lr 3.9e-04, dt 2.1s +All GPU(s): step 3388: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3389: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3390: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3391: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3392: loss 4.0859, lr 3.9e-04, dt 2.1s +All GPU(s): step 3393: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3394: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3395: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3396: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3397: loss 4.0898, lr 3.9e-04, dt 2.1s +All GPU(s): step 3398: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3399: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3400: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3401: loss 4.1172, lr 3.9e-04, dt 2.1s +All GPU(s): step 3402: loss 4.1016, lr 3.9e-04, dt 2.1s +All GPU(s): step 3403: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3404: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3405: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3406: loss 4.0977, lr 3.9e-04, dt 2.1s +All GPU(s): step 3407: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3408: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3409: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3410: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3411: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3412: loss 4.0859, lr 3.9e-04, dt 2.1s +All GPU(s): step 3413: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3414: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3415: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3416: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3417: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3418: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3419: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3420: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3421: loss 4.1133, lr 3.9e-04, dt 2.2s +All GPU(s): step 3422: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3423: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3424: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3425: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3426: loss 4.1055, lr 3.9e-04, dt 2.2s +All GPU(s): step 3427: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3428: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3429: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3430: loss 4.0898, lr 3.9e-04, dt 2.0s +All GPU(s): step 3431: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3432: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3433: loss 4.1211, lr 3.9e-04, dt 2.0s +All GPU(s): step 3434: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3435: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3436: loss 4.1016, lr 3.9e-04, dt 2.1s +All GPU(s): step 3437: loss 4.0938, lr 3.9e-04, dt 2.0s +All GPU(s): step 3438: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3439: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3440: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3441: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3442: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3443: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3444: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3445: loss 4.1016, lr 3.9e-04, dt 2.1s +All GPU(s): step 3446: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3447: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3448: loss 4.0898, lr 3.9e-04, dt 2.0s +All GPU(s): step 3449: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3450: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3451: loss 4.1055, lr 3.9e-04, dt 2.1s +All GPU(s): step 3452: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3453: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3454: loss 4.1172, lr 3.9e-04, dt 2.1s +All GPU(s): step 3455: loss 4.1133, lr 3.9e-04, dt 2.1s +All GPU(s): step 3456: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3457: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3458: loss 4.1172, lr 3.9e-04, dt 2.0s +All GPU(s): step 3459: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3460: loss 4.1172, lr 3.9e-04, dt 2.1s +All GPU(s): step 3461: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3462: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3463: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3464: loss 4.1133, lr 3.9e-04, dt 2.0s +All GPU(s): step 3465: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3466: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3467: loss 4.1055, lr 3.9e-04, dt 2.0s +All GPU(s): step 3468: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3469: loss 4.0859, lr 3.9e-04, dt 2.1s +All GPU(s): step 3470: loss 4.0859, lr 3.9e-04, dt 2.1s +All GPU(s): step 3471: loss 4.1016, lr 3.9e-04, dt 2.0s +All GPU(s): step 3472: loss 4.0977, lr 3.9e-04, dt 2.0s +All GPU(s): step 3473: loss 4.1094, lr 3.9e-04, dt 2.0s +All GPU(s): step 3474: loss 4.1094, lr 3.9e-04, dt 2.1s +All GPU(s): step 3475: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3476: loss 4.0859, lr 3.8e-04, dt 2.0s +All GPU(s): step 3477: loss 4.1250, lr 3.8e-04, dt 2.0s +All GPU(s): step 3478: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3479: loss 4.1562, lr 3.8e-04, dt 2.1s +All GPU(s): step 3480: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3481: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3482: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3483: loss 4.1406, lr 3.8e-04, dt 2.0s +All GPU(s): step 3484: loss 4.0996, lr 3.8e-04, dt 2.1s +All GPU(s): step 3485: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3486: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3487: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3488: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3489: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3490: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3491: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3492: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3493: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3494: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3495: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3496: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3497: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3498: loss 4.0938, lr 3.8e-04, dt 2.0s +All GPU(s): step 3499: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3500: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3501: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3502: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3503: loss 4.0977, lr 3.8e-04, dt 2.1s +All GPU(s): step 3504: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3505: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3506: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3507: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3508: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3509: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3510: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3511: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3512: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3513: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3514: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3515: loss 4.0938, lr 3.8e-04, dt 2.0s +All GPU(s): step 3516: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3517: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3518: loss 4.1055, lr 3.8e-04, dt 2.1s +All GPU(s): step 3519: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3520: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3521: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3522: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3523: loss 4.1016, lr 3.8e-04, dt 2.1s +All GPU(s): step 3524: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3525: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3526: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3527: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3528: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3529: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3530: loss 4.0977, lr 3.8e-04, dt 2.0s +All GPU(s): step 3531: loss 4.0781, lr 3.8e-04, dt 2.0s +All GPU(s): step 3532: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3533: loss 4.1055, lr 3.8e-04, dt 2.1s +All GPU(s): step 3534: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3535: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3536: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3537: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3538: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3539: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3540: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3541: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3542: loss 4.1211, lr 3.8e-04, dt 2.1s +All GPU(s): step 3543: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3544: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3545: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3546: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3547: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3548: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3549: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3550: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3551: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3552: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3553: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3554: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3555: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3556: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3557: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3558: loss 4.1016, lr 3.8e-04, dt 2.1s +All GPU(s): step 3559: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3560: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3561: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3562: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3563: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3564: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3565: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3566: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3567: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3568: loss 4.0938, lr 3.8e-04, dt 2.0s +All GPU(s): step 3569: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3570: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3571: loss 4.1133, lr 3.8e-04, dt 2.2s +All GPU(s): step 3572: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3573: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3574: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3575: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3576: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3577: loss 4.1211, lr 3.8e-04, dt 2.1s +All GPU(s): step 3578: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3579: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3580: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3581: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3582: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3583: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3584: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3585: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3586: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3587: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3588: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3589: loss 4.0898, lr 3.8e-04, dt 2.0s +All GPU(s): step 3590: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3591: loss 4.1055, lr 3.8e-04, dt 2.1s +All GPU(s): step 3592: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3593: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3594: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3595: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3596: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3597: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3598: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3599: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3600: loss 4.1133, lr 3.8e-04, dt 2.2s +All GPU(s): step 3601: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3602: loss 4.1250, lr 3.8e-04, dt 2.0s +All GPU(s): step 3603: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3604: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3605: loss 4.0977, lr 3.8e-04, dt 2.1s +All GPU(s): step 3606: loss 4.1211, lr 3.8e-04, dt 2.0s +All GPU(s): step 3607: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3608: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3609: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3610: loss 4.1055, lr 3.8e-04, dt 2.1s +All GPU(s): step 3611: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3612: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3613: loss 4.0977, lr 3.8e-04, dt 2.0s +All GPU(s): step 3614: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3615: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3616: loss 4.1250, lr 3.8e-04, dt 2.0s +All GPU(s): step 3617: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3618: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3619: loss 4.1055, lr 3.8e-04, dt 2.1s +All GPU(s): step 3620: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3621: loss 4.1172, lr 3.8e-04, dt 2.0s +All GPU(s): step 3622: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3623: loss 4.1016, lr 3.8e-04, dt 2.0s +All GPU(s): step 3624: loss 4.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 3625: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3626: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3627: loss 4.1055, lr 3.8e-04, dt 2.0s +All GPU(s): step 3628: loss 4.1289, lr 3.8e-04, dt 2.0s +All GPU(s): step 3629: loss 4.1172, lr 3.8e-04, dt 2.1s +All GPU(s): step 3630: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3631: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3632: loss 4.1133, lr 3.8e-04, dt 2.0s +All GPU(s): step 3633: loss 4.1094, lr 3.8e-04, dt 2.0s +All GPU(s): step 3634: loss 4.1133, lr 3.8e-04, dt 2.1s +All GPU(s): step 3635: loss 4.1211, lr 3.7e-04, dt 2.0s +All GPU(s): step 3636: loss 4.1172, lr 3.7e-04, dt 2.0s +All GPU(s): step 3637: loss 4.1094, lr 3.7e-04, dt 2.0s +All GPU(s): step 3638: loss 4.1133, lr 3.7e-04, dt 2.0s +All GPU(s): step 3639: loss 4.1172, lr 3.7e-04, dt 2.1s +All GPU(s): step 3640: loss 4.1133, lr 3.7e-04, dt 2.0s +All GPU(s): step 3641: loss 4.1055, lr 3.7e-04, dt 2.0s +All GPU(s): step 3642: loss 4.1172, lr 3.7e-04, dt 2.0s +All GPU(s): step 3643: loss 4.1055, lr 3.7e-04, dt 2.1s +All GPU(s): step 3644: loss 4.1094, lr 3.7e-04, dt 2.1s +All GPU(s): step 3645: loss 4.1250, lr 3.7e-04, dt 2.0s +All GPU(s): step 3646: loss 4.1016, lr 3.7e-04, dt 2.0s +All GPU(s): step 3647: loss 4.1133, lr 3.7e-04, dt 2.0s +All GPU(s): step 3648: loss 4.1133, lr 3.7e-04, dt 2.0s +All GPU(s): step 3649: loss 4.0977, lr 3.7e-04, dt 2.1s +All GPU(s): step 3650: loss 4.1172, lr 3.7e-04, dt 2.0s +All GPU(s): step 3651: loss 4.1055, lr 3.7e-04, dt 2.1s +All GPU(s): step 3652: loss 4.1016, lr 3.7e-04, dt 2.1s +All GPU(s): step 3653: loss 4.1211, lr 3.7e-04, dt 2.1s +All GPU(s): step 3654: loss 4.1211, lr 3.7e-04, dt 2.0s +All GPU(s): step 3655: loss 4.1094, lr 3.7e-04, dt 2.0s +All GPU(s): step 3656: loss 4.1055, lr 3.7e-04, dt 2.0s +All GPU(s): step 3657: loss 4.1094, lr 3.7e-04, dt 2.0s +All GPU(s): step 3658: loss 4.1211, lr 3.7e-04, dt 2.1s +All GPU(s): step 3659: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3660: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3661: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3662: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3663: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3664: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3665: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3666: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3667: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3668: loss 4.1523, lr 3.7e-04, dt 2.1s +All GPU(s): step 3669: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3670: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3671: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3672: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3673: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3674: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3675: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3676: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3677: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3678: loss 4.1328, lr 3.7e-04, dt 2.1s +All GPU(s): step 3679: loss 4.1562, lr 3.7e-04, dt 2.0s +All GPU(s): step 3680: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3681: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3682: loss 4.1445, lr 3.7e-04, dt 2.1s +All GPU(s): step 3683: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3684: loss 4.1289, lr 3.7e-04, dt 2.0s +All GPU(s): step 3685: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3686: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3687: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3688: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3689: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3690: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3691: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3692: loss 4.1445, lr 3.7e-04, dt 2.1s +All GPU(s): step 3693: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3694: loss 4.1250, lr 3.7e-04, dt 2.1s +All GPU(s): step 3695: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3696: loss 4.1328, lr 3.7e-04, dt 2.1s +All GPU(s): step 3697: loss 4.1289, lr 3.7e-04, dt 2.2s +All GPU(s): step 3698: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3699: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3700: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3701: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3702: loss 4.1289, lr 3.7e-04, dt 2.1s +All GPU(s): step 3703: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3704: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3705: loss 4.1289, lr 3.7e-04, dt 2.0s +All GPU(s): step 3706: loss 4.1328, lr 3.7e-04, dt 2.1s +All GPU(s): step 3707: loss 4.1445, lr 3.7e-04, dt 2.1s +All GPU(s): step 3708: loss 4.1406, lr 3.7e-04, dt 2.1s +All GPU(s): step 3709: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3710: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3711: loss 4.1445, lr 3.7e-04, dt 2.1s +All GPU(s): step 3712: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3713: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3714: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3715: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3716: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3717: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3718: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3719: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3720: loss 4.1523, lr 3.7e-04, dt 2.0s +All GPU(s): step 3721: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3722: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3723: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3724: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3725: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3726: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3727: loss 4.1406, lr 3.7e-04, dt 2.1s +All GPU(s): step 3728: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3729: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3730: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3731: loss 4.1406, lr 3.7e-04, dt 2.1s +All GPU(s): step 3732: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3733: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3734: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3735: loss 4.1289, lr 3.7e-04, dt 2.1s +All GPU(s): step 3736: loss 4.1562, lr 3.7e-04, dt 2.1s +All GPU(s): step 3737: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3738: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3739: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3740: loss 4.1523, lr 3.7e-04, dt 2.1s +All GPU(s): step 3741: loss 4.1523, lr 3.7e-04, dt 2.1s +All GPU(s): step 3742: loss 4.1523, lr 3.7e-04, dt 2.0s +All GPU(s): step 3743: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3744: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3745: loss 4.1406, lr 3.7e-04, dt 2.1s +All GPU(s): step 3746: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3747: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3748: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3749: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3750: loss 4.1328, lr 3.7e-04, dt 2.1s +All GPU(s): step 3751: loss 4.1406, lr 3.7e-04, dt 2.1s +All GPU(s): step 3752: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3753: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3754: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3755: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3756: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3757: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3758: loss 4.1250, lr 3.7e-04, dt 2.0s +All GPU(s): step 3759: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3760: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3761: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3762: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3763: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3764: loss 4.1289, lr 3.7e-04, dt 2.0s +All GPU(s): step 3765: loss 4.1484, lr 3.7e-04, dt 2.1s +All GPU(s): step 3766: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3767: loss 4.1328, lr 3.7e-04, dt 2.0s +All GPU(s): step 3768: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3769: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3770: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3771: loss 4.1250, lr 3.7e-04, dt 2.0s +All GPU(s): step 3772: loss 4.1445, lr 3.7e-04, dt 2.0s +All GPU(s): step 3773: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3774: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3775: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3776: loss 4.1562, lr 3.7e-04, dt 2.0s +All GPU(s): step 3777: loss 4.1250, lr 3.7e-04, dt 2.0s +All GPU(s): step 3778: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3779: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3780: loss 4.1289, lr 3.7e-04, dt 2.0s +All GPU(s): step 3781: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3782: loss 4.1484, lr 3.7e-04, dt 2.0s +All GPU(s): step 3783: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3784: loss 4.1445, lr 3.7e-04, dt 2.1s +All GPU(s): step 3785: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3786: loss 4.1289, lr 3.7e-04, dt 2.0s +All GPU(s): step 3787: loss 4.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 3788: loss 4.1523, lr 3.7e-04, dt 2.0s +All GPU(s): step 3789: loss 4.1367, lr 3.7e-04, dt 2.1s +All GPU(s): step 3790: loss 4.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 3791: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3792: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3793: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3794: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3795: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3796: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3797: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3798: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3799: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3800: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3801: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3802: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3803: loss 4.1328, lr 3.6e-04, dt 2.1s +All GPU(s): step 3804: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3805: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3806: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3807: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3808: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3809: loss 4.1523, lr 3.6e-04, dt 2.0s +All GPU(s): step 3810: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3811: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3812: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3813: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3814: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3815: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3816: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3817: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3818: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3819: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3820: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3821: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3822: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3823: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3824: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3825: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3826: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3827: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3828: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3829: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3830: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3831: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3832: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3833: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3834: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3835: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3836: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3837: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3838: loss 4.1250, lr 3.6e-04, dt 2.0s +All GPU(s): step 3839: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3840: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3841: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3842: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3843: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3844: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3845: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3846: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3847: loss 4.1523, lr 3.6e-04, dt 2.1s +All GPU(s): step 3848: loss 4.1523, lr 3.6e-04, dt 2.1s +All GPU(s): step 3849: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3850: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3851: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3852: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3853: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3854: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3855: loss 4.1250, lr 3.6e-04, dt 2.0s +All GPU(s): step 3856: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3857: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3858: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3859: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3860: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3861: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3862: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3863: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3864: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3865: loss 4.1250, lr 3.6e-04, dt 2.0s +All GPU(s): step 3866: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3867: loss 4.1523, lr 3.6e-04, dt 2.0s +All GPU(s): step 3868: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3869: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3870: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3871: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3872: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3873: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3874: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3875: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3876: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3877: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3878: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3879: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3880: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3881: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3882: loss 4.1328, lr 3.6e-04, dt 2.1s +All GPU(s): step 3883: loss 4.1328, lr 3.6e-04, dt 2.1s +All GPU(s): step 3884: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3885: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3886: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3887: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3888: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3889: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3890: loss 4.1289, lr 3.6e-04, dt 2.1s +All GPU(s): step 3891: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3892: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3893: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3894: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3895: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3896: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3897: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3898: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3899: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3900: loss 4.1328, lr 3.6e-04, dt 2.1s +All GPU(s): step 3901: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3902: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3903: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3904: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3905: loss 4.1289, lr 3.6e-04, dt 2.1s +All GPU(s): step 3906: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3907: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3908: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3909: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3910: loss 4.1406, lr 3.6e-04, dt 2.2s +All GPU(s): step 3911: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3912: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3913: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3914: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3915: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3916: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3917: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3918: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3919: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3920: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3921: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3922: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3923: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3924: loss 4.1367, lr 3.6e-04, dt 2.1s +All GPU(s): step 3925: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3926: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3927: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3928: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3929: loss 4.1562, lr 3.6e-04, dt 2.1s +All GPU(s): step 3930: loss 4.1406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3931: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3932: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3933: loss 4.1289, lr 3.6e-04, dt 2.0s +All GPU(s): step 3934: loss 4.1484, lr 3.6e-04, dt 2.1s +All GPU(s): step 3935: loss 4.1367, lr 3.6e-04, dt 2.0s +All GPU(s): step 3936: loss 4.1328, lr 3.6e-04, dt 2.0s +All GPU(s): step 3937: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3938: loss 4.1445, lr 3.6e-04, dt 2.0s +All GPU(s): step 3939: loss 4.1445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3940: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3941: loss 4.1484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3942: loss 4.1406, lr 3.6e-04, dt 2.0s +All GPU(s): step 3943: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3944: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 3945: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 3946: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 3947: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 3948: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 3949: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3950: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3951: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 3952: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 3953: loss 4.1523, lr 3.5e-04, dt 2.1s +All GPU(s): step 3954: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3955: loss 4.1602, lr 3.5e-04, dt 2.0s +All GPU(s): step 3956: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3957: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3958: loss 4.1680, lr 3.5e-04, dt 2.1s +All GPU(s): step 3959: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3960: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 3961: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 3962: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3963: loss 4.1602, lr 3.5e-04, dt 2.1s +All GPU(s): step 3964: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3965: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3966: loss 4.1953, lr 3.5e-04, dt 2.0s +All GPU(s): step 3967: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 3968: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 3969: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3970: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 3971: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 3972: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3973: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 3974: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3975: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 3976: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 3977: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3978: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 3979: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 3980: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3981: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 3982: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 3983: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3984: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3985: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3986: loss 4.1602, lr 3.5e-04, dt 2.1s +All GPU(s): step 3987: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 3988: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3989: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3990: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 3991: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3992: loss 4.1523, lr 3.5e-04, dt 2.1s +All GPU(s): step 3993: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 3994: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3995: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3996: loss 4.1562, lr 3.5e-04, dt 2.0s +All GPU(s): step 3997: loss 4.1484, lr 3.5e-04, dt 2.2s +All GPU(s): step 3998: loss 4.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3999: loss 4.1367, lr 3.5e-04, dt 2.1s +saving checkpoint to checkpoints/ckpt_4000.pt +All GPU(s): step 4000: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4001: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4002: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4003: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4004: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4005: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4006: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4007: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4008: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4009: loss 4.1250, lr 3.5e-04, dt 2.0s +All GPU(s): step 4010: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4011: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4012: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4013: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4014: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4015: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4016: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4017: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4018: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4019: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4020: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4021: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 4022: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4023: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4024: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4025: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4026: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4027: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4028: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4029: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4030: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4031: loss 4.1406, lr 3.5e-04, dt 2.2s +All GPU(s): step 4032: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4033: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4034: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4035: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4036: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4037: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4038: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4039: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 4040: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4041: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4042: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4043: loss 4.1289, lr 3.5e-04, dt 2.0s +All GPU(s): step 4044: loss 4.1289, lr 3.5e-04, dt 2.0s +All GPU(s): step 4045: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4046: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4047: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4048: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4049: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4050: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4051: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4052: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4053: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4054: loss 4.1289, lr 3.5e-04, dt 2.0s +All GPU(s): step 4055: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4056: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4057: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4058: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4059: loss 4.1328, lr 3.5e-04, dt 2.1s +All GPU(s): step 4060: loss 4.1484, lr 3.5e-04, dt 2.1s +All GPU(s): step 4061: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4062: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4063: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4064: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4065: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4066: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4067: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4068: loss 4.1367, lr 3.5e-04, dt 2.1s +All GPU(s): step 4069: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4070: loss 4.1562, lr 3.5e-04, dt 2.1s +All GPU(s): step 4071: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4072: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4073: loss 4.1250, lr 3.5e-04, dt 2.0s +All GPU(s): step 4074: loss 4.1445, lr 3.5e-04, dt 2.2s +All GPU(s): step 4075: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4076: loss 4.1445, lr 3.5e-04, dt 2.0s +All GPU(s): step 4077: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4078: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4079: loss 4.1406, lr 3.5e-04, dt 2.2s +All GPU(s): step 4080: loss 4.1445, lr 3.5e-04, dt 2.1s +All GPU(s): step 4081: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4082: loss 4.1328, lr 3.5e-04, dt 2.0s +All GPU(s): step 4083: loss 4.1523, lr 3.5e-04, dt 2.0s +All GPU(s): step 4084: loss 4.1289, lr 3.5e-04, dt 2.1s +All GPU(s): step 4085: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4086: loss 4.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 4087: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4088: loss 4.1367, lr 3.5e-04, dt 2.0s +All GPU(s): step 4089: loss 4.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 4090: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4091: loss 4.1484, lr 3.5e-04, dt 2.0s +All GPU(s): step 4092: loss 4.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 4093: loss 4.1289, lr 3.4e-04, dt 2.1s +All GPU(s): step 4094: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4095: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4096: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4097: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4098: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4099: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4100: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4101: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4102: loss 4.1523, lr 3.4e-04, dt 2.1s +All GPU(s): step 4103: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4104: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4105: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4106: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4107: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4108: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4109: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4110: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4111: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4112: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4113: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4114: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4115: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4116: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4117: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4118: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4119: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4120: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4121: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4122: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4123: loss 4.1562, lr 3.4e-04, dt 2.0s +All GPU(s): step 4124: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4125: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4126: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4127: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4128: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4129: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4130: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4131: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4132: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4133: loss 4.1289, lr 3.4e-04, dt 2.0s +All GPU(s): step 4134: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4135: loss 4.1562, lr 3.4e-04, dt 2.1s +All GPU(s): step 4136: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4137: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4138: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4139: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4140: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4141: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4142: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4143: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4144: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4145: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4146: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4147: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4148: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4149: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4150: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4151: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4152: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4153: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4154: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4155: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4156: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4157: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4158: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4159: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4160: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4161: loss 4.1367, lr 3.4e-04, dt 2.2s +All GPU(s): step 4162: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4163: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4164: loss 4.1562, lr 3.4e-04, dt 2.0s +All GPU(s): step 4165: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4166: loss 4.1328, lr 3.4e-04, dt 2.2s +All GPU(s): step 4167: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4168: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4169: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4170: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4171: loss 4.1289, lr 3.4e-04, dt 2.1s +All GPU(s): step 4172: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4173: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4174: loss 4.1289, lr 3.4e-04, dt 2.0s +All GPU(s): step 4175: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4176: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4177: loss 4.1523, lr 3.4e-04, dt 2.1s +All GPU(s): step 4178: loss 4.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 4179: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4180: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4181: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4182: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4183: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4184: loss 4.1523, lr 3.4e-04, dt 2.0s +All GPU(s): step 4185: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4186: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4187: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4188: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4189: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4190: loss 4.1289, lr 3.4e-04, dt 2.2s +All GPU(s): step 4191: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4192: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4193: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4194: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4195: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4196: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4197: loss 4.1289, lr 3.4e-04, dt 2.0s +All GPU(s): step 4198: loss 4.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 4199: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4200: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4201: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4202: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4203: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4204: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4205: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4206: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4207: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4208: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4209: loss 4.1367, lr 3.4e-04, dt 2.1s +All GPU(s): step 4210: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4211: loss 4.1523, lr 3.4e-04, dt 2.0s +All GPU(s): step 4212: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4213: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4214: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4215: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4216: loss 4.1484, lr 3.4e-04, dt 2.0s +All GPU(s): step 4217: loss 4.1211, lr 3.4e-04, dt 2.0s +All GPU(s): step 4218: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4219: loss 4.1523, lr 3.4e-04, dt 2.1s +All GPU(s): step 4220: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4221: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4222: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4223: loss 4.1328, lr 3.4e-04, dt 2.0s +All GPU(s): step 4224: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4225: loss 4.1367, lr 3.4e-04, dt 2.0s +All GPU(s): step 4226: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4227: loss 4.1289, lr 3.4e-04, dt 2.0s +All GPU(s): step 4228: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4229: loss 4.1289, lr 3.4e-04, dt 2.1s +All GPU(s): step 4230: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4231: loss 4.1484, lr 3.4e-04, dt 2.1s +All GPU(s): step 4232: loss 4.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 4233: loss 4.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 4234: loss 4.1445, lr 3.4e-04, dt 2.1s +All GPU(s): step 4235: loss 4.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 4236: loss 4.1289, lr 3.4e-04, dt 2.0s +All GPU(s): step 4237: loss 4.1523, lr 3.4e-04, dt 2.0s +All GPU(s): step 4238: loss 4.1328, lr 3.4e-04, dt 2.1s +All GPU(s): step 4239: loss 4.1445, lr 3.4e-04, dt 2.0s +All GPU(s): step 4240: loss 4.1172, lr 3.4e-04, dt 2.0s +All GPU(s): step 4241: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4242: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4243: loss 4.1289, lr 3.3e-04, dt 2.2s +All GPU(s): step 4244: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4245: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4246: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4247: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4248: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4249: loss 4.1562, lr 3.3e-04, dt 2.0s +All GPU(s): step 4250: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4251: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4252: loss 4.1523, lr 3.3e-04, dt 2.0s +All GPU(s): step 4253: loss 4.1250, lr 3.3e-04, dt 2.1s +All GPU(s): step 4254: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4255: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4256: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4257: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4258: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4259: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4260: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4261: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4262: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4263: loss 4.1328, lr 3.3e-04, dt 2.1s +All GPU(s): step 4264: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4265: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4266: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4267: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4268: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4269: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4270: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4271: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4272: loss 4.1328, lr 3.3e-04, dt 2.2s +All GPU(s): step 4273: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4274: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4275: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4276: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4277: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4278: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4279: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4280: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4281: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4282: loss 4.1328, lr 3.3e-04, dt 2.1s +All GPU(s): step 4283: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4284: loss 4.1250, lr 3.3e-04, dt 2.1s +All GPU(s): step 4285: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4286: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4287: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4288: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4289: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4290: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4291: loss 4.1328, lr 3.3e-04, dt 2.1s +All GPU(s): step 4292: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4293: loss 4.1523, lr 3.3e-04, dt 2.0s +All GPU(s): step 4294: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4295: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4296: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4297: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4298: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4299: loss 4.1289, lr 3.3e-04, dt 2.0s +All GPU(s): step 4300: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4301: loss 4.1484, lr 3.3e-04, dt 2.1s +All GPU(s): step 4302: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4303: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4304: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4305: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4306: loss 4.1523, lr 3.3e-04, dt 2.2s +All GPU(s): step 4307: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4308: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4309: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4310: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4311: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4312: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4313: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4314: loss 4.1250, lr 3.3e-04, dt 2.0s +All GPU(s): step 4315: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4316: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4317: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4318: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4319: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4320: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4321: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4322: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4323: loss 4.1289, lr 3.3e-04, dt 2.0s +All GPU(s): step 4324: loss 4.1289, lr 3.3e-04, dt 2.0s +All GPU(s): step 4325: loss 4.1523, lr 3.3e-04, dt 2.1s +All GPU(s): step 4326: loss 4.1211, lr 3.3e-04, dt 2.0s +All GPU(s): step 4327: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4328: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4329: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4330: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4331: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4332: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4333: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4334: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4335: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4336: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4337: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4338: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4339: loss 4.1523, lr 3.3e-04, dt 2.0s +All GPU(s): step 4340: loss 4.1523, lr 3.3e-04, dt 2.1s +All GPU(s): step 4341: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4342: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4343: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4344: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4345: loss 4.1289, lr 3.3e-04, dt 2.1s +All GPU(s): step 4346: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4347: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4348: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4349: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4350: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4351: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4352: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4353: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4354: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4355: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4356: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4357: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4358: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4359: loss 4.1367, lr 3.3e-04, dt 2.1s +All GPU(s): step 4360: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4361: loss 4.1328, lr 3.3e-04, dt 2.1s +All GPU(s): step 4362: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4363: loss 4.1484, lr 3.3e-04, dt 2.1s +All GPU(s): step 4364: loss 4.1406, lr 3.3e-04, dt 2.2s +All GPU(s): step 4365: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4366: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4367: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4368: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4369: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4370: loss 4.1445, lr 3.3e-04, dt 2.0s +All GPU(s): step 4371: loss 4.1523, lr 3.3e-04, dt 2.0s +All GPU(s): step 4372: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4373: loss 4.1250, lr 3.3e-04, dt 2.0s +All GPU(s): step 4374: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4375: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4376: loss 4.1289, lr 3.3e-04, dt 2.0s +All GPU(s): step 4377: loss 4.1289, lr 3.3e-04, dt 2.0s +All GPU(s): step 4378: loss 4.1289, lr 3.3e-04, dt 2.1s +All GPU(s): step 4379: loss 4.1406, lr 3.3e-04, dt 2.1s +All GPU(s): step 4380: loss 4.1406, lr 3.3e-04, dt 2.0s +All GPU(s): step 4381: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4382: loss 4.1367, lr 3.3e-04, dt 2.0s +All GPU(s): step 4383: loss 4.1445, lr 3.3e-04, dt 2.1s +All GPU(s): step 4384: loss 4.1328, lr 3.3e-04, dt 2.1s +All GPU(s): step 4385: loss 4.1328, lr 3.3e-04, dt 2.0s +All GPU(s): step 4386: loss 4.1484, lr 3.3e-04, dt 2.0s +All GPU(s): step 4387: loss 4.1562, lr 3.2e-04, dt 2.0s +All GPU(s): step 4388: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4389: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4390: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4391: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4392: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4393: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4394: loss 4.1523, lr 3.2e-04, dt 2.0s +All GPU(s): step 4395: loss 4.1289, lr 3.2e-04, dt 2.0s +All GPU(s): step 4396: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4397: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4398: loss 4.1328, lr 3.2e-04, dt 2.2s +All GPU(s): step 4399: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4400: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4401: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4402: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4403: loss 4.1484, lr 3.2e-04, dt 2.1s +All GPU(s): step 4404: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4405: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4406: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4407: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4408: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4409: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4410: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4411: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4412: loss 4.1445, lr 3.2e-04, dt 2.1s +All GPU(s): step 4413: loss 4.1289, lr 3.2e-04, dt 2.1s +All GPU(s): step 4414: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4415: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4416: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4417: loss 4.1484, lr 3.2e-04, dt 2.1s +All GPU(s): step 4418: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4419: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4420: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4421: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4422: loss 4.1523, lr 3.2e-04, dt 2.2s +All GPU(s): step 4423: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4424: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4425: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4426: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4427: loss 4.1406, lr 3.2e-04, dt 2.2s +All GPU(s): step 4428: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4429: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4430: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4431: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4432: loss 4.1406, lr 3.2e-04, dt 2.2s +All GPU(s): step 4433: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4434: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4435: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4436: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4437: loss 4.1484, lr 3.2e-04, dt 2.1s +All GPU(s): step 4438: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4439: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4440: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4441: loss 4.1445, lr 3.2e-04, dt 2.1s +All GPU(s): step 4442: loss 4.1250, lr 3.2e-04, dt 2.0s +All GPU(s): step 4443: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4444: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4445: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4446: loss 4.1289, lr 3.2e-04, dt 2.2s +All GPU(s): step 4447: loss 4.1289, lr 3.2e-04, dt 2.1s +All GPU(s): step 4448: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4449: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4450: loss 4.1250, lr 3.2e-04, dt 2.0s +All GPU(s): step 4451: loss 4.1328, lr 3.2e-04, dt 2.2s +All GPU(s): step 4452: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4453: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4454: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4455: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4456: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4457: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4458: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4459: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4460: loss 4.1523, lr 3.2e-04, dt 2.0s +All GPU(s): step 4461: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4462: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4463: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4464: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4465: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4466: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4467: loss 4.1523, lr 3.2e-04, dt 2.0s +All GPU(s): step 4468: loss 4.1289, lr 3.2e-04, dt 2.0s +All GPU(s): step 4469: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4470: loss 4.1445, lr 3.2e-04, dt 2.1s +All GPU(s): step 4471: loss 4.1562, lr 3.2e-04, dt 2.0s +All GPU(s): step 4472: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4473: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4474: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4475: loss 4.1328, lr 3.2e-04, dt 2.2s +All GPU(s): step 4476: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4477: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4478: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4479: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4480: loss 4.1484, lr 3.2e-04, dt 2.2s +All GPU(s): step 4481: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4482: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4483: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4484: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4485: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4486: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4487: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4488: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4489: loss 4.1523, lr 3.2e-04, dt 2.1s +All GPU(s): step 4490: loss 4.1328, lr 3.2e-04, dt 2.1s +All GPU(s): step 4491: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4492: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4493: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4494: loss 4.1328, lr 3.2e-04, dt 2.1s +All GPU(s): step 4495: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4496: loss 4.1523, lr 3.2e-04, dt 2.0s +All GPU(s): step 4497: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4498: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4499: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4500: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4501: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4502: loss 4.1328, lr 3.2e-04, dt 2.0s +All GPU(s): step 4503: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4504: loss 4.1289, lr 3.2e-04, dt 2.2s +All GPU(s): step 4505: loss 4.1445, lr 3.2e-04, dt 2.1s +All GPU(s): step 4506: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4507: loss 4.1367, lr 3.2e-04, dt 2.0s +All GPU(s): step 4508: loss 4.1250, lr 3.2e-04, dt 2.1s +All GPU(s): step 4509: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4510: loss 4.1445, lr 3.2e-04, dt 2.1s +All GPU(s): step 4511: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4512: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4513: loss 4.1328, lr 3.2e-04, dt 2.1s +All GPU(s): step 4514: loss 4.1445, lr 3.2e-04, dt 2.2s +All GPU(s): step 4515: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4516: loss 4.1289, lr 3.2e-04, dt 2.0s +All GPU(s): step 4517: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4518: loss 4.1367, lr 3.2e-04, dt 2.1s +All GPU(s): step 4519: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4520: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4521: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4522: loss 4.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4523: loss 4.1484, lr 3.2e-04, dt 2.1s +All GPU(s): step 4524: loss 4.1523, lr 3.2e-04, dt 2.1s +All GPU(s): step 4525: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4526: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4527: loss 4.1484, lr 3.2e-04, dt 2.0s +All GPU(s): step 4528: loss 4.1289, lr 3.2e-04, dt 2.1s +All GPU(s): step 4529: loss 4.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4530: loss 4.1523, lr 3.2e-04, dt 2.1s +All GPU(s): step 4531: loss 4.1445, lr 3.2e-04, dt 2.0s +All GPU(s): step 4532: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4533: loss 4.1445, lr 3.1e-04, dt 2.3s +All GPU(s): step 4534: loss 4.1289, lr 3.1e-04, dt 2.1s +All GPU(s): step 4535: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4536: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4537: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4538: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4539: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4540: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4541: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4542: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4543: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4544: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4545: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4546: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4547: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4548: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4549: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4550: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4551: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4552: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4553: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4554: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4555: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4556: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4557: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4558: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4559: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4560: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4561: loss 4.1289, lr 3.1e-04, dt 2.0s +All GPU(s): step 4562: loss 4.1328, lr 3.1e-04, dt 2.1s +All GPU(s): step 4563: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4564: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4565: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4566: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4567: loss 4.1328, lr 3.1e-04, dt 2.1s +All GPU(s): step 4568: loss 4.1289, lr 3.1e-04, dt 2.0s +All GPU(s): step 4569: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4570: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4571: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4572: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4573: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4574: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4575: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4576: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4577: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4578: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4579: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4580: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4581: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4582: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4583: loss 4.1562, lr 3.1e-04, dt 2.0s +All GPU(s): step 4584: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4585: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4586: loss 4.1406, lr 3.1e-04, dt 2.2s +All GPU(s): step 4587: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4588: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4589: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4590: loss 4.1211, lr 3.1e-04, dt 2.1s +All GPU(s): step 4591: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4592: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4593: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4594: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4595: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4596: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4597: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4598: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4599: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4600: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4601: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4602: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4603: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4604: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4605: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4606: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4607: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4608: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4609: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4610: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4611: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4612: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4613: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4614: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4615: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4616: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4617: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4618: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4619: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4620: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4621: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4622: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4623: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4624: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4625: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4626: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4627: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4628: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4629: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4630: loss 4.1562, lr 3.1e-04, dt 2.1s +All GPU(s): step 4631: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4632: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4633: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4634: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4635: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4636: loss 4.1289, lr 3.1e-04, dt 2.0s +All GPU(s): step 4637: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4638: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4639: loss 4.1328, lr 3.1e-04, dt 2.1s +All GPU(s): step 4640: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4641: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4642: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4643: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4644: loss 4.1523, lr 3.1e-04, dt 2.2s +All GPU(s): step 4645: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4646: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4647: loss 4.1211, lr 3.1e-04, dt 2.0s +All GPU(s): step 4648: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4649: loss 4.1367, lr 3.1e-04, dt 2.2s +All GPU(s): step 4650: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4651: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4652: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4653: loss 4.1523, lr 3.1e-04, dt 2.0s +All GPU(s): step 4654: loss 4.1289, lr 3.1e-04, dt 2.1s +All GPU(s): step 4655: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4656: loss 4.1484, lr 3.1e-04, dt 2.0s +All GPU(s): step 4657: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4658: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4659: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4660: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4661: loss 4.1250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4662: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4663: loss 4.1367, lr 3.1e-04, dt 2.1s +All GPU(s): step 4664: loss 4.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4665: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4666: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4667: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4668: loss 4.1445, lr 3.1e-04, dt 2.1s +All GPU(s): step 4669: loss 4.1445, lr 3.1e-04, dt 2.0s +All GPU(s): step 4670: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4671: loss 4.1367, lr 3.1e-04, dt 2.0s +All GPU(s): step 4672: loss 4.1328, lr 3.1e-04, dt 2.0s +All GPU(s): step 4673: loss 4.1484, lr 3.1e-04, dt 2.1s +All GPU(s): step 4674: loss 4.1406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4675: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4676: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4677: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4678: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4679: loss 4.1289, lr 3.0e-04, dt 2.0s +All GPU(s): step 4680: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4681: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4682: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4683: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4684: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4685: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4686: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4687: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4688: loss 4.1328, lr 3.0e-04, dt 2.1s +All GPU(s): step 4689: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4690: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4691: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4692: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4693: loss 4.1328, lr 3.0e-04, dt 2.1s +All GPU(s): step 4694: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4695: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4696: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4697: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4698: loss 4.1406, lr 3.0e-04, dt 2.1s +All GPU(s): step 4699: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4700: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4701: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4702: loss 4.1289, lr 3.0e-04, dt 2.1s +All GPU(s): step 4703: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4704: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4705: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4706: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4707: loss 4.1484, lr 3.0e-04, dt 2.2s +All GPU(s): step 4708: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4709: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4710: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4711: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4712: loss 4.1523, lr 3.0e-04, dt 2.1s +All GPU(s): step 4713: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4714: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4715: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4716: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4717: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4718: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4719: loss 4.1289, lr 3.0e-04, dt 2.0s +All GPU(s): step 4720: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4721: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4722: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4723: loss 4.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4724: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4725: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4726: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4727: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4728: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4729: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4730: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4731: loss 4.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4732: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4733: loss 4.1602, lr 3.0e-04, dt 2.0s +All GPU(s): step 4734: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4735: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4736: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4737: loss 4.1641, lr 3.0e-04, dt 2.0s +All GPU(s): step 4738: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4739: loss 4.1562, lr 3.0e-04, dt 2.0s +All GPU(s): step 4740: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4741: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4742: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4743: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4744: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4745: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4746: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4747: loss 4.1289, lr 3.0e-04, dt 2.0s +All GPU(s): step 4748: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4749: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4750: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4751: loss 4.1406, lr 3.0e-04, dt 2.1s +All GPU(s): step 4752: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4753: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4754: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4755: loss 4.1523, lr 3.0e-04, dt 2.1s +All GPU(s): step 4756: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4757: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4758: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4759: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4760: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4761: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4762: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4763: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4764: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4765: loss 4.1445, lr 3.0e-04, dt 2.2s +All GPU(s): step 4766: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4767: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4768: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4769: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4770: loss 4.1406, lr 3.0e-04, dt 2.1s +All GPU(s): step 4771: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4772: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4773: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4774: loss 4.1602, lr 3.0e-04, dt 2.0s +All GPU(s): step 4775: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4776: loss 4.1406, lr 3.0e-04, dt 2.1s +All GPU(s): step 4777: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4778: loss 4.1523, lr 3.0e-04, dt 2.1s +All GPU(s): step 4779: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4780: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4781: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4782: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4783: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4784: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4785: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4786: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4787: loss 4.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4788: loss 4.1523, lr 3.0e-04, dt 2.0s +All GPU(s): step 4789: loss 4.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4790: loss 4.1641, lr 3.0e-04, dt 2.0s +All GPU(s): step 4791: loss 4.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4792: loss 4.1211, lr 3.0e-04, dt 2.0s +All GPU(s): step 4793: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4794: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4795: loss 4.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4796: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4797: loss 4.1289, lr 3.0e-04, dt 2.0s +All GPU(s): step 4798: loss 4.1328, lr 3.0e-04, dt 2.1s +All GPU(s): step 4799: loss 4.1328, lr 3.0e-04, dt 2.2s +All GPU(s): step 4800: loss 4.1289, lr 3.0e-04, dt 2.0s +All GPU(s): step 4801: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4802: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4803: loss 4.1367, lr 3.0e-04, dt 2.0s +All GPU(s): step 4804: loss 4.1367, lr 3.0e-04, dt 2.1s +All GPU(s): step 4805: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4806: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4807: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4808: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4809: loss 4.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4810: loss 4.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4811: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4812: loss 4.1445, lr 3.0e-04, dt 2.0s +All GPU(s): step 4813: loss 4.1523, lr 3.0e-04, dt 2.1s +All GPU(s): step 4814: loss 4.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4815: loss 4.1562, lr 3.0e-04, dt 2.0s +All GPU(s): step 4816: loss 4.1328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4817: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4818: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4819: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4820: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4821: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4822: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4823: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4824: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4825: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4826: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4827: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4828: loss 4.1367, lr 2.9e-04, dt 2.1s +All GPU(s): step 4829: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4830: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4831: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4832: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4833: loss 4.1562, lr 2.9e-04, dt 2.1s +All GPU(s): step 4834: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4835: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4836: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4837: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4838: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4839: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4840: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4841: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4842: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4843: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4844: loss 4.1250, lr 2.9e-04, dt 2.1s +All GPU(s): step 4845: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4846: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4847: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4848: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4849: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4850: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4851: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4852: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4853: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4854: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4855: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4856: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4857: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4858: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4859: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4860: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4861: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4862: loss 4.1328, lr 2.9e-04, dt 2.2s +All GPU(s): step 4863: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4864: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4865: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4866: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4867: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4868: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4869: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4870: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4871: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4872: loss 4.1406, lr 2.9e-04, dt 2.2s +All GPU(s): step 4873: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4874: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4875: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4876: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4877: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4878: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4879: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4880: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4881: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4882: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4883: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4884: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4885: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4886: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4887: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4888: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4889: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4890: loss 4.1367, lr 2.9e-04, dt 2.1s +All GPU(s): step 4891: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4892: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4893: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4894: loss 4.1523, lr 2.9e-04, dt 2.0s +All GPU(s): step 4895: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4896: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4897: loss 4.1250, lr 2.9e-04, dt 2.0s +All GPU(s): step 4898: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4899: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4900: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4901: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4902: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4903: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4904: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4905: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4906: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4907: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4908: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4909: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4910: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4911: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4912: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4913: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4914: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4915: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4916: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4917: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4918: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4919: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4920: loss 4.1523, lr 2.9e-04, dt 2.1s +All GPU(s): step 4921: loss 4.1328, lr 2.9e-04, dt 2.0s +All GPU(s): step 4922: loss 4.1289, lr 2.9e-04, dt 2.0s +All GPU(s): step 4923: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4924: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4925: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4926: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4927: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4928: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4929: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4930: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4931: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4932: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4933: loss 4.1562, lr 2.9e-04, dt 2.1s +All GPU(s): step 4934: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4935: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4936: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4937: loss 4.1484, lr 2.9e-04, dt 2.0s +All GPU(s): step 4938: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4939: loss 4.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4940: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4941: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4942: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4943: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4944: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4945: loss 4.1367, lr 2.9e-04, dt 2.1s +All GPU(s): step 4946: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4947: loss 4.1289, lr 2.9e-04, dt 2.1s +All GPU(s): step 4948: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4949: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4950: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4951: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4952: loss 4.1484, lr 2.9e-04, dt 2.1s +All GPU(s): step 4953: loss 4.1445, lr 2.9e-04, dt 2.1s +All GPU(s): step 4954: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4955: loss 4.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4956: loss 4.1367, lr 2.9e-04, dt 2.0s +All GPU(s): step 4957: loss 4.1445, lr 2.9e-04, dt 2.0s +All GPU(s): step 4958: loss 4.1328, lr 2.9e-04, dt 2.1s +All GPU(s): step 4959: loss 4.1523, lr 2.8e-04, dt 2.1s +All GPU(s): step 4960: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4961: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4962: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 4963: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 4964: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 4965: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4966: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 4967: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4968: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 4969: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 4970: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4971: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4972: loss 4.1484, lr 2.8e-04, dt 2.1s +All GPU(s): step 4973: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 4974: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4975: loss 4.1250, lr 2.8e-04, dt 2.0s +All GPU(s): step 4976: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 4977: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4978: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 4979: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4980: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 4981: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 4982: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 4983: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4984: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 4985: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4986: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4987: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 4988: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 4989: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4990: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4991: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 4992: loss 4.1484, lr 2.8e-04, dt 2.1s +All GPU(s): step 4993: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 4994: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 4995: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 4996: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4997: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 4998: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 4999: loss 4.1152, lr 2.8e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_5000.pt +All GPU(s): step 5000: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5001: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5002: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 5003: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5004: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5005: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5006: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5007: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5008: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5009: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5010: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5011: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5012: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5013: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5014: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5015: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 5016: loss 4.1523, lr 2.8e-04, dt 2.1s +All GPU(s): step 5017: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5018: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5019: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5020: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5021: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5022: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 5023: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5024: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5025: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5026: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5027: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5028: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5029: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 5030: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5031: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 5032: loss 4.1289, lr 2.8e-04, dt 2.0s +All GPU(s): step 5033: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5034: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5035: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5036: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 5037: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5038: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5039: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5040: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5041: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5042: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5043: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5044: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5045: loss 4.1484, lr 2.8e-04, dt 2.1s +All GPU(s): step 5046: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5047: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5048: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5049: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5050: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5051: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 5052: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5053: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5054: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5055: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5056: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5057: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5058: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5059: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5060: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5061: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5062: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5063: loss 4.1602, lr 2.8e-04, dt 2.0s +All GPU(s): step 5064: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5065: loss 4.1523, lr 2.8e-04, dt 2.1s +All GPU(s): step 5066: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5067: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5068: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5069: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5070: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5071: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5072: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5073: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5074: loss 4.1367, lr 2.8e-04, dt 2.1s +All GPU(s): step 5075: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5076: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5077: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5078: loss 4.1133, lr 2.8e-04, dt 2.0s +All GPU(s): step 5079: loss 4.1289, lr 2.8e-04, dt 2.1s +All GPU(s): step 5080: loss 4.1484, lr 2.8e-04, dt 2.0s +All GPU(s): step 5081: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5082: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5083: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5084: loss 4.1406, lr 2.8e-04, dt 2.1s +All GPU(s): step 5085: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5086: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5087: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5088: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5089: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5090: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5091: loss 4.1445, lr 2.8e-04, dt 2.0s +All GPU(s): step 5092: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5093: loss 4.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 5094: loss 4.1484, lr 2.8e-04, dt 2.1s +All GPU(s): step 5095: loss 4.1328, lr 2.8e-04, dt 2.0s +All GPU(s): step 5096: loss 4.1211, lr 2.8e-04, dt 2.0s +All GPU(s): step 5097: loss 4.1523, lr 2.8e-04, dt 2.0s +All GPU(s): step 5098: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5099: loss 4.1445, lr 2.8e-04, dt 2.1s +All GPU(s): step 5100: loss 4.1367, lr 2.8e-04, dt 2.0s +All GPU(s): step 5101: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5102: loss 4.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5103: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5104: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5105: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5106: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5107: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5108: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5109: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5110: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5111: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5112: loss 4.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5113: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5114: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5115: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5116: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5117: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5118: loss 4.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5119: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5120: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5121: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5122: loss 4.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5123: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5124: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5125: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5126: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5127: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5128: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5129: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5130: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5131: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5132: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5133: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5134: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5135: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5136: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5137: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5138: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5139: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5140: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5141: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5142: loss 4.1367, lr 2.7e-04, dt 2.2s +All GPU(s): step 5143: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5144: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5145: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5146: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5147: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5148: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5149: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5150: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5151: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5152: loss 4.1523, lr 2.7e-04, dt 2.1s +All GPU(s): step 5153: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5154: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5155: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5156: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5157: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5158: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5159: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5160: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5161: loss 4.1328, lr 2.7e-04, dt 2.1s +All GPU(s): step 5162: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5163: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5164: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5165: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5166: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5167: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5168: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5169: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5170: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5171: loss 4.1367, lr 2.7e-04, dt 2.2s +All GPU(s): step 5172: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5173: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5174: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5175: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5176: loss 4.1328, lr 2.7e-04, dt 2.2s +All GPU(s): step 5177: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5178: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5179: loss 4.1484, lr 2.7e-04, dt 2.1s +All GPU(s): step 5180: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5181: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5182: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5183: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5184: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5185: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5186: loss 4.1328, lr 2.7e-04, dt 2.1s +All GPU(s): step 5187: loss 4.1328, lr 2.7e-04, dt 2.0s +All GPU(s): step 5188: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5189: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5190: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5191: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5192: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5193: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5194: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5195: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5196: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5197: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5198: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5199: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5200: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5201: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5202: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5203: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5204: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5205: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5206: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5207: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5208: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5209: loss 4.1289, lr 2.7e-04, dt 2.0s +All GPU(s): step 5210: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5211: loss 4.1562, lr 2.7e-04, dt 2.0s +All GPU(s): step 5212: loss 4.1445, lr 2.7e-04, dt 2.0s +All GPU(s): step 5213: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5214: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5215: loss 4.1523, lr 2.7e-04, dt 2.1s +All GPU(s): step 5216: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5217: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5218: loss 4.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5219: loss 4.1367, lr 2.7e-04, dt 2.1s +All GPU(s): step 5220: loss 4.1445, lr 2.7e-04, dt 2.1s +All GPU(s): step 5221: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5222: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5223: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5224: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5225: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5226: loss 4.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5227: loss 4.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5228: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5229: loss 4.1484, lr 2.7e-04, dt 2.1s +All GPU(s): step 5230: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5231: loss 4.1562, lr 2.7e-04, dt 2.1s +All GPU(s): step 5232: loss 4.1367, lr 2.7e-04, dt 2.0s +All GPU(s): step 5233: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5234: loss 4.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5235: loss 4.1484, lr 2.7e-04, dt 2.0s +All GPU(s): step 5236: loss 4.1523, lr 2.7e-04, dt 2.1s +All GPU(s): step 5237: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5238: loss 4.1641, lr 2.7e-04, dt 2.0s +All GPU(s): step 5239: loss 4.1602, lr 2.7e-04, dt 2.1s +All GPU(s): step 5240: loss 4.1523, lr 2.7e-04, dt 2.0s +All GPU(s): step 5241: loss 4.1641, lr 2.7e-04, dt 2.0s +All GPU(s): step 5242: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5243: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5244: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5245: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5246: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5247: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5248: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5249: loss 4.1367, lr 2.6e-04, dt 2.1s +All GPU(s): step 5250: loss 4.1484, lr 2.6e-04, dt 2.1s +All GPU(s): step 5251: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5252: loss 4.1328, lr 2.6e-04, dt 2.1s +All GPU(s): step 5253: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5254: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5255: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5256: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5257: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5258: loss 4.1406, lr 2.6e-04, dt 2.2s +All GPU(s): step 5259: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5260: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5261: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5262: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5263: loss 4.1641, lr 2.6e-04, dt 2.1s +All GPU(s): step 5264: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5265: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5266: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5267: loss 4.1602, lr 2.6e-04, dt 2.1s +All GPU(s): step 5268: loss 4.1719, lr 2.6e-04, dt 2.1s +All GPU(s): step 5269: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5270: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5271: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5272: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5273: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5274: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5275: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5276: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5277: loss 4.1719, lr 2.6e-04, dt 2.1s +All GPU(s): step 5278: loss 4.1602, lr 2.6e-04, dt 2.1s +All GPU(s): step 5279: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5280: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5281: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5282: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5283: loss 4.1367, lr 2.6e-04, dt 2.1s +All GPU(s): step 5284: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5285: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5286: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5287: loss 4.1406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5288: loss 4.1484, lr 2.6e-04, dt 2.1s +All GPU(s): step 5289: loss 4.1367, lr 2.6e-04, dt 2.0s +All GPU(s): step 5290: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5291: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5292: loss 4.1602, lr 2.6e-04, dt 2.1s +All GPU(s): step 5293: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5294: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5295: loss 4.1641, lr 2.6e-04, dt 2.0s +All GPU(s): step 5296: loss 4.1484, lr 2.6e-04, dt 2.1s +All GPU(s): step 5297: loss 4.1445, lr 2.6e-04, dt 2.2s +All GPU(s): step 5298: loss 4.1484, lr 2.6e-04, dt 2.1s +All GPU(s): step 5299: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5300: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5301: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5302: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5303: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5304: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5305: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5306: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5307: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5308: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5309: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5310: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5311: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5312: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5313: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5314: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5315: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5316: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5317: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5318: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5319: loss 4.1328, lr 2.6e-04, dt 2.0s +All GPU(s): step 5320: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5321: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5322: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5323: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5324: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5325: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5326: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5327: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5328: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5329: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5330: loss 4.1328, lr 2.6e-04, dt 2.1s +All GPU(s): step 5331: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5332: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5333: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5334: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5335: loss 4.1484, lr 2.6e-04, dt 2.1s +All GPU(s): step 5336: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5337: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5338: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5339: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5340: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5341: loss 4.1367, lr 2.6e-04, dt 2.0s +All GPU(s): step 5342: loss 4.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5343: loss 4.1328, lr 2.6e-04, dt 2.0s +All GPU(s): step 5344: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5345: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5346: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5347: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5348: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5349: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5350: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5351: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5352: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5353: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5354: loss 4.1602, lr 2.6e-04, dt 2.1s +All GPU(s): step 5355: loss 4.1406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5356: loss 4.1328, lr 2.6e-04, dt 2.0s +All GPU(s): step 5357: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5358: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5359: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5360: loss 4.1445, lr 2.6e-04, dt 2.1s +All GPU(s): step 5361: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5362: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5363: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5364: loss 4.1406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5365: loss 4.1719, lr 2.6e-04, dt 2.1s +All GPU(s): step 5366: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5367: loss 4.1602, lr 2.6e-04, dt 2.0s +All GPU(s): step 5368: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5369: loss 4.1523, lr 2.6e-04, dt 2.1s +All GPU(s): step 5370: loss 4.1641, lr 2.6e-04, dt 2.0s +All GPU(s): step 5371: loss 4.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5372: loss 4.1484, lr 2.6e-04, dt 2.0s +All GPU(s): step 5373: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5374: loss 4.1406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5375: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5376: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5377: loss 4.1641, lr 2.6e-04, dt 2.0s +All GPU(s): step 5378: loss 4.1680, lr 2.6e-04, dt 2.1s +All GPU(s): step 5379: loss 4.1641, lr 2.6e-04, dt 2.1s +All GPU(s): step 5380: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5381: loss 4.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5382: loss 4.1523, lr 2.6e-04, dt 2.0s +All GPU(s): step 5383: loss 4.1445, lr 2.6e-04, dt 2.0s +All GPU(s): step 5384: loss 4.1562, lr 2.5e-04, dt 2.2s +All GPU(s): step 5385: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5386: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5387: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5388: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5389: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5390: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5391: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5392: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5393: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5394: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5395: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5396: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5397: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5398: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5399: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5400: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5401: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5402: loss 4.1602, lr 2.5e-04, dt 2.1s +All GPU(s): step 5403: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5404: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5405: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5406: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5407: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5408: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5409: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5410: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5411: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5412: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5413: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5414: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5415: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5416: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5417: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5418: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5419: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5420: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5421: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5422: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5423: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5424: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5425: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5426: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5427: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5428: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5429: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5430: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5431: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5432: loss 4.1680, lr 2.5e-04, dt 2.1s +All GPU(s): step 5433: loss 4.1680, lr 2.5e-04, dt 2.1s +All GPU(s): step 5434: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5435: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5436: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5437: loss 4.1562, lr 2.5e-04, dt 2.2s +All GPU(s): step 5438: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5439: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5440: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5441: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5442: loss 4.1367, lr 2.5e-04, dt 2.1s +All GPU(s): step 5443: loss 4.1367, lr 2.5e-04, dt 2.0s +All GPU(s): step 5444: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5445: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5446: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5447: loss 4.1406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5448: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5449: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5450: loss 4.1367, lr 2.5e-04, dt 2.1s +All GPU(s): step 5451: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5452: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5453: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5454: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5455: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5456: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5457: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5458: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5459: loss 4.1641, lr 2.5e-04, dt 2.0s +All GPU(s): step 5460: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5461: loss 4.1602, lr 2.5e-04, dt 2.1s +All GPU(s): step 5462: loss 4.1602, lr 2.5e-04, dt 2.0s +All GPU(s): step 5463: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5464: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5465: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5466: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5467: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5468: loss 4.1367, lr 2.5e-04, dt 2.0s +All GPU(s): step 5469: loss 4.1367, lr 2.5e-04, dt 2.0s +All GPU(s): step 5470: loss 4.1367, lr 2.5e-04, dt 2.1s +All GPU(s): step 5471: loss 4.1406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5472: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5473: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5474: loss 4.1328, lr 2.5e-04, dt 2.0s +All GPU(s): step 5475: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5476: loss 4.1367, lr 2.5e-04, dt 2.1s +All GPU(s): step 5477: loss 4.1367, lr 2.5e-04, dt 2.1s +All GPU(s): step 5478: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5479: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5480: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5481: loss 4.1328, lr 2.5e-04, dt 2.1s +All GPU(s): step 5482: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5483: loss 4.1250, lr 2.5e-04, dt 2.0s +All GPU(s): step 5484: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5485: loss 4.1250, lr 2.5e-04, dt 2.1s +All GPU(s): step 5486: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5487: loss 4.1523, lr 2.5e-04, dt 2.1s +All GPU(s): step 5488: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5489: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5490: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5491: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5492: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5493: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5494: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5495: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5496: loss 4.1289, lr 2.5e-04, dt 2.0s +All GPU(s): step 5497: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5498: loss 4.1328, lr 2.5e-04, dt 2.0s +All GPU(s): step 5499: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5500: loss 4.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5501: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5502: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5503: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5504: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5505: loss 4.1562, lr 2.5e-04, dt 2.2s +All GPU(s): step 5506: loss 4.1367, lr 2.5e-04, dt 2.0s +All GPU(s): step 5507: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5508: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5509: loss 4.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5510: loss 4.1406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5511: loss 4.1328, lr 2.5e-04, dt 2.0s +All GPU(s): step 5512: loss 4.1484, lr 2.5e-04, dt 2.0s +All GPU(s): step 5513: loss 4.1641, lr 2.5e-04, dt 2.0s +All GPU(s): step 5514: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5515: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5516: loss 4.1523, lr 2.5e-04, dt 2.0s +All GPU(s): step 5517: loss 4.1406, lr 2.5e-04, dt 2.0s +All GPU(s): step 5518: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5519: loss 4.1484, lr 2.5e-04, dt 2.1s +All GPU(s): step 5520: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5521: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5522: loss 4.1445, lr 2.5e-04, dt 2.0s +All GPU(s): step 5523: loss 4.1328, lr 2.5e-04, dt 2.0s +All GPU(s): step 5524: loss 4.1406, lr 2.5e-04, dt 2.2s +All GPU(s): step 5525: loss 4.1445, lr 2.5e-04, dt 2.1s +All GPU(s): step 5526: loss 4.1406, lr 2.4e-04, dt 2.1s +All GPU(s): step 5527: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5528: loss 4.1367, lr 2.4e-04, dt 2.1s +All GPU(s): step 5529: loss 4.1562, lr 2.4e-04, dt 2.2s +All GPU(s): step 5530: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5531: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5532: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5533: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5534: loss 4.1445, lr 2.4e-04, dt 2.1s +All GPU(s): step 5535: loss 4.1406, lr 2.4e-04, dt 2.1s +All GPU(s): step 5536: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5537: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5538: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5539: loss 4.1367, lr 2.4e-04, dt 2.1s +All GPU(s): step 5540: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5541: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5542: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5543: loss 4.1406, lr 2.4e-04, dt 2.1s +All GPU(s): step 5544: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5545: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5546: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5547: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5548: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5549: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5550: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5551: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5552: loss 4.1367, lr 2.4e-04, dt 2.1s +All GPU(s): step 5553: loss 4.1445, lr 2.4e-04, dt 2.2s +All GPU(s): step 5554: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5555: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5556: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5557: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5558: loss 4.1367, lr 2.4e-04, dt 2.2s +All GPU(s): step 5559: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5560: loss 4.1367, lr 2.4e-04, dt 2.1s +All GPU(s): step 5561: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5562: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5563: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5564: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5565: loss 4.1328, lr 2.4e-04, dt 2.0s +All GPU(s): step 5566: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5567: loss 4.1328, lr 2.4e-04, dt 2.0s +All GPU(s): step 5568: loss 4.1406, lr 2.4e-04, dt 2.1s +All GPU(s): step 5569: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5570: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5571: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5572: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5573: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5574: loss 4.1641, lr 2.4e-04, dt 2.0s +All GPU(s): step 5575: loss 4.1035, lr 2.4e-04, dt 2.0s +All GPU(s): step 5576: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5577: loss 4.1641, lr 2.4e-04, dt 2.1s +All GPU(s): step 5578: loss 4.1406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5579: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5580: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5581: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5582: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5583: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5584: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5585: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5586: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5587: loss 4.1562, lr 2.4e-04, dt 2.2s +All GPU(s): step 5588: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5589: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5590: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5591: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5592: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5593: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5594: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5595: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5596: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5597: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5598: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5599: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5600: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5601: loss 4.1445, lr 2.4e-04, dt 2.1s +All GPU(s): step 5602: loss 4.1641, lr 2.4e-04, dt 2.1s +All GPU(s): step 5603: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5604: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5605: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5606: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5607: loss 4.1641, lr 2.4e-04, dt 2.0s +All GPU(s): step 5608: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5609: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5610: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5611: loss 4.1406, lr 2.4e-04, dt 2.1s +All GPU(s): step 5612: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5613: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5614: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5615: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5616: loss 4.1602, lr 2.4e-04, dt 2.2s +All GPU(s): step 5617: loss 4.1641, lr 2.4e-04, dt 2.0s +All GPU(s): step 5618: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5619: loss 4.1641, lr 2.4e-04, dt 2.0s +All GPU(s): step 5620: loss 4.1680, lr 2.4e-04, dt 2.0s +All GPU(s): step 5621: loss 4.1602, lr 2.4e-04, dt 2.1s +All GPU(s): step 5622: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5623: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5624: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5625: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5626: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5627: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5628: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5629: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5630: loss 4.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5631: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5632: loss 4.1602, lr 2.4e-04, dt 2.1s +All GPU(s): step 5633: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5634: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5635: loss 4.1562, lr 2.4e-04, dt 2.2s +All GPU(s): step 5636: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5637: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5638: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5639: loss 4.1641, lr 2.4e-04, dt 2.0s +All GPU(s): step 5640: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5641: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5642: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5643: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5644: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5645: loss 4.1523, lr 2.4e-04, dt 2.2s +All GPU(s): step 5646: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5647: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5648: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5649: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5650: loss 4.1523, lr 2.4e-04, dt 2.2s +All GPU(s): step 5651: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5652: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5653: loss 4.1484, lr 2.4e-04, dt 2.1s +All GPU(s): step 5654: loss 4.1680, lr 2.4e-04, dt 2.1s +All GPU(s): step 5655: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5656: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5657: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5658: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5659: loss 4.1445, lr 2.4e-04, dt 2.1s +All GPU(s): step 5660: loss 4.1602, lr 2.4e-04, dt 2.0s +All GPU(s): step 5661: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5662: loss 4.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5663: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5664: loss 4.1523, lr 2.4e-04, dt 2.1s +All GPU(s): step 5665: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5666: loss 4.1484, lr 2.4e-04, dt 2.0s +All GPU(s): step 5667: loss 4.1523, lr 2.4e-04, dt 2.0s +All GPU(s): step 5668: loss 4.1445, lr 2.4e-04, dt 2.0s +All GPU(s): step 5669: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5670: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5671: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5672: loss 4.1641, lr 2.3e-04, dt 2.0s +All GPU(s): step 5673: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5674: loss 4.1602, lr 2.3e-04, dt 2.1s +All GPU(s): step 5675: loss 4.1602, lr 2.3e-04, dt 2.1s +All GPU(s): step 5676: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5677: loss 4.1719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5678: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5679: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5680: loss 4.1602, lr 2.3e-04, dt 2.0s +All GPU(s): step 5681: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5682: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5683: loss 4.1641, lr 2.3e-04, dt 2.1s +All GPU(s): step 5684: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5685: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5686: loss 4.1602, lr 2.3e-04, dt 2.0s +All GPU(s): step 5687: loss 4.1602, lr 2.3e-04, dt 2.0s +All GPU(s): step 5688: loss 4.1562, lr 2.3e-04, dt 2.2s +All GPU(s): step 5689: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5690: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5691: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5692: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5693: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5694: loss 4.1602, lr 2.3e-04, dt 2.0s +All GPU(s): step 5695: loss 4.1641, lr 2.3e-04, dt 2.0s +All GPU(s): step 5696: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5697: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5698: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5699: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5700: loss 4.1328, lr 2.3e-04, dt 2.1s +All GPU(s): step 5701: loss 4.1328, lr 2.3e-04, dt 2.1s +All GPU(s): step 5702: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5703: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5704: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5705: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5706: loss 4.1250, lr 2.3e-04, dt 2.0s +All GPU(s): step 5707: loss 4.1328, lr 2.3e-04, dt 2.1s +All GPU(s): step 5708: loss 4.1367, lr 2.3e-04, dt 2.1s +All GPU(s): step 5709: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5710: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5711: loss 4.1328, lr 2.3e-04, dt 2.0s +All GPU(s): step 5712: loss 4.1289, lr 2.3e-04, dt 2.1s +All GPU(s): step 5713: loss 4.1289, lr 2.3e-04, dt 2.0s +All GPU(s): step 5714: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5715: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5716: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5717: loss 4.1523, lr 2.3e-04, dt 2.2s +All GPU(s): step 5718: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5719: loss 4.1328, lr 2.3e-04, dt 2.0s +All GPU(s): step 5720: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5721: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5722: loss 4.1484, lr 2.3e-04, dt 2.2s +All GPU(s): step 5723: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5724: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5725: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5726: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5727: loss 4.1406, lr 2.3e-04, dt 2.2s +All GPU(s): step 5728: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5729: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5730: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5731: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5732: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5733: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5734: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5735: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5736: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5737: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5738: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5739: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5740: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5741: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5742: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5743: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5744: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5745: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5746: loss 4.1602, lr 2.3e-04, dt 2.2s +All GPU(s): step 5747: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5748: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5749: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5750: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5751: loss 4.1523, lr 2.3e-04, dt 2.1s +All GPU(s): step 5752: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5753: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5754: loss 4.1523, lr 2.3e-04, dt 2.0s +All GPU(s): step 5755: loss 4.1562, lr 2.3e-04, dt 2.0s +All GPU(s): step 5756: loss 4.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5757: loss 4.1562, lr 2.3e-04, dt 2.0s +All GPU(s): step 5758: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5759: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5760: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5761: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5762: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5763: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5764: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5765: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5766: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5767: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5768: loss 4.1328, lr 2.3e-04, dt 2.1s +All GPU(s): step 5769: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5770: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5771: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5772: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5773: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5774: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5775: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5776: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5777: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5778: loss 4.1211, lr 2.3e-04, dt 2.0s +All GPU(s): step 5779: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5780: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5781: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5782: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5783: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5784: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5785: loss 4.1367, lr 2.3e-04, dt 2.1s +All GPU(s): step 5786: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5787: loss 4.1289, lr 2.3e-04, dt 2.0s +All GPU(s): step 5788: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5789: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5790: loss 4.1484, lr 2.3e-04, dt 2.1s +All GPU(s): step 5791: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5792: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5793: loss 4.1289, lr 2.3e-04, dt 2.0s +All GPU(s): step 5794: loss 4.1445, lr 2.3e-04, dt 2.1s +All GPU(s): step 5795: loss 4.1367, lr 2.3e-04, dt 2.1s +All GPU(s): step 5796: loss 4.1328, lr 2.3e-04, dt 2.0s +All GPU(s): step 5797: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5798: loss 4.1328, lr 2.3e-04, dt 2.1s +All GPU(s): step 5799: loss 4.1367, lr 2.3e-04, dt 2.1s +All GPU(s): step 5800: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5801: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5802: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5803: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5804: loss 4.1367, lr 2.3e-04, dt 2.1s +All GPU(s): step 5805: loss 4.1484, lr 2.3e-04, dt 2.0s +All GPU(s): step 5806: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5807: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5808: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5809: loss 4.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5810: loss 4.1445, lr 2.3e-04, dt 2.0s +All GPU(s): step 5811: loss 4.1367, lr 2.3e-04, dt 2.0s +All GPU(s): step 5812: loss 4.1328, lr 2.3e-04, dt 2.0s +All GPU(s): step 5813: loss 4.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5814: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5815: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5816: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5817: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5818: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5819: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5820: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5821: loss 4.1289, lr 2.2e-04, dt 2.1s +All GPU(s): step 5822: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5823: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5824: loss 4.1562, lr 2.2e-04, dt 2.0s +All GPU(s): step 5825: loss 4.1523, lr 2.2e-04, dt 2.1s +All GPU(s): step 5826: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5827: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5828: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5829: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5830: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5831: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5832: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5833: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5834: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5835: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5836: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5837: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5838: loss 4.1484, lr 2.2e-04, dt 2.2s +All GPU(s): step 5839: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5840: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5841: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5842: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5843: loss 4.1367, lr 2.2e-04, dt 2.2s +All GPU(s): step 5844: loss 4.1289, lr 2.2e-04, dt 2.1s +All GPU(s): step 5845: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5846: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5847: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5848: loss 4.1562, lr 2.2e-04, dt 2.1s +All GPU(s): step 5849: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5850: loss 4.1250, lr 2.2e-04, dt 2.0s +All GPU(s): step 5851: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5852: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5853: loss 4.1523, lr 2.2e-04, dt 2.1s +All GPU(s): step 5854: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5855: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5856: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5857: loss 4.1289, lr 2.2e-04, dt 2.1s +All GPU(s): step 5858: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5859: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5860: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5861: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5862: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5863: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5864: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5865: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5866: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5867: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5868: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5869: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5870: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5871: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5872: loss 4.1250, lr 2.2e-04, dt 2.1s +All GPU(s): step 5873: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5874: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5875: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5876: loss 4.1328, lr 2.2e-04, dt 2.1s +All GPU(s): step 5877: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5878: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5879: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5880: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5881: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5882: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5883: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5884: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5885: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5886: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5887: loss 4.1523, lr 2.2e-04, dt 2.1s +All GPU(s): step 5888: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5889: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5890: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5891: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5892: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5893: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5894: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5895: loss 4.1328, lr 2.2e-04, dt 2.1s +All GPU(s): step 5896: loss 4.1445, lr 2.2e-04, dt 2.2s +All GPU(s): step 5897: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5898: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5899: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5900: loss 4.1328, lr 2.2e-04, dt 2.1s +All GPU(s): step 5901: loss 4.1289, lr 2.2e-04, dt 2.1s +All GPU(s): step 5902: loss 4.1523, lr 2.2e-04, dt 2.0s +All GPU(s): step 5903: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5904: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5905: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5906: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5907: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5908: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5909: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5910: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5911: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5912: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5913: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5914: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5915: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5916: loss 4.1289, lr 2.2e-04, dt 2.0s +All GPU(s): step 5917: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5918: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5919: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5920: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5921: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5922: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5923: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5924: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5925: loss 4.1445, lr 2.2e-04, dt 2.2s +All GPU(s): step 5926: loss 4.1250, lr 2.2e-04, dt 2.1s +All GPU(s): step 5927: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5928: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5929: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5930: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5931: loss 4.1328, lr 2.2e-04, dt 2.1s +All GPU(s): step 5932: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5933: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5934: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5935: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5936: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5937: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5938: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5939: loss 4.1523, lr 2.2e-04, dt 2.1s +All GPU(s): step 5940: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5941: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5942: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5943: loss 4.1523, lr 2.2e-04, dt 2.0s +All GPU(s): step 5944: loss 4.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5945: loss 4.1445, lr 2.2e-04, dt 2.0s +All GPU(s): step 5946: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5947: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5948: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5949: loss 4.1367, lr 2.2e-04, dt 2.1s +All GPU(s): step 5950: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5951: loss 4.1406, lr 2.2e-04, dt 2.0s +All GPU(s): step 5952: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5953: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5954: loss 4.1484, lr 2.2e-04, dt 2.1s +All GPU(s): step 5955: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5956: loss 4.1367, lr 2.2e-04, dt 2.0s +All GPU(s): step 5957: loss 4.1328, lr 2.2e-04, dt 2.0s +All GPU(s): step 5958: loss 4.1484, lr 2.2e-04, dt 2.0s +All GPU(s): step 5959: loss 4.1445, lr 2.2e-04, dt 2.1s +All GPU(s): step 5960: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5961: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 5962: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5963: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 5964: loss 4.1484, lr 2.1e-04, dt 2.1s +All GPU(s): step 5965: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5966: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 5967: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5968: loss 4.1289, lr 2.1e-04, dt 2.1s +All GPU(s): step 5969: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5970: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 5971: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 5972: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 5973: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5974: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5975: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 5976: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 5977: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 5978: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5979: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 5980: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 5981: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 5982: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 5983: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5984: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 5985: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5986: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 5987: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 5988: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 5989: loss 4.1250, lr 2.1e-04, dt 2.0s +All GPU(s): step 5990: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5991: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5992: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 5993: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 5994: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 5995: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 5996: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 5997: loss 4.1328, lr 2.1e-04, dt 2.1s +All GPU(s): step 5998: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 5999: loss 4.1289, lr 2.1e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_6000.pt +All GPU(s): step 6000: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6001: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6002: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 6003: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6004: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6005: loss 4.1250, lr 2.1e-04, dt 2.0s +All GPU(s): step 6006: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6007: loss 4.1250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6008: loss 4.1172, lr 2.1e-04, dt 2.0s +All GPU(s): step 6009: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6010: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6011: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6012: loss 4.1484, lr 2.1e-04, dt 2.1s +All GPU(s): step 6013: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 6014: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6015: loss 4.1523, lr 2.1e-04, dt 2.1s +All GPU(s): step 6016: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6017: loss 4.1523, lr 2.1e-04, dt 2.1s +All GPU(s): step 6018: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6019: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6020: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6021: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6022: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 6023: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6024: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 6025: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6026: loss 4.1328, lr 2.1e-04, dt 2.1s +All GPU(s): step 6027: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6028: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6029: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6030: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6031: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 6032: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6033: loss 4.1211, lr 2.1e-04, dt 2.0s +All GPU(s): step 6034: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6035: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6036: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6037: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6038: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 6039: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6040: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 6041: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 6042: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6043: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6044: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6045: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6046: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 6047: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6048: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6049: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6050: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6051: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 6052: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6053: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6054: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6055: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6056: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6057: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6058: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6059: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6060: loss 4.1328, lr 2.1e-04, dt 2.1s +All GPU(s): step 6061: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6062: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6063: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6064: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6065: loss 4.1484, lr 2.1e-04, dt 2.1s +All GPU(s): step 6066: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6067: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6068: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6069: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6070: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 6071: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6072: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6073: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6074: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6075: loss 4.1367, lr 2.1e-04, dt 2.1s +All GPU(s): step 6076: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6077: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6078: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6079: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6080: loss 4.1484, lr 2.1e-04, dt 2.1s +All GPU(s): step 6081: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6082: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6083: loss 4.1484, lr 2.1e-04, dt 2.0s +All GPU(s): step 6084: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6085: loss 4.1328, lr 2.1e-04, dt 2.1s +All GPU(s): step 6086: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6087: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6088: loss 4.1289, lr 2.1e-04, dt 2.0s +All GPU(s): step 6089: loss 4.1445, lr 2.1e-04, dt 2.1s +All GPU(s): step 6090: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6091: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6092: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6093: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6094: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6095: loss 4.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6096: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6097: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6098: loss 4.1562, lr 2.1e-04, dt 2.0s +All GPU(s): step 6099: loss 4.1445, lr 2.1e-04, dt 2.2s +All GPU(s): step 6100: loss 4.1328, lr 2.1e-04, dt 2.0s +All GPU(s): step 6101: loss 4.1523, lr 2.1e-04, dt 2.0s +All GPU(s): step 6102: loss 4.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6103: loss 4.1328, lr 2.1e-04, dt 2.1s +All GPU(s): step 6104: loss 4.1211, lr 2.1e-04, dt 2.2s +All GPU(s): step 6105: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6106: loss 4.1367, lr 2.1e-04, dt 2.0s +All GPU(s): step 6107: loss 4.1445, lr 2.1e-04, dt 2.0s +All GPU(s): step 6108: loss 4.1289, lr 2.0e-04, dt 2.1s +All GPU(s): step 6109: loss 4.1328, lr 2.0e-04, dt 2.2s +All GPU(s): step 6110: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6111: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6112: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6113: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6114: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6115: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6116: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6117: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6118: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6119: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6120: loss 4.1211, lr 2.0e-04, dt 2.0s +All GPU(s): step 6121: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6122: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6123: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6124: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6125: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6126: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6127: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6128: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6129: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6130: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6131: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6132: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6133: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6134: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6135: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6136: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6137: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6138: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6139: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6140: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6141: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6142: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6143: loss 4.1289, lr 2.0e-04, dt 2.1s +All GPU(s): step 6144: loss 4.1289, lr 2.0e-04, dt 2.0s +All GPU(s): step 6145: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6146: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6147: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6148: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6149: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6150: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6151: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6152: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6153: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6154: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6155: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6156: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6157: loss 4.1367, lr 2.0e-04, dt 2.1s +All GPU(s): step 6158: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6159: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6160: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6161: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6162: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6163: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6164: loss 4.1289, lr 2.0e-04, dt 2.0s +All GPU(s): step 6165: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6166: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6167: loss 4.1523, lr 2.0e-04, dt 2.1s +All GPU(s): step 6168: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6169: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6170: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6171: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6172: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6173: loss 4.1328, lr 2.0e-04, dt 2.1s +All GPU(s): step 6174: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6175: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6176: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6177: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6178: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6179: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6180: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6181: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6182: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6183: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6184: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6185: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6186: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6187: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6188: loss 4.1289, lr 2.0e-04, dt 2.1s +All GPU(s): step 6189: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6190: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6191: loss 4.1406, lr 2.0e-04, dt 2.2s +All GPU(s): step 6192: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6193: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6194: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6195: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6196: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6197: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6198: loss 4.1523, lr 2.0e-04, dt 2.1s +All GPU(s): step 6199: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6200: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6201: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6202: loss 4.1641, lr 2.0e-04, dt 2.0s +All GPU(s): step 6203: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6204: loss 4.1562, lr 2.0e-04, dt 2.0s +All GPU(s): step 6205: loss 4.1641, lr 2.0e-04, dt 2.1s +All GPU(s): step 6206: loss 4.1680, lr 2.0e-04, dt 2.1s +All GPU(s): step 6207: loss 4.1836, lr 2.0e-04, dt 2.1s +All GPU(s): step 6208: loss 4.1875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6209: loss 4.1641, lr 2.0e-04, dt 2.0s +All GPU(s): step 6210: loss 4.1797, lr 2.0e-04, dt 2.1s +All GPU(s): step 6211: loss 4.1797, lr 2.0e-04, dt 2.1s +All GPU(s): step 6212: loss 4.1797, lr 2.0e-04, dt 2.0s +All GPU(s): step 6213: loss 4.1797, lr 2.0e-04, dt 2.0s +All GPU(s): step 6214: loss 4.1797, lr 2.0e-04, dt 2.0s +All GPU(s): step 6215: loss 4.1719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6216: loss 4.1602, lr 2.0e-04, dt 2.0s +All GPU(s): step 6217: loss 4.1758, lr 2.0e-04, dt 2.0s +All GPU(s): step 6218: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6219: loss 4.1602, lr 2.0e-04, dt 2.1s +All GPU(s): step 6220: loss 4.1719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6221: loss 4.1562, lr 2.0e-04, dt 2.0s +All GPU(s): step 6222: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6223: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6224: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6225: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6226: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6227: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6228: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6229: loss 4.1406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6230: loss 4.1445, lr 2.0e-04, dt 2.2s +All GPU(s): step 6231: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6232: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6233: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6234: loss 4.1445, lr 2.0e-04, dt 2.1s +All GPU(s): step 6235: loss 4.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6236: loss 4.1328, lr 2.0e-04, dt 2.0s +All GPU(s): step 6237: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6238: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6239: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6240: loss 4.1602, lr 2.0e-04, dt 2.1s +All GPU(s): step 6241: loss 4.1523, lr 2.0e-04, dt 2.1s +All GPU(s): step 6242: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6243: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6244: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6245: loss 4.1641, lr 2.0e-04, dt 2.1s +All GPU(s): step 6246: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6247: loss 4.1602, lr 2.0e-04, dt 2.0s +All GPU(s): step 6248: loss 4.1602, lr 2.0e-04, dt 2.0s +All GPU(s): step 6249: loss 4.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6250: loss 4.1484, lr 2.0e-04, dt 2.0s +All GPU(s): step 6251: loss 4.1562, lr 2.0e-04, dt 2.0s +All GPU(s): step 6252: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6253: loss 4.1523, lr 2.0e-04, dt 2.0s +All GPU(s): step 6254: loss 4.1484, lr 2.0e-04, dt 2.1s +All GPU(s): step 6255: loss 4.1367, lr 2.0e-04, dt 2.0s +All GPU(s): step 6256: loss 4.1445, lr 2.0e-04, dt 2.0s +All GPU(s): step 6257: loss 4.1641, lr 2.0e-04, dt 2.1s +All GPU(s): step 6258: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6259: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6260: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6261: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6262: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6263: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6264: loss 4.1562, lr 1.9e-04, dt 2.1s +All GPU(s): step 6265: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6266: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6267: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6268: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6269: loss 4.1328, lr 1.9e-04, dt 2.1s +All GPU(s): step 6270: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6271: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6272: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6273: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6274: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6275: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6276: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6277: loss 4.1211, lr 1.9e-04, dt 2.0s +All GPU(s): step 6278: loss 4.1250, lr 1.9e-04, dt 2.2s +All GPU(s): step 6279: loss 4.1250, lr 1.9e-04, dt 2.1s +All GPU(s): step 6280: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6281: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6282: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6283: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6284: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6285: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6286: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6287: loss 4.1484, lr 1.9e-04, dt 2.1s +All GPU(s): step 6288: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6289: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6290: loss 4.1484, lr 1.9e-04, dt 2.1s +All GPU(s): step 6291: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6292: loss 4.1484, lr 1.9e-04, dt 2.1s +All GPU(s): step 6293: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6294: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6295: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6296: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6297: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6298: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6299: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6300: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6301: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6302: loss 4.1289, lr 1.9e-04, dt 2.1s +All GPU(s): step 6303: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6304: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6305: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6306: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6307: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6308: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6309: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6310: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6311: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6312: loss 4.1289, lr 1.9e-04, dt 2.2s +All GPU(s): step 6313: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6314: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6315: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6316: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6317: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6318: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6319: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6320: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6321: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6322: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6323: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6324: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6325: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6326: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6327: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6328: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6329: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6330: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6331: loss 4.1328, lr 1.9e-04, dt 2.1s +All GPU(s): step 6332: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6333: loss 4.1562, lr 1.9e-04, dt 2.0s +All GPU(s): step 6334: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6335: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6336: loss 4.1484, lr 1.9e-04, dt 2.2s +All GPU(s): step 6337: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6338: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6339: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6340: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6341: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6342: loss 4.1484, lr 1.9e-04, dt 2.0s +All GPU(s): step 6343: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6344: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6345: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6346: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6347: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6348: loss 4.1484, lr 1.9e-04, dt 2.0s +All GPU(s): step 6349: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6350: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6351: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6352: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6353: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6354: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6355: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6356: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6357: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6358: loss 4.1484, lr 1.9e-04, dt 2.0s +All GPU(s): step 6359: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6360: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6361: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6362: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6363: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6364: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6365: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6366: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6367: loss 4.1289, lr 1.9e-04, dt 2.1s +All GPU(s): step 6368: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6369: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6370: loss 4.1367, lr 1.9e-04, dt 2.2s +All GPU(s): step 6371: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6372: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6373: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6374: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6375: loss 4.1523, lr 1.9e-04, dt 2.1s +All GPU(s): step 6376: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6377: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6378: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6379: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6380: loss 4.1328, lr 1.9e-04, dt 2.1s +All GPU(s): step 6381: loss 4.1211, lr 1.9e-04, dt 2.0s +All GPU(s): step 6382: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6383: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6384: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6385: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6386: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6387: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6388: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6389: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6390: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6391: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6392: loss 4.1289, lr 1.9e-04, dt 2.0s +All GPU(s): step 6393: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6394: loss 4.1367, lr 1.9e-04, dt 2.1s +All GPU(s): step 6395: loss 4.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6396: loss 4.1406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6397: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6398: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6399: loss 4.1289, lr 1.9e-04, dt 2.1s +All GPU(s): step 6400: loss 4.1250, lr 1.9e-04, dt 2.0s +All GPU(s): step 6401: loss 4.1328, lr 1.9e-04, dt 2.0s +All GPU(s): step 6402: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6403: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6404: loss 4.1445, lr 1.9e-04, dt 2.1s +All GPU(s): step 6405: loss 4.1445, lr 1.9e-04, dt 2.0s +All GPU(s): step 6406: loss 4.1250, lr 1.9e-04, dt 2.0s +All GPU(s): step 6407: loss 4.1367, lr 1.9e-04, dt 2.0s +All GPU(s): step 6408: loss 4.1484, lr 1.9e-04, dt 2.1s +All GPU(s): step 6409: loss 4.1250, lr 1.9e-04, dt 2.2s +All GPU(s): step 6410: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6411: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6412: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6413: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6414: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6415: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6416: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6417: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6418: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6419: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6420: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6421: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6422: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6423: loss 4.1250, lr 1.8e-04, dt 2.1s +All GPU(s): step 6424: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6425: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6426: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6427: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6428: loss 4.1484, lr 1.8e-04, dt 2.1s +All GPU(s): step 6429: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6430: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6431: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6432: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6433: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6434: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6435: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6436: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6437: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6438: loss 4.1523, lr 1.8e-04, dt 2.1s +All GPU(s): step 6439: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6440: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6441: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6442: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6443: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6444: loss 4.1523, lr 1.8e-04, dt 2.0s +All GPU(s): step 6445: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6446: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6447: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6448: loss 4.1484, lr 1.8e-04, dt 2.0s +All GPU(s): step 6449: loss 4.1562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6450: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6451: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6452: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6453: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6454: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6455: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6456: loss 4.1484, lr 1.8e-04, dt 2.1s +All GPU(s): step 6457: loss 4.1289, lr 1.8e-04, dt 2.1s +All GPU(s): step 6458: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6459: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6460: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6461: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6462: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6463: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6464: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6465: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6466: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6467: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6468: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6469: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6470: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6471: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6472: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6473: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6474: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6475: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6476: loss 4.1562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6477: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6478: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6479: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6480: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6481: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6482: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6483: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6484: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6485: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6486: loss 4.1562, lr 1.8e-04, dt 2.2s +All GPU(s): step 6487: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6488: loss 4.1523, lr 1.8e-04, dt 2.1s +All GPU(s): step 6489: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6490: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6491: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6492: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6493: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6494: loss 4.1484, lr 1.8e-04, dt 2.1s +All GPU(s): step 6495: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6496: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6497: loss 4.1484, lr 1.8e-04, dt 2.0s +All GPU(s): step 6498: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6499: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6500: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6501: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6502: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6503: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6504: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6505: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6506: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6507: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6508: loss 4.1484, lr 1.8e-04, dt 2.0s +All GPU(s): step 6509: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6510: loss 4.1367, lr 1.8e-04, dt 2.2s +All GPU(s): step 6511: loss 4.1289, lr 1.8e-04, dt 2.1s +All GPU(s): step 6512: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6513: loss 4.1484, lr 1.8e-04, dt 2.0s +All GPU(s): step 6514: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6515: loss 4.1484, lr 1.8e-04, dt 2.1s +All GPU(s): step 6516: loss 4.1523, lr 1.8e-04, dt 2.0s +All GPU(s): step 6517: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6518: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6519: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6520: loss 4.1289, lr 1.8e-04, dt 2.1s +All GPU(s): step 6521: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6522: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6523: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6524: loss 4.1289, lr 1.8e-04, dt 2.1s +All GPU(s): step 6525: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6526: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6527: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6528: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6529: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6530: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6531: loss 4.1484, lr 1.8e-04, dt 2.0s +All GPU(s): step 6532: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6533: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6534: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6535: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6536: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6537: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6538: loss 4.1289, lr 1.8e-04, dt 2.0s +All GPU(s): step 6539: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6540: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6541: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6542: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6543: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6544: loss 4.1289, lr 1.8e-04, dt 2.1s +All GPU(s): step 6545: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6546: loss 4.1250, lr 1.8e-04, dt 2.0s +All GPU(s): step 6547: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6548: loss 4.1562, lr 1.8e-04, dt 2.0s +All GPU(s): step 6549: loss 4.1406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6550: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6551: loss 4.1328, lr 1.8e-04, dt 2.0s +All GPU(s): step 6552: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6553: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6554: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6555: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6556: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6557: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6558: loss 4.1445, lr 1.8e-04, dt 2.1s +All GPU(s): step 6559: loss 4.1367, lr 1.8e-04, dt 2.1s +All GPU(s): step 6560: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6561: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6562: loss 4.1406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6563: loss 4.1328, lr 1.8e-04, dt 2.1s +All GPU(s): step 6564: loss 4.1445, lr 1.8e-04, dt 2.0s +All GPU(s): step 6565: loss 4.1367, lr 1.8e-04, dt 2.0s +All GPU(s): step 6566: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6567: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6568: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6569: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6570: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6571: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6572: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6573: loss 4.1484, lr 1.7e-04, dt 2.2s +All GPU(s): step 6574: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6575: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6576: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6577: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6578: loss 4.1406, lr 1.7e-04, dt 2.2s +All GPU(s): step 6579: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6580: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6581: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6582: loss 4.1484, lr 1.7e-04, dt 2.1s +All GPU(s): step 6583: loss 4.1289, lr 1.7e-04, dt 2.1s +All GPU(s): step 6584: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6585: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6586: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6587: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6588: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6589: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6590: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6591: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6592: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6593: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6594: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6595: loss 4.1523, lr 1.7e-04, dt 2.0s +All GPU(s): step 6596: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6597: loss 4.1250, lr 1.7e-04, dt 2.2s +All GPU(s): step 6598: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6599: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6600: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6601: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6602: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6603: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6604: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6605: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6606: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6607: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6608: loss 4.1172, lr 1.7e-04, dt 2.1s +All GPU(s): step 6609: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6610: loss 4.1484, lr 1.7e-04, dt 2.1s +All GPU(s): step 6611: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6612: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6613: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6614: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6615: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6616: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6617: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6618: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6619: loss 4.1484, lr 1.7e-04, dt 2.1s +All GPU(s): step 6620: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6621: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6622: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6623: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6624: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6625: loss 4.1523, lr 1.7e-04, dt 2.0s +All GPU(s): step 6626: loss 4.1445, lr 1.7e-04, dt 2.3s +All GPU(s): step 6627: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6628: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6629: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6630: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6631: loss 4.1289, lr 1.7e-04, dt 2.1s +All GPU(s): step 6632: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6633: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6634: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6635: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6636: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6637: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6638: loss 4.1289, lr 1.7e-04, dt 2.0s +All GPU(s): step 6639: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6640: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6641: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6642: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6643: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6644: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6645: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6646: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6647: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6648: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6649: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6650: loss 4.1250, lr 1.7e-04, dt 2.2s +All GPU(s): step 6651: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6652: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6653: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6654: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6655: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6656: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6657: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6658: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6659: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6660: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6661: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6662: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6663: loss 4.1289, lr 1.7e-04, dt 2.0s +All GPU(s): step 6664: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6665: loss 4.1406, lr 1.7e-04, dt 2.2s +All GPU(s): step 6666: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6667: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6668: loss 4.1250, lr 1.7e-04, dt 2.0s +All GPU(s): step 6669: loss 4.1328, lr 1.7e-04, dt 2.1s +All GPU(s): step 6670: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6671: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6672: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6673: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6674: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6675: loss 4.1523, lr 1.7e-04, dt 2.0s +All GPU(s): step 6676: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6677: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6678: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6679: loss 4.1328, lr 1.7e-04, dt 2.1s +All GPU(s): step 6680: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6681: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6682: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6683: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6684: loss 4.1328, lr 1.7e-04, dt 2.1s +All GPU(s): step 6685: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6686: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6687: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6688: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6689: loss 4.1250, lr 1.7e-04, dt 2.1s +All GPU(s): step 6690: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6691: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6692: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6693: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6694: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6695: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6696: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6697: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6698: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6699: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6700: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6701: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6702: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6703: loss 4.1367, lr 1.7e-04, dt 2.1s +All GPU(s): step 6704: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6705: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6706: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6707: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6708: loss 4.1406, lr 1.7e-04, dt 2.2s +All GPU(s): step 6709: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6710: loss 4.1328, lr 1.7e-04, dt 2.0s +All GPU(s): step 6711: loss 4.1445, lr 1.7e-04, dt 2.1s +All GPU(s): step 6712: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6713: loss 4.1562, lr 1.7e-04, dt 2.2s +All GPU(s): step 6714: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6715: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6716: loss 4.1484, lr 1.7e-04, dt 2.1s +All GPU(s): step 6717: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6718: loss 4.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6719: loss 4.1367, lr 1.7e-04, dt 2.0s +All GPU(s): step 6720: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6721: loss 4.1484, lr 1.7e-04, dt 2.0s +All GPU(s): step 6722: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6723: loss 4.1484, lr 1.7e-04, dt 2.1s +All GPU(s): step 6724: loss 4.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6725: loss 4.1445, lr 1.7e-04, dt 2.0s +All GPU(s): step 6726: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6727: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6728: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6729: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6730: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6731: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6732: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6733: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6734: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6735: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6736: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6737: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6738: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6739: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6740: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6741: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6742: loss 4.1523, lr 1.6e-04, dt 2.1s +All GPU(s): step 6743: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6744: loss 4.1133, lr 1.6e-04, dt 2.0s +All GPU(s): step 6745: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6746: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6747: loss 4.1211, lr 1.6e-04, dt 2.1s +All GPU(s): step 6748: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6749: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6750: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6751: loss 4.1484, lr 1.6e-04, dt 2.1s +All GPU(s): step 6752: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6753: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6754: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6755: loss 4.1289, lr 1.6e-04, dt 2.1s +All GPU(s): step 6756: loss 4.1250, lr 1.6e-04, dt 2.1s +All GPU(s): step 6757: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6758: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6759: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6760: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6761: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6762: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6763: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6764: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6765: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6766: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6767: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6768: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6769: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6770: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6771: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6772: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6773: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6774: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6775: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6776: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6777: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6778: loss 4.1562, lr 1.6e-04, dt 2.0s +All GPU(s): step 6779: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6780: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6781: loss 4.1250, lr 1.6e-04, dt 2.1s +All GPU(s): step 6782: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6783: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6784: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6785: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6786: loss 4.1367, lr 1.6e-04, dt 2.2s +All GPU(s): step 6787: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6788: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6789: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6790: loss 4.1250, lr 1.6e-04, dt 2.1s +All GPU(s): step 6791: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6792: loss 4.1289, lr 1.6e-04, dt 2.1s +All GPU(s): step 6793: loss 4.1523, lr 1.6e-04, dt 2.0s +All GPU(s): step 6794: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6795: loss 4.1445, lr 1.6e-04, dt 2.2s +All GPU(s): step 6796: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6797: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6798: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6799: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6800: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6801: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6802: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6803: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6804: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6805: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6806: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6807: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6808: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6809: loss 4.1523, lr 1.6e-04, dt 2.1s +All GPU(s): step 6810: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6811: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6812: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6813: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6814: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6815: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6816: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6817: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6818: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6819: loss 4.1367, lr 1.6e-04, dt 2.2s +All GPU(s): step 6820: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6821: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6822: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6823: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6824: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6825: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6826: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6827: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6828: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6829: loss 4.1367, lr 1.6e-04, dt 2.2s +All GPU(s): step 6830: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6831: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6832: loss 4.1562, lr 1.6e-04, dt 2.0s +All GPU(s): step 6833: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6834: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6835: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6836: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6837: loss 4.1289, lr 1.6e-04, dt 2.0s +All GPU(s): step 6838: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6839: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6840: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6841: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6842: loss 4.1523, lr 1.6e-04, dt 2.0s +All GPU(s): step 6843: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6844: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6845: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6846: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6847: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6848: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6849: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6850: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6851: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6852: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6853: loss 4.1523, lr 1.6e-04, dt 2.1s +All GPU(s): step 6854: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6855: loss 4.1211, lr 1.6e-04, dt 2.0s +All GPU(s): step 6856: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6857: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6858: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6859: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6860: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6861: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6862: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6863: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6864: loss 4.1250, lr 1.6e-04, dt 2.0s +All GPU(s): step 6865: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6866: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6867: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6868: loss 4.1289, lr 1.6e-04, dt 2.1s +All GPU(s): step 6869: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6870: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6871: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6872: loss 4.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 6873: loss 4.1445, lr 1.6e-04, dt 2.1s +All GPU(s): step 6874: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6875: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6876: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6877: loss 4.1328, lr 1.6e-04, dt 2.1s +All GPU(s): step 6878: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6879: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6880: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6881: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6882: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6883: loss 4.1328, lr 1.6e-04, dt 2.0s +All GPU(s): step 6884: loss 4.1367, lr 1.6e-04, dt 2.0s +All GPU(s): step 6885: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6886: loss 4.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 6887: loss 4.1367, lr 1.6e-04, dt 2.1s +All GPU(s): step 6888: loss 4.1523, lr 1.6e-04, dt 2.0s +All GPU(s): step 6889: loss 4.1484, lr 1.6e-04, dt 2.0s +All GPU(s): step 6890: loss 4.1445, lr 1.6e-04, dt 2.0s +All GPU(s): step 6891: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6892: loss 4.1289, lr 1.5e-04, dt 2.1s +All GPU(s): step 6893: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6894: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6895: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6896: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6897: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6898: loss 4.1484, lr 1.5e-04, dt 2.0s +All GPU(s): step 6899: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6900: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6901: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6902: loss 4.1484, lr 1.5e-04, dt 2.1s +All GPU(s): step 6903: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6904: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6905: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6906: loss 4.1289, lr 1.5e-04, dt 2.1s +All GPU(s): step 6907: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6908: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6909: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6910: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6911: loss 4.1367, lr 1.5e-04, dt 2.2s +All GPU(s): step 6912: loss 4.1289, lr 1.5e-04, dt 2.1s +All GPU(s): step 6913: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6914: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6915: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6916: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6917: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6918: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6919: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6920: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6921: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6922: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6923: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6924: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6925: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6926: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 6927: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6928: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6929: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6930: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 6931: loss 4.1484, lr 1.5e-04, dt 2.0s +All GPU(s): step 6932: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6933: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6934: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6935: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6936: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 6937: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 6938: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 6939: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6940: loss 4.1562, lr 1.5e-04, dt 2.2s +All GPU(s): step 6941: loss 4.1484, lr 1.5e-04, dt 2.1s +All GPU(s): step 6942: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6943: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6944: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6945: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6946: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6947: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6948: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6949: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6950: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6951: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6952: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6953: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6954: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6955: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6956: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6957: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6958: loss 4.1523, lr 1.5e-04, dt 2.0s +All GPU(s): step 6959: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6960: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6961: loss 4.1523, lr 1.5e-04, dt 2.0s +All GPU(s): step 6962: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6963: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6964: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6965: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6966: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6967: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6968: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 6969: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6970: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6971: loss 4.1328, lr 1.5e-04, dt 2.1s +All GPU(s): step 6972: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6973: loss 4.1484, lr 1.5e-04, dt 2.1s +All GPU(s): step 6974: loss 4.1289, lr 1.5e-04, dt 2.1s +All GPU(s): step 6975: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 6976: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6977: loss 4.1211, lr 1.5e-04, dt 2.0s +All GPU(s): step 6978: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6979: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6980: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6981: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 6982: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6983: loss 4.1211, lr 1.5e-04, dt 2.1s +All GPU(s): step 6984: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 6985: loss 4.1250, lr 1.5e-04, dt 2.0s +All GPU(s): step 6986: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 6987: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6988: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 6989: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6990: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6991: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6992: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 6993: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6994: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 6995: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 6996: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 6997: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 6998: loss 4.1406, lr 1.5e-04, dt 2.2s +All GPU(s): step 6999: loss 4.1367, lr 1.5e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_7000.pt +All GPU(s): step 7000: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7001: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7002: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7003: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7004: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 7005: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7006: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7007: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7008: loss 4.1484, lr 1.5e-04, dt 2.1s +All GPU(s): step 7009: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7010: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7011: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7012: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7013: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 7014: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 7015: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7016: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7017: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7018: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 7019: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 7020: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7021: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7022: loss 4.1250, lr 1.5e-04, dt 2.2s +All GPU(s): step 7023: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7024: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7025: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7026: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7027: loss 4.1367, lr 1.5e-04, dt 2.2s +All GPU(s): step 7028: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 7029: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7030: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7031: loss 4.1484, lr 1.5e-04, dt 2.0s +All GPU(s): step 7032: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7033: loss 4.1250, lr 1.5e-04, dt 2.0s +All GPU(s): step 7034: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7035: loss 4.1211, lr 1.5e-04, dt 2.0s +All GPU(s): step 7036: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7037: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 7038: loss 4.1484, lr 1.5e-04, dt 2.0s +All GPU(s): step 7039: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7040: loss 4.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7041: loss 4.1367, lr 1.5e-04, dt 2.1s +All GPU(s): step 7042: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7043: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7044: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7045: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7046: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 7047: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7048: loss 4.1523, lr 1.5e-04, dt 2.0s +All GPU(s): step 7049: loss 4.1445, lr 1.5e-04, dt 2.0s +All GPU(s): step 7050: loss 4.1523, lr 1.5e-04, dt 2.0s +All GPU(s): step 7051: loss 4.1367, lr 1.5e-04, dt 2.2s +All GPU(s): step 7052: loss 4.1484, lr 1.5e-04, dt 2.0s +All GPU(s): step 7053: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7054: loss 4.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7055: loss 4.1328, lr 1.5e-04, dt 2.0s +All GPU(s): step 7056: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 7057: loss 4.1445, lr 1.5e-04, dt 2.1s +All GPU(s): step 7058: loss 4.1289, lr 1.5e-04, dt 2.0s +All GPU(s): step 7059: loss 4.1367, lr 1.5e-04, dt 2.0s +All GPU(s): step 7060: loss 4.1523, lr 1.5e-04, dt 2.1s +All GPU(s): step 7061: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7062: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7063: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7064: loss 4.1211, lr 1.4e-04, dt 2.0s +All GPU(s): step 7065: loss 4.1523, lr 1.4e-04, dt 2.1s +All GPU(s): step 7066: loss 4.1406, lr 1.4e-04, dt 2.2s +All GPU(s): step 7067: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7068: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7069: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7070: loss 4.1523, lr 1.4e-04, dt 2.1s +All GPU(s): step 7071: loss 4.1484, lr 1.4e-04, dt 2.1s +All GPU(s): step 7072: loss 4.1250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7073: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7074: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7075: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7076: loss 4.1211, lr 1.4e-04, dt 2.0s +All GPU(s): step 7077: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7078: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7079: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7080: loss 4.1406, lr 1.4e-04, dt 2.2s +All GPU(s): step 7081: loss 4.1484, lr 1.4e-04, dt 2.1s +All GPU(s): step 7082: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7083: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7084: loss 4.1523, lr 1.4e-04, dt 2.1s +All GPU(s): step 7085: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7086: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7087: loss 4.1328, lr 1.4e-04, dt 2.1s +All GPU(s): step 7088: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7089: loss 4.1328, lr 1.4e-04, dt 2.1s +All GPU(s): step 7090: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7091: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7092: loss 4.1523, lr 1.4e-04, dt 2.0s +All GPU(s): step 7093: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7094: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7095: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7096: loss 4.1523, lr 1.4e-04, dt 2.0s +All GPU(s): step 7097: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7098: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7099: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7100: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7101: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7102: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7103: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7104: loss 4.1367, lr 1.4e-04, dt 2.2s +All GPU(s): step 7105: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7106: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7107: loss 4.1211, lr 1.4e-04, dt 2.1s +All GPU(s): step 7108: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7109: loss 4.1367, lr 1.4e-04, dt 2.2s +All GPU(s): step 7110: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7111: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7112: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7113: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7114: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7115: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7116: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7117: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7118: loss 4.1211, lr 1.4e-04, dt 2.1s +All GPU(s): step 7119: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7120: loss 4.1211, lr 1.4e-04, dt 2.1s +All GPU(s): step 7121: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7122: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7123: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7124: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7125: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7126: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7127: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7128: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7129: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7130: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7131: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7132: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7133: loss 4.1328, lr 1.4e-04, dt 2.2s +All GPU(s): step 7134: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7135: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7136: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7137: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7138: loss 4.1367, lr 1.4e-04, dt 2.2s +All GPU(s): step 7139: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7140: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7141: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7142: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7143: loss 4.1328, lr 1.4e-04, dt 2.1s +All GPU(s): step 7144: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7145: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7146: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7147: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7148: loss 4.1445, lr 1.4e-04, dt 2.2s +All GPU(s): step 7149: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7150: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7151: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7152: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7153: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7154: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7155: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7156: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7157: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7158: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7159: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7160: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7161: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7162: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7163: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7164: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7165: loss 4.1328, lr 1.4e-04, dt 2.1s +All GPU(s): step 7166: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7167: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7168: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7169: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7170: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7171: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7172: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7173: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7174: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7175: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7176: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7177: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7178: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7179: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7180: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7181: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7182: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7183: loss 4.1523, lr 1.4e-04, dt 2.0s +All GPU(s): step 7184: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7185: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7186: loss 4.1367, lr 1.4e-04, dt 2.2s +All GPU(s): step 7187: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7188: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7189: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7190: loss 4.1250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7191: loss 4.1484, lr 1.4e-04, dt 2.2s +All GPU(s): step 7192: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7193: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7194: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7195: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7196: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7197: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7198: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7199: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7200: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7201: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7202: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7203: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7204: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7205: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7206: loss 4.1328, lr 1.4e-04, dt 2.1s +All GPU(s): step 7207: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7208: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7209: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7210: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7211: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7212: loss 4.1250, lr 1.4e-04, dt 2.0s +All GPU(s): step 7213: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7214: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7215: loss 4.1367, lr 1.4e-04, dt 2.2s +All GPU(s): step 7216: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7217: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7218: loss 4.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7219: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7220: loss 4.1367, lr 1.4e-04, dt 2.1s +All GPU(s): step 7221: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7222: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7223: loss 4.1367, lr 1.4e-04, dt 2.0s +All GPU(s): step 7224: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7225: loss 4.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7226: loss 4.1289, lr 1.4e-04, dt 2.0s +All GPU(s): step 7227: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7228: loss 4.1328, lr 1.4e-04, dt 2.0s +All GPU(s): step 7229: loss 4.1289, lr 1.4e-04, dt 2.1s +All GPU(s): step 7230: loss 4.1484, lr 1.4e-04, dt 2.1s +All GPU(s): step 7231: loss 4.1484, lr 1.4e-04, dt 2.0s +All GPU(s): step 7232: loss 4.1250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7233: loss 4.1445, lr 1.4e-04, dt 2.1s +All GPU(s): step 7234: loss 4.1484, lr 1.4e-04, dt 2.1s +All GPU(s): step 7235: loss 4.1250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7236: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7237: loss 4.1445, lr 1.4e-04, dt 2.0s +All GPU(s): step 7238: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7239: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7240: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7241: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7242: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7243: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7244: loss 4.1406, lr 1.3e-04, dt 2.2s +All GPU(s): step 7245: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7246: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7247: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7248: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7249: loss 4.1250, lr 1.3e-04, dt 2.1s +All GPU(s): step 7250: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7251: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7252: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7253: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7254: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7255: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7256: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7257: loss 4.1250, lr 1.3e-04, dt 2.0s +All GPU(s): step 7258: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7259: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7260: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7261: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7262: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7263: loss 4.1406, lr 1.3e-04, dt 2.2s +All GPU(s): step 7264: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7265: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7266: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7267: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7268: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7269: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7270: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7271: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7272: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7273: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7274: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7275: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7276: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7277: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7278: loss 4.1328, lr 1.3e-04, dt 2.2s +All GPU(s): step 7279: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7280: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7281: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7282: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7283: loss 4.1289, lr 1.3e-04, dt 2.1s +All GPU(s): step 7284: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7285: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7286: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7287: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7288: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7289: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7290: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7291: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7292: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7293: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7294: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7295: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7296: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7297: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7298: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7299: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7300: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7301: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7302: loss 4.1445, lr 1.3e-04, dt 2.2s +All GPU(s): step 7303: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7304: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7305: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7306: loss 4.1484, lr 1.3e-04, dt 2.0s +All GPU(s): step 7307: loss 4.1250, lr 1.3e-04, dt 2.1s +All GPU(s): step 7308: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7309: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7310: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7311: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7312: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7313: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7314: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7315: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7316: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7317: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7318: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7319: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7320: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7321: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7322: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7323: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7324: loss 4.1523, lr 1.3e-04, dt 2.1s +All GPU(s): step 7325: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7326: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7327: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7328: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7329: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7330: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7331: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7332: loss 4.1562, lr 1.3e-04, dt 2.1s +All GPU(s): step 7333: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7334: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7335: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7336: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7337: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7338: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7339: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7340: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7341: loss 4.1523, lr 1.3e-04, dt 2.1s +All GPU(s): step 7342: loss 4.1562, lr 1.3e-04, dt 2.1s +All GPU(s): step 7343: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7344: loss 4.1484, lr 1.3e-04, dt 2.0s +All GPU(s): step 7345: loss 4.1602, lr 1.3e-04, dt 2.1s +All GPU(s): step 7346: loss 4.1484, lr 1.3e-04, dt 2.0s +All GPU(s): step 7347: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7348: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7349: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7350: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7351: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7352: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7353: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7354: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7355: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7356: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7357: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7358: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7359: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7360: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7361: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7362: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7363: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7364: loss 4.1523, lr 1.3e-04, dt 2.1s +All GPU(s): step 7365: loss 4.1562, lr 1.3e-04, dt 2.1s +All GPU(s): step 7366: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7367: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7368: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7369: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7370: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7371: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7372: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7373: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7374: loss 4.1289, lr 1.3e-04, dt 2.1s +All GPU(s): step 7375: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7376: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7377: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7378: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7379: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7380: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7381: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7382: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7383: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7384: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7385: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7386: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7387: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7388: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7389: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7390: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7391: loss 4.1562, lr 1.3e-04, dt 2.0s +All GPU(s): step 7392: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7393: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7394: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7395: loss 4.1484, lr 1.3e-04, dt 2.0s +All GPU(s): step 7396: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7397: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7398: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7399: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7400: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7401: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7402: loss 4.1328, lr 1.3e-04, dt 2.1s +All GPU(s): step 7403: loss 4.1328, lr 1.3e-04, dt 2.2s +All GPU(s): step 7404: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7405: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7406: loss 4.1523, lr 1.3e-04, dt 2.0s +All GPU(s): step 7407: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7408: loss 4.1445, lr 1.3e-04, dt 2.2s +All GPU(s): step 7409: loss 4.1367, lr 1.3e-04, dt 2.1s +All GPU(s): step 7410: loss 4.1367, lr 1.3e-04, dt 2.0s +All GPU(s): step 7411: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7412: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7413: loss 4.1445, lr 1.3e-04, dt 2.1s +All GPU(s): step 7414: loss 4.1289, lr 1.3e-04, dt 2.0s +All GPU(s): step 7415: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7416: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7417: loss 4.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7418: loss 4.1484, lr 1.3e-04, dt 2.1s +All GPU(s): step 7419: loss 4.1328, lr 1.3e-04, dt 2.0s +All GPU(s): step 7420: loss 4.1445, lr 1.3e-04, dt 2.0s +All GPU(s): step 7421: loss 4.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7422: loss 4.1289, lr 1.3e-04, dt 2.2s +All GPU(s): step 7423: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7424: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7425: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7426: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7427: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7428: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7429: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7430: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7431: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7432: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7433: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7434: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7435: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7436: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7437: loss 4.1289, lr 1.2e-04, dt 2.2s +All GPU(s): step 7438: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7439: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7440: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7441: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7442: loss 4.1328, lr 1.2e-04, dt 2.2s +All GPU(s): step 7443: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7444: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7445: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7446: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7447: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7448: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7449: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7450: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7451: loss 4.1523, lr 1.2e-04, dt 2.2s +All GPU(s): step 7452: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7453: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7454: loss 4.1250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7455: loss 4.1523, lr 1.2e-04, dt 2.1s +All GPU(s): step 7456: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7457: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7458: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7459: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7460: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7461: loss 4.1406, lr 1.2e-04, dt 2.2s +All GPU(s): step 7462: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7463: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7464: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7465: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7466: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7467: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7468: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7469: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7470: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7471: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7472: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7473: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7474: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7475: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7476: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7477: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7478: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7479: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7480: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7481: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7482: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7483: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7484: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7485: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7486: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7487: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7488: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7489: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7490: loss 4.1406, lr 1.2e-04, dt 2.2s +All GPU(s): step 7491: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7492: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7493: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7494: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7495: loss 4.1445, lr 1.2e-04, dt 2.2s +All GPU(s): step 7496: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7497: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7498: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7499: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7500: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7501: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7502: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7503: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7504: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7505: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7506: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7507: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7508: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7509: loss 4.1406, lr 1.2e-04, dt 2.2s +All GPU(s): step 7510: loss 4.1523, lr 1.2e-04, dt 2.1s +All GPU(s): step 7511: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7512: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7513: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7514: loss 4.1406, lr 1.2e-04, dt 2.2s +All GPU(s): step 7515: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7516: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7517: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7518: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7519: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7520: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7521: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7522: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7523: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7524: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7525: loss 4.1250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7526: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7527: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7528: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7529: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7530: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7531: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7532: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7533: loss 4.1328, lr 1.2e-04, dt 2.2s +All GPU(s): step 7534: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7535: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7536: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7537: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7538: loss 4.1484, lr 1.2e-04, dt 2.2s +All GPU(s): step 7539: loss 4.1250, lr 1.2e-04, dt 2.1s +All GPU(s): step 7540: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7541: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7542: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7543: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7544: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7545: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7546: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7547: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7548: loss 4.1523, lr 1.2e-04, dt 2.1s +All GPU(s): step 7549: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7550: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7551: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7552: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7553: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7554: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7555: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7556: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7557: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7558: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7559: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7560: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7561: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7562: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7563: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7564: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7565: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7566: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7567: loss 4.1328, lr 1.2e-04, dt 2.1s +All GPU(s): step 7568: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7569: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7570: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7571: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7572: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7573: loss 4.1250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7574: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7575: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7576: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7577: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7578: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7579: loss 4.1523, lr 1.2e-04, dt 2.0s +All GPU(s): step 7580: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7581: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7582: loss 4.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7583: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7584: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7585: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7586: loss 4.1445, lr 1.2e-04, dt 2.1s +All GPU(s): step 7587: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7588: loss 4.1523, lr 1.2e-04, dt 2.1s +All GPU(s): step 7589: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7590: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7591: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7592: loss 4.1289, lr 1.2e-04, dt 2.0s +All GPU(s): step 7593: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7594: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7595: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7596: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7597: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7598: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7599: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7600: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7601: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7602: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7603: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7604: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7605: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7606: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7607: loss 4.1445, lr 1.2e-04, dt 2.0s +All GPU(s): step 7608: loss 4.1367, lr 1.2e-04, dt 2.0s +All GPU(s): step 7609: loss 4.1484, lr 1.2e-04, dt 2.0s +All GPU(s): step 7610: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7611: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7612: loss 4.1328, lr 1.2e-04, dt 2.0s +All GPU(s): step 7613: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7614: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7615: loss 4.1484, lr 1.2e-04, dt 2.1s +All GPU(s): step 7616: loss 4.1289, lr 1.2e-04, dt 2.1s +All GPU(s): step 7617: loss 4.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7618: loss 4.1367, lr 1.2e-04, dt 2.1s +All GPU(s): step 7619: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7620: loss 4.1445, lr 1.1e-04, dt 2.2s +All GPU(s): step 7621: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7622: loss 4.1172, lr 1.1e-04, dt 2.1s +All GPU(s): step 7623: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7624: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7625: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7626: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7627: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7628: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7629: loss 4.1289, lr 1.1e-04, dt 2.1s +All GPU(s): step 7630: loss 4.1406, lr 1.1e-04, dt 2.2s +All GPU(s): step 7631: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7632: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7633: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7634: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7635: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7636: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7637: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7638: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7639: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7640: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7641: loss 4.1523, lr 1.1e-04, dt 2.0s +All GPU(s): step 7642: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7643: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7644: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7645: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7646: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7647: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7648: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7649: loss 4.1445, lr 1.1e-04, dt 2.2s +All GPU(s): step 7650: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7651: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7652: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7653: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7654: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7655: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7656: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7657: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7658: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7659: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7660: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7661: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7662: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7663: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7664: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7665: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7666: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7667: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7668: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7669: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7670: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7671: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7672: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7673: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7674: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7675: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7676: loss 4.1523, lr 1.1e-04, dt 2.0s +All GPU(s): step 7677: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7678: loss 4.1250, lr 1.1e-04, dt 2.2s +All GPU(s): step 7679: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7680: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7681: loss 4.1289, lr 1.1e-04, dt 2.0s +All GPU(s): step 7682: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7683: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7684: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7685: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7686: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7687: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7688: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7689: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7690: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7691: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7692: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7693: loss 4.1523, lr 1.1e-04, dt 2.1s +All GPU(s): step 7694: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7695: loss 4.1289, lr 1.1e-04, dt 2.1s +All GPU(s): step 7696: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7697: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7698: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7699: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7700: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7701: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7702: loss 4.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 7703: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7704: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7705: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7706: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7707: loss 4.1523, lr 1.1e-04, dt 2.1s +All GPU(s): step 7708: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7709: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7710: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7711: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7712: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7713: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7714: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7715: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7716: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7717: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7718: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7719: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7720: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7721: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7722: loss 4.1289, lr 1.1e-04, dt 2.1s +All GPU(s): step 7723: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7724: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7725: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7726: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7727: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7728: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7729: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7730: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7731: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7732: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7733: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7734: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7735: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7736: loss 4.1484, lr 1.1e-04, dt 2.2s +All GPU(s): step 7737: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7738: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7739: loss 4.1211, lr 1.1e-04, dt 2.0s +All GPU(s): step 7740: loss 4.1523, lr 1.1e-04, dt 2.0s +All GPU(s): step 7741: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7742: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7743: loss 4.1289, lr 1.1e-04, dt 2.0s +All GPU(s): step 7744: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7745: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7746: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7747: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7748: loss 4.1602, lr 1.1e-04, dt 2.0s +All GPU(s): step 7749: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7750: loss 4.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 7751: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7752: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7753: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7754: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7755: loss 4.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 7756: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7757: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7758: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7759: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7760: loss 4.1406, lr 1.1e-04, dt 2.2s +All GPU(s): step 7761: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7762: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7763: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7764: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7765: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7766: loss 4.1602, lr 1.1e-04, dt 2.0s +All GPU(s): step 7767: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7768: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7769: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7770: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7771: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7772: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7773: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7774: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7775: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7776: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7777: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7778: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7779: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7780: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7781: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7782: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7783: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7784: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7785: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7786: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7787: loss 4.1523, lr 1.1e-04, dt 2.1s +All GPU(s): step 7788: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7789: loss 4.1328, lr 1.1e-04, dt 2.2s +All GPU(s): step 7790: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7791: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7792: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7793: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7794: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7795: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7796: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7797: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7798: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7799: loss 4.1367, lr 1.1e-04, dt 2.2s +All GPU(s): step 7800: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7801: loss 4.1289, lr 1.1e-04, dt 2.0s +All GPU(s): step 7802: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7803: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7804: loss 4.1523, lr 1.1e-04, dt 2.1s +All GPU(s): step 7805: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7806: loss 4.1484, lr 1.1e-04, dt 2.0s +All GPU(s): step 7807: loss 4.1328, lr 1.1e-04, dt 2.0s +All GPU(s): step 7808: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7809: loss 4.1484, lr 1.1e-04, dt 2.1s +All GPU(s): step 7810: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7811: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7812: loss 4.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 7813: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7814: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7815: loss 4.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 7816: loss 4.1289, lr 1.1e-04, dt 2.0s +All GPU(s): step 7817: loss 4.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 7818: loss 4.1445, lr 1.1e-04, dt 2.2s +All GPU(s): step 7819: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7820: loss 4.1328, lr 1.1e-04, dt 2.1s +All GPU(s): step 7821: loss 4.1367, lr 1.1e-04, dt 2.1s +All GPU(s): step 7822: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7823: loss 4.1445, lr 1.1e-04, dt 2.1s +All GPU(s): step 7824: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7825: loss 4.1445, lr 1.1e-04, dt 2.0s +All GPU(s): step 7826: loss 4.1367, lr 1.1e-04, dt 2.0s +All GPU(s): step 7827: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7828: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7829: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7830: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7831: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7832: loss 4.1289, lr 1.0e-04, dt 2.0s +All GPU(s): step 7833: loss 4.1289, lr 1.0e-04, dt 2.1s +All GPU(s): step 7834: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7835: loss 4.1289, lr 1.0e-04, dt 2.0s +All GPU(s): step 7836: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7837: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7838: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7839: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7840: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7841: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7842: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7843: loss 4.1484, lr 1.0e-04, dt 2.1s +All GPU(s): step 7844: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7845: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7846: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7847: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7848: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7849: loss 4.1523, lr 1.0e-04, dt 2.0s +All GPU(s): step 7850: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7851: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7852: loss 4.1406, lr 1.0e-04, dt 2.2s +All GPU(s): step 7853: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7854: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7855: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7856: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7857: loss 4.1445, lr 1.0e-04, dt 2.2s +All GPU(s): step 7858: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7859: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7860: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7861: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7862: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7863: loss 4.1523, lr 1.0e-04, dt 2.0s +All GPU(s): step 7864: loss 4.1328, lr 1.0e-04, dt 2.0s +All GPU(s): step 7865: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7866: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7867: loss 4.1328, lr 1.0e-04, dt 2.0s +All GPU(s): step 7868: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7869: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7870: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7871: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7872: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7873: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7874: loss 4.1562, lr 1.0e-04, dt 2.0s +All GPU(s): step 7875: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7876: loss 4.1406, lr 1.0e-04, dt 2.2s +All GPU(s): step 7877: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7878: loss 4.1328, lr 1.0e-04, dt 2.0s +All GPU(s): step 7879: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7880: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7881: loss 4.1289, lr 1.0e-04, dt 2.1s +All GPU(s): step 7882: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7883: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7884: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7885: loss 4.1523, lr 1.0e-04, dt 2.0s +All GPU(s): step 7886: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7887: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7888: loss 4.1250, lr 1.0e-04, dt 2.1s +All GPU(s): step 7889: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7890: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7891: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7892: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7893: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7894: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7895: loss 4.1523, lr 1.0e-04, dt 2.1s +All GPU(s): step 7896: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7897: loss 4.1250, lr 1.0e-04, dt 2.0s +All GPU(s): step 7898: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7899: loss 4.1289, lr 1.0e-04, dt 2.1s +All GPU(s): step 7900: loss 4.1445, lr 1.0e-04, dt 2.2s +All GPU(s): step 7901: loss 4.1484, lr 1.0e-04, dt 2.1s +All GPU(s): step 7902: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7903: loss 4.1523, lr 1.0e-04, dt 2.1s +All GPU(s): step 7904: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7905: loss 4.1406, lr 1.0e-04, dt 2.2s +All GPU(s): step 7906: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7907: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7908: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7909: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7910: loss 4.1484, lr 1.0e-04, dt 2.1s +All GPU(s): step 7911: loss 4.1328, lr 1.0e-04, dt 2.0s +All GPU(s): step 7912: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7913: loss 4.1289, lr 1.0e-04, dt 2.0s +All GPU(s): step 7914: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7915: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7916: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7917: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7918: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7919: loss 4.1367, lr 1.0e-04, dt 2.1s +All GPU(s): step 7920: loss 4.1523, lr 1.0e-04, dt 2.1s +All GPU(s): step 7921: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7922: loss 4.1328, lr 1.0e-04, dt 2.0s +All GPU(s): step 7923: loss 4.1523, lr 1.0e-04, dt 2.0s +All GPU(s): step 7924: loss 4.1484, lr 1.0e-04, dt 2.1s +All GPU(s): step 7925: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7926: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7927: loss 4.1367, lr 1.0e-04, dt 2.0s +All GPU(s): step 7928: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7929: loss 4.1484, lr 1.0e-04, dt 2.2s +All GPU(s): step 7930: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7931: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7932: loss 4.1328, lr 1.0e-04, dt 2.1s +All GPU(s): step 7933: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7934: loss 4.1406, lr 1.0e-04, dt 2.1s +All GPU(s): step 7935: loss 4.1289, lr 1.0e-04, dt 2.0s +All GPU(s): step 7936: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7937: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7938: loss 4.1250, lr 1.0e-04, dt 2.0s +All GPU(s): step 7939: loss 4.1445, lr 1.0e-04, dt 2.1s +All GPU(s): step 7940: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7941: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7942: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7943: loss 4.1250, lr 1.0e-04, dt 2.1s +All GPU(s): step 7944: loss 4.1484, lr 1.0e-04, dt 2.1s +All GPU(s): step 7945: loss 4.1445, lr 1.0e-04, dt 2.0s +All GPU(s): step 7946: loss 4.1484, lr 1.0e-04, dt 2.0s +All GPU(s): step 7947: loss 4.1406, lr 1.0e-04, dt 2.0s +All GPU(s): step 7948: loss 4.1406, lr 9.9e-05, dt 2.1s +All GPU(s): step 7949: loss 4.1406, lr 9.9e-05, dt 2.1s +All GPU(s): step 7950: loss 4.1484, lr 9.9e-05, dt 2.0s +All GPU(s): step 7951: loss 4.1367, lr 9.9e-05, dt 2.0s +All GPU(s): step 7952: loss 4.1445, lr 9.9e-05, dt 2.1s +All GPU(s): step 7953: loss 4.1328, lr 9.9e-05, dt 2.2s +All GPU(s): step 7954: loss 4.1406, lr 9.9e-05, dt 2.1s +All GPU(s): step 7955: loss 4.1523, lr 9.9e-05, dt 2.0s +All GPU(s): step 7956: loss 4.1445, lr 9.9e-05, dt 2.0s +All GPU(s): step 7957: loss 4.1367, lr 9.9e-05, dt 2.0s +All GPU(s): step 7958: loss 4.1367, lr 9.9e-05, dt 2.2s +All GPU(s): step 7959: loss 4.1250, lr 9.9e-05, dt 2.0s +All GPU(s): step 7960: loss 4.1328, lr 9.9e-05, dt 2.0s +All GPU(s): step 7961: loss 4.1445, lr 9.9e-05, dt 2.0s +All GPU(s): step 7962: loss 4.1406, lr 9.9e-05, dt 2.1s +All GPU(s): step 7963: loss 4.1367, lr 9.9e-05, dt 2.2s +All GPU(s): step 7964: loss 4.1445, lr 9.9e-05, dt 2.1s +All GPU(s): step 7965: loss 4.1328, lr 9.9e-05, dt 2.0s +All GPU(s): step 7966: loss 4.1328, lr 9.9e-05, dt 2.1s +All GPU(s): step 7967: loss 4.1406, lr 9.9e-05, dt 2.0s +All GPU(s): step 7968: loss 4.1406, lr 9.9e-05, dt 2.1s +All GPU(s): step 7969: loss 4.1367, lr 9.9e-05, dt 2.0s +All GPU(s): step 7970: loss 4.1406, lr 9.9e-05, dt 2.0s +All GPU(s): step 7971: loss 4.1406, lr 9.8e-05, dt 2.0s +All GPU(s): step 7972: loss 4.1289, lr 9.8e-05, dt 2.1s +All GPU(s): step 7973: loss 4.1328, lr 9.8e-05, dt 2.1s +All GPU(s): step 7974: loss 4.1406, lr 9.8e-05, dt 2.0s +All GPU(s): step 7975: loss 4.1328, lr 9.8e-05, dt 2.0s +All GPU(s): step 7976: loss 4.1445, lr 9.8e-05, dt 2.1s +All GPU(s): step 7977: loss 4.1367, lr 9.8e-05, dt 2.1s +All GPU(s): step 7978: loss 4.1328, lr 9.8e-05, dt 2.0s +All GPU(s): step 7979: loss 4.1367, lr 9.8e-05, dt 2.1s +All GPU(s): step 7980: loss 4.1328, lr 9.8e-05, dt 2.0s +All GPU(s): step 7981: loss 4.1367, lr 9.8e-05, dt 2.1s +All GPU(s): step 7982: loss 4.1445, lr 9.8e-05, dt 2.2s +All GPU(s): step 7983: loss 4.1289, lr 9.8e-05, dt 2.1s +All GPU(s): step 7984: loss 4.1523, lr 9.8e-05, dt 2.1s +All GPU(s): step 7985: loss 4.1406, lr 9.8e-05, dt 2.1s +All GPU(s): step 7986: loss 4.1484, lr 9.8e-05, dt 2.0s +All GPU(s): step 7987: loss 4.1367, lr 9.8e-05, dt 2.2s +All GPU(s): step 7988: loss 4.1367, lr 9.8e-05, dt 2.1s +All GPU(s): step 7989: loss 4.1406, lr 9.8e-05, dt 2.0s +All GPU(s): step 7990: loss 4.1289, lr 9.8e-05, dt 2.0s +All GPU(s): step 7991: loss 4.1445, lr 9.8e-05, dt 2.0s +All GPU(s): step 7992: loss 4.1367, lr 9.8e-05, dt 2.1s +All GPU(s): step 7993: loss 4.1406, lr 9.8e-05, dt 2.0s +All GPU(s): step 7994: loss 4.1250, lr 9.7e-05, dt 2.0s +All GPU(s): step 7995: loss 4.1211, lr 9.7e-05, dt 2.0s +All GPU(s): step 7996: loss 4.1367, lr 9.7e-05, dt 2.0s +All GPU(s): step 7997: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 7998: loss 4.1328, lr 9.7e-05, dt 2.0s +All GPU(s): step 7999: loss 4.1328, lr 9.7e-05, dt 2.0s +saving checkpoint to checkpoints/ckpt_8000.pt +All GPU(s): step 8000: loss 4.1367, lr 9.7e-05, dt 2.1s +All GPU(s): step 8001: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8002: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8003: loss 4.1406, lr 9.7e-05, dt 2.1s +All GPU(s): step 8004: loss 4.1406, lr 9.7e-05, dt 2.0s +All GPU(s): step 8005: loss 4.1367, lr 9.7e-05, dt 2.0s +All GPU(s): step 8006: loss 4.1406, lr 9.7e-05, dt 2.1s +All GPU(s): step 8007: loss 4.1406, lr 9.7e-05, dt 2.1s +All GPU(s): step 8008: loss 4.1406, lr 9.7e-05, dt 2.0s +All GPU(s): step 8009: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8010: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8011: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8012: loss 4.1289, lr 9.7e-05, dt 2.0s +All GPU(s): step 8013: loss 4.1406, lr 9.7e-05, dt 2.0s +All GPU(s): step 8014: loss 4.1328, lr 9.7e-05, dt 2.0s +All GPU(s): step 8015: loss 4.1406, lr 9.7e-05, dt 2.1s +All GPU(s): step 8016: loss 4.1328, lr 9.7e-05, dt 2.1s +All GPU(s): step 8017: loss 4.1328, lr 9.6e-05, dt 2.0s +All GPU(s): step 8018: loss 4.1406, lr 9.6e-05, dt 2.0s +All GPU(s): step 8019: loss 4.1445, lr 9.6e-05, dt 2.0s +All GPU(s): step 8020: loss 4.1328, lr 9.6e-05, dt 2.1s +All GPU(s): step 8021: loss 4.1367, lr 9.6e-05, dt 2.1s +All GPU(s): step 8022: loss 4.1367, lr 9.6e-05, dt 2.0s +All GPU(s): step 8023: loss 4.1445, lr 9.6e-05, dt 2.1s +All GPU(s): step 8024: loss 4.1328, lr 9.6e-05, dt 2.1s +All GPU(s): step 8025: loss 4.1406, lr 9.6e-05, dt 2.0s +All GPU(s): step 8026: loss 4.1445, lr 9.6e-05, dt 2.1s +All GPU(s): step 8027: loss 4.1328, lr 9.6e-05, dt 2.0s +All GPU(s): step 8028: loss 4.1367, lr 9.6e-05, dt 2.0s +All GPU(s): step 8029: loss 4.1328, lr 9.6e-05, dt 2.0s +All GPU(s): step 8030: loss 4.1406, lr 9.6e-05, dt 2.1s +All GPU(s): step 8031: loss 4.1328, lr 9.6e-05, dt 2.1s +All GPU(s): step 8032: loss 4.1328, lr 9.6e-05, dt 2.0s +All GPU(s): step 8033: loss 4.1406, lr 9.6e-05, dt 2.0s +All GPU(s): step 8034: loss 4.1445, lr 9.6e-05, dt 2.0s +All GPU(s): step 8035: loss 4.1406, lr 9.6e-05, dt 2.1s +All GPU(s): step 8036: loss 4.1367, lr 9.6e-05, dt 2.1s +All GPU(s): step 8037: loss 4.1406, lr 9.6e-05, dt 2.0s +All GPU(s): step 8038: loss 4.1367, lr 9.6e-05, dt 2.1s +All GPU(s): step 8039: loss 4.1289, lr 9.6e-05, dt 2.0s +All GPU(s): step 8040: loss 4.1445, lr 9.5e-05, dt 2.2s +All GPU(s): step 8041: loss 4.1406, lr 9.5e-05, dt 2.1s +All GPU(s): step 8042: loss 4.1367, lr 9.5e-05, dt 2.0s +All GPU(s): step 8043: loss 4.1406, lr 9.5e-05, dt 2.0s +All GPU(s): step 8044: loss 4.1211, lr 9.5e-05, dt 2.1s +All GPU(s): step 8045: loss 4.1406, lr 9.5e-05, dt 2.2s +All GPU(s): step 8046: loss 4.1367, lr 9.5e-05, dt 2.1s +All GPU(s): step 8047: loss 4.1562, lr 9.5e-05, dt 2.0s +All GPU(s): step 8048: loss 4.1406, lr 9.5e-05, dt 2.1s +All GPU(s): step 8049: loss 4.1406, lr 9.5e-05, dt 2.0s +All GPU(s): step 8050: loss 4.1406, lr 9.5e-05, dt 2.1s +All GPU(s): step 8051: loss 4.1445, lr 9.5e-05, dt 2.0s +All GPU(s): step 8052: loss 4.1367, lr 9.5e-05, dt 2.1s +All GPU(s): step 8053: loss 4.1328, lr 9.5e-05, dt 2.1s +All GPU(s): step 8054: loss 4.1484, lr 9.5e-05, dt 2.1s +All GPU(s): step 8055: loss 4.1328, lr 9.5e-05, dt 2.1s +All GPU(s): step 8056: loss 4.1484, lr 9.5e-05, dt 2.0s +All GPU(s): step 8057: loss 4.1367, lr 9.5e-05, dt 2.0s +All GPU(s): step 8058: loss 4.1328, lr 9.5e-05, dt 2.0s +All GPU(s): step 8059: loss 4.1289, lr 9.5e-05, dt 2.1s +All GPU(s): step 8060: loss 4.1367, lr 9.5e-05, dt 2.1s +All GPU(s): step 8061: loss 4.1367, lr 9.5e-05, dt 2.0s +All GPU(s): step 8062: loss 4.1445, lr 9.5e-05, dt 2.0s +All GPU(s): step 8063: loss 4.1445, lr 9.5e-05, dt 2.0s +All GPU(s): step 8064: loss 4.1367, lr 9.4e-05, dt 2.2s +All GPU(s): step 8065: loss 4.1289, lr 9.4e-05, dt 2.1s +All GPU(s): step 8066: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8067: loss 4.1406, lr 9.4e-05, dt 2.0s +All GPU(s): step 8068: loss 4.1367, lr 9.4e-05, dt 2.1s +All GPU(s): step 8069: loss 4.1172, lr 9.4e-05, dt 2.1s +All GPU(s): step 8070: loss 4.1328, lr 9.4e-05, dt 2.0s +All GPU(s): step 8071: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8072: loss 4.1406, lr 9.4e-05, dt 2.0s +All GPU(s): step 8073: loss 4.1406, lr 9.4e-05, dt 2.0s +All GPU(s): step 8074: loss 4.1328, lr 9.4e-05, dt 2.2s +All GPU(s): step 8075: loss 4.1406, lr 9.4e-05, dt 2.1s +All GPU(s): step 8076: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8077: loss 4.1289, lr 9.4e-05, dt 2.0s +All GPU(s): step 8078: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8079: loss 4.1328, lr 9.4e-05, dt 2.1s +All GPU(s): step 8080: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8081: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8082: loss 4.1328, lr 9.4e-05, dt 2.0s +All GPU(s): step 8083: loss 4.1328, lr 9.4e-05, dt 2.1s +All GPU(s): step 8084: loss 4.1445, lr 9.4e-05, dt 2.1s +All GPU(s): step 8085: loss 4.1367, lr 9.4e-05, dt 2.0s +All GPU(s): step 8086: loss 4.1445, lr 9.4e-05, dt 2.0s +All GPU(s): step 8087: loss 4.1328, lr 9.4e-05, dt 2.0s +All GPU(s): step 8088: loss 4.1484, lr 9.3e-05, dt 2.1s +All GPU(s): step 8089: loss 4.1367, lr 9.3e-05, dt 2.0s +All GPU(s): step 8090: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8091: loss 4.1211, lr 9.3e-05, dt 2.0s +All GPU(s): step 8092: loss 4.1367, lr 9.3e-05, dt 2.1s +All GPU(s): step 8093: loss 4.1406, lr 9.3e-05, dt 2.1s +All GPU(s): step 8094: loss 4.1484, lr 9.3e-05, dt 2.0s +All GPU(s): step 8095: loss 4.1445, lr 9.3e-05, dt 2.0s +All GPU(s): step 8096: loss 4.1484, lr 9.3e-05, dt 2.0s +All GPU(s): step 8097: loss 4.1328, lr 9.3e-05, dt 2.0s +All GPU(s): step 8098: loss 4.1484, lr 9.3e-05, dt 2.1s +All GPU(s): step 8099: loss 4.1445, lr 9.3e-05, dt 2.0s +All GPU(s): step 8100: loss 4.1367, lr 9.3e-05, dt 2.0s +All GPU(s): step 8101: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8102: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8103: loss 4.1406, lr 9.3e-05, dt 2.1s +All GPU(s): step 8104: loss 4.1445, lr 9.3e-05, dt 2.0s +All GPU(s): step 8105: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8106: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8107: loss 4.1367, lr 9.3e-05, dt 2.0s +All GPU(s): step 8108: loss 4.1367, lr 9.3e-05, dt 2.1s +All GPU(s): step 8109: loss 4.1445, lr 9.3e-05, dt 2.0s +All GPU(s): step 8110: loss 4.1406, lr 9.3e-05, dt 2.0s +All GPU(s): step 8111: loss 4.1328, lr 9.3e-05, dt 2.0s +All GPU(s): step 8112: loss 4.1367, lr 9.2e-05, dt 2.0s +All GPU(s): step 8113: loss 4.1445, lr 9.2e-05, dt 2.1s +All GPU(s): step 8114: loss 4.1445, lr 9.2e-05, dt 2.0s +All GPU(s): step 8115: loss 4.1406, lr 9.2e-05, dt 2.0s +All GPU(s): step 8116: loss 4.1445, lr 9.2e-05, dt 2.0s +All GPU(s): step 8117: loss 4.1367, lr 9.2e-05, dt 2.1s +All GPU(s): step 8118: loss 4.1484, lr 9.2e-05, dt 2.1s +All GPU(s): step 8119: loss 4.1367, lr 9.2e-05, dt 2.0s +All GPU(s): step 8120: loss 4.1289, lr 9.2e-05, dt 2.0s +All GPU(s): step 8121: loss 4.1367, lr 9.2e-05, dt 2.0s +All GPU(s): step 8122: loss 4.1484, lr 9.2e-05, dt 2.2s +All GPU(s): step 8123: loss 4.1367, lr 9.2e-05, dt 2.1s +All GPU(s): step 8124: loss 4.1367, lr 9.2e-05, dt 2.1s +All GPU(s): step 8125: loss 4.1289, lr 9.2e-05, dt 2.1s +All GPU(s): step 8126: loss 4.1406, lr 9.2e-05, dt 2.1s +All GPU(s): step 8127: loss 4.1367, lr 9.2e-05, dt 2.1s +All GPU(s): step 8128: loss 4.1523, lr 9.2e-05, dt 2.0s +All GPU(s): step 8129: loss 4.1367, lr 9.2e-05, dt 2.0s +All GPU(s): step 8130: loss 4.1445, lr 9.2e-05, dt 2.0s +All GPU(s): step 8131: loss 4.1328, lr 9.2e-05, dt 2.1s +All GPU(s): step 8132: loss 4.1367, lr 9.2e-05, dt 2.1s +All GPU(s): step 8133: loss 4.1406, lr 9.2e-05, dt 2.0s +All GPU(s): step 8134: loss 4.1406, lr 9.2e-05, dt 2.1s +All GPU(s): step 8135: loss 4.1406, lr 9.2e-05, dt 2.0s +All GPU(s): step 8136: loss 4.1445, lr 9.1e-05, dt 2.1s +All GPU(s): step 8137: loss 4.1289, lr 9.1e-05, dt 2.1s +All GPU(s): step 8138: loss 4.1250, lr 9.1e-05, dt 2.0s +All GPU(s): step 8139: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8140: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8141: loss 4.1367, lr 9.1e-05, dt 2.1s +All GPU(s): step 8142: loss 4.1406, lr 9.1e-05, dt 2.1s +All GPU(s): step 8143: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8144: loss 4.1406, lr 9.1e-05, dt 2.0s +All GPU(s): step 8145: loss 4.1250, lr 9.1e-05, dt 2.0s +All GPU(s): step 8146: loss 4.1523, lr 9.1e-05, dt 2.1s +All GPU(s): step 8147: loss 4.1406, lr 9.1e-05, dt 2.1s +All GPU(s): step 8148: loss 4.1367, lr 9.1e-05, dt 2.0s +All GPU(s): step 8149: loss 4.1406, lr 9.1e-05, dt 2.0s +All GPU(s): step 8150: loss 4.1445, lr 9.1e-05, dt 2.1s +All GPU(s): step 8151: loss 4.1445, lr 9.1e-05, dt 2.2s +All GPU(s): step 8152: loss 4.1484, lr 9.1e-05, dt 2.1s +All GPU(s): step 8153: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8154: loss 4.1367, lr 9.1e-05, dt 2.0s +All GPU(s): step 8155: loss 4.1367, lr 9.1e-05, dt 2.1s +All GPU(s): step 8156: loss 4.1406, lr 9.1e-05, dt 2.2s +All GPU(s): step 8157: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8158: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8159: loss 4.1445, lr 9.1e-05, dt 2.0s +All GPU(s): step 8160: loss 4.1406, lr 9.1e-05, dt 2.0s +All GPU(s): step 8161: loss 4.1406, lr 9.0e-05, dt 2.1s +All GPU(s): step 8162: loss 4.1484, lr 9.0e-05, dt 2.0s +All GPU(s): step 8163: loss 4.1289, lr 9.0e-05, dt 2.0s +All GPU(s): step 8164: loss 4.1445, lr 9.0e-05, dt 2.0s +All GPU(s): step 8165: loss 4.1445, lr 9.0e-05, dt 2.1s +All GPU(s): step 8166: loss 4.1484, lr 9.0e-05, dt 2.1s +All GPU(s): step 8167: loss 4.1367, lr 9.0e-05, dt 2.0s +All GPU(s): step 8168: loss 4.1484, lr 9.0e-05, dt 2.1s +All GPU(s): step 8169: loss 4.1406, lr 9.0e-05, dt 2.0s +All GPU(s): step 8170: loss 4.1406, lr 9.0e-05, dt 2.1s +All GPU(s): step 8171: loss 4.1484, lr 9.0e-05, dt 2.1s +All GPU(s): step 8172: loss 4.1445, lr 9.0e-05, dt 2.1s +All GPU(s): step 8173: loss 4.1445, lr 9.0e-05, dt 2.0s +All GPU(s): step 8174: loss 4.1406, lr 9.0e-05, dt 2.1s +All GPU(s): step 8175: loss 4.1367, lr 9.0e-05, dt 2.1s +All GPU(s): step 8176: loss 4.1367, lr 9.0e-05, dt 2.0s +All GPU(s): step 8177: loss 4.1445, lr 9.0e-05, dt 2.0s +All GPU(s): step 8178: loss 4.1289, lr 9.0e-05, dt 2.0s +All GPU(s): step 8179: loss 4.1328, lr 9.0e-05, dt 2.1s +All GPU(s): step 8180: loss 4.1289, lr 9.0e-05, dt 2.2s +All GPU(s): step 8181: loss 4.1367, lr 9.0e-05, dt 2.0s +All GPU(s): step 8182: loss 4.1445, lr 9.0e-05, dt 2.1s +All GPU(s): step 8183: loss 4.1445, lr 9.0e-05, dt 2.0s +All GPU(s): step 8184: loss 4.1445, lr 9.0e-05, dt 2.1s +All GPU(s): step 8185: loss 4.1367, lr 9.0e-05, dt 2.2s +All GPU(s): step 8186: loss 4.1328, lr 8.9e-05, dt 2.1s +All GPU(s): step 8187: loss 4.1328, lr 8.9e-05, dt 2.0s +All GPU(s): step 8188: loss 4.1406, lr 8.9e-05, dt 2.1s +All GPU(s): step 8189: loss 4.1367, lr 8.9e-05, dt 2.1s +All GPU(s): step 8190: loss 4.1484, lr 8.9e-05, dt 2.1s +All GPU(s): step 8191: loss 4.1367, lr 8.9e-05, dt 2.0s +All GPU(s): step 8192: loss 4.1328, lr 8.9e-05, dt 2.1s +All GPU(s): step 8193: loss 4.1484, lr 8.9e-05, dt 2.0s +All GPU(s): step 8194: loss 4.1289, lr 8.9e-05, dt 2.0s +All GPU(s): step 8195: loss 4.1445, lr 8.9e-05, dt 2.1s +All GPU(s): step 8196: loss 4.1484, lr 8.9e-05, dt 2.1s +All GPU(s): step 8197: loss 4.1445, lr 8.9e-05, dt 2.0s +All GPU(s): step 8198: loss 4.1445, lr 8.9e-05, dt 2.1s +All GPU(s): step 8199: loss 4.1445, lr 8.9e-05, dt 2.1s +All GPU(s): step 8200: loss 4.1367, lr 8.9e-05, dt 2.1s +All GPU(s): step 8201: loss 4.1328, lr 8.9e-05, dt 2.0s +All GPU(s): step 8202: loss 4.1367, lr 8.9e-05, dt 2.0s +All GPU(s): step 8203: loss 4.1367, lr 8.9e-05, dt 2.0s +All GPU(s): step 8204: loss 4.1406, lr 8.9e-05, dt 2.1s +All GPU(s): step 8205: loss 4.1406, lr 8.9e-05, dt 2.1s +All GPU(s): step 8206: loss 4.1211, lr 8.9e-05, dt 2.0s +All GPU(s): step 8207: loss 4.1367, lr 8.9e-05, dt 2.0s +All GPU(s): step 8208: loss 4.1406, lr 8.9e-05, dt 2.0s +All GPU(s): step 8209: loss 4.1406, lr 8.9e-05, dt 2.1s +All GPU(s): step 8210: loss 4.1445, lr 8.9e-05, dt 2.0s +All GPU(s): step 8211: loss 4.1445, lr 8.8e-05, dt 2.0s +All GPU(s): step 8212: loss 4.1406, lr 8.8e-05, dt 2.0s +All GPU(s): step 8213: loss 4.1289, lr 8.8e-05, dt 2.1s +All GPU(s): step 8214: loss 4.1367, lr 8.8e-05, dt 2.2s +All GPU(s): step 8215: loss 4.1367, lr 8.8e-05, dt 2.1s +All GPU(s): step 8216: loss 4.1445, lr 8.8e-05, dt 2.1s +All GPU(s): step 8217: loss 4.1328, lr 8.8e-05, dt 2.0s +All GPU(s): step 8218: loss 4.1367, lr 8.8e-05, dt 2.1s +All GPU(s): step 8219: loss 4.1289, lr 8.8e-05, dt 2.1s +All GPU(s): step 8220: loss 4.1406, lr 8.8e-05, dt 2.1s +All GPU(s): step 8221: loss 4.1445, lr 8.8e-05, dt 2.0s +All GPU(s): step 8222: loss 4.1406, lr 8.8e-05, dt 2.0s +All GPU(s): step 8223: loss 4.1445, lr 8.8e-05, dt 2.1s +All GPU(s): step 8224: loss 4.1406, lr 8.8e-05, dt 2.1s +All GPU(s): step 8225: loss 4.1445, lr 8.8e-05, dt 2.0s +All GPU(s): step 8226: loss 4.1367, lr 8.8e-05, dt 2.1s +All GPU(s): step 8227: loss 4.1445, lr 8.8e-05, dt 2.0s +All GPU(s): step 8228: loss 4.1406, lr 8.8e-05, dt 2.1s +All GPU(s): step 8229: loss 4.1523, lr 8.8e-05, dt 2.0s +All GPU(s): step 8230: loss 4.1367, lr 8.8e-05, dt 2.0s +All GPU(s): step 8231: loss 4.1367, lr 8.8e-05, dt 2.0s +All GPU(s): step 8232: loss 4.1328, lr 8.8e-05, dt 2.0s +All GPU(s): step 8233: loss 4.1289, lr 8.8e-05, dt 2.2s +All GPU(s): step 8234: loss 4.1445, lr 8.8e-05, dt 2.1s +All GPU(s): step 8235: loss 4.1484, lr 8.8e-05, dt 2.0s +All GPU(s): step 8236: loss 4.1367, lr 8.7e-05, dt 2.0s +All GPU(s): step 8237: loss 4.1406, lr 8.7e-05, dt 2.1s +All GPU(s): step 8238: loss 4.1367, lr 8.7e-05, dt 2.1s +All GPU(s): step 8239: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8240: loss 4.1445, lr 8.7e-05, dt 2.0s +All GPU(s): step 8241: loss 4.1445, lr 8.7e-05, dt 2.0s +All GPU(s): step 8242: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8243: loss 4.1406, lr 8.7e-05, dt 2.1s +All GPU(s): step 8244: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8245: loss 4.1445, lr 8.7e-05, dt 2.0s +All GPU(s): step 8246: loss 4.1289, lr 8.7e-05, dt 2.0s +All GPU(s): step 8247: loss 4.1367, lr 8.7e-05, dt 2.1s +All GPU(s): step 8248: loss 4.1367, lr 8.7e-05, dt 2.1s +All GPU(s): step 8249: loss 4.1406, lr 8.7e-05, dt 2.1s +All GPU(s): step 8250: loss 4.1484, lr 8.7e-05, dt 2.0s +All GPU(s): step 8251: loss 4.1523, lr 8.7e-05, dt 2.0s +All GPU(s): step 8252: loss 4.1367, lr 8.7e-05, dt 2.1s +All GPU(s): step 8253: loss 4.1406, lr 8.7e-05, dt 2.1s +All GPU(s): step 8254: loss 4.1367, lr 8.7e-05, dt 2.0s +All GPU(s): step 8255: loss 4.1289, lr 8.7e-05, dt 2.0s +All GPU(s): step 8256: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8257: loss 4.1484, lr 8.7e-05, dt 2.1s +All GPU(s): step 8258: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8259: loss 4.1406, lr 8.7e-05, dt 2.0s +All GPU(s): step 8260: loss 4.1445, lr 8.7e-05, dt 2.0s +All GPU(s): step 8261: loss 4.1367, lr 8.7e-05, dt 2.0s +All GPU(s): step 8262: loss 4.1484, lr 8.6e-05, dt 2.1s +All GPU(s): step 8263: loss 4.1484, lr 8.6e-05, dt 2.0s +All GPU(s): step 8264: loss 4.1328, lr 8.6e-05, dt 2.0s +All GPU(s): step 8265: loss 4.1367, lr 8.6e-05, dt 2.0s +All GPU(s): step 8266: loss 4.1367, lr 8.6e-05, dt 2.0s +All GPU(s): step 8267: loss 4.1484, lr 8.6e-05, dt 2.1s +All GPU(s): step 8268: loss 4.1484, lr 8.6e-05, dt 2.0s +All GPU(s): step 8269: loss 4.1328, lr 8.6e-05, dt 2.0s +All GPU(s): step 8270: loss 4.1367, lr 8.6e-05, dt 2.0s +All GPU(s): step 8271: loss 4.1406, lr 8.6e-05, dt 2.0s +All GPU(s): step 8272: loss 4.1484, lr 8.6e-05, dt 2.1s +All GPU(s): step 8273: loss 4.1328, lr 8.6e-05, dt 2.0s +All GPU(s): step 8274: loss 4.1445, lr 8.6e-05, dt 2.0s +All GPU(s): step 8275: loss 4.1484, lr 8.6e-05, dt 2.0s +All GPU(s): step 8276: loss 4.1445, lr 8.6e-05, dt 2.1s +All GPU(s): step 8277: loss 4.1367, lr 8.6e-05, dt 2.1s +All GPU(s): step 8278: loss 4.1289, lr 8.6e-05, dt 2.0s +All GPU(s): step 8279: loss 4.1328, lr 8.6e-05, dt 2.0s +All GPU(s): step 8280: loss 4.1406, lr 8.6e-05, dt 2.0s +All GPU(s): step 8281: loss 4.1445, lr 8.6e-05, dt 2.1s +All GPU(s): step 8282: loss 4.1406, lr 8.6e-05, dt 2.1s +All GPU(s): step 8283: loss 4.1445, lr 8.6e-05, dt 2.0s +All GPU(s): step 8284: loss 4.1406, lr 8.6e-05, dt 2.0s +All GPU(s): step 8285: loss 4.1523, lr 8.6e-05, dt 2.0s +All GPU(s): step 8286: loss 4.1328, lr 8.6e-05, dt 2.1s +All GPU(s): step 8287: loss 4.1289, lr 8.6e-05, dt 2.1s +All GPU(s): step 8288: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8289: loss 4.1484, lr 8.5e-05, dt 2.1s +All GPU(s): step 8290: loss 4.1328, lr 8.5e-05, dt 2.0s +All GPU(s): step 8291: loss 4.1289, lr 8.5e-05, dt 2.2s +All GPU(s): step 8292: loss 4.1328, lr 8.5e-05, dt 2.1s +All GPU(s): step 8293: loss 4.1367, lr 8.5e-05, dt 2.0s +All GPU(s): step 8294: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8295: loss 4.1328, lr 8.5e-05, dt 2.1s +All GPU(s): step 8296: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8297: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8298: loss 4.1367, lr 8.5e-05, dt 2.1s +All GPU(s): step 8299: loss 4.1445, lr 8.5e-05, dt 2.0s +All GPU(s): step 8300: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8301: loss 4.1367, lr 8.5e-05, dt 2.1s +All GPU(s): step 8302: loss 4.1367, lr 8.5e-05, dt 2.0s +All GPU(s): step 8303: loss 4.1328, lr 8.5e-05, dt 2.0s +All GPU(s): step 8304: loss 4.1406, lr 8.5e-05, dt 2.0s +All GPU(s): step 8305: loss 4.1289, lr 8.5e-05, dt 2.1s +All GPU(s): step 8306: loss 4.1367, lr 8.5e-05, dt 2.1s +All GPU(s): step 8307: loss 4.1367, lr 8.5e-05, dt 2.0s +All GPU(s): step 8308: loss 4.1445, lr 8.5e-05, dt 2.1s +All GPU(s): step 8309: loss 4.1406, lr 8.5e-05, dt 2.0s +All GPU(s): step 8310: loss 4.1484, lr 8.5e-05, dt 2.1s +All GPU(s): step 8311: loss 4.1445, lr 8.5e-05, dt 2.1s +All GPU(s): step 8312: loss 4.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8313: loss 4.1406, lr 8.5e-05, dt 2.0s +All GPU(s): step 8314: loss 4.1367, lr 8.4e-05, dt 2.0s +All GPU(s): step 8315: loss 4.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8316: loss 4.1523, lr 8.4e-05, dt 2.0s +All GPU(s): step 8317: loss 4.1289, lr 8.4e-05, dt 2.0s +All GPU(s): step 8318: loss 4.1445, lr 8.4e-05, dt 2.0s +All GPU(s): step 8319: loss 4.1328, lr 8.4e-05, dt 2.0s +All GPU(s): step 8320: loss 4.1367, lr 8.4e-05, dt 2.1s +All GPU(s): step 8321: loss 4.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8322: loss 4.1328, lr 8.4e-05, dt 2.1s +All GPU(s): step 8323: loss 4.1367, lr 8.4e-05, dt 2.0s +All GPU(s): step 8324: loss 4.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8325: loss 4.1445, lr 8.4e-05, dt 2.1s +All GPU(s): step 8326: loss 4.1328, lr 8.4e-05, dt 2.0s +All GPU(s): step 8327: loss 4.1445, lr 8.4e-05, dt 2.0s +All GPU(s): step 8328: loss 4.1523, lr 8.4e-05, dt 2.0s +All GPU(s): step 8329: loss 4.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8330: loss 4.1328, lr 8.4e-05, dt 2.1s +All GPU(s): step 8331: loss 4.1406, lr 8.4e-05, dt 2.0s +All GPU(s): step 8332: loss 4.1328, lr 8.4e-05, dt 2.0s +All GPU(s): step 8333: loss 4.1367, lr 8.4e-05, dt 2.0s +All GPU(s): step 8334: loss 4.1367, lr 8.4e-05, dt 2.0s +All GPU(s): step 8335: loss 4.1445, lr 8.4e-05, dt 2.1s +All GPU(s): step 8336: loss 4.1367, lr 8.4e-05, dt 2.0s +All GPU(s): step 8337: loss 4.1484, lr 8.4e-05, dt 2.0s +All GPU(s): step 8338: loss 4.1484, lr 8.4e-05, dt 2.0s +All GPU(s): step 8339: loss 4.1328, lr 8.4e-05, dt 2.1s +All GPU(s): step 8340: loss 4.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8341: loss 4.1289, lr 8.3e-05, dt 2.0s +All GPU(s): step 8342: loss 4.1289, lr 8.3e-05, dt 2.0s +All GPU(s): step 8343: loss 4.1367, lr 8.3e-05, dt 2.0s +All GPU(s): step 8344: loss 4.1367, lr 8.3e-05, dt 2.1s +All GPU(s): step 8345: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8346: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8347: loss 4.1328, lr 8.3e-05, dt 2.0s +All GPU(s): step 8348: loss 4.1523, lr 8.3e-05, dt 2.0s +All GPU(s): step 8349: loss 4.1289, lr 8.3e-05, dt 2.1s +All GPU(s): step 8350: loss 4.1406, lr 8.3e-05, dt 2.1s +All GPU(s): step 8351: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8352: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8353: loss 4.1367, lr 8.3e-05, dt 2.1s +All GPU(s): step 8354: loss 4.1289, lr 8.3e-05, dt 2.1s +All GPU(s): step 8355: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8356: loss 4.1367, lr 8.3e-05, dt 2.0s +All GPU(s): step 8357: loss 4.1406, lr 8.3e-05, dt 2.1s +All GPU(s): step 8358: loss 4.1289, lr 8.3e-05, dt 2.1s +All GPU(s): step 8359: loss 4.1289, lr 8.3e-05, dt 2.1s +All GPU(s): step 8360: loss 4.1172, lr 8.3e-05, dt 2.0s +All GPU(s): step 8361: loss 4.1445, lr 8.3e-05, dt 2.0s +All GPU(s): step 8362: loss 4.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8363: loss 4.1328, lr 8.3e-05, dt 2.0s +All GPU(s): step 8364: loss 4.1406, lr 8.3e-05, dt 2.1s +All GPU(s): step 8365: loss 4.1445, lr 8.3e-05, dt 2.0s +All GPU(s): step 8366: loss 4.1367, lr 8.3e-05, dt 2.1s +All GPU(s): step 8367: loss 4.1445, lr 8.3e-05, dt 2.0s +All GPU(s): step 8368: loss 4.1367, lr 8.2e-05, dt 2.1s +All GPU(s): step 8369: loss 4.1523, lr 8.2e-05, dt 2.1s +All GPU(s): step 8370: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8371: loss 4.1328, lr 8.2e-05, dt 2.0s +All GPU(s): step 8372: loss 4.1484, lr 8.2e-05, dt 2.1s +All GPU(s): step 8373: loss 4.1445, lr 8.2e-05, dt 2.1s +All GPU(s): step 8374: loss 4.1328, lr 8.2e-05, dt 2.0s +All GPU(s): step 8375: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8376: loss 4.1484, lr 8.2e-05, dt 2.0s +All GPU(s): step 8377: loss 4.1406, lr 8.2e-05, dt 2.0s +All GPU(s): step 8378: loss 4.1367, lr 8.2e-05, dt 2.1s +All GPU(s): step 8379: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8380: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8381: loss 4.1289, lr 8.2e-05, dt 2.0s +All GPU(s): step 8382: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8383: loss 4.1289, lr 8.2e-05, dt 2.1s +All GPU(s): step 8384: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8385: loss 4.1445, lr 8.2e-05, dt 2.0s +All GPU(s): step 8386: loss 4.1250, lr 8.2e-05, dt 2.0s +All GPU(s): step 8387: loss 4.1289, lr 8.2e-05, dt 2.1s +All GPU(s): step 8388: loss 4.1328, lr 8.2e-05, dt 2.1s +All GPU(s): step 8389: loss 4.1406, lr 8.2e-05, dt 2.1s +All GPU(s): step 8390: loss 4.1445, lr 8.2e-05, dt 2.0s +All GPU(s): step 8391: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8392: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8393: loss 4.1406, lr 8.2e-05, dt 2.1s +All GPU(s): step 8394: loss 4.1406, lr 8.2e-05, dt 2.0s +All GPU(s): step 8395: loss 4.1367, lr 8.2e-05, dt 2.0s +All GPU(s): step 8396: loss 4.1523, lr 8.1e-05, dt 2.0s +All GPU(s): step 8397: loss 4.1250, lr 8.1e-05, dt 2.1s +All GPU(s): step 8398: loss 4.1250, lr 8.1e-05, dt 2.1s +All GPU(s): step 8399: loss 4.1445, lr 8.1e-05, dt 2.0s +All GPU(s): step 8400: loss 4.1484, lr 8.1e-05, dt 2.0s +All GPU(s): step 8401: loss 4.1367, lr 8.1e-05, dt 2.1s +All GPU(s): step 8402: loss 4.1484, lr 8.1e-05, dt 2.1s +All GPU(s): step 8403: loss 4.1484, lr 8.1e-05, dt 2.0s +All GPU(s): step 8404: loss 4.1445, lr 8.1e-05, dt 2.0s +All GPU(s): step 8405: loss 4.1406, lr 8.1e-05, dt 2.0s +All GPU(s): step 8406: loss 4.1406, lr 8.1e-05, dt 2.0s +All GPU(s): step 8407: loss 4.1484, lr 8.1e-05, dt 2.1s +All GPU(s): step 8408: loss 4.1367, lr 8.1e-05, dt 2.0s +All GPU(s): step 8409: loss 4.1445, lr 8.1e-05, dt 2.0s +All GPU(s): step 8410: loss 4.1406, lr 8.1e-05, dt 2.0s +All GPU(s): step 8411: loss 4.1484, lr 8.1e-05, dt 2.0s +All GPU(s): step 8412: loss 4.1406, lr 8.1e-05, dt 2.1s +All GPU(s): step 8413: loss 4.1328, lr 8.1e-05, dt 2.0s +All GPU(s): step 8414: loss 4.1328, lr 8.1e-05, dt 2.0s +All GPU(s): step 8415: loss 4.1328, lr 8.1e-05, dt 2.0s +All GPU(s): step 8416: loss 4.1406, lr 8.1e-05, dt 2.0s +All GPU(s): step 8417: loss 4.1250, lr 8.1e-05, dt 2.1s +All GPU(s): step 8418: loss 4.1367, lr 8.1e-05, dt 2.0s +All GPU(s): step 8419: loss 4.1367, lr 8.1e-05, dt 2.0s +All GPU(s): step 8420: loss 4.1523, lr 8.1e-05, dt 2.0s +All GPU(s): step 8421: loss 4.1367, lr 8.1e-05, dt 2.0s +All GPU(s): step 8422: loss 4.1211, lr 8.1e-05, dt 2.1s +All GPU(s): step 8423: loss 4.1406, lr 8.1e-05, dt 2.0s +All GPU(s): step 8424: loss 4.1445, lr 8.0e-05, dt 2.0s +All GPU(s): step 8425: loss 4.1445, lr 8.0e-05, dt 2.0s +All GPU(s): step 8426: loss 4.1289, lr 8.0e-05, dt 2.1s +All GPU(s): step 8427: loss 4.1250, lr 8.0e-05, dt 2.1s +All GPU(s): step 8428: loss 4.1367, lr 8.0e-05, dt 2.0s +All GPU(s): step 8429: loss 4.1484, lr 8.0e-05, dt 2.0s +All GPU(s): step 8430: loss 4.1445, lr 8.0e-05, dt 2.1s +All GPU(s): step 8431: loss 4.1445, lr 8.0e-05, dt 2.1s +All GPU(s): step 8432: loss 4.1445, lr 8.0e-05, dt 2.0s +All GPU(s): step 8433: loss 4.1328, lr 8.0e-05, dt 2.0s +All GPU(s): step 8434: loss 4.1367, lr 8.0e-05, dt 2.0s +All GPU(s): step 8435: loss 4.1367, lr 8.0e-05, dt 2.0s +All GPU(s): step 8436: loss 4.1406, lr 8.0e-05, dt 2.1s +All GPU(s): step 8437: loss 4.1367, lr 8.0e-05, dt 2.1s +All GPU(s): step 8438: loss 4.1367, lr 8.0e-05, dt 2.1s +All GPU(s): step 8439: loss 4.1406, lr 8.0e-05, dt 2.0s +All GPU(s): step 8440: loss 4.1289, lr 8.0e-05, dt 2.1s +All GPU(s): step 8441: loss 4.1445, lr 8.0e-05, dt 2.2s +All GPU(s): step 8442: loss 4.1484, lr 8.0e-05, dt 2.0s +All GPU(s): step 8443: loss 4.1406, lr 8.0e-05, dt 2.0s +All GPU(s): step 8444: loss 4.1328, lr 8.0e-05, dt 2.0s +All GPU(s): step 8445: loss 4.1328, lr 8.0e-05, dt 2.1s +All GPU(s): step 8446: loss 4.1367, lr 8.0e-05, dt 2.1s +All GPU(s): step 8447: loss 4.1367, lr 8.0e-05, dt 2.1s +All GPU(s): step 8448: loss 4.1484, lr 8.0e-05, dt 2.0s +All GPU(s): step 8449: loss 4.1484, lr 8.0e-05, dt 2.1s +All GPU(s): step 8450: loss 4.1367, lr 8.0e-05, dt 2.0s +All GPU(s): step 8451: loss 4.1406, lr 8.0e-05, dt 2.1s +All GPU(s): step 8452: loss 4.1328, lr 7.9e-05, dt 2.1s +All GPU(s): step 8453: loss 4.1406, lr 7.9e-05, dt 2.1s +All GPU(s): step 8454: loss 4.1406, lr 7.9e-05, dt 2.0s +All GPU(s): step 8455: loss 4.1406, lr 7.9e-05, dt 2.1s +All GPU(s): step 8456: loss 4.1289, lr 7.9e-05, dt 2.1s +All GPU(s): step 8457: loss 4.1445, lr 7.9e-05, dt 2.1s +All GPU(s): step 8458: loss 4.1406, lr 7.9e-05, dt 2.0s +All GPU(s): step 8459: loss 4.1289, lr 7.9e-05, dt 2.0s +All GPU(s): step 8460: loss 4.1445, lr 7.9e-05, dt 2.1s +All GPU(s): step 8461: loss 4.1328, lr 7.9e-05, dt 2.0s +All GPU(s): step 8462: loss 4.1523, lr 7.9e-05, dt 2.0s +All GPU(s): step 8463: loss 4.1367, lr 7.9e-05, dt 2.0s +All GPU(s): step 8464: loss 4.1523, lr 7.9e-05, dt 2.0s +All GPU(s): step 8465: loss 4.1367, lr 7.9e-05, dt 2.1s +All GPU(s): step 8466: loss 4.1445, lr 7.9e-05, dt 2.1s +All GPU(s): step 8467: loss 4.1406, lr 7.9e-05, dt 2.0s +All GPU(s): step 8468: loss 4.1484, lr 7.9e-05, dt 2.0s +All GPU(s): step 8469: loss 4.1289, lr 7.9e-05, dt 2.0s +All GPU(s): step 8470: loss 4.1328, lr 7.9e-05, dt 2.1s +All GPU(s): step 8471: loss 4.1367, lr 7.9e-05, dt 2.0s +All GPU(s): step 8472: loss 4.1445, lr 7.9e-05, dt 2.0s +All GPU(s): step 8473: loss 4.1328, lr 7.9e-05, dt 2.0s +All GPU(s): step 8474: loss 4.1328, lr 7.9e-05, dt 2.0s +All GPU(s): step 8475: loss 4.1406, lr 7.9e-05, dt 2.3s +All GPU(s): step 8476: loss 4.1328, lr 7.9e-05, dt 2.0s +All GPU(s): step 8477: loss 4.1328, lr 7.9e-05, dt 2.0s +All GPU(s): step 8478: loss 4.1445, lr 7.9e-05, dt 2.0s +All GPU(s): step 8479: loss 4.1445, lr 7.9e-05, dt 2.0s +All GPU(s): step 8480: loss 4.1484, lr 7.9e-05, dt 2.1s +All GPU(s): step 8481: loss 4.1406, lr 7.8e-05, dt 2.0s +All GPU(s): step 8482: loss 4.1484, lr 7.8e-05, dt 2.0s +All GPU(s): step 8483: loss 4.1445, lr 7.8e-05, dt 2.0s +All GPU(s): step 8484: loss 4.1367, lr 7.8e-05, dt 2.1s +All GPU(s): step 8485: loss 4.1406, lr 7.8e-05, dt 2.1s +All GPU(s): step 8486: loss 4.1445, lr 7.8e-05, dt 2.0s +All GPU(s): step 8487: loss 4.1445, lr 7.8e-05, dt 2.1s +All GPU(s): step 8488: loss 4.1445, lr 7.8e-05, dt 2.0s +All GPU(s): step 8489: loss 4.1328, lr 7.8e-05, dt 2.1s +All GPU(s): step 8490: loss 4.1367, lr 7.8e-05, dt 2.0s +All GPU(s): step 8491: loss 4.1406, lr 7.8e-05, dt 2.0s +All GPU(s): step 8492: loss 4.1367, lr 7.8e-05, dt 2.0s +All GPU(s): step 8493: loss 4.1406, lr 7.8e-05, dt 2.1s +All GPU(s): step 8494: loss 4.1367, lr 7.8e-05, dt 2.2s +All GPU(s): step 8495: loss 4.1367, lr 7.8e-05, dt 2.0s +All GPU(s): step 8496: loss 4.1484, lr 7.8e-05, dt 2.0s +All GPU(s): step 8497: loss 4.1445, lr 7.8e-05, dt 2.0s +All GPU(s): step 8498: loss 4.1289, lr 7.8e-05, dt 2.0s +All GPU(s): step 8499: loss 4.1289, lr 7.8e-05, dt 2.1s +All GPU(s): step 8500: loss 4.1367, lr 7.8e-05, dt 2.0s +All GPU(s): step 8501: loss 4.1328, lr 7.8e-05, dt 2.0s +All GPU(s): step 8502: loss 4.1328, lr 7.8e-05, dt 2.0s +All GPU(s): step 8503: loss 4.1406, lr 7.8e-05, dt 2.0s +All GPU(s): step 8504: loss 4.1406, lr 7.8e-05, dt 2.1s +All GPU(s): step 8505: loss 4.1289, lr 7.8e-05, dt 2.0s +All GPU(s): step 8506: loss 4.1406, lr 7.8e-05, dt 2.0s +All GPU(s): step 8507: loss 4.1250, lr 7.8e-05, dt 2.0s +All GPU(s): step 8508: loss 4.1406, lr 7.8e-05, dt 2.0s +All GPU(s): step 8509: loss 4.1328, lr 7.8e-05, dt 2.1s +All GPU(s): step 8510: loss 4.1367, lr 7.7e-05, dt 2.0s +All GPU(s): step 8511: loss 4.1367, lr 7.7e-05, dt 2.0s +All GPU(s): step 8512: loss 4.1406, lr 7.7e-05, dt 2.0s +All GPU(s): step 8513: loss 4.1406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8514: loss 4.0938, lr 7.7e-05, dt 2.1s +All GPU(s): step 8515: loss 4.1328, lr 7.7e-05, dt 2.0s +All GPU(s): step 8516: loss 4.1406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8517: loss 4.1406, lr 7.7e-05, dt 2.0s +All GPU(s): step 8518: loss 4.1406, lr 7.7e-05, dt 2.2s +All GPU(s): step 8519: loss 4.1367, lr 7.7e-05, dt 2.1s +All GPU(s): step 8520: loss 4.1367, lr 7.7e-05, dt 2.0s +All GPU(s): step 8521: loss 4.1406, lr 7.7e-05, dt 2.0s +All GPU(s): step 8522: loss 4.1289, lr 7.7e-05, dt 2.1s +All GPU(s): step 8523: loss 4.1406, lr 7.7e-05, dt 2.2s +All GPU(s): step 8524: loss 4.1250, lr 7.7e-05, dt 2.0s +All GPU(s): step 8525: loss 4.1484, lr 7.7e-05, dt 2.0s +All GPU(s): step 8526: loss 4.1445, lr 7.7e-05, dt 2.0s +All GPU(s): step 8527: loss 4.1406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8528: loss 4.1406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8529: loss 4.1523, lr 7.7e-05, dt 2.0s +All GPU(s): step 8530: loss 4.1484, lr 7.7e-05, dt 2.0s +All GPU(s): step 8531: loss 4.1523, lr 7.7e-05, dt 2.0s +All GPU(s): step 8532: loss 4.1484, lr 7.7e-05, dt 2.0s +All GPU(s): step 8533: loss 4.1484, lr 7.7e-05, dt 2.1s +All GPU(s): step 8534: loss 4.1289, lr 7.7e-05, dt 2.0s +All GPU(s): step 8535: loss 4.1484, lr 7.7e-05, dt 2.1s +All GPU(s): step 8536: loss 4.1406, lr 7.7e-05, dt 2.0s +All GPU(s): step 8537: loss 4.1406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8538: loss 4.1406, lr 7.7e-05, dt 2.2s +All GPU(s): step 8539: loss 4.1367, lr 7.7e-05, dt 2.1s +All GPU(s): step 8540: loss 4.1250, lr 7.6e-05, dt 2.1s +All GPU(s): step 8541: loss 4.1406, lr 7.6e-05, dt 2.1s +All GPU(s): step 8542: loss 4.1367, lr 7.6e-05, dt 2.1s +All GPU(s): step 8543: loss 4.1445, lr 7.6e-05, dt 2.1s +All GPU(s): step 8544: loss 4.1328, lr 7.6e-05, dt 2.1s +All GPU(s): step 8545: loss 4.1328, lr 7.6e-05, dt 2.1s +All GPU(s): step 8546: loss 4.1328, lr 7.6e-05, dt 2.1s +All GPU(s): step 8547: loss 4.1445, lr 7.6e-05, dt 2.1s +All GPU(s): step 8548: loss 4.1406, lr 7.6e-05, dt 2.1s +All GPU(s): step 8549: loss 4.1484, lr 7.6e-05, dt 2.0s +All GPU(s): step 8550: loss 4.1289, lr 7.6e-05, dt 2.1s +All GPU(s): step 8551: loss 4.1367, lr 7.6e-05, dt 2.1s +All GPU(s): step 8552: loss 4.1250, lr 7.6e-05, dt 2.1s +All GPU(s): step 8553: loss 4.1250, lr 7.6e-05, dt 2.0s +All GPU(s): step 8554: loss 4.1484, lr 7.6e-05, dt 2.0s +All GPU(s): step 8555: loss 4.1289, lr 7.6e-05, dt 2.1s +All GPU(s): step 8556: loss 4.1406, lr 7.6e-05, dt 2.0s +All GPU(s): step 8557: loss 4.1289, lr 7.6e-05, dt 2.1s +All GPU(s): step 8558: loss 4.1172, lr 7.6e-05, dt 2.0s +All GPU(s): step 8559: loss 4.1367, lr 7.6e-05, dt 2.0s +All GPU(s): step 8560: loss 4.1445, lr 7.6e-05, dt 2.0s +All GPU(s): step 8561: loss 4.1406, lr 7.6e-05, dt 2.1s +All GPU(s): step 8562: loss 4.1406, lr 7.6e-05, dt 2.1s +All GPU(s): step 8563: loss 4.1250, lr 7.6e-05, dt 2.0s +All GPU(s): step 8564: loss 4.1406, lr 7.6e-05, dt 2.0s +All GPU(s): step 8565: loss 4.1367, lr 7.6e-05, dt 2.0s +All GPU(s): step 8566: loss 4.1367, lr 7.6e-05, dt 2.1s +All GPU(s): step 8567: loss 4.1445, lr 7.6e-05, dt 2.1s +All GPU(s): step 8568: loss 4.1484, lr 7.6e-05, dt 2.0s +All GPU(s): step 8569: loss 4.1484, lr 7.6e-05, dt 2.1s +All GPU(s): step 8570: loss 4.1367, lr 7.5e-05, dt 2.0s +All GPU(s): step 8571: loss 4.1406, lr 7.5e-05, dt 2.1s +All GPU(s): step 8572: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8573: loss 4.1367, lr 7.5e-05, dt 2.0s +All GPU(s): step 8574: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8575: loss 4.1367, lr 7.5e-05, dt 2.0s +All GPU(s): step 8576: loss 4.1406, lr 7.5e-05, dt 2.1s +All GPU(s): step 8577: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8578: loss 4.1445, lr 7.5e-05, dt 2.0s +All GPU(s): step 8579: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8580: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8581: loss 4.1367, lr 7.5e-05, dt 2.2s +All GPU(s): step 8582: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8583: loss 4.1445, lr 7.5e-05, dt 2.0s +All GPU(s): step 8584: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8585: loss 4.1367, lr 7.5e-05, dt 2.0s +All GPU(s): step 8586: loss 4.1406, lr 7.5e-05, dt 2.1s +All GPU(s): step 8587: loss 4.1250, lr 7.5e-05, dt 2.0s +All GPU(s): step 8588: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8589: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8590: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8591: loss 4.1445, lr 7.5e-05, dt 2.1s +All GPU(s): step 8592: loss 4.1367, lr 7.5e-05, dt 2.0s +All GPU(s): step 8593: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8594: loss 4.1445, lr 7.5e-05, dt 2.0s +All GPU(s): step 8595: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8596: loss 4.1406, lr 7.5e-05, dt 2.1s +All GPU(s): step 8597: loss 4.1289, lr 7.5e-05, dt 2.0s +All GPU(s): step 8598: loss 4.1328, lr 7.5e-05, dt 2.0s +All GPU(s): step 8599: loss 4.1406, lr 7.5e-05, dt 2.0s +All GPU(s): step 8600: loss 4.1445, lr 7.5e-05, dt 2.1s +All GPU(s): step 8601: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8602: loss 4.1406, lr 7.4e-05, dt 2.1s +All GPU(s): step 8603: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8604: loss 4.1328, lr 7.4e-05, dt 2.0s +All GPU(s): step 8605: loss 4.1367, lr 7.4e-05, dt 2.1s +All GPU(s): step 8606: loss 4.1367, lr 7.4e-05, dt 2.0s +All GPU(s): step 8607: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8608: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8609: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8610: loss 4.1250, lr 7.4e-05, dt 2.1s +All GPU(s): step 8611: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8612: loss 4.1328, lr 7.4e-05, dt 2.0s +All GPU(s): step 8613: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8614: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8615: loss 4.1367, lr 7.4e-05, dt 2.1s +All GPU(s): step 8616: loss 4.1367, lr 7.4e-05, dt 2.0s +All GPU(s): step 8617: loss 4.1328, lr 7.4e-05, dt 2.0s +All GPU(s): step 8618: loss 4.1367, lr 7.4e-05, dt 2.0s +All GPU(s): step 8619: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8620: loss 4.1484, lr 7.4e-05, dt 2.1s +All GPU(s): step 8621: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8622: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8623: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8624: loss 4.1406, lr 7.4e-05, dt 2.0s +All GPU(s): step 8625: loss 4.1328, lr 7.4e-05, dt 2.1s +All GPU(s): step 8626: loss 4.1445, lr 7.4e-05, dt 2.0s +All GPU(s): step 8627: loss 4.1367, lr 7.4e-05, dt 2.0s +All GPU(s): step 8628: loss 4.1250, lr 7.4e-05, dt 2.0s +All GPU(s): step 8629: loss 4.1250, lr 7.4e-05, dt 2.1s +All GPU(s): step 8630: loss 4.1484, lr 7.4e-05, dt 2.0s +All GPU(s): step 8631: loss 4.1250, lr 7.4e-05, dt 2.0s +All GPU(s): step 8632: loss 4.1367, lr 7.4e-05, dt 2.0s +All GPU(s): step 8633: loss 4.1523, lr 7.3e-05, dt 2.0s +All GPU(s): step 8634: loss 4.1445, lr 7.3e-05, dt 2.1s +All GPU(s): step 8635: loss 4.1328, lr 7.3e-05, dt 2.0s +All GPU(s): step 8636: loss 4.1289, lr 7.3e-05, dt 2.0s +All GPU(s): step 8637: loss 4.1328, lr 7.3e-05, dt 2.0s +All GPU(s): step 8638: loss 4.1445, lr 7.3e-05, dt 2.0s +All GPU(s): step 8639: loss 4.1445, lr 7.3e-05, dt 2.2s +All GPU(s): step 8640: loss 4.1367, lr 7.3e-05, dt 2.0s +All GPU(s): step 8641: loss 4.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 8642: loss 4.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 8643: loss 4.1289, lr 7.3e-05, dt 2.0s +All GPU(s): step 8644: loss 4.1289, lr 7.3e-05, dt 2.2s +All GPU(s): step 8645: loss 4.1445, lr 7.3e-05, dt 2.1s +All GPU(s): step 8646: loss 4.1328, lr 7.3e-05, dt 2.1s +All GPU(s): step 8647: loss 4.1523, lr 7.3e-05, dt 2.0s +All GPU(s): step 8648: loss 4.1445, lr 7.3e-05, dt 2.1s +All GPU(s): step 8649: loss 4.1367, lr 7.3e-05, dt 2.1s +All GPU(s): step 8650: loss 4.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 8651: loss 4.1367, lr 7.3e-05, dt 2.1s +All GPU(s): step 8652: loss 4.1445, lr 7.3e-05, dt 2.0s +All GPU(s): step 8653: loss 4.1328, lr 7.3e-05, dt 2.1s +All GPU(s): step 8654: loss 4.1445, lr 7.3e-05, dt 2.1s +All GPU(s): step 8655: loss 4.1367, lr 7.3e-05, dt 2.1s +All GPU(s): step 8656: loss 4.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 8657: loss 4.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 8658: loss 4.1445, lr 7.3e-05, dt 2.1s +All GPU(s): step 8659: loss 4.1367, lr 7.3e-05, dt 2.1s +All GPU(s): step 8660: loss 4.1484, lr 7.3e-05, dt 2.1s +All GPU(s): step 8661: loss 4.1328, lr 7.3e-05, dt 2.1s +All GPU(s): step 8662: loss 4.1484, lr 7.3e-05, dt 2.1s +All GPU(s): step 8663: loss 4.1328, lr 7.3e-05, dt 2.1s +All GPU(s): step 8664: loss 4.1484, lr 7.3e-05, dt 2.0s +All GPU(s): step 8665: loss 4.1328, lr 7.2e-05, dt 2.0s +All GPU(s): step 8666: loss 4.1406, lr 7.2e-05, dt 2.0s +All GPU(s): step 8667: loss 4.1250, lr 7.2e-05, dt 2.1s +All GPU(s): step 8668: loss 4.1367, lr 7.2e-05, dt 2.1s +All GPU(s): step 8669: loss 4.1367, lr 7.2e-05, dt 2.0s +All GPU(s): step 8670: loss 4.1484, lr 7.2e-05, dt 2.0s +All GPU(s): step 8671: loss 4.1172, lr 7.2e-05, dt 2.0s +All GPU(s): step 8672: loss 4.1367, lr 7.2e-05, dt 2.1s +All GPU(s): step 8673: loss 4.1484, lr 7.2e-05, dt 2.1s +All GPU(s): step 8674: loss 4.1328, lr 7.2e-05, dt 2.0s +All GPU(s): step 8675: loss 4.1484, lr 7.2e-05, dt 2.0s +All GPU(s): step 8676: loss 4.1484, lr 7.2e-05, dt 2.0s +All GPU(s): step 8677: loss 4.1445, lr 7.2e-05, dt 2.1s +All GPU(s): step 8678: loss 4.1172, lr 7.2e-05, dt 2.1s +All GPU(s): step 8679: loss 4.1367, lr 7.2e-05, dt 2.0s +All GPU(s): step 8680: loss 4.1445, lr 7.2e-05, dt 2.0s +All GPU(s): step 8681: loss 4.1328, lr 7.2e-05, dt 2.0s +All GPU(s): step 8682: loss 4.1406, lr 7.2e-05, dt 2.1s +All GPU(s): step 8683: loss 4.1406, lr 7.2e-05, dt 2.0s +All GPU(s): step 8684: loss 4.1406, lr 7.2e-05, dt 2.1s +All GPU(s): step 8685: loss 4.1250, lr 7.2e-05, dt 2.0s +All GPU(s): step 8686: loss 4.1328, lr 7.2e-05, dt 2.0s +All GPU(s): step 8687: loss 4.1367, lr 7.2e-05, dt 2.1s +All GPU(s): step 8688: loss 4.1406, lr 7.2e-05, dt 2.0s +All GPU(s): step 8689: loss 4.1367, lr 7.2e-05, dt 2.0s +All GPU(s): step 8690: loss 4.1367, lr 7.2e-05, dt 2.0s +All GPU(s): step 8691: loss 4.1328, lr 7.2e-05, dt 2.0s +All GPU(s): step 8692: loss 4.1445, lr 7.2e-05, dt 2.1s +All GPU(s): step 8693: loss 4.1289, lr 7.2e-05, dt 2.0s +All GPU(s): step 8694: loss 4.1406, lr 7.2e-05, dt 2.0s +All GPU(s): step 8695: loss 4.1484, lr 7.2e-05, dt 2.1s +All GPU(s): step 8696: loss 4.1445, lr 7.2e-05, dt 2.1s +All GPU(s): step 8697: loss 4.1328, lr 7.2e-05, dt 2.1s +All GPU(s): step 8698: loss 4.1445, lr 7.1e-05, dt 2.0s +All GPU(s): step 8699: loss 4.1484, lr 7.1e-05, dt 2.0s +All GPU(s): step 8700: loss 4.1484, lr 7.1e-05, dt 2.0s +All GPU(s): step 8701: loss 4.1523, lr 7.1e-05, dt 2.0s +All GPU(s): step 8702: loss 4.1367, lr 7.1e-05, dt 2.2s +All GPU(s): step 8703: loss 4.1367, lr 7.1e-05, dt 2.1s +All GPU(s): step 8704: loss 4.1406, lr 7.1e-05, dt 2.1s +All GPU(s): step 8705: loss 4.1445, lr 7.1e-05, dt 2.1s +All GPU(s): step 8706: loss 4.1406, lr 7.1e-05, dt 2.1s +All GPU(s): step 8707: loss 4.1328, lr 7.1e-05, dt 2.1s +All GPU(s): step 8708: loss 4.1484, lr 7.1e-05, dt 2.1s +All GPU(s): step 8709: loss 4.1367, lr 7.1e-05, dt 2.1s +All GPU(s): step 8710: loss 4.1406, lr 7.1e-05, dt 2.0s +All GPU(s): step 8711: loss 4.1367, lr 7.1e-05, dt 2.1s +All GPU(s): step 8712: loss 4.1367, lr 7.1e-05, dt 2.0s +All GPU(s): step 8713: loss 4.1523, lr 7.1e-05, dt 2.0s +All GPU(s): step 8714: loss 4.1484, lr 7.1e-05, dt 2.0s +All GPU(s): step 8715: loss 4.1406, lr 7.1e-05, dt 2.0s +All GPU(s): step 8716: loss 4.1406, lr 7.1e-05, dt 2.1s +All GPU(s): step 8717: loss 4.1289, lr 7.1e-05, dt 2.0s +All GPU(s): step 8718: loss 4.1367, lr 7.1e-05, dt 2.0s +All GPU(s): step 8719: loss 4.1328, lr 7.1e-05, dt 2.0s +All GPU(s): step 8720: loss 4.1484, lr 7.1e-05, dt 2.1s +All GPU(s): step 8721: loss 4.1250, lr 7.1e-05, dt 2.1s +All GPU(s): step 8722: loss 4.1211, lr 7.1e-05, dt 2.1s +All GPU(s): step 8723: loss 4.1445, lr 7.1e-05, dt 2.0s +All GPU(s): step 8724: loss 4.1484, lr 7.1e-05, dt 2.0s +All GPU(s): step 8725: loss 4.1328, lr 7.1e-05, dt 2.1s +All GPU(s): step 8726: loss 4.1445, lr 7.1e-05, dt 2.1s +All GPU(s): step 8727: loss 4.1406, lr 7.1e-05, dt 2.0s +All GPU(s): step 8728: loss 4.1367, lr 7.1e-05, dt 2.1s +All GPU(s): step 8729: loss 4.1367, lr 7.1e-05, dt 2.0s +All GPU(s): step 8730: loss 4.1484, lr 7.1e-05, dt 2.1s +All GPU(s): step 8731: loss 4.1289, lr 7.0e-05, dt 2.1s +All GPU(s): step 8732: loss 4.1562, lr 7.0e-05, dt 2.0s +All GPU(s): step 8733: loss 4.1367, lr 7.0e-05, dt 2.0s +All GPU(s): step 8734: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8735: loss 4.1328, lr 7.0e-05, dt 2.1s +All GPU(s): step 8736: loss 4.1289, lr 7.0e-05, dt 2.1s +All GPU(s): step 8737: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8738: loss 4.1328, lr 7.0e-05, dt 2.0s +All GPU(s): step 8739: loss 4.1289, lr 7.0e-05, dt 2.0s +All GPU(s): step 8740: loss 4.1328, lr 7.0e-05, dt 2.1s +All GPU(s): step 8741: loss 4.1445, lr 7.0e-05, dt 2.0s +All GPU(s): step 8742: loss 4.1328, lr 7.0e-05, dt 2.1s +All GPU(s): step 8743: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8744: loss 4.1367, lr 7.0e-05, dt 2.0s +All GPU(s): step 8745: loss 4.1250, lr 7.0e-05, dt 2.2s +All GPU(s): step 8746: loss 4.1094, lr 7.0e-05, dt 2.1s +All GPU(s): step 8747: loss 4.1250, lr 7.0e-05, dt 2.0s +All GPU(s): step 8748: loss 4.1367, lr 7.0e-05, dt 2.1s +All GPU(s): step 8749: loss 4.1367, lr 7.0e-05, dt 2.1s +All GPU(s): step 8750: loss 4.1406, lr 7.0e-05, dt 2.1s +All GPU(s): step 8751: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8752: loss 4.1484, lr 7.0e-05, dt 2.0s +All GPU(s): step 8753: loss 4.1367, lr 7.0e-05, dt 2.0s +All GPU(s): step 8754: loss 4.1445, lr 7.0e-05, dt 2.0s +All GPU(s): step 8755: loss 4.1406, lr 7.0e-05, dt 2.1s +All GPU(s): step 8756: loss 4.1367, lr 7.0e-05, dt 2.1s +All GPU(s): step 8757: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8758: loss 4.1406, lr 7.0e-05, dt 2.0s +All GPU(s): step 8759: loss 4.1445, lr 7.0e-05, dt 2.0s +All GPU(s): step 8760: loss 4.1367, lr 7.0e-05, dt 2.1s +All GPU(s): step 8761: loss 4.1367, lr 7.0e-05, dt 2.0s +All GPU(s): step 8762: loss 4.1406, lr 7.0e-05, dt 2.1s +All GPU(s): step 8763: loss 4.1484, lr 7.0e-05, dt 2.0s +All GPU(s): step 8764: loss 4.1406, lr 7.0e-05, dt 2.1s +All GPU(s): step 8765: loss 4.1367, lr 7.0e-05, dt 2.1s +All GPU(s): step 8766: loss 4.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 8767: loss 4.1367, lr 6.9e-05, dt 2.0s +All GPU(s): step 8768: loss 4.1367, lr 6.9e-05, dt 2.0s +All GPU(s): step 8769: loss 4.1484, lr 6.9e-05, dt 2.1s +All GPU(s): step 8770: loss 4.1250, lr 6.9e-05, dt 2.0s +All GPU(s): step 8771: loss 4.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 8772: loss 4.1406, lr 6.9e-05, dt 2.1s +All GPU(s): step 8773: loss 4.1367, lr 6.9e-05, dt 2.0s +All GPU(s): step 8774: loss 4.1289, lr 6.9e-05, dt 2.1s +All GPU(s): step 8775: loss 4.1445, lr 6.9e-05, dt 2.0s +All GPU(s): step 8776: loss 4.1328, lr 6.9e-05, dt 2.0s +All GPU(s): step 8777: loss 4.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 8778: loss 4.1367, lr 6.9e-05, dt 2.1s +All GPU(s): step 8779: loss 4.1328, lr 6.9e-05, dt 2.2s +All GPU(s): step 8780: loss 4.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 8781: loss 4.1250, lr 6.9e-05, dt 2.0s +All GPU(s): step 8782: loss 4.1367, lr 6.9e-05, dt 2.0s +All GPU(s): step 8783: loss 4.1484, lr 6.9e-05, dt 2.1s +All GPU(s): step 8784: loss 4.1406, lr 6.9e-05, dt 2.3s +All GPU(s): step 8785: loss 4.1523, lr 6.9e-05, dt 2.1s +All GPU(s): step 8786: loss 4.1328, lr 6.9e-05, dt 2.0s +All GPU(s): step 8787: loss 4.1367, lr 6.9e-05, dt 2.1s +All GPU(s): step 8788: loss 4.1523, lr 6.9e-05, dt 2.0s +All GPU(s): step 8789: loss 4.1406, lr 6.9e-05, dt 2.1s +All GPU(s): step 8790: loss 4.1367, lr 6.9e-05, dt 2.0s +All GPU(s): step 8791: loss 4.1328, lr 6.9e-05, dt 2.0s +All GPU(s): step 8792: loss 4.1445, lr 6.9e-05, dt 2.0s +All GPU(s): step 8793: loss 4.1406, lr 6.9e-05, dt 2.1s +All GPU(s): step 8794: loss 4.1406, lr 6.9e-05, dt 2.1s +All GPU(s): step 8795: loss 4.1328, lr 6.9e-05, dt 2.0s +All GPU(s): step 8796: loss 4.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 8797: loss 4.1445, lr 6.9e-05, dt 2.0s +All GPU(s): step 8798: loss 4.1523, lr 6.9e-05, dt 2.1s +All GPU(s): step 8799: loss 4.1367, lr 6.9e-05, dt 2.1s +All GPU(s): step 8800: loss 4.1445, lr 6.9e-05, dt 2.0s +All GPU(s): step 8801: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8802: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8803: loss 4.1445, lr 6.8e-05, dt 2.2s +All GPU(s): step 8804: loss 4.1328, lr 6.8e-05, dt 2.0s +All GPU(s): step 8805: loss 4.1328, lr 6.8e-05, dt 2.0s +All GPU(s): step 8806: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8807: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8808: loss 4.1328, lr 6.8e-05, dt 2.1s +All GPU(s): step 8809: loss 4.1484, lr 6.8e-05, dt 2.0s +All GPU(s): step 8810: loss 4.1367, lr 6.8e-05, dt 2.0s +All GPU(s): step 8811: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8812: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8813: loss 4.1289, lr 6.8e-05, dt 2.1s +All GPU(s): step 8814: loss 4.1406, lr 6.8e-05, dt 2.1s +All GPU(s): step 8815: loss 4.1367, lr 6.8e-05, dt 2.0s +All GPU(s): step 8816: loss 4.1406, lr 6.8e-05, dt 2.1s +All GPU(s): step 8817: loss 4.1406, lr 6.8e-05, dt 2.1s +All GPU(s): step 8818: loss 4.1328, lr 6.8e-05, dt 2.1s +All GPU(s): step 8819: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8820: loss 4.1367, lr 6.8e-05, dt 2.0s +All GPU(s): step 8821: loss 4.1367, lr 6.8e-05, dt 2.0s +All GPU(s): step 8822: loss 4.1406, lr 6.8e-05, dt 2.1s +All GPU(s): step 8823: loss 4.1445, lr 6.8e-05, dt 2.1s +All GPU(s): step 8824: loss 4.1484, lr 6.8e-05, dt 2.0s +All GPU(s): step 8825: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8826: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8827: loss 4.1367, lr 6.8e-05, dt 2.1s +All GPU(s): step 8828: loss 4.1289, lr 6.8e-05, dt 2.1s +All GPU(s): step 8829: loss 4.1484, lr 6.8e-05, dt 2.0s +All GPU(s): step 8830: loss 4.1328, lr 6.8e-05, dt 2.1s +All GPU(s): step 8831: loss 4.1445, lr 6.8e-05, dt 2.1s +All GPU(s): step 8832: loss 4.1445, lr 6.8e-05, dt 2.1s +All GPU(s): step 8833: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8834: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8835: loss 4.1328, lr 6.8e-05, dt 2.0s +All GPU(s): step 8836: loss 4.1406, lr 6.8e-05, dt 2.0s +All GPU(s): step 8837: loss 4.1250, lr 6.7e-05, dt 2.1s +All GPU(s): step 8838: loss 4.1484, lr 6.7e-05, dt 2.0s +All GPU(s): step 8839: loss 4.1230, lr 6.7e-05, dt 2.1s +All GPU(s): step 8840: loss 4.1328, lr 6.7e-05, dt 2.0s +All GPU(s): step 8841: loss 4.1367, lr 6.7e-05, dt 2.0s +All GPU(s): step 8842: loss 4.1406, lr 6.7e-05, dt 2.1s +All GPU(s): step 8843: loss 4.1289, lr 6.7e-05, dt 2.1s +All GPU(s): step 8844: loss 4.1484, lr 6.7e-05, dt 2.1s +All GPU(s): step 8845: loss 4.1562, lr 6.7e-05, dt 2.0s +All GPU(s): step 8846: loss 4.1367, lr 6.7e-05, dt 2.1s +All GPU(s): step 8847: loss 4.1445, lr 6.7e-05, dt 2.1s +All GPU(s): step 8848: loss 4.1445, lr 6.7e-05, dt 2.0s +All GPU(s): step 8849: loss 4.1445, lr 6.7e-05, dt 2.1s +All GPU(s): step 8850: loss 4.1484, lr 6.7e-05, dt 2.0s +All GPU(s): step 8851: loss 4.1367, lr 6.7e-05, dt 2.1s +All GPU(s): step 8852: loss 4.1367, lr 6.7e-05, dt 2.0s +All GPU(s): step 8853: loss 4.1289, lr 6.7e-05, dt 2.1s +All GPU(s): step 8854: loss 4.1250, lr 6.7e-05, dt 2.1s +All GPU(s): step 8855: loss 4.1406, lr 6.7e-05, dt 2.1s +All GPU(s): step 8856: loss 4.1367, lr 6.7e-05, dt 2.1s +All GPU(s): step 8857: loss 4.1328, lr 6.7e-05, dt 2.0s +All GPU(s): step 8858: loss 4.1406, lr 6.7e-05, dt 2.0s +All GPU(s): step 8859: loss 4.1406, lr 6.7e-05, dt 2.0s +All GPU(s): step 8860: loss 4.1367, lr 6.7e-05, dt 2.1s +All GPU(s): step 8861: loss 4.1289, lr 6.7e-05, dt 2.1s +All GPU(s): step 8862: loss 4.1484, lr 6.7e-05, dt 2.0s +All GPU(s): step 8863: loss 4.1406, lr 6.7e-05, dt 2.0s +All GPU(s): step 8864: loss 4.1484, lr 6.7e-05, dt 2.1s +All GPU(s): step 8865: loss 4.1445, lr 6.7e-05, dt 2.0s +All GPU(s): step 8866: loss 4.1406, lr 6.7e-05, dt 2.1s +All GPU(s): step 8867: loss 4.1406, lr 6.7e-05, dt 2.0s +All GPU(s): step 8868: loss 4.1328, lr 6.7e-05, dt 2.0s +All GPU(s): step 8869: loss 4.1328, lr 6.7e-05, dt 2.0s +All GPU(s): step 8870: loss 4.1445, lr 6.7e-05, dt 2.0s +All GPU(s): step 8871: loss 4.1445, lr 6.7e-05, dt 2.1s +All GPU(s): step 8872: loss 4.1328, lr 6.7e-05, dt 2.0s +All GPU(s): step 8873: loss 4.1445, lr 6.7e-05, dt 2.0s +All GPU(s): step 8874: loss 4.1445, lr 6.6e-05, dt 2.0s +All GPU(s): step 8875: loss 4.1445, lr 6.6e-05, dt 2.1s +All GPU(s): step 8876: loss 4.1406, lr 6.6e-05, dt 2.1s +All GPU(s): step 8877: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8878: loss 4.1562, lr 6.6e-05, dt 2.0s +All GPU(s): step 8879: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8880: loss 4.1328, lr 6.6e-05, dt 2.1s +All GPU(s): step 8881: loss 4.1289, lr 6.6e-05, dt 2.0s +All GPU(s): step 8882: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8883: loss 4.1406, lr 6.6e-05, dt 2.0s +All GPU(s): step 8884: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8885: loss 4.1445, lr 6.6e-05, dt 2.1s +All GPU(s): step 8886: loss 4.1406, lr 6.6e-05, dt 2.1s +All GPU(s): step 8887: loss 4.1328, lr 6.6e-05, dt 2.0s +All GPU(s): step 8888: loss 4.1289, lr 6.6e-05, dt 2.0s +All GPU(s): step 8889: loss 4.1328, lr 6.6e-05, dt 2.1s +All GPU(s): step 8890: loss 4.1406, lr 6.6e-05, dt 2.1s +All GPU(s): step 8891: loss 4.1328, lr 6.6e-05, dt 2.0s +All GPU(s): step 8892: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8893: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8894: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8895: loss 4.1367, lr 6.6e-05, dt 2.2s +All GPU(s): step 8896: loss 4.1523, lr 6.6e-05, dt 2.0s +All GPU(s): step 8897: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8898: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8899: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8900: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8901: loss 4.1367, lr 6.6e-05, dt 2.0s +All GPU(s): step 8902: loss 4.1484, lr 6.6e-05, dt 2.0s +All GPU(s): step 8903: loss 4.1289, lr 6.6e-05, dt 2.0s +All GPU(s): step 8904: loss 4.1406, lr 6.6e-05, dt 2.1s +All GPU(s): step 8905: loss 4.1406, lr 6.6e-05, dt 2.1s +All GPU(s): step 8906: loss 4.1445, lr 6.6e-05, dt 2.1s +All GPU(s): step 8907: loss 4.1484, lr 6.6e-05, dt 2.1s +All GPU(s): step 8908: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8909: loss 4.1250, lr 6.6e-05, dt 2.1s +All GPU(s): step 8910: loss 4.1367, lr 6.6e-05, dt 2.1s +All GPU(s): step 8911: loss 4.1328, lr 6.6e-05, dt 2.1s +All GPU(s): step 8912: loss 4.1445, lr 6.5e-05, dt 2.1s +All GPU(s): step 8913: loss 4.1328, lr 6.5e-05, dt 2.1s +All GPU(s): step 8914: loss 4.1484, lr 6.5e-05, dt 2.2s +All GPU(s): step 8915: loss 4.1406, lr 6.5e-05, dt 2.0s +All GPU(s): step 8916: loss 4.1445, lr 6.5e-05, dt 2.0s +All GPU(s): step 8917: loss 4.1289, lr 6.5e-05, dt 2.0s +All GPU(s): step 8918: loss 4.1445, lr 6.5e-05, dt 2.0s +All GPU(s): step 8919: loss 4.1484, lr 6.5e-05, dt 2.1s +All GPU(s): step 8920: loss 4.1328, lr 6.5e-05, dt 2.0s +All GPU(s): step 8921: loss 4.1484, lr 6.5e-05, dt 2.0s +All GPU(s): step 8922: loss 4.1445, lr 6.5e-05, dt 2.0s +All GPU(s): step 8923: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8924: loss 4.1406, lr 6.5e-05, dt 2.1s +All GPU(s): step 8925: loss 4.1250, lr 6.5e-05, dt 2.1s +All GPU(s): step 8926: loss 4.1445, lr 6.5e-05, dt 2.1s +All GPU(s): step 8927: loss 4.1367, lr 6.5e-05, dt 2.1s +All GPU(s): step 8928: loss 4.1406, lr 6.5e-05, dt 2.1s +All GPU(s): step 8929: loss 4.1484, lr 6.5e-05, dt 2.1s +All GPU(s): step 8930: loss 4.1289, lr 6.5e-05, dt 2.1s +All GPU(s): step 8931: loss 4.1484, lr 6.5e-05, dt 2.1s +All GPU(s): step 8932: loss 4.1484, lr 6.5e-05, dt 2.0s +All GPU(s): step 8933: loss 4.1289, lr 6.5e-05, dt 2.1s +All GPU(s): step 8934: loss 4.1328, lr 6.5e-05, dt 2.0s +All GPU(s): step 8935: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8936: loss 4.1367, lr 6.5e-05, dt 2.1s +All GPU(s): step 8937: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8938: loss 4.1445, lr 6.5e-05, dt 2.1s +All GPU(s): step 8939: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8940: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8941: loss 4.1484, lr 6.5e-05, dt 2.1s +All GPU(s): step 8942: loss 4.1367, lr 6.5e-05, dt 2.0s +All GPU(s): step 8943: loss 4.1328, lr 6.5e-05, dt 2.1s +All GPU(s): step 8944: loss 4.1406, lr 6.5e-05, dt 2.0s +All GPU(s): step 8945: loss 4.1328, lr 6.5e-05, dt 2.0s +All GPU(s): step 8946: loss 4.1406, lr 6.5e-05, dt 2.0s +All GPU(s): step 8947: loss 4.1406, lr 6.5e-05, dt 2.1s +All GPU(s): step 8948: loss 4.1406, lr 6.5e-05, dt 2.1s +All GPU(s): step 8949: loss 4.1445, lr 6.5e-05, dt 2.0s +All GPU(s): step 8950: loss 4.1406, lr 6.5e-05, dt 2.0s +All GPU(s): step 8951: loss 4.1523, lr 6.5e-05, dt 2.1s +All GPU(s): step 8952: loss 4.1289, lr 6.4e-05, dt 2.0s +All GPU(s): step 8953: loss 4.1406, lr 6.4e-05, dt 2.1s +All GPU(s): step 8954: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8955: loss 4.1406, lr 6.4e-05, dt 2.0s +All GPU(s): step 8956: loss 4.1445, lr 6.4e-05, dt 2.0s +All GPU(s): step 8957: loss 4.1445, lr 6.4e-05, dt 2.1s +All GPU(s): step 8958: loss 4.1289, lr 6.4e-05, dt 2.1s +All GPU(s): step 8959: loss 4.1406, lr 6.4e-05, dt 2.0s +All GPU(s): step 8960: loss 4.1289, lr 6.4e-05, dt 2.0s +All GPU(s): step 8961: loss 4.1406, lr 6.4e-05, dt 2.1s +All GPU(s): step 8962: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8963: loss 4.1445, lr 6.4e-05, dt 2.1s +All GPU(s): step 8964: loss 4.1484, lr 6.4e-05, dt 2.0s +All GPU(s): step 8965: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8966: loss 4.1367, lr 6.4e-05, dt 2.0s +All GPU(s): step 8967: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8968: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8969: loss 4.1367, lr 6.4e-05, dt 2.0s +All GPU(s): step 8970: loss 4.1250, lr 6.4e-05, dt 2.0s +All GPU(s): step 8971: loss 4.1406, lr 6.4e-05, dt 2.0s +All GPU(s): step 8972: loss 4.1367, lr 6.4e-05, dt 2.2s +All GPU(s): step 8973: loss 4.1484, lr 6.4e-05, dt 2.1s +All GPU(s): step 8974: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8975: loss 4.1406, lr 6.4e-05, dt 2.1s +All GPU(s): step 8976: loss 4.1406, lr 6.4e-05, dt 2.1s +All GPU(s): step 8977: loss 4.1289, lr 6.4e-05, dt 2.2s +All GPU(s): step 8978: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8979: loss 4.1289, lr 6.4e-05, dt 2.1s +All GPU(s): step 8980: loss 4.1445, lr 6.4e-05, dt 2.0s +All GPU(s): step 8981: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8982: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8983: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8984: loss 4.1484, lr 6.4e-05, dt 2.0s +All GPU(s): step 8985: loss 4.1367, lr 6.4e-05, dt 2.0s +All GPU(s): step 8986: loss 4.1367, lr 6.4e-05, dt 2.1s +All GPU(s): step 8987: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8988: loss 4.1328, lr 6.4e-05, dt 2.0s +All GPU(s): step 8989: loss 4.1445, lr 6.4e-05, dt 2.0s +All GPU(s): step 8990: loss 4.1250, lr 6.4e-05, dt 2.1s +All GPU(s): step 8991: loss 4.1328, lr 6.4e-05, dt 2.1s +All GPU(s): step 8992: loss 4.1250, lr 6.3e-05, dt 2.1s +All GPU(s): step 8993: loss 4.1289, lr 6.3e-05, dt 2.0s +All GPU(s): step 8994: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 8995: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 8996: loss 4.1445, lr 6.3e-05, dt 2.2s +All GPU(s): step 8997: loss 4.1367, lr 6.3e-05, dt 2.0s +All GPU(s): step 8998: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 8999: loss 4.1445, lr 6.3e-05, dt 2.0s +saving checkpoint to checkpoints/ckpt_9000.pt +All GPU(s): step 9000: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 9001: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 9002: loss 4.1406, lr 6.3e-05, dt 2.1s +All GPU(s): step 9003: loss 4.1445, lr 6.3e-05, dt 2.0s +All GPU(s): step 9004: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 9005: loss 4.1445, lr 6.3e-05, dt 2.0s +All GPU(s): step 9006: loss 4.1406, lr 6.3e-05, dt 2.1s +All GPU(s): step 9007: loss 4.1523, lr 6.3e-05, dt 2.1s +All GPU(s): step 9008: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9009: loss 4.1328, lr 6.3e-05, dt 2.1s +All GPU(s): step 9010: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9011: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9012: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 9013: loss 4.1328, lr 6.3e-05, dt 2.0s +All GPU(s): step 9014: loss 4.1484, lr 6.3e-05, dt 2.0s +All GPU(s): step 9015: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 9016: loss 4.1250, lr 6.3e-05, dt 2.0s +All GPU(s): step 9017: loss 4.1250, lr 6.3e-05, dt 2.0s +All GPU(s): step 9018: loss 4.1484, lr 6.3e-05, dt 2.0s +All GPU(s): step 9019: loss 4.1328, lr 6.3e-05, dt 2.0s +All GPU(s): step 9020: loss 4.1328, lr 6.3e-05, dt 2.2s +All GPU(s): step 9021: loss 4.1445, lr 6.3e-05, dt 2.0s +All GPU(s): step 9022: loss 4.1445, lr 6.3e-05, dt 2.0s +All GPU(s): step 9023: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 9024: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9025: loss 4.1406, lr 6.3e-05, dt 2.1s +All GPU(s): step 9026: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9027: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 9028: loss 4.1445, lr 6.3e-05, dt 2.1s +All GPU(s): step 9029: loss 4.1367, lr 6.3e-05, dt 2.1s +All GPU(s): step 9030: loss 4.1445, lr 6.3e-05, dt 2.2s +All GPU(s): step 9031: loss 4.1445, lr 6.3e-05, dt 2.0s +All GPU(s): step 9032: loss 4.1406, lr 6.3e-05, dt 2.0s +All GPU(s): step 9033: loss 4.1289, lr 6.3e-05, dt 2.0s +All GPU(s): step 9034: loss 4.1406, lr 6.2e-05, dt 2.0s +All GPU(s): step 9035: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9036: loss 4.1523, lr 6.2e-05, dt 2.0s +All GPU(s): step 9037: loss 4.1289, lr 6.2e-05, dt 2.0s +All GPU(s): step 9038: loss 4.1406, lr 6.2e-05, dt 2.0s +All GPU(s): step 9039: loss 4.1289, lr 6.2e-05, dt 2.1s +All GPU(s): step 9040: loss 4.1367, lr 6.2e-05, dt 2.1s +All GPU(s): step 9041: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9042: loss 4.1484, lr 6.2e-05, dt 2.0s +All GPU(s): step 9043: loss 4.1367, lr 6.2e-05, dt 2.1s +All GPU(s): step 9044: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9045: loss 4.1328, lr 6.2e-05, dt 2.0s +All GPU(s): step 9046: loss 4.1523, lr 6.2e-05, dt 2.0s +All GPU(s): step 9047: loss 4.1367, lr 6.2e-05, dt 2.0s +All GPU(s): step 9048: loss 4.1445, lr 6.2e-05, dt 2.0s +All GPU(s): step 9049: loss 4.1367, lr 6.2e-05, dt 2.1s +All GPU(s): step 9050: loss 4.1367, lr 6.2e-05, dt 2.0s +All GPU(s): step 9051: loss 4.1289, lr 6.2e-05, dt 2.1s +All GPU(s): step 9052: loss 4.1406, lr 6.2e-05, dt 2.0s +All GPU(s): step 9053: loss 4.1367, lr 6.2e-05, dt 2.0s +All GPU(s): step 9054: loss 4.1406, lr 6.2e-05, dt 2.2s +All GPU(s): step 9055: loss 4.1211, lr 6.2e-05, dt 2.1s +All GPU(s): step 9056: loss 4.1367, lr 6.2e-05, dt 2.0s +All GPU(s): step 9057: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9058: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9059: loss 4.1367, lr 6.2e-05, dt 2.2s +All GPU(s): step 9060: loss 4.1367, lr 6.2e-05, dt 2.1s +All GPU(s): step 9061: loss 4.1133, lr 6.2e-05, dt 2.1s +All GPU(s): step 9062: loss 4.1406, lr 6.2e-05, dt 2.1s +All GPU(s): step 9063: loss 4.1328, lr 6.2e-05, dt 2.1s +All GPU(s): step 9064: loss 4.1367, lr 6.2e-05, dt 2.1s +All GPU(s): step 9065: loss 4.1445, lr 6.2e-05, dt 2.1s +All GPU(s): step 9066: loss 4.1484, lr 6.2e-05, dt 2.1s +All GPU(s): step 9067: loss 4.1445, lr 6.2e-05, dt 2.1s +All GPU(s): step 9068: loss 4.1445, lr 6.2e-05, dt 2.1s +All GPU(s): step 9069: loss 4.1289, lr 6.2e-05, dt 2.1s +All GPU(s): step 9070: loss 4.1289, lr 6.2e-05, dt 2.1s +All GPU(s): step 9071: loss 4.1445, lr 6.2e-05, dt 2.1s +All GPU(s): step 9072: loss 4.1328, lr 6.2e-05, dt 2.0s +All GPU(s): step 9073: loss 4.1445, lr 6.2e-05, dt 2.1s +All GPU(s): step 9074: loss 4.1328, lr 6.2e-05, dt 2.1s +All GPU(s): step 9075: loss 4.1328, lr 6.2e-05, dt 2.1s +All GPU(s): step 9076: loss 4.1523, lr 6.2e-05, dt 2.1s +All GPU(s): step 9077: loss 4.1484, lr 6.2e-05, dt 2.1s +All GPU(s): step 9078: loss 4.1250, lr 6.1e-05, dt 2.2s +All GPU(s): step 9079: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9080: loss 4.1445, lr 6.1e-05, dt 2.0s +All GPU(s): step 9081: loss 4.1211, lr 6.1e-05, dt 2.1s +All GPU(s): step 9082: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9083: loss 4.1328, lr 6.1e-05, dt 2.1s +All GPU(s): step 9084: loss 4.1484, lr 6.1e-05, dt 2.1s +All GPU(s): step 9085: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9086: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9087: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9088: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9089: loss 4.1406, lr 6.1e-05, dt 2.0s +All GPU(s): step 9090: loss 4.1562, lr 6.1e-05, dt 2.0s +All GPU(s): step 9091: loss 4.1367, lr 6.1e-05, dt 2.0s +All GPU(s): step 9092: loss 4.1289, lr 6.1e-05, dt 2.1s +All GPU(s): step 9093: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9094: loss 4.1445, lr 6.1e-05, dt 2.0s +All GPU(s): step 9095: loss 4.1484, lr 6.1e-05, dt 2.1s +All GPU(s): step 9096: loss 4.1289, lr 6.1e-05, dt 2.0s +All GPU(s): step 9097: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9098: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9099: loss 4.1406, lr 6.1e-05, dt 2.0s +All GPU(s): step 9100: loss 4.1367, lr 6.1e-05, dt 2.0s +All GPU(s): step 9101: loss 4.1367, lr 6.1e-05, dt 2.0s +All GPU(s): step 9102: loss 4.1445, lr 6.1e-05, dt 2.2s +All GPU(s): step 9103: loss 4.1289, lr 6.1e-05, dt 2.0s +All GPU(s): step 9104: loss 4.1406, lr 6.1e-05, dt 2.0s +All GPU(s): step 9105: loss 4.1289, lr 6.1e-05, dt 2.0s +All GPU(s): step 9106: loss 4.1445, lr 6.1e-05, dt 2.0s +All GPU(s): step 9107: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9108: loss 4.1367, lr 6.1e-05, dt 2.1s +All GPU(s): step 9109: loss 4.1289, lr 6.1e-05, dt 2.0s +All GPU(s): step 9110: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9111: loss 4.1289, lr 6.1e-05, dt 2.1s +All GPU(s): step 9112: loss 4.1484, lr 6.1e-05, dt 2.1s +All GPU(s): step 9113: loss 4.1445, lr 6.1e-05, dt 2.0s +All GPU(s): step 9114: loss 4.1406, lr 6.1e-05, dt 2.1s +All GPU(s): step 9115: loss 4.1406, lr 6.1e-05, dt 2.0s +All GPU(s): step 9116: loss 4.1484, lr 6.1e-05, dt 2.1s +All GPU(s): step 9117: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9118: loss 4.1328, lr 6.1e-05, dt 2.1s +All GPU(s): step 9119: loss 4.1367, lr 6.1e-05, dt 2.0s +All GPU(s): step 9120: loss 4.1289, lr 6.1e-05, dt 2.1s +All GPU(s): step 9121: loss 4.1445, lr 6.1e-05, dt 2.1s +All GPU(s): step 9122: loss 4.1523, lr 6.1e-05, dt 2.1s +All GPU(s): step 9123: loss 4.1289, lr 6.1e-05, dt 2.1s +All GPU(s): step 9124: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9125: loss 4.1367, lr 6.0e-05, dt 2.0s +All GPU(s): step 9126: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9127: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9128: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9129: loss 4.1328, lr 6.0e-05, dt 2.0s +All GPU(s): step 9130: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9131: loss 4.1367, lr 6.0e-05, dt 2.1s +All GPU(s): step 9132: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9133: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9134: loss 4.1328, lr 6.0e-05, dt 2.0s +All GPU(s): step 9135: loss 4.1367, lr 6.0e-05, dt 2.1s +All GPU(s): step 9136: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9137: loss 4.1289, lr 6.0e-05, dt 2.0s +All GPU(s): step 9138: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9139: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9140: loss 4.1484, lr 6.0e-05, dt 2.1s +All GPU(s): step 9141: loss 4.1328, lr 6.0e-05, dt 2.1s +All GPU(s): step 9142: loss 4.1289, lr 6.0e-05, dt 2.0s +All GPU(s): step 9143: loss 4.1484, lr 6.0e-05, dt 2.0s +All GPU(s): step 9144: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9145: loss 4.1289, lr 6.0e-05, dt 2.0s +All GPU(s): step 9146: loss 4.1523, lr 6.0e-05, dt 2.1s +All GPU(s): step 9147: loss 4.1445, lr 6.0e-05, dt 2.0s +All GPU(s): step 9148: loss 4.1445, lr 6.0e-05, dt 2.0s +All GPU(s): step 9149: loss 4.1445, lr 6.0e-05, dt 2.0s +All GPU(s): step 9150: loss 4.1484, lr 6.0e-05, dt 2.1s +All GPU(s): step 9151: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9152: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9153: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9154: loss 4.1328, lr 6.0e-05, dt 2.1s +All GPU(s): step 9155: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9156: loss 4.1367, lr 6.0e-05, dt 2.1s +All GPU(s): step 9157: loss 4.1328, lr 6.0e-05, dt 2.1s +All GPU(s): step 9158: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9159: loss 4.1406, lr 6.0e-05, dt 2.0s +All GPU(s): step 9160: loss 4.1406, lr 6.0e-05, dt 2.2s +All GPU(s): step 9161: loss 4.1523, lr 6.0e-05, dt 2.1s +All GPU(s): step 9162: loss 4.1250, lr 6.0e-05, dt 2.0s +All GPU(s): step 9163: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9164: loss 4.1367, lr 6.0e-05, dt 2.1s +All GPU(s): step 9165: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9166: loss 4.1289, lr 6.0e-05, dt 2.1s +All GPU(s): step 9167: loss 4.1406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9168: loss 4.1289, lr 6.0e-05, dt 2.0s +All GPU(s): step 9169: loss 4.1445, lr 6.0e-05, dt 2.1s +All GPU(s): step 9170: loss 4.1250, lr 6.0e-05, dt 2.1s +All GPU(s): step 9171: loss 4.1367, lr 6.0e-05, dt 2.0s +All GPU(s): step 9172: loss 4.1328, lr 5.9e-05, dt 2.0s +All GPU(s): step 9173: loss 4.1328, lr 5.9e-05, dt 2.0s +All GPU(s): step 9174: loss 4.1328, lr 5.9e-05, dt 2.1s +All GPU(s): step 9175: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9176: loss 4.1328, lr 5.9e-05, dt 2.0s +All GPU(s): step 9177: loss 4.1406, lr 5.9e-05, dt 2.0s +All GPU(s): step 9178: loss 4.1406, lr 5.9e-05, dt 2.1s +All GPU(s): step 9179: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9180: loss 4.1406, lr 5.9e-05, dt 2.0s +All GPU(s): step 9181: loss 4.1328, lr 5.9e-05, dt 2.0s +All GPU(s): step 9182: loss 4.1289, lr 5.9e-05, dt 2.1s +All GPU(s): step 9183: loss 4.1289, lr 5.9e-05, dt 2.0s +All GPU(s): step 9184: loss 4.1367, lr 5.9e-05, dt 2.2s +All GPU(s): step 9185: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9186: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9187: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9188: loss 4.1250, lr 5.9e-05, dt 2.0s +All GPU(s): step 9189: loss 4.1406, lr 5.9e-05, dt 2.1s +All GPU(s): step 9190: loss 4.1445, lr 5.9e-05, dt 2.1s +All GPU(s): step 9191: loss 4.1289, lr 5.9e-05, dt 2.0s +All GPU(s): step 9192: loss 4.1484, lr 5.9e-05, dt 2.0s +All GPU(s): step 9193: loss 4.1484, lr 5.9e-05, dt 2.1s +All GPU(s): step 9194: loss 4.1289, lr 5.9e-05, dt 2.1s +All GPU(s): step 9195: loss 4.1406, lr 5.9e-05, dt 2.1s +All GPU(s): step 9196: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9197: loss 4.1484, lr 5.9e-05, dt 2.0s +All GPU(s): step 9198: loss 4.1445, lr 5.9e-05, dt 2.0s +All GPU(s): step 9199: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9200: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9201: loss 4.1484, lr 5.9e-05, dt 2.0s +All GPU(s): step 9202: loss 4.1328, lr 5.9e-05, dt 2.1s +All GPU(s): step 9203: loss 4.1406, lr 5.9e-05, dt 2.1s +All GPU(s): step 9204: loss 4.1328, lr 5.9e-05, dt 2.1s +All GPU(s): step 9205: loss 4.1328, lr 5.9e-05, dt 2.0s +All GPU(s): step 9206: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9207: loss 4.1406, lr 5.9e-05, dt 2.0s +All GPU(s): step 9208: loss 4.1445, lr 5.9e-05, dt 2.1s +All GPU(s): step 9209: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9210: loss 4.1406, lr 5.9e-05, dt 2.0s +All GPU(s): step 9211: loss 4.1367, lr 5.9e-05, dt 2.1s +All GPU(s): step 9212: loss 4.1445, lr 5.9e-05, dt 2.0s +All GPU(s): step 9213: loss 4.1328, lr 5.9e-05, dt 2.1s +All GPU(s): step 9214: loss 4.1523, lr 5.9e-05, dt 2.0s +All GPU(s): step 9215: loss 4.1445, lr 5.9e-05, dt 2.0s +All GPU(s): step 9216: loss 4.1406, lr 5.9e-05, dt 2.0s +All GPU(s): step 9217: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9218: loss 4.1445, lr 5.9e-05, dt 2.2s +All GPU(s): step 9219: loss 4.1445, lr 5.9e-05, dt 2.0s +All GPU(s): step 9220: loss 4.1367, lr 5.9e-05, dt 2.0s +All GPU(s): step 9221: loss 4.1289, lr 5.9e-05, dt 2.0s +All GPU(s): step 9222: loss 4.1484, lr 5.9e-05, dt 2.1s +All GPU(s): step 9223: loss 4.1367, lr 5.8e-05, dt 2.2s +All GPU(s): step 9224: loss 4.1406, lr 5.8e-05, dt 2.0s +All GPU(s): step 9225: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9226: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9227: loss 4.1523, lr 5.8e-05, dt 2.1s +All GPU(s): step 9228: loss 4.1367, lr 5.8e-05, dt 2.1s +All GPU(s): step 9229: loss 4.1484, lr 5.8e-05, dt 2.1s +All GPU(s): step 9230: loss 4.1289, lr 5.8e-05, dt 2.0s +All GPU(s): step 9231: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9232: loss 4.1562, lr 5.8e-05, dt 2.1s +All GPU(s): step 9233: loss 4.1328, lr 5.8e-05, dt 2.1s +All GPU(s): step 9234: loss 4.1562, lr 5.8e-05, dt 2.1s +All GPU(s): step 9235: loss 4.1406, lr 5.8e-05, dt 2.1s +All GPU(s): step 9236: loss 4.1484, lr 5.8e-05, dt 2.0s +All GPU(s): step 9237: loss 4.1406, lr 5.8e-05, dt 2.1s +All GPU(s): step 9238: loss 4.1289, lr 5.8e-05, dt 2.0s +All GPU(s): step 9239: loss 4.1523, lr 5.8e-05, dt 2.0s +All GPU(s): step 9240: loss 4.1523, lr 5.8e-05, dt 2.0s +All GPU(s): step 9241: loss 4.1289, lr 5.8e-05, dt 2.0s +All GPU(s): step 9242: loss 4.1445, lr 5.8e-05, dt 2.1s +All GPU(s): step 9243: loss 4.1367, lr 5.8e-05, dt 2.1s +All GPU(s): step 9244: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9245: loss 4.1445, lr 5.8e-05, dt 2.1s +All GPU(s): step 9246: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9247: loss 4.1406, lr 5.8e-05, dt 2.1s +All GPU(s): step 9248: loss 4.1406, lr 5.8e-05, dt 2.0s +All GPU(s): step 9249: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9250: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9251: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9252: loss 4.1367, lr 5.8e-05, dt 2.1s +All GPU(s): step 9253: loss 4.1328, lr 5.8e-05, dt 2.0s +All GPU(s): step 9254: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9255: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9256: loss 4.1328, lr 5.8e-05, dt 2.1s +All GPU(s): step 9257: loss 4.1133, lr 5.8e-05, dt 2.1s +All GPU(s): step 9258: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9259: loss 4.1523, lr 5.8e-05, dt 2.0s +All GPU(s): step 9260: loss 4.1406, lr 5.8e-05, dt 2.0s +All GPU(s): step 9261: loss 4.1289, lr 5.8e-05, dt 2.1s +All GPU(s): step 9262: loss 4.1367, lr 5.8e-05, dt 2.1s +All GPU(s): step 9263: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9264: loss 4.1406, lr 5.8e-05, dt 2.0s +All GPU(s): step 9265: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9266: loss 4.1328, lr 5.8e-05, dt 2.1s +All GPU(s): step 9267: loss 4.1484, lr 5.8e-05, dt 2.0s +All GPU(s): step 9268: loss 4.1367, lr 5.8e-05, dt 2.0s +All GPU(s): step 9269: loss 4.1406, lr 5.8e-05, dt 2.1s +All GPU(s): step 9270: loss 4.1484, lr 5.8e-05, dt 2.1s +All GPU(s): step 9271: loss 4.1523, lr 5.8e-05, dt 2.2s +All GPU(s): step 9272: loss 4.1445, lr 5.8e-05, dt 2.1s +All GPU(s): step 9273: loss 4.1406, lr 5.8e-05, dt 2.0s +All GPU(s): step 9274: loss 4.1445, lr 5.8e-05, dt 2.0s +All GPU(s): step 9275: loss 4.1445, lr 5.8e-05, dt 2.1s +All GPU(s): step 9276: loss 4.1367, lr 5.7e-05, dt 2.1s +All GPU(s): step 9277: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9278: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9279: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9280: loss 4.1289, lr 5.7e-05, dt 2.0s +All GPU(s): step 9281: loss 4.1328, lr 5.7e-05, dt 2.1s +All GPU(s): step 9282: loss 4.1250, lr 5.7e-05, dt 2.0s +All GPU(s): step 9283: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9284: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9285: loss 4.1289, lr 5.7e-05, dt 2.1s +All GPU(s): step 9286: loss 4.1367, lr 5.7e-05, dt 2.1s +All GPU(s): step 9287: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9288: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9289: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9290: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9291: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9292: loss 4.1328, lr 5.7e-05, dt 2.0s +All GPU(s): step 9293: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9294: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9295: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9296: loss 4.1289, lr 5.7e-05, dt 2.1s +All GPU(s): step 9297: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9298: loss 4.1211, lr 5.7e-05, dt 2.0s +All GPU(s): step 9299: loss 4.1445, lr 5.7e-05, dt 2.1s +All GPU(s): step 9300: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9301: loss 4.1367, lr 5.7e-05, dt 2.1s +All GPU(s): step 9302: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9303: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9304: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9305: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9306: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9307: loss 4.1523, lr 5.7e-05, dt 2.0s +All GPU(s): step 9308: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9309: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9310: loss 4.1484, lr 5.7e-05, dt 2.1s +All GPU(s): step 9311: loss 4.1289, lr 5.7e-05, dt 2.1s +All GPU(s): step 9312: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9313: loss 4.1484, lr 5.7e-05, dt 2.0s +All GPU(s): step 9314: loss 4.1445, lr 5.7e-05, dt 2.1s +All GPU(s): step 9315: loss 4.1406, lr 5.7e-05, dt 2.1s +All GPU(s): step 9316: loss 4.1445, lr 5.7e-05, dt 2.0s +All GPU(s): step 9317: loss 4.1250, lr 5.7e-05, dt 2.0s +All GPU(s): step 9318: loss 4.1289, lr 5.7e-05, dt 2.0s +All GPU(s): step 9319: loss 4.1328, lr 5.7e-05, dt 2.1s +All GPU(s): step 9320: loss 4.1289, lr 5.7e-05, dt 2.1s +All GPU(s): step 9321: loss 4.1367, lr 5.7e-05, dt 2.1s +All GPU(s): step 9322: loss 4.1328, lr 5.7e-05, dt 2.1s +All GPU(s): step 9323: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9324: loss 4.1445, lr 5.7e-05, dt 2.1s +All GPU(s): step 9325: loss 4.1328, lr 5.7e-05, dt 2.0s +All GPU(s): step 9326: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9327: loss 4.1367, lr 5.7e-05, dt 2.0s +All GPU(s): step 9328: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9329: loss 4.1367, lr 5.7e-05, dt 2.2s +All GPU(s): step 9330: loss 4.1406, lr 5.7e-05, dt 2.0s +All GPU(s): step 9331: loss 4.1523, lr 5.7e-05, dt 2.0s +All GPU(s): step 9332: loss 4.1328, lr 5.7e-05, dt 2.0s +All GPU(s): step 9333: loss 4.1367, lr 5.7e-05, dt 2.1s +All GPU(s): step 9334: loss 4.1367, lr 5.6e-05, dt 2.2s +All GPU(s): step 9335: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9336: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9337: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9338: loss 4.1367, lr 5.6e-05, dt 2.1s +All GPU(s): step 9339: loss 4.1328, lr 5.6e-05, dt 2.1s +All GPU(s): step 9340: loss 4.1328, lr 5.6e-05, dt 2.0s +All GPU(s): step 9341: loss 4.1406, lr 5.6e-05, dt 2.1s +All GPU(s): step 9342: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9343: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9344: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9345: loss 4.1328, lr 5.6e-05, dt 2.0s +All GPU(s): step 9346: loss 4.1367, lr 5.6e-05, dt 2.0s +All GPU(s): step 9347: loss 4.1367, lr 5.6e-05, dt 2.0s +All GPU(s): step 9348: loss 4.1367, lr 5.6e-05, dt 2.1s +All GPU(s): step 9349: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9350: loss 4.1367, lr 5.6e-05, dt 2.0s +All GPU(s): step 9351: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9352: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9353: loss 4.1328, lr 5.6e-05, dt 2.1s +All GPU(s): step 9354: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9355: loss 4.1484, lr 5.6e-05, dt 2.0s +All GPU(s): step 9356: loss 4.1328, lr 5.6e-05, dt 2.0s +All GPU(s): step 9357: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9358: loss 4.1484, lr 5.6e-05, dt 2.2s +All GPU(s): step 9359: loss 4.1328, lr 5.6e-05, dt 2.0s +All GPU(s): step 9360: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9361: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9362: loss 4.1484, lr 5.6e-05, dt 2.1s +All GPU(s): step 9363: loss 4.1445, lr 5.6e-05, dt 2.1s +All GPU(s): step 9364: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9365: loss 4.1445, lr 5.6e-05, dt 2.0s +All GPU(s): step 9366: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9367: loss 4.1562, lr 5.6e-05, dt 2.0s +All GPU(s): step 9368: loss 4.1484, lr 5.6e-05, dt 2.1s +All GPU(s): step 9369: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9370: loss 4.1406, lr 5.6e-05, dt 2.0s +All GPU(s): step 9371: loss 4.1484, lr 5.6e-05, dt 2.0s +All GPU(s): step 9372: loss 4.1406, lr 5.6e-05, dt 2.1s +All GPU(s): step 9373: loss 4.1250, lr 5.6e-05, dt 2.1s +All GPU(s): step 9374: loss 4.1445, lr 5.6e-05, dt 2.0s +All GPU(s): step 9375: loss 4.1367, lr 5.6e-05, dt 2.0s +All GPU(s): step 9376: loss 4.1289, lr 5.6e-05, dt 2.0s +All GPU(s): step 9377: loss 4.1406, lr 5.6e-05, dt 2.1s +All GPU(s): step 9378: loss 4.1367, lr 5.6e-05, dt 2.0s diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-metadata.json b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..18391e3452ecd122a77dc1de5212eb5693265d9a --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-metadata.json @@ -0,0 +1,88 @@ +{ + "os": "Linux-5.15.0-117-generic-x86_64-with-glibc2.31", + "python": "3.10.14", + "startedAt": "2024-09-23T09:34:07.253424Z", + "args": [ + "--config-name", + "experimental/byte_autoencoder_1" + ], + "program": "/root/SuperTinyLanguageModels/train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/LeonGuertler/SuperTinyLanguageModels.git", + "commit": "c36bf6b78927d4d365c52a835f0e178edacbab29" + }, + "email": "calvin14@gmail.com", + "root": "/root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58", + "host": "11c6e13f6a55", + "username": "root", + "executable": "/root/SuperTinyLanguageModels/.conda/bin/python3", + "cpu_count": 128, + "cpu_count_logical": 256, + "gpu": "[NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090]", + "gpu_count": 8, + "disk": { + "/": { + "total": "1123133947904", + "used": "551794671616" + } + }, + "memory": { + "total": "540812599296" + }, + "cpu": { + "count": 128, + "countLogical": 256 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + } + ], + "cudaVersion": "12.5" +} \ No newline at end of file diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-summary.json b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..866d41172c22e820fb019a479760154d73777bcb --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":19419},"_runtime":19419.42144769,"_timestamp":1.7271034620132892e+09,"additional_info/BCE-loss":0.0011798107298091054,"iter":9378,"additional_info/chunk_len_penalty_loss":0,"token_num":460947456,"additional_info/chunk_len_loss":4.104955673217773,"loss":4.13671875,"lr":5.576320243468443e-05,"_step":460947456,"additional_info/total-loss":4.106135368347168,"additional_info/average_chunk_length":1.5930068492889404} \ No newline at end of file diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..0818e6702aa3aa0fda29dfa42a7b3615ded3829a --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log @@ -0,0 +1,13 @@ +{"time":"2024-09-23T09:34:06.551174015Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpn1evznij/port-85375.txt","pid":85375,"debug":false,"disable-analytics":false} +{"time":"2024-09-23T09:34:06.551216045Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2024-09-23T09:34:06.564461097Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":85375} +{"time":"2024-09-23T09:34:06.564448897Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46013,"Zone":""}} +{"time":"2024-09-23T09:34:06.732455149Z","level":"INFO","msg":"created new connection","id":"127.0.0.1:50404"} +{"time":"2024-09-23T09:34:07.257780383Z","level":"INFO","msg":"connection init received","streamId":"jnzzkcth","id":"127.0.0.1:50404"} +{"time":"2024-09-23T09:34:07.258146398Z","level":"ERROR","msg":"error creating symlink","error":"symlink /root/.cache/wandb/logs/core-debug-20240923_093406.log /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log: file exists"} +{"time":"2024-09-23T09:34:07.260935175Z","level":"INFO","msg":"connection init completed","streamId":"jnzzkcth","id":"127.0.0.1:50404"} +{"time":"2024-09-23T14:57:46.674656037Z","level":"INFO","msg":"connection: teardown","id":"127.0.0.1:50404"} +{"time":"2024-09-23T14:57:46.674922221Z","level":"INFO","msg":"server is shutting down"} +{"time":"2024-09-23T14:57:46.674939631Z","level":"INFO","msg":"closed connection","id":"127.0.0.1:50404"} +{"time":"2024-09-23T14:57:48.203675304Z","level":"INFO","msg":"connection closed","id":"127.0.0.1:50404"} +{"time":"2024-09-23T14:57:48.203707925Z","level":"INFO","msg":"server is closed"} diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-internal.log b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..50dd3985c96156de29fc7af1025242df965d3601 --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T09:34:07.258097267Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:34:07.258119398Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log"} +{"time":"2024-09-23T09:34:07.258175408Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T09:34:07.258180668Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-core.log"} +{"time":"2024-09-23T09:34:07.260911834Z","level":"INFO","msg":"created new stream","id":"jnzzkcth"} +{"time":"2024-09-23T09:34:07.260929995Z","level":"INFO","msg":"stream: started","id":"jnzzkcth"} +{"time":"2024-09-23T09:34:07.260949385Z","level":"INFO","msg":"handler: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.260966215Z","level":"INFO","msg":"sender: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.260991135Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T09:34:07.631920515Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T09:34:07.634314975Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T14:57:46.67482666Z","level":"INFO","msg":"stream: closing","id":"jnzzkcth"} +{"time":"2024-09-23T14:57:46.674896121Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T14:57:46.678091073Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T14:57:48.203214047Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203305789Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203294698Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"jnzzkcth"}} +{"time":"2024-09-23T14:57:48.203550773Z","level":"INFO","msg":"stream: closed","id":"jnzzkcth"} diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug.log b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e640057693b0eae0c84abea9a0f6fac9caaae9bc --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug.log @@ -0,0 +1,26 @@ +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Configure stats pid to 85375 +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/settings +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 09:34:07,248 INFO MainThread:85375 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug.log +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/logs/debug-internal.log +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():616] calling init triggers +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 0.1, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():666] starting backend +2024-09-23 09:34:07,249 INFO MainThread:85375 [wandb_init.py:init():670] setting up manager +2024-09-23 09:34:07,251 INFO MainThread:85375 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 09:34:07,253 INFO MainThread:85375 [wandb_init.py:init():678] backend started and connected +2024-09-23 09:34:07,256 INFO MainThread:85375 [wandb_init.py:init():773] updated telemetry +2024-09-23 09:34:07,262 INFO MainThread:85375 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 09:34:07,628 INFO MainThread:85375 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 09:34:07,798 INFO MainThread:85375 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 09:34:07,802 INFO MainThread:85375 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 14:57:46,675 WARNING MsgRouterThr:85375 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/run-jnzzkcth.wandb b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/run-jnzzkcth.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d1d378d53007027aa3c375d6848aee6fb8bb9890 --- /dev/null +++ b/2024-09-23/09-33-58/wandb/run-20240923_093407-jnzzkcth/run-jnzzkcth.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecba4703ad3de4654f58e8bc85e22d3ae406fa762067f171ee69a11f13c5b2d +size 39377178 diff --git a/2024-09-23/15-02-55/.hydra/config.yaml b/2024-09-23/15-02-55/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e46c1f4db5220a36d1691f7e5a67a6e09222a39 --- /dev/null +++ b/2024-09-23/15-02-55/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 1.0 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0005 + min_lr: 5.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 100 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/15-02-55/.hydra/hydra.yaml b/2024-09-23/15-02-55/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4edd49803d1129ec40b3e609520be94cf507816f --- /dev/null +++ b/2024-09-23/15-02-55/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/15-02-55/.hydra/overrides.yaml b/2024-09-23/15-02-55/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/15-02-55/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/15-02-55/checkpoints/ckpt_1000.pt b/2024-09-23/15-02-55/checkpoints/ckpt_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f04d6acb162f4a18053edbec52f15377bffcd133 --- /dev/null +++ b/2024-09-23/15-02-55/checkpoints/ckpt_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e46a5d8593ca456e0b497b42e174d2391e829ea215df3661daf313e0580abed +size 69377274 diff --git a/2024-09-23/15-02-55/train.log b/2024-09-23/15-02-55/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/15-02-55/wandb/debug-internal.log b/2024-09-23/15-02-55/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6a4f240ee358516782023a72997984dfcc0896fe --- /dev/null +++ b/2024-09-23/15-02-55/wandb/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T15:03:04.833628134Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:03:04.833650595Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log"} +{"time":"2024-09-23T15:03:04.833707186Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:03:04.833713256Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log"} +{"time":"2024-09-23T15:03:04.837118723Z","level":"INFO","msg":"created new stream","id":"bbl5fd2u"} +{"time":"2024-09-23T15:03:04.837137053Z","level":"INFO","msg":"stream: started","id":"bbl5fd2u"} +{"time":"2024-09-23T15:03:04.837160104Z","level":"INFO","msg":"handler: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:04.837187104Z","level":"INFO","msg":"sender: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:04.837235035Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:05.197982746Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T15:03:05.20002754Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T15:27:48.548880321Z","level":"INFO","msg":"stream: closing","id":"bbl5fd2u"} +{"time":"2024-09-23T15:27:48.548925082Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T15:27:48.549887675Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T15:27:53.078953404Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079066405Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079081806Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079280268Z","level":"INFO","msg":"stream: closed","id":"bbl5fd2u"} diff --git a/2024-09-23/15-02-55/wandb/debug.log b/2024-09-23/15-02-55/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3046e4424228a5d4185d2ef8b632877b8fbb529e --- /dev/null +++ b/2024-09-23/15-02-55/wandb/debug.log @@ -0,0 +1,26 @@ +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Configure stats pid to 123542 +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/settings +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug.log +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-internal.log +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:init():616] calling init triggers +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 1.0, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():666] starting backend +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():670] setting up manager +2024-09-23 15:03:04,829 INFO MainThread:123542 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 15:03:04,830 INFO MainThread:123542 [wandb_init.py:init():678] backend started and connected +2024-09-23 15:03:04,833 INFO MainThread:123542 [wandb_init.py:init():773] updated telemetry +2024-09-23 15:03:04,840 INFO MainThread:123542 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 15:03:05,195 INFO MainThread:123542 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 15:03:05,394 INFO MainThread:123542 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 15:27:48,549 WARNING MsgRouterThr:123542 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/config.yaml b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..465ac053e6a16e16e270d94da5901ac8c1afa202 --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/config.yaml @@ -0,0 +1,115 @@ +_wandb: + value: + cli_version: 0.18.1 + m: [] + python_version: 3.10.14 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "2": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + - 61 + "4": 3.10.14 + "5": 0.18.1 + "6": 4.44.2 + "8": + - 5 + - 9 + "12": 0.18.1 + "13": linux-x86_64 +general: + value: + device: cuda + logging: + group_name: experimental_byte_level + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + paths: + checkpoint_dir: checkpoints + data_dir: /root/SuperTinyLanguageModels/data + eval_dir: /root/SuperTinyLanguageModels/evals + output_dir: outputs + seed: 489 +model: + value: + byte_hidden: 128 + chunk_len_loss_weight: 1 + chunk_len_penalty: 0.1 + context_window: 8192 + core_model_type: pass_through + cproj_weight_tying: false + embedding_model_type: byte_level + embedding_weight_tying: true + ffn_weight_tying: false + hidden_dim: 384 + lm_head_bias: false + lm_head_dropout: 0 + lm_head_normalization: rms_norm + lm_head_type: byte_level + max_chunk_length: 12 + max_num_chunks: 1024 + model_shell_type: byte_autoencoder_shell + num_byte_decoder_layers: 5 + num_delimiter_layers: 3 + positional_encoding_type: rope + target_chunk_len: 8 + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + tokenizer_type: bpe + vocab_size: 259 +trainer: + value: + batch_size: 6 + checkpoint_interval: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + dataset: fineweb_edu_10B + eval: + eval_byte_metrics: false + mcq_benchmarks: null + mcq_num_samples: 1000 + text_generation_eval: false + text_modeling_eval: false + eval_interval: 50000000 + eval_iters: 1000 + gradient_accumulation_steps: 8 + log_interval: 1 + loss_fn: + name: pass_through + lr_scheduler: + name: cosine + warmup_iters: 100 + max_iters: 10000 + optimizer: + beta1: 0.9 + beta2: 0.95 + grad_clip: 1 + lr: 0.0005 + min_lr: 5e-05 + optimizer_name: adamW + weight_decay: 0.01 + run_eval: false + trainer_type: base_trainer diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/output.log b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9ef5516f3baecfed7be7d202afe9848f40afa58e --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/output.log @@ -0,0 +1,1065 @@ +Weight and Biases Initialized +Rank0 Trainer built +Training loop is starting +All GPU(s): step 1: loss 33.7500, lr 5.0e-06, dt 2.1s +All GPU(s): step 2: loss 33.7500, lr 1.0e-05, dt 2.1s +All GPU(s): step 3: loss 33.5312, lr 1.5e-05, dt 2.1s +All GPU(s): step 4: loss 33.4062, lr 2.0e-05, dt 2.1s +All GPU(s): step 5: loss 33.3750, lr 2.5e-05, dt 2.1s +All GPU(s): step 6: loss 33.3125, lr 3.0e-05, dt 2.1s +All GPU(s): step 7: loss 33.2500, lr 3.5e-05, dt 2.1s +All GPU(s): step 8: loss 32.9219, lr 4.0e-05, dt 2.2s +All GPU(s): step 9: loss 32.2500, lr 4.5e-05, dt 2.3s +All GPU(s): step 10: loss 32.4219, lr 5.0e-05, dt 2.1s +All GPU(s): step 11: loss 32.4062, lr 5.5e-05, dt 2.1s +All GPU(s): step 12: loss 32.2969, lr 6.0e-05, dt 2.1s +All GPU(s): step 13: loss 32.9062, lr 6.5e-05, dt 2.1s +All GPU(s): step 14: loss 32.2500, lr 7.0e-05, dt 2.1s +All GPU(s): step 15: loss 32.5625, lr 7.5e-05, dt 2.1s +All GPU(s): step 16: loss 31.7031, lr 8.0e-05, dt 2.0s +All GPU(s): step 17: loss 31.9219, lr 8.5e-05, dt 2.0s +All GPU(s): step 18: loss 30.9531, lr 9.0e-05, dt 2.1s +All GPU(s): step 19: loss 30.5781, lr 9.5e-05, dt 2.1s +All GPU(s): step 20: loss 29.8906, lr 1.0e-04, dt 2.1s +All GPU(s): step 21: loss 29.4219, lr 1.1e-04, dt 2.1s +All GPU(s): step 22: loss 28.9375, lr 1.1e-04, dt 2.1s +All GPU(s): step 23: loss 29.0469, lr 1.2e-04, dt 2.1s +All GPU(s): step 24: loss 28.8125, lr 1.2e-04, dt 2.0s +All GPU(s): step 25: loss 28.8125, lr 1.3e-04, dt 2.1s +All GPU(s): step 26: loss 27.9531, lr 1.3e-04, dt 2.0s +All GPU(s): step 27: loss 28.2188, lr 1.4e-04, dt 2.1s +All GPU(s): step 28: loss 27.4531, lr 1.4e-04, dt 2.2s +All GPU(s): step 29: loss 27.6875, lr 1.5e-04, dt 2.1s +All GPU(s): step 30: loss 27.4375, lr 1.5e-04, dt 2.1s +All GPU(s): step 31: loss 26.7188, lr 1.5e-04, dt 2.0s +All GPU(s): step 32: loss 26.5781, lr 1.6e-04, dt 2.1s +All GPU(s): step 33: loss 26.3750, lr 1.6e-04, dt 2.2s +All GPU(s): step 34: loss 26.5000, lr 1.7e-04, dt 2.1s +All GPU(s): step 35: loss 25.9844, lr 1.8e-04, dt 2.1s +All GPU(s): step 36: loss 25.7812, lr 1.8e-04, dt 2.1s +All GPU(s): step 37: loss 25.8438, lr 1.8e-04, dt 2.1s +All GPU(s): step 38: loss 25.2812, lr 1.9e-04, dt 2.2s +All GPU(s): step 39: loss 24.2812, lr 1.9e-04, dt 2.1s +All GPU(s): step 40: loss 24.4688, lr 2.0e-04, dt 2.1s +All GPU(s): step 41: loss 24.4844, lr 2.0e-04, dt 2.1s +All GPU(s): step 42: loss 24.1875, lr 2.1e-04, dt 2.1s +All GPU(s): step 43: loss 23.2031, lr 2.2e-04, dt 2.0s +All GPU(s): step 44: loss 23.9375, lr 2.2e-04, dt 2.1s +All GPU(s): step 45: loss 23.4531, lr 2.2e-04, dt 2.1s +All GPU(s): step 46: loss 23.8125, lr 2.3e-04, dt 2.1s +All GPU(s): step 47: loss 23.3750, lr 2.3e-04, dt 2.2s +All GPU(s): step 48: loss 23.2500, lr 2.4e-04, dt 2.1s +All GPU(s): step 49: loss 24.0312, lr 2.4e-04, dt 2.0s +All GPU(s): step 50: loss 23.3281, lr 2.5e-04, dt 2.1s +All GPU(s): step 51: loss 23.6406, lr 2.6e-04, dt 2.1s +All GPU(s): step 52: loss 23.3125, lr 2.6e-04, dt 2.2s +All GPU(s): step 53: loss 23.0312, lr 2.6e-04, dt 2.0s +All GPU(s): step 54: loss 23.4688, lr 2.7e-04, dt 2.0s +All GPU(s): step 55: loss 23.8438, lr 2.8e-04, dt 2.0s +All GPU(s): step 56: loss 24.2188, lr 2.8e-04, dt 2.1s +All GPU(s): step 57: loss 23.3281, lr 2.8e-04, dt 2.1s +All GPU(s): step 58: loss 23.7500, lr 2.9e-04, dt 2.0s +All GPU(s): step 59: loss 22.8750, lr 3.0e-04, dt 2.0s +All GPU(s): step 60: loss 24.0156, lr 3.0e-04, dt 2.0s +All GPU(s): step 61: loss 24.0469, lr 3.0e-04, dt 2.0s +All GPU(s): step 62: loss 23.6719, lr 3.1e-04, dt 2.1s +All GPU(s): step 63: loss 23.7344, lr 3.2e-04, dt 2.0s +All GPU(s): step 64: loss 23.7031, lr 3.2e-04, dt 2.1s +All GPU(s): step 65: loss 23.8906, lr 3.2e-04, dt 2.0s +All GPU(s): step 66: loss 23.3281, lr 3.3e-04, dt 2.1s +All GPU(s): step 67: loss 23.2031, lr 3.4e-04, dt 2.1s +All GPU(s): step 68: loss 23.0781, lr 3.4e-04, dt 2.1s +All GPU(s): step 69: loss 23.7969, lr 3.5e-04, dt 2.0s +All GPU(s): step 70: loss 23.6875, lr 3.5e-04, dt 2.0s +All GPU(s): step 71: loss 23.3438, lr 3.6e-04, dt 2.1s +All GPU(s): step 72: loss 23.4531, lr 3.6e-04, dt 2.0s +All GPU(s): step 73: loss 23.8125, lr 3.6e-04, dt 2.0s +All GPU(s): step 74: loss 24.0000, lr 3.7e-04, dt 2.0s +All GPU(s): step 75: loss 23.5781, lr 3.8e-04, dt 2.0s +All GPU(s): step 76: loss 23.9531, lr 3.8e-04, dt 2.1s +All GPU(s): step 77: loss 24.1406, lr 3.8e-04, dt 2.1s +All GPU(s): step 78: loss 24.0000, lr 3.9e-04, dt 2.0s +All GPU(s): step 79: loss 23.9219, lr 4.0e-04, dt 2.0s +All GPU(s): step 80: loss 23.3438, lr 4.0e-04, dt 2.1s +All GPU(s): step 81: loss 23.8281, lr 4.1e-04, dt 2.1s +All GPU(s): step 82: loss 24.5781, lr 4.1e-04, dt 2.0s +All GPU(s): step 83: loss 23.5469, lr 4.2e-04, dt 2.1s +All GPU(s): step 84: loss 23.7812, lr 4.2e-04, dt 2.0s +All GPU(s): step 85: loss 24.4375, lr 4.3e-04, dt 2.1s +All GPU(s): step 86: loss 23.7344, lr 4.3e-04, dt 2.1s +All GPU(s): step 87: loss 24.1719, lr 4.4e-04, dt 2.0s +All GPU(s): step 88: loss 24.4844, lr 4.4e-04, dt 2.0s +All GPU(s): step 89: loss 23.7344, lr 4.4e-04, dt 2.0s +All GPU(s): step 90: loss 24.3594, lr 4.5e-04, dt 2.1s +All GPU(s): step 91: loss 23.9688, lr 4.6e-04, dt 2.1s +All GPU(s): step 92: loss 23.8438, lr 4.6e-04, dt 2.0s +All GPU(s): step 93: loss 24.0625, lr 4.6e-04, dt 2.0s +All GPU(s): step 94: loss 24.0938, lr 4.7e-04, dt 2.0s +All GPU(s): step 95: loss 24.2969, lr 4.7e-04, dt 2.2s +All GPU(s): step 96: loss 23.4219, lr 4.8e-04, dt 2.1s +All GPU(s): step 97: loss 24.0625, lr 4.9e-04, dt 2.1s +All GPU(s): step 98: loss 23.7969, lr 4.9e-04, dt 2.1s +All GPU(s): step 99: loss 24.5000, lr 4.9e-04, dt 2.1s +All GPU(s): step 100: loss 23.5156, lr 5.0e-04, dt 2.2s +All GPU(s): step 101: loss 24.1562, lr 5.0e-04, dt 2.1s +All GPU(s): step 102: loss 24.4375, lr 5.0e-04, dt 2.0s +All GPU(s): step 103: loss 23.7031, lr 5.0e-04, dt 2.0s +All GPU(s): step 104: loss 24.3750, lr 5.0e-04, dt 2.1s +All GPU(s): step 105: loss 24.8125, lr 5.0e-04, dt 2.2s +All GPU(s): step 106: loss 24.5938, lr 5.0e-04, dt 2.1s +All GPU(s): step 107: loss 24.1250, lr 5.0e-04, dt 2.0s +All GPU(s): step 108: loss 23.9375, lr 5.0e-04, dt 2.0s +All GPU(s): step 109: loss 25.0781, lr 5.0e-04, dt 2.1s +All GPU(s): step 110: loss 24.6719, lr 5.0e-04, dt 2.1s +All GPU(s): step 111: loss 24.7500, lr 5.0e-04, dt 2.0s +All GPU(s): step 112: loss 24.3750, lr 5.0e-04, dt 2.0s +All GPU(s): step 113: loss 24.2344, lr 5.0e-04, dt 2.0s +All GPU(s): step 114: loss 24.0781, lr 5.0e-04, dt 2.1s +All GPU(s): step 115: loss 23.4531, lr 5.0e-04, dt 2.1s +All GPU(s): step 116: loss 23.6250, lr 5.0e-04, dt 2.0s +All GPU(s): step 117: loss 23.7969, lr 5.0e-04, dt 2.0s +All GPU(s): step 118: loss 23.3594, lr 5.0e-04, dt 2.0s +All GPU(s): step 119: loss 24.2969, lr 5.0e-04, dt 2.1s +All GPU(s): step 120: loss 23.6250, lr 5.0e-04, dt 2.0s +All GPU(s): step 121: loss 22.6406, lr 5.0e-04, dt 2.0s +All GPU(s): step 122: loss 22.6719, lr 5.0e-04, dt 2.0s +All GPU(s): step 123: loss 22.5156, lr 5.0e-04, dt 2.0s +All GPU(s): step 124: loss 21.6875, lr 5.0e-04, dt 2.1s +All GPU(s): step 125: loss 21.1094, lr 5.0e-04, dt 2.1s +All GPU(s): step 126: loss 21.1250, lr 5.0e-04, dt 2.1s +All GPU(s): step 127: loss 19.4062, lr 5.0e-04, dt 2.0s +All GPU(s): step 128: loss 19.5469, lr 5.0e-04, dt 2.1s +All GPU(s): step 129: loss 18.0000, lr 5.0e-04, dt 2.2s +All GPU(s): step 130: loss 17.1953, lr 5.0e-04, dt 2.0s +All GPU(s): step 131: loss 15.0078, lr 5.0e-04, dt 2.0s +All GPU(s): step 132: loss 14.5781, lr 5.0e-04, dt 2.0s +All GPU(s): step 133: loss 12.4766, lr 5.0e-04, dt 2.1s +All GPU(s): step 134: loss 10.0156, lr 5.0e-04, dt 2.1s +All GPU(s): step 135: loss 8.3516, lr 5.0e-04, dt 2.0s +All GPU(s): step 136: loss 8.0430, lr 5.0e-04, dt 2.1s +All GPU(s): step 137: loss 7.4453, lr 5.0e-04, dt 2.0s +All GPU(s): step 138: loss 8.2812, lr 5.0e-04, dt 2.1s +All GPU(s): step 139: loss 13.0469, lr 5.0e-04, dt 2.1s +All GPU(s): step 140: loss 8.3398, lr 5.0e-04, dt 2.1s +All GPU(s): step 141: loss 12.3594, lr 5.0e-04, dt 2.0s +All GPU(s): step 142: loss 18.4844, lr 5.0e-04, dt 2.0s +All GPU(s): step 143: loss 12.1016, lr 5.0e-04, dt 2.2s +All GPU(s): step 144: loss 10.5703, lr 5.0e-04, dt 2.1s +All GPU(s): step 145: loss 14.6719, lr 5.0e-04, dt 2.1s +All GPU(s): step 146: loss 13.9297, lr 5.0e-04, dt 2.0s +All GPU(s): step 147: loss 9.9062, lr 5.0e-04, dt 2.1s +All GPU(s): step 148: loss 11.5547, lr 5.0e-04, dt 2.2s +All GPU(s): step 149: loss 16.1953, lr 5.0e-04, dt 2.1s +All GPU(s): step 150: loss 13.7891, lr 5.0e-04, dt 2.1s +All GPU(s): step 151: loss 9.9609, lr 5.0e-04, dt 2.1s +All GPU(s): step 152: loss 8.6250, lr 5.0e-04, dt 2.1s +All GPU(s): step 153: loss 11.9609, lr 5.0e-04, dt 2.1s +All GPU(s): step 154: loss 10.7500, lr 5.0e-04, dt 2.0s +All GPU(s): step 155: loss 8.5078, lr 5.0e-04, dt 2.0s +All GPU(s): step 156: loss 7.1094, lr 5.0e-04, dt 2.0s +All GPU(s): step 157: loss 8.8047, lr 5.0e-04, dt 2.1s +All GPU(s): step 158: loss 8.5117, lr 5.0e-04, dt 2.1s +All GPU(s): step 159: loss 6.3750, lr 5.0e-04, dt 2.1s +All GPU(s): step 160: loss 5.1621, lr 5.0e-04, dt 2.0s +All GPU(s): step 161: loss 7.3242, lr 5.0e-04, dt 2.1s +All GPU(s): step 162: loss 5.6914, lr 5.0e-04, dt 2.1s +All GPU(s): step 163: loss 4.1406, lr 5.0e-04, dt 2.1s +All GPU(s): step 164: loss 4.3398, lr 5.0e-04, dt 2.1s +All GPU(s): step 165: loss 4.3535, lr 5.0e-04, dt 2.1s +All GPU(s): step 166: loss 3.2207, lr 5.0e-04, dt 2.1s +All GPU(s): step 167: loss 3.3711, lr 5.0e-04, dt 2.2s +All GPU(s): step 168: loss 3.2188, lr 5.0e-04, dt 2.0s +All GPU(s): step 169: loss 3.5391, lr 5.0e-04, dt 2.0s +All GPU(s): step 170: loss 3.6074, lr 5.0e-04, dt 2.0s +All GPU(s): step 171: loss 3.1758, lr 5.0e-04, dt 2.0s +All GPU(s): step 172: loss 3.2539, lr 5.0e-04, dt 2.2s +All GPU(s): step 173: loss 3.1836, lr 5.0e-04, dt 2.0s +All GPU(s): step 174: loss 3.3008, lr 5.0e-04, dt 2.1s +All GPU(s): step 175: loss 3.7988, lr 5.0e-04, dt 2.0s +All GPU(s): step 176: loss 4.0859, lr 5.0e-04, dt 2.1s +All GPU(s): step 177: loss 3.3809, lr 5.0e-04, dt 2.2s +All GPU(s): step 178: loss 3.8730, lr 5.0e-04, dt 2.1s +All GPU(s): step 179: loss 5.3164, lr 5.0e-04, dt 2.1s +All GPU(s): step 180: loss 4.8398, lr 5.0e-04, dt 2.1s +All GPU(s): step 181: loss 3.2090, lr 5.0e-04, dt 2.0s +All GPU(s): step 182: loss 3.1992, lr 5.0e-04, dt 2.1s +All GPU(s): step 183: loss 3.1621, lr 5.0e-04, dt 2.0s +All GPU(s): step 184: loss 3.1406, lr 5.0e-04, dt 2.0s +All GPU(s): step 185: loss 4.5254, lr 5.0e-04, dt 1.9s +All GPU(s): step 186: loss 4.9512, lr 5.0e-04, dt 1.9s +All GPU(s): step 187: loss 3.7832, lr 5.0e-04, dt 2.0s +All GPU(s): step 188: loss 3.1113, lr 5.0e-04, dt 2.0s +All GPU(s): step 189: loss 3.9102, lr 5.0e-04, dt 1.9s +All GPU(s): step 190: loss 4.0195, lr 5.0e-04, dt 1.9s +All GPU(s): step 191: loss 4.0996, lr 5.0e-04, dt 1.9s +All GPU(s): step 192: loss 4.8379, lr 5.0e-04, dt 1.9s +All GPU(s): step 193: loss 5.5625, lr 5.0e-04, dt 1.8s +All GPU(s): step 194: loss 4.4551, lr 5.0e-04, dt 1.9s +All GPU(s): step 195: loss 4.9824, lr 5.0e-04, dt 1.9s +All GPU(s): step 196: loss 9.1953, lr 5.0e-04, dt 1.7s +All GPU(s): step 197: loss 8.0078, lr 5.0e-04, dt 1.8s +All GPU(s): step 198: loss 7.3477, lr 5.0e-04, dt 1.7s +All GPU(s): step 199: loss 9.3125, lr 5.0e-04, dt 1.7s +All GPU(s): step 200: loss 11.5273, lr 5.0e-04, dt 1.6s +All GPU(s): step 201: loss 13.5547, lr 5.0e-04, dt 1.6s +All GPU(s): step 202: loss 19.1797, lr 5.0e-04, dt 1.6s +All GPU(s): step 203: loss 12.5977, lr 5.0e-04, dt 1.8s +All GPU(s): step 204: loss 23.8125, lr 5.0e-04, dt 1.5s +All GPU(s): step 205: loss 28.8438, lr 5.0e-04, dt 1.4s +All GPU(s): step 206: loss 28.4688, lr 5.0e-04, dt 1.4s +All GPU(s): step 207: loss 26.6719, lr 5.0e-04, dt 1.4s +All GPU(s): step 208: loss 26.1094, lr 5.0e-04, dt 1.4s +All GPU(s): step 209: loss 29.4062, lr 5.0e-04, dt 1.4s +All GPU(s): step 210: loss 24.2812, lr 5.0e-04, dt 1.5s +All GPU(s): step 211: loss 27.3594, lr 5.0e-04, dt 1.4s +All GPU(s): step 212: loss 32.9219, lr 5.0e-04, dt 1.4s +All GPU(s): step 213: loss 25.2656, lr 5.0e-04, dt 1.4s +All GPU(s): step 214: loss 27.4844, lr 5.0e-04, dt 1.4s +All GPU(s): step 215: loss 25.0156, lr 5.0e-04, dt 1.4s +All GPU(s): step 216: loss 25.9531, lr 5.0e-04, dt 1.4s +All GPU(s): step 217: loss 25.3281, lr 5.0e-04, dt 1.5s +All GPU(s): step 218: loss 26.8438, lr 5.0e-04, dt 1.4s +All GPU(s): step 219: loss 25.8906, lr 5.0e-04, dt 1.4s +All GPU(s): step 220: loss 25.4219, lr 5.0e-04, dt 1.4s +All GPU(s): step 221: loss 27.6875, lr 5.0e-04, dt 1.4s +All GPU(s): step 222: loss 32.0156, lr 5.0e-04, dt 1.4s +All GPU(s): step 223: loss 29.8750, lr 5.0e-04, dt 1.4s +All GPU(s): step 224: loss 33.0000, lr 5.0e-04, dt 1.5s +All GPU(s): step 225: loss 35.1562, lr 5.0e-04, dt 1.4s +All GPU(s): step 226: loss 34.7188, lr 5.0e-04, dt 1.3s +All GPU(s): step 227: loss 39.6875, lr 5.0e-04, dt 1.3s +All GPU(s): step 228: loss 39.8750, lr 5.0e-04, dt 1.3s +All GPU(s): step 229: loss 51.0000, lr 5.0e-04, dt 1.2s +All GPU(s): step 230: loss 49.2188, lr 5.0e-04, dt 1.2s +All GPU(s): step 231: loss 48.9375, lr 5.0e-04, dt 1.3s +All GPU(s): step 232: loss 59.7812, lr 5.0e-04, dt 1.2s +All GPU(s): step 233: loss 57.8438, lr 5.0e-04, dt 1.2s +All GPU(s): step 234: loss 65.3438, lr 5.0e-04, dt 1.2s +All GPU(s): step 235: loss 80.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 236: loss 87.6875, lr 5.0e-04, dt 1.1s +All GPU(s): step 237: loss 112.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 238: loss 117.1875, lr 5.0e-04, dt 1.1s +All GPU(s): step 239: loss 125.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 240: loss 175.4375, lr 5.0e-04, dt 1.0s +All GPU(s): step 241: loss 248.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 242: loss 211.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 243: loss 269.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 244: loss 360.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 245: loss 706.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 246: loss 720.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 247: loss 1128.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 248: loss 1771.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 249: loss 1724.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 250: loss 761.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 251: loss 731.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 252: loss 1013.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 253: loss 1547.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 254: loss 1250.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 255: loss 799.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 256: loss 426.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 257: loss 579.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 258: loss 784.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 259: loss 942.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 260: loss 964.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 261: loss 1295.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 262: loss 2029.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 263: loss 3250.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 264: loss 7720.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 265: loss 10256.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 266: loss 16336.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 267: loss 15268.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 268: loss 12752.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 269: loss 14576.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 270: loss 7888.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 271: loss 4930.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 272: loss 4326.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 273: loss 4878.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 274: loss 4416.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 275: loss 4776.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 276: loss 5944.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 277: loss 7436.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 278: loss 6892.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 279: loss 7896.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 280: loss 11428.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 281: loss 9752.0000, lr 5.0e-04, dt 0.3s +All GPU(s): step 282: loss 4916.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 283: loss 5088.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 284: loss 4740.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 285: loss 4344.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 286: loss 3128.0000, lr 5.0e-04, dt 0.4s +All GPU(s): step 287: loss 2027.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 288: loss 1679.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 289: loss 2036.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 290: loss 1625.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 291: loss 1625.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 292: loss 967.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 293: loss 547.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 294: loss 503.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 295: loss 461.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 296: loss 417.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 297: loss 341.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 298: loss 284.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 299: loss 272.8750, lr 5.0e-04, dt 0.9s +All GPU(s): step 300: loss 441.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 301: loss 483.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 302: loss 478.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 303: loss 484.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 304: loss 554.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 305: loss 343.1250, lr 5.0e-04, dt 0.8s +All GPU(s): step 306: loss 339.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 307: loss 325.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 308: loss 568.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 309: loss 756.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 310: loss 988.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 311: loss 907.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 312: loss 818.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 313: loss 502.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 314: loss 386.2500, lr 5.0e-04, dt 0.8s +All GPU(s): step 315: loss 446.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 316: loss 402.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 317: loss 594.2500, lr 5.0e-04, dt 0.8s +All GPU(s): step 318: loss 757.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 319: loss 1101.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 320: loss 940.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 321: loss 1193.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 322: loss 800.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 323: loss 560.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 324: loss 576.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 325: loss 495.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 326: loss 476.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 327: loss 583.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 328: loss 679.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 329: loss 893.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 330: loss 798.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 331: loss 670.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 332: loss 556.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 333: loss 513.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 334: loss 587.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 335: loss 793.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 336: loss 899.0000, lr 5.0e-04, dt 0.7s +All GPU(s): step 337: loss 1074.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 338: loss 1143.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 339: loss 1712.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 340: loss 2363.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 341: loss 3006.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 342: loss 1445.0000, lr 5.0e-04, dt 0.5s +All GPU(s): step 343: loss 1268.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 344: loss 976.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 345: loss 1224.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 346: loss 1422.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 347: loss 969.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 348: loss 845.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 349: loss 1149.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 350: loss 1001.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 351: loss 554.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 352: loss 428.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 353: loss 440.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 354: loss 413.2500, lr 5.0e-04, dt 0.8s +All GPU(s): step 355: loss 374.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 356: loss 547.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 357: loss 945.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 358: loss 969.5000, lr 5.0e-04, dt 0.6s +All GPU(s): step 359: loss 938.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 360: loss 909.0000, lr 5.0e-04, dt 0.6s +All GPU(s): step 361: loss 645.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 362: loss 518.5000, lr 5.0e-04, dt 0.7s +All GPU(s): step 363: loss 543.2500, lr 5.0e-04, dt 0.7s +All GPU(s): step 364: loss 434.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 365: loss 497.7500, lr 5.0e-04, dt 0.7s +All GPU(s): step 366: loss 418.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 367: loss 455.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 368: loss 353.0000, lr 5.0e-04, dt 0.8s +All GPU(s): step 369: loss 292.3750, lr 5.0e-04, dt 0.8s +All GPU(s): step 370: loss 269.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 371: loss 173.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 372: loss 158.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 373: loss 130.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 374: loss 100.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 375: loss 111.5625, lr 5.0e-04, dt 1.1s +All GPU(s): step 376: loss 119.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 377: loss 125.4375, lr 5.0e-04, dt 1.0s +All GPU(s): step 378: loss 169.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 379: loss 169.3125, lr 5.0e-04, dt 1.0s +All GPU(s): step 380: loss 165.1250, lr 5.0e-04, dt 1.0s +All GPU(s): step 381: loss 164.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 382: loss 183.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 383: loss 155.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 384: loss 168.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 385: loss 175.5000, lr 5.0e-04, dt 0.9s +All GPU(s): step 386: loss 165.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 387: loss 150.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 388: loss 157.9375, lr 5.0e-04, dt 1.0s +All GPU(s): step 389: loss 152.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 390: loss 157.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 391: loss 188.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 392: loss 173.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 393: loss 153.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 394: loss 155.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 395: loss 116.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 396: loss 100.3750, lr 5.0e-04, dt 1.0s +All GPU(s): step 397: loss 93.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 398: loss 79.5000, lr 5.0e-04, dt 1.2s +All GPU(s): step 399: loss 65.0312, lr 5.0e-04, dt 1.2s +All GPU(s): step 400: loss 53.0938, lr 5.0e-04, dt 1.2s +All GPU(s): step 401: loss 53.2188, lr 5.0e-04, dt 1.3s +All GPU(s): step 402: loss 39.9219, lr 5.0e-04, dt 1.3s +All GPU(s): step 403: loss 52.0625, lr 5.0e-04, dt 1.2s +All GPU(s): step 404: loss 54.7500, lr 5.0e-04, dt 1.3s +All GPU(s): step 405: loss 42.5938, lr 5.0e-04, dt 1.3s +All GPU(s): step 406: loss 32.0469, lr 5.0e-04, dt 1.4s +All GPU(s): step 407: loss 24.5625, lr 5.0e-04, dt 1.4s +All GPU(s): step 408: loss 28.0156, lr 5.0e-04, dt 1.4s +All GPU(s): step 409: loss 33.4375, lr 5.0e-04, dt 1.3s +All GPU(s): step 410: loss 41.3125, lr 5.0e-04, dt 1.3s +All GPU(s): step 411: loss 38.4062, lr 5.0e-04, dt 1.3s +All GPU(s): step 412: loss 32.9531, lr 5.0e-04, dt 1.4s +All GPU(s): step 413: loss 39.1094, lr 5.0e-04, dt 1.4s +All GPU(s): step 414: loss 56.0312, lr 5.0e-04, dt 1.3s +All GPU(s): step 415: loss 49.9062, lr 5.0e-04, dt 1.3s +All GPU(s): step 416: loss 49.2812, lr 5.0e-04, dt 1.2s +All GPU(s): step 417: loss 53.8125, lr 5.0e-04, dt 1.2s +All GPU(s): step 418: loss 70.9688, lr 5.0e-04, dt 1.1s +All GPU(s): step 419: loss 76.4062, lr 5.0e-04, dt 1.1s +All GPU(s): step 420: loss 93.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 421: loss 90.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 422: loss 89.7188, lr 5.0e-04, dt 1.1s +All GPU(s): step 423: loss 122.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 424: loss 134.0625, lr 5.0e-04, dt 1.0s +All GPU(s): step 425: loss 135.6250, lr 5.0e-04, dt 1.0s +All GPU(s): step 426: loss 131.3750, lr 5.0e-04, dt 1.0s +All GPU(s): step 427: loss 153.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 428: loss 185.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 429: loss 216.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 430: loss 196.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 431: loss 254.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 432: loss 311.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 433: loss 217.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 434: loss 233.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 435: loss 226.3750, lr 5.0e-04, dt 0.9s +All GPU(s): step 436: loss 238.3750, lr 5.0e-04, dt 0.9s +All GPU(s): step 437: loss 231.3750, lr 5.0e-04, dt 0.9s +All GPU(s): step 438: loss 217.8750, lr 5.0e-04, dt 0.8s +All GPU(s): step 439: loss 239.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 440: loss 266.7500, lr 5.0e-04, dt 0.8s +All GPU(s): step 441: loss 288.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 442: loss 271.8750, lr 5.0e-04, dt 0.9s +All GPU(s): step 443: loss 260.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 444: loss 298.5000, lr 5.0e-04, dt 0.8s +All GPU(s): step 445: loss 288.6250, lr 5.0e-04, dt 0.8s +All GPU(s): step 446: loss 261.8750, lr 5.0e-04, dt 0.8s +All GPU(s): step 447: loss 256.1250, lr 5.0e-04, dt 0.8s +All GPU(s): step 448: loss 224.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 449: loss 179.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 450: loss 122.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 451: loss 88.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 452: loss 72.7188, lr 5.0e-04, dt 1.2s +All GPU(s): step 453: loss 52.4688, lr 5.0e-04, dt 1.3s +All GPU(s): step 454: loss 57.9062, lr 5.0e-04, dt 1.2s +All GPU(s): step 455: loss 55.9688, lr 5.0e-04, dt 1.2s +All GPU(s): step 456: loss 46.7500, lr 5.0e-04, dt 1.2s +All GPU(s): step 457: loss 35.2812, lr 5.0e-04, dt 1.4s +All GPU(s): step 458: loss 35.0938, lr 5.0e-04, dt 1.3s +All GPU(s): step 459: loss 32.0000, lr 5.0e-04, dt 1.3s +All GPU(s): step 460: loss 37.0938, lr 5.0e-04, dt 1.3s +All GPU(s): step 461: loss 53.3125, lr 5.0e-04, dt 1.3s +All GPU(s): step 462: loss 61.2188, lr 5.0e-04, dt 1.2s +All GPU(s): step 463: loss 84.6875, lr 5.0e-04, dt 1.1s +All GPU(s): step 464: loss 100.1875, lr 5.0e-04, dt 1.1s +All GPU(s): step 465: loss 110.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 466: loss 161.8750, lr 5.0e-04, dt 0.9s +All GPU(s): step 467: loss 178.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 468: loss 175.5000, lr 5.0e-04, dt 0.9s +All GPU(s): step 469: loss 197.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 470: loss 189.5000, lr 5.0e-04, dt 1.0s +All GPU(s): step 471: loss 152.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 472: loss 140.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 473: loss 133.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 474: loss 159.5000, lr 5.0e-04, dt 1.0s +All GPU(s): step 475: loss 1223.3125, lr 5.0e-04, dt 1.0s +All GPU(s): step 476: loss 155.4375, lr 5.0e-04, dt 1.0s +All GPU(s): step 477: loss 178.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 478: loss 160.0625, lr 5.0e-04, dt 1.0s +All GPU(s): step 479: loss 136.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 480: loss 124.9375, lr 5.0e-04, dt 1.1s +All GPU(s): step 481: loss 142.4375, lr 5.0e-04, dt 1.1s +All GPU(s): step 482: loss 110.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 483: loss 66.4688, lr 5.0e-04, dt 1.2s +All GPU(s): step 484: loss 56.5000, lr 5.0e-04, dt 1.2s +All GPU(s): step 485: loss 50.5312, lr 5.0e-04, dt 1.2s +All GPU(s): step 486: loss 47.2188, lr 5.0e-04, dt 1.3s +All GPU(s): step 487: loss 33.2500, lr 5.0e-04, dt 1.3s +All GPU(s): step 488: loss 35.8750, lr 5.0e-04, dt 1.5s +All GPU(s): step 489: loss 19.8281, lr 5.0e-04, dt 1.5s +All GPU(s): step 490: loss 17.9062, lr 5.0e-04, dt 1.4s +All GPU(s): step 491: loss 18.3828, lr 5.0e-04, dt 1.5s +All GPU(s): step 492: loss 19.4844, lr 5.0e-04, dt 1.5s +All GPU(s): step 493: loss 14.9141, lr 5.0e-04, dt 1.6s +All GPU(s): step 494: loss 11.8594, lr 5.0e-04, dt 1.6s +All GPU(s): step 495: loss 11.8516, lr 5.0e-04, dt 1.7s +All GPU(s): step 496: loss 11.6484, lr 5.0e-04, dt 1.6s +All GPU(s): step 497: loss 12.6094, lr 5.0e-04, dt 1.6s +All GPU(s): step 498: loss 14.6641, lr 5.0e-04, dt 1.6s +All GPU(s): step 499: loss 13.0859, lr 5.0e-04, dt 1.5s +All GPU(s): step 500: loss 10.3359, lr 5.0e-04, dt 1.6s +All GPU(s): step 501: loss 11.5312, lr 5.0e-04, dt 1.7s +All GPU(s): step 502: loss 11.7812, lr 5.0e-04, dt 1.6s +All GPU(s): step 503: loss 12.5078, lr 5.0e-04, dt 1.6s +All GPU(s): step 504: loss 10.4219, lr 5.0e-04, dt 1.6s +All GPU(s): step 505: loss 11.1367, lr 5.0e-04, dt 1.6s +All GPU(s): step 506: loss 11.4766, lr 5.0e-04, dt 1.6s +All GPU(s): step 507: loss 12.0703, lr 5.0e-04, dt 1.7s +All GPU(s): step 508: loss 13.6797, lr 5.0e-04, dt 1.6s +All GPU(s): step 509: loss 13.9062, lr 5.0e-04, dt 1.6s +All GPU(s): step 510: loss 19.4609, lr 5.0e-04, dt 1.5s +All GPU(s): step 511: loss 22.5000, lr 5.0e-04, dt 1.5s +All GPU(s): step 512: loss 31.1406, lr 5.0e-04, dt 1.4s +All GPU(s): step 513: loss 38.3281, lr 5.0e-04, dt 1.4s +All GPU(s): step 514: loss 41.2969, lr 5.0e-04, dt 1.4s +All GPU(s): step 515: loss 31.1094, lr 5.0e-04, dt 1.4s +All GPU(s): step 516: loss 27.4062, lr 5.0e-04, dt 1.4s +All GPU(s): step 517: loss 29.9531, lr 5.0e-04, dt 1.4s +All GPU(s): step 518: loss 29.2969, lr 5.0e-04, dt 1.4s +All GPU(s): step 519: loss 21.9375, lr 5.0e-04, dt 1.4s +All GPU(s): step 520: loss 18.2891, lr 5.0e-04, dt 1.5s +All GPU(s): step 521: loss 19.5938, lr 5.0e-04, dt 1.6s +All GPU(s): step 522: loss 16.6797, lr 5.0e-04, dt 1.5s +All GPU(s): step 523: loss 12.0156, lr 5.0e-04, dt 1.6s +All GPU(s): step 524: loss 12.5469, lr 5.0e-04, dt 1.6s +All GPU(s): step 525: loss 11.0703, lr 5.0e-04, dt 1.6s +All GPU(s): step 526: loss 10.5508, lr 5.0e-04, dt 1.6s +All GPU(s): step 527: loss 10.2031, lr 5.0e-04, dt 1.7s +All GPU(s): step 528: loss 10.5781, lr 5.0e-04, dt 1.6s +All GPU(s): step 529: loss 9.7188, lr 5.0e-04, dt 1.6s +All GPU(s): step 530: loss 10.5391, lr 5.0e-04, dt 1.6s +All GPU(s): step 531: loss 10.8438, lr 5.0e-04, dt 1.7s +All GPU(s): step 532: loss 10.8008, lr 5.0e-04, dt 1.6s +All GPU(s): step 533: loss 10.5938, lr 5.0e-04, dt 1.7s +All GPU(s): step 534: loss 10.7461, lr 5.0e-04, dt 1.6s +All GPU(s): step 535: loss 10.8984, lr 5.0e-04, dt 1.6s +All GPU(s): step 536: loss 10.6406, lr 5.0e-04, dt 1.6s +All GPU(s): step 537: loss 11.0234, lr 5.0e-04, dt 1.6s +All GPU(s): step 538: loss 13.1719, lr 5.0e-04, dt 1.6s +All GPU(s): step 539: loss 14.5938, lr 5.0e-04, dt 1.6s +All GPU(s): step 540: loss 13.0234, lr 5.0e-04, dt 1.7s +All GPU(s): step 541: loss 13.8828, lr 5.0e-04, dt 1.6s +All GPU(s): step 542: loss 15.0469, lr 5.0e-04, dt 1.5s +All GPU(s): step 543: loss 19.6016, lr 5.0e-04, dt 1.5s +All GPU(s): step 544: loss 21.4219, lr 5.0e-04, dt 1.5s +All GPU(s): step 545: loss 21.1562, lr 5.0e-04, dt 1.5s +All GPU(s): step 546: loss 21.2500, lr 5.0e-04, dt 1.5s +All GPU(s): step 547: loss 23.3672, lr 5.0e-04, dt 1.5s +All GPU(s): step 548: loss 17.9141, lr 5.0e-04, dt 1.5s +All GPU(s): step 549: loss 24.5938, lr 5.0e-04, dt 1.5s +All GPU(s): step 550: loss 30.0938, lr 5.0e-04, dt 1.4s +All GPU(s): step 551: loss 29.0000, lr 5.0e-04, dt 1.4s +All GPU(s): step 552: loss 28.3281, lr 5.0e-04, dt 1.5s +All GPU(s): step 553: loss 30.0938, lr 5.0e-04, dt 1.4s +All GPU(s): step 554: loss 37.5938, lr 5.0e-04, dt 1.3s +All GPU(s): step 555: loss 41.9531, lr 5.0e-04, dt 1.3s +All GPU(s): step 556: loss 32.6875, lr 5.0e-04, dt 1.3s +All GPU(s): step 557: loss 29.9062, lr 5.0e-04, dt 1.3s +All GPU(s): step 558: loss 39.8125, lr 5.0e-04, dt 1.3s +All GPU(s): step 559: loss 43.9062, lr 5.0e-04, dt 1.3s +All GPU(s): step 560: loss 55.5000, lr 5.0e-04, dt 1.3s +All GPU(s): step 561: loss 50.2812, lr 5.0e-04, dt 1.3s +All GPU(s): step 562: loss 62.7812, lr 5.0e-04, dt 1.2s +All GPU(s): step 563: loss 54.1875, lr 5.0e-04, dt 1.2s +All GPU(s): step 564: loss 51.2812, lr 5.0e-04, dt 1.3s +All GPU(s): step 565: loss 46.2188, lr 5.0e-04, dt 1.3s +All GPU(s): step 566: loss 53.3438, lr 5.0e-04, dt 1.3s +All GPU(s): step 567: loss 58.2188, lr 5.0e-04, dt 1.2s +All GPU(s): step 568: loss 40.3750, lr 5.0e-04, dt 1.4s +All GPU(s): step 569: loss 33.2812, lr 5.0e-04, dt 1.4s +All GPU(s): step 570: loss 42.9062, lr 5.0e-04, dt 1.3s +All GPU(s): step 571: loss 39.0312, lr 5.0e-04, dt 1.3s +All GPU(s): step 572: loss 29.6094, lr 5.0e-04, dt 1.4s +All GPU(s): step 573: loss 23.5469, lr 5.0e-04, dt 1.4s +All GPU(s): step 574: loss 37.2031, lr 5.0e-04, dt 1.3s +All GPU(s): step 575: loss 51.9375, lr 5.0e-04, dt 1.3s +All GPU(s): step 576: loss 53.8125, lr 5.0e-04, dt 1.3s +All GPU(s): step 577: loss 46.6250, lr 5.0e-04, dt 1.3s +All GPU(s): step 578: loss 77.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 579: loss 94.5000, lr 5.0e-04, dt 1.1s +All GPU(s): step 580: loss 115.5000, lr 5.0e-04, dt 1.0s +All GPU(s): step 581: loss 145.3750, lr 5.0e-04, dt 1.0s +All GPU(s): step 582: loss 162.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 583: loss 208.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 584: loss 241.8750, lr 5.0e-04, dt 0.9s +All GPU(s): step 585: loss 245.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 586: loss 231.3750, lr 5.0e-04, dt 0.9s +All GPU(s): step 587: loss 239.2500, lr 5.0e-04, dt 0.8s +All GPU(s): step 588: loss 234.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 589: loss 245.6250, lr 5.0e-04, dt 0.8s +All GPU(s): step 590: loss 226.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 591: loss 214.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 592: loss 211.8750, lr 5.0e-04, dt 0.9s +All GPU(s): step 593: loss 220.5000, lr 5.0e-04, dt 0.9s +All GPU(s): step 594: loss 198.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 595: loss 197.6250, lr 5.0e-04, dt 0.9s +All GPU(s): step 596: loss 205.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 597: loss 177.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 598: loss 156.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 599: loss 152.1875, lr 5.0e-04, dt 1.0s +All GPU(s): step 600: loss 124.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 601: loss 129.6250, lr 5.0e-04, dt 1.0s +All GPU(s): step 602: loss 104.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 603: loss 109.0000, lr 5.0e-04, dt 1.1s +All GPU(s): step 604: loss 86.2812, lr 5.0e-04, dt 1.2s +All GPU(s): step 605: loss 98.2500, lr 5.0e-04, dt 1.1s +All GPU(s): step 606: loss 95.1875, lr 5.0e-04, dt 1.1s +All GPU(s): step 607: loss 90.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 608: loss 73.0312, lr 5.0e-04, dt 1.1s +All GPU(s): step 609: loss 100.7500, lr 5.0e-04, dt 1.1s +All GPU(s): step 610: loss 102.1875, lr 5.0e-04, dt 1.1s +All GPU(s): step 611: loss 136.1875, lr 5.0e-04, dt 1.0s +All GPU(s): step 612: loss 135.8125, lr 5.0e-04, dt 1.0s +All GPU(s): step 613: loss 142.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 614: loss 184.1250, lr 5.0e-04, dt 0.9s +All GPU(s): step 615: loss 208.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 616: loss 211.2500, lr 5.0e-04, dt 1.0s +All GPU(s): step 617: loss 176.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 618: loss 164.8125, lr 5.0e-04, dt 1.0s +All GPU(s): step 619: loss 113.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 620: loss 89.5625, lr 5.0e-04, dt 1.1s +All GPU(s): step 621: loss 84.5625, lr 5.0e-04, dt 1.1s +All GPU(s): step 622: loss 76.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 623: loss 73.0312, lr 5.0e-04, dt 1.1s +All GPU(s): step 624: loss 76.6250, lr 5.0e-04, dt 1.2s +All GPU(s): step 625: loss 88.1875, lr 5.0e-04, dt 1.2s +All GPU(s): step 626: loss 68.7500, lr 5.0e-04, dt 1.2s +All GPU(s): step 627: loss 62.9062, lr 5.0e-04, dt 1.2s +All GPU(s): step 628: loss 74.6250, lr 5.0e-04, dt 1.2s +All GPU(s): step 629: loss 80.4688, lr 5.0e-04, dt 1.1s +All GPU(s): step 630: loss 76.9375, lr 5.0e-04, dt 1.1s +All GPU(s): step 631: loss 86.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 632: loss 93.5312, lr 5.0e-04, dt 1.1s +All GPU(s): step 633: loss 88.5625, lr 5.0e-04, dt 1.2s +All GPU(s): step 634: loss 85.1875, lr 5.0e-04, dt 1.2s +All GPU(s): step 635: loss 79.0938, lr 5.0e-04, dt 1.2s +All GPU(s): step 636: loss 80.3438, lr 5.0e-04, dt 1.1s +All GPU(s): step 637: loss 88.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 638: loss 87.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 639: loss 87.8438, lr 5.0e-04, dt 1.2s +All GPU(s): step 640: loss 98.9375, lr 5.0e-04, dt 1.1s +All GPU(s): step 641: loss 89.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 642: loss 91.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 643: loss 106.4375, lr 5.0e-04, dt 1.1s +All GPU(s): step 644: loss 130.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 645: loss 97.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 646: loss 69.7188, lr 5.0e-04, dt 1.2s +All GPU(s): step 647: loss 78.7812, lr 5.0e-04, dt 1.1s +All GPU(s): step 648: loss 99.0625, lr 5.0e-04, dt 1.1s +All GPU(s): step 649: loss 75.2812, lr 5.0e-04, dt 1.2s +All GPU(s): step 650: loss 70.3750, lr 5.0e-04, dt 1.2s +All GPU(s): step 651: loss 101.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 652: loss 107.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 653: loss 107.8125, lr 5.0e-04, dt 1.1s +All GPU(s): step 654: loss 76.6562, lr 5.0e-04, dt 1.1s +All GPU(s): step 655: loss 87.2812, lr 5.0e-04, dt 1.1s +All GPU(s): step 656: loss 102.0000, lr 5.0e-04, dt 1.1s +All GPU(s): step 657: loss 87.0625, lr 5.0e-04, dt 1.1s +All GPU(s): step 658: loss 92.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 659: loss 95.6875, lr 5.0e-04, dt 1.1s +All GPU(s): step 660: loss 94.5000, lr 5.0e-04, dt 1.1s +All GPU(s): step 661: loss 91.0625, lr 5.0e-04, dt 1.1s +All GPU(s): step 662: loss 71.8125, lr 5.0e-04, dt 1.2s +All GPU(s): step 663: loss 66.3125, lr 5.0e-04, dt 1.2s +All GPU(s): step 664: loss 86.9688, lr 5.0e-04, dt 1.1s +All GPU(s): step 665: loss 87.8750, lr 5.0e-04, dt 1.1s +All GPU(s): step 666: loss 68.5312, lr 5.0e-04, dt 1.2s +All GPU(s): step 667: loss 104.2500, lr 5.0e-04, dt 1.1s +All GPU(s): step 668: loss 104.8750, lr 5.0e-04, dt 1.1s +All GPU(s): step 669: loss 119.1250, lr 5.0e-04, dt 1.0s +All GPU(s): step 670: loss 98.7500, lr 5.0e-04, dt 1.1s +All GPU(s): step 671: loss 89.0000, lr 5.0e-04, dt 1.1s +All GPU(s): step 672: loss 109.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 673: loss 2807.8125, lr 5.0e-04, dt 1.0s +All GPU(s): step 674: loss 137.0625, lr 5.0e-04, dt 1.1s +All GPU(s): step 675: loss 133.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 676: loss 122.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 677: loss 145.3125, lr 5.0e-04, dt 1.0s +All GPU(s): step 678: loss 132.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 679: loss 112.0000, lr 5.0e-04, dt 1.1s +All GPU(s): step 680: loss 132.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 681: loss 146.1875, lr 5.0e-04, dt 1.0s +All GPU(s): step 682: loss 139.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 683: loss 127.0625, lr 5.0e-04, dt 1.0s +All GPU(s): step 684: loss 116.5625, lr 5.0e-04, dt 1.1s +All GPU(s): step 685: loss 121.6875, lr 5.0e-04, dt 1.0s +All GPU(s): step 686: loss 122.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 687: loss 120.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 688: loss 103.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 689: loss 94.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 690: loss 95.8750, lr 5.0e-04, dt 1.1s +All GPU(s): step 691: loss 103.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 692: loss 116.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 693: loss 113.1875, lr 5.0e-04, dt 1.0s +All GPU(s): step 694: loss 119.9375, lr 5.0e-04, dt 1.0s +All GPU(s): step 695: loss 136.8750, lr 5.0e-04, dt 1.0s +All GPU(s): step 696: loss 127.3750, lr 5.0e-04, dt 1.0s +All GPU(s): step 697: loss 92.2500, lr 5.0e-04, dt 1.2s +All GPU(s): step 698: loss 98.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 699: loss 88.1875, lr 5.0e-04, dt 1.1s +All GPU(s): step 700: loss 71.9062, lr 5.0e-04, dt 1.1s +All GPU(s): step 701: loss 62.3125, lr 5.0e-04, dt 1.2s +All GPU(s): step 702: loss 64.1562, lr 5.0e-04, dt 1.3s +All GPU(s): step 703: loss 63.7188, lr 5.0e-04, dt 1.2s +All GPU(s): step 704: loss 74.8750, lr 5.0e-04, dt 1.2s +All GPU(s): step 705: loss 103.0625, lr 5.0e-04, dt 1.1s +All GPU(s): step 706: loss 123.4375, lr 5.0e-04, dt 1.1s +All GPU(s): step 707: loss 144.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 708: loss 183.1250, lr 5.0e-04, dt 1.0s +All GPU(s): step 709: loss 173.2500, lr 5.0e-04, dt 0.9s +All GPU(s): step 710: loss 183.7500, lr 5.0e-04, dt 0.9s +All GPU(s): step 711: loss 185.0000, lr 5.0e-04, dt 0.9s +All GPU(s): step 712: loss 164.7500, lr 5.0e-04, dt 1.0s +All GPU(s): step 713: loss 171.0625, lr 5.0e-04, dt 1.0s +All GPU(s): step 714: loss 125.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 715: loss 105.6875, lr 5.0e-04, dt 1.1s +All GPU(s): step 716: loss 130.5625, lr 5.0e-04, dt 1.0s +All GPU(s): step 717: loss 102.3750, lr 5.0e-04, dt 1.1s +All GPU(s): step 718: loss 59.5312, lr 5.0e-04, dt 1.2s +All GPU(s): step 719: loss 70.2812, lr 5.0e-04, dt 1.2s +All GPU(s): step 720: loss 92.1250, lr 5.0e-04, dt 1.1s +All GPU(s): step 721: loss 78.8438, lr 5.0e-04, dt 1.2s +All GPU(s): step 722: loss 74.4688, lr 5.0e-04, dt 1.1s +All GPU(s): step 723: loss 103.7500, lr 5.0e-04, dt 1.1s +All GPU(s): step 724: loss 84.0000, lr 5.0e-04, dt 1.2s +All GPU(s): step 725: loss 91.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 726: loss 114.6250, lr 5.0e-04, dt 1.0s +All GPU(s): step 727: loss 142.0000, lr 5.0e-04, dt 1.0s +All GPU(s): step 728: loss 109.5000, lr 5.0e-04, dt 1.0s +All GPU(s): step 729: loss 72.0938, lr 5.0e-04, dt 1.2s +All GPU(s): step 730: loss 54.6875, lr 5.0e-04, dt 1.2s +All GPU(s): step 731: loss 58.4062, lr 5.0e-04, dt 1.2s +All GPU(s): step 732: loss 45.3438, lr 5.0e-04, dt 1.3s +All GPU(s): step 733: loss 34.7656, lr 5.0e-04, dt 1.4s +All GPU(s): step 734: loss 20.4844, lr 5.0e-04, dt 1.5s +All GPU(s): step 735: loss 20.2188, lr 5.0e-04, dt 1.5s +All GPU(s): step 736: loss 17.3438, lr 5.0e-04, dt 1.5s +All GPU(s): step 737: loss 23.3672, lr 5.0e-04, dt 1.5s +All GPU(s): step 738: loss 20.4688, lr 5.0e-04, dt 1.4s +All GPU(s): step 739: loss 21.2188, lr 5.0e-04, dt 1.5s +All GPU(s): step 740: loss 18.7188, lr 5.0e-04, dt 1.6s +All GPU(s): step 741: loss 20.0234, lr 5.0e-04, dt 1.5s +All GPU(s): step 742: loss 16.5000, lr 5.0e-04, dt 1.5s +All GPU(s): step 743: loss 16.5547, lr 5.0e-04, dt 1.5s +All GPU(s): step 744: loss 22.8750, lr 5.0e-04, dt 1.5s +All GPU(s): step 745: loss 22.8906, lr 5.0e-04, dt 1.4s +All GPU(s): step 746: loss 30.4062, lr 5.0e-04, dt 1.5s +All GPU(s): step 747: loss 37.9531, lr 5.0e-04, dt 1.3s +All GPU(s): step 748: loss 57.6875, lr 5.0e-04, dt 1.2s +All GPU(s): step 749: loss 78.7188, lr 5.0e-04, dt 1.1s +All GPU(s): step 750: loss 91.2500, lr 5.0e-04, dt 1.1s +All GPU(s): step 751: loss 94.3125, lr 5.0e-04, dt 1.1s +All GPU(s): step 752: loss 74.9375, lr 5.0e-04, dt 1.1s +All GPU(s): step 753: loss 67.0312, lr 5.0e-04, dt 1.1s +All GPU(s): step 754: loss 69.2188, lr 5.0e-04, dt 1.2s +All GPU(s): step 755: loss 63.9062, lr 5.0e-04, dt 1.3s +All GPU(s): step 756: loss 46.2812, lr 5.0e-04, dt 1.2s +All GPU(s): step 757: loss 54.3438, lr 5.0e-04, dt 1.2s +All GPU(s): step 758: loss 57.0938, lr 5.0e-04, dt 1.3s +All GPU(s): step 759: loss 66.3125, lr 5.0e-04, dt 1.2s +All GPU(s): step 760: loss 61.9375, lr 5.0e-04, dt 1.2s +All GPU(s): step 761: loss 66.1562, lr 5.0e-04, dt 1.2s +All GPU(s): step 762: loss 75.0312, lr 5.0e-04, dt 1.2s +All GPU(s): step 763: loss 68.4062, lr 5.0e-04, dt 1.2s +All GPU(s): step 764: loss 62.4375, lr 5.0e-04, dt 1.2s +All GPU(s): step 765: loss 70.9062, lr 5.0e-04, dt 1.2s +All GPU(s): step 766: loss 85.6875, lr 5.0e-04, dt 1.1s +All GPU(s): step 767: loss 99.2500, lr 5.0e-04, dt 1.1s +All GPU(s): step 768: loss 86.0938, lr 5.0e-04, dt 1.1s +All GPU(s): step 769: loss 90.7500, lr 5.0e-04, dt 1.1s +All GPU(s): step 770: loss 87.6250, lr 5.0e-04, dt 1.1s +All GPU(s): step 771: loss 77.1250, lr 5.0e-04, dt 1.2s +All GPU(s): step 772: loss 74.7500, lr 5.0e-04, dt 1.2s +All GPU(s): step 773: loss 69.7812, lr 4.9e-04, dt 1.3s +All GPU(s): step 774: loss 73.5312, lr 4.9e-04, dt 1.2s +All GPU(s): step 775: loss 45.3438, lr 4.9e-04, dt 1.3s +All GPU(s): step 776: loss 38.5312, lr 4.9e-04, dt 1.3s +All GPU(s): step 777: loss 43.9375, lr 4.9e-04, dt 1.3s +All GPU(s): step 778: loss 36.1250, lr 4.9e-04, dt 1.3s +All GPU(s): step 779: loss 32.4844, lr 4.9e-04, dt 1.4s +All GPU(s): step 780: loss 36.3125, lr 4.9e-04, dt 1.4s +All GPU(s): step 781: loss 35.5156, lr 4.9e-04, dt 1.4s +All GPU(s): step 782: loss 32.8594, lr 4.9e-04, dt 1.3s +All GPU(s): step 783: loss 32.2031, lr 4.9e-04, dt 1.3s +All GPU(s): step 784: loss 32.8281, lr 4.9e-04, dt 1.3s +All GPU(s): step 785: loss 40.0938, lr 4.9e-04, dt 1.3s +All GPU(s): step 786: loss 55.5938, lr 4.9e-04, dt 1.2s +All GPU(s): step 787: loss 64.0938, lr 4.9e-04, dt 1.2s +All GPU(s): step 788: loss 71.6875, lr 4.9e-04, dt 1.2s +All GPU(s): step 789: loss 77.0000, lr 4.9e-04, dt 1.1s +All GPU(s): step 790: loss 72.0938, lr 4.9e-04, dt 1.2s +All GPU(s): step 791: loss 60.0938, lr 4.9e-04, dt 1.2s +All GPU(s): step 792: loss 49.0312, lr 4.9e-04, dt 1.3s +All GPU(s): step 793: loss 48.8438, lr 4.9e-04, dt 1.3s +All GPU(s): step 794: loss 60.4375, lr 4.9e-04, dt 1.2s +All GPU(s): step 795: loss 62.4375, lr 4.9e-04, dt 1.3s +All GPU(s): step 796: loss 63.8750, lr 4.9e-04, dt 1.2s +All GPU(s): step 797: loss 43.5156, lr 4.9e-04, dt 1.3s +All GPU(s): step 798: loss 39.0000, lr 4.9e-04, dt 1.3s +All GPU(s): step 799: loss 37.0625, lr 4.9e-04, dt 1.3s +All GPU(s): step 800: loss 50.9062, lr 4.9e-04, dt 1.3s +All GPU(s): step 801: loss 60.3125, lr 4.9e-04, dt 1.2s +All GPU(s): step 802: loss 85.1250, lr 4.9e-04, dt 1.2s +All GPU(s): step 803: loss 95.1250, lr 4.9e-04, dt 1.2s +All GPU(s): step 804: loss 95.0312, lr 4.9e-04, dt 1.2s +All GPU(s): step 805: loss 78.0000, lr 4.9e-04, dt 1.1s +All GPU(s): step 806: loss 65.0625, lr 4.9e-04, dt 1.2s +All GPU(s): step 807: loss 64.5938, lr 4.9e-04, dt 1.3s +All GPU(s): step 808: loss 61.2188, lr 4.9e-04, dt 1.2s +All GPU(s): step 809: loss 46.9688, lr 4.9e-04, dt 1.3s +All GPU(s): step 810: loss 38.4219, lr 4.9e-04, dt 1.4s +All GPU(s): step 811: loss 25.1875, lr 4.9e-04, dt 1.5s +All GPU(s): step 812: loss 22.8281, lr 4.9e-04, dt 1.5s +All GPU(s): step 813: loss 30.1094, lr 4.9e-04, dt 1.4s +All GPU(s): step 814: loss 24.0156, lr 4.9e-04, dt 1.4s +All GPU(s): step 815: loss 16.6641, lr 4.9e-04, dt 1.5s +All GPU(s): step 816: loss 19.1016, lr 4.9e-04, dt 1.5s +All GPU(s): step 817: loss 27.4688, lr 4.9e-04, dt 1.5s +All GPU(s): step 818: loss 29.6094, lr 4.9e-04, dt 1.4s +All GPU(s): step 819: loss 16.3906, lr 4.9e-04, dt 1.5s +All GPU(s): step 820: loss 11.7109, lr 4.9e-04, dt 1.6s +All GPU(s): step 821: loss 17.5078, lr 4.9e-04, dt 1.5s +All GPU(s): step 822: loss 14.5391, lr 4.9e-04, dt 1.5s +All GPU(s): step 823: loss 13.3594, lr 4.9e-04, dt 1.6s +All GPU(s): step 824: loss 10.2539, lr 4.9e-04, dt 1.7s +All GPU(s): step 825: loss 8.8906, lr 4.9e-04, dt 1.7s +All GPU(s): step 826: loss 9.9102, lr 4.9e-04, dt 1.7s +All GPU(s): step 827: loss 9.0273, lr 4.9e-04, dt 1.7s +All GPU(s): step 828: loss 9.4062, lr 4.9e-04, dt 1.6s +All GPU(s): step 829: loss 10.0000, lr 4.9e-04, dt 1.6s +All GPU(s): step 830: loss 9.4336, lr 4.9e-04, dt 1.8s +All GPU(s): step 831: loss 8.8750, lr 4.9e-04, dt 1.6s +All GPU(s): step 832: loss 8.9062, lr 4.9e-04, dt 1.6s +All GPU(s): step 833: loss 11.5938, lr 4.9e-04, dt 1.6s +All GPU(s): step 834: loss 10.1250, lr 4.9e-04, dt 1.6s +All GPU(s): step 835: loss 10.2891, lr 4.9e-04, dt 1.6s +All GPU(s): step 836: loss 12.7500, lr 4.9e-04, dt 1.7s +All GPU(s): step 837: loss 12.6328, lr 4.9e-04, dt 1.6s +All GPU(s): step 838: loss 11.3867, lr 4.9e-04, dt 1.6s +All GPU(s): step 839: loss 15.8750, lr 4.9e-04, dt 1.5s +All GPU(s): step 840: loss 20.1562, lr 4.9e-04, dt 1.5s +All GPU(s): step 841: loss 28.9844, lr 4.9e-04, dt 1.4s +All GPU(s): step 842: loss 28.5156, lr 4.9e-04, dt 1.4s +All GPU(s): step 843: loss 39.8281, lr 4.9e-04, dt 1.5s +All GPU(s): step 844: loss 47.1250, lr 4.9e-04, dt 1.3s +All GPU(s): step 845: loss 49.9688, lr 4.9e-04, dt 1.3s +All GPU(s): step 846: loss 59.6875, lr 4.9e-04, dt 1.3s +All GPU(s): step 847: loss 72.3750, lr 4.9e-04, dt 1.2s +All GPU(s): step 848: loss 46.6562, lr 4.9e-04, dt 1.3s +All GPU(s): step 849: loss 26.3906, lr 4.9e-04, dt 1.4s +All GPU(s): step 850: loss 20.2109, lr 4.9e-04, dt 1.6s +All GPU(s): step 851: loss 20.7578, lr 4.9e-04, dt 1.6s +All GPU(s): step 852: loss 15.5508, lr 4.9e-04, dt 1.7s +All GPU(s): step 853: loss 13.0078, lr 4.9e-04, dt 1.6s +All GPU(s): step 854: loss 11.6406, lr 4.9e-04, dt 1.6s +All GPU(s): step 855: loss 10.8828, lr 4.9e-04, dt 1.6s +All GPU(s): step 856: loss 8.2891, lr 4.9e-04, dt 1.7s +All GPU(s): step 857: loss 9.2734, lr 4.9e-04, dt 1.7s +All GPU(s): step 858: loss 8.9766, lr 4.9e-04, dt 1.6s +All GPU(s): step 859: loss 9.6133, lr 4.9e-04, dt 1.6s +All GPU(s): step 860: loss 10.9141, lr 4.9e-04, dt 1.6s +All GPU(s): step 861: loss 9.5156, lr 4.9e-04, dt 1.6s +All GPU(s): step 862: loss 8.0000, lr 4.9e-04, dt 1.7s +All GPU(s): step 863: loss 8.9492, lr 4.9e-04, dt 1.7s +All GPU(s): step 864: loss 10.3281, lr 4.9e-04, dt 1.6s +All GPU(s): step 865: loss 12.8750, lr 4.9e-04, dt 1.6s +All GPU(s): step 866: loss 13.8594, lr 4.9e-04, dt 1.5s +All GPU(s): step 867: loss 17.7188, lr 4.9e-04, dt 1.5s +All GPU(s): step 868: loss 34.2344, lr 4.9e-04, dt 1.4s +All GPU(s): step 869: loss 43.6250, lr 4.9e-04, dt 1.4s +All GPU(s): step 870: loss 60.5625, lr 4.9e-04, dt 1.2s +All GPU(s): step 871: loss 87.6250, lr 4.9e-04, dt 1.1s +All GPU(s): step 872: loss 94.4375, lr 4.9e-04, dt 1.1s +All GPU(s): step 873: loss 91.5625, lr 4.9e-04, dt 1.1s +All GPU(s): step 874: loss 66.7188, lr 4.9e-04, dt 1.2s +All GPU(s): step 875: loss 49.0625, lr 4.9e-04, dt 1.3s +All GPU(s): step 876: loss 41.2188, lr 4.9e-04, dt 1.3s +All GPU(s): step 877: loss 28.7656, lr 4.9e-04, dt 1.5s +All GPU(s): step 878: loss 21.6562, lr 4.9e-04, dt 1.5s +All GPU(s): step 879: loss 15.4297, lr 4.9e-04, dt 1.5s +All GPU(s): step 880: loss 13.0703, lr 4.9e-04, dt 1.6s +All GPU(s): step 881: loss 10.7695, lr 4.9e-04, dt 1.6s +All GPU(s): step 882: loss 11.3672, lr 4.9e-04, dt 1.6s +All GPU(s): step 883: loss 10.9062, lr 4.9e-04, dt 1.6s +All GPU(s): step 884: loss 12.3438, lr 4.9e-04, dt 1.6s +All GPU(s): step 885: loss 11.9219, lr 4.9e-04, dt 1.6s +All GPU(s): step 886: loss 10.7461, lr 4.9e-04, dt 1.6s +All GPU(s): step 887: loss 15.5938, lr 4.9e-04, dt 1.5s +All GPU(s): step 888: loss 21.5000, lr 4.9e-04, dt 1.4s +All GPU(s): step 889: loss 26.0000, lr 4.9e-04, dt 1.4s +All GPU(s): step 890: loss 30.1250, lr 4.9e-04, dt 1.4s +All GPU(s): step 891: loss 29.4219, lr 4.9e-04, dt 1.4s +All GPU(s): step 892: loss 25.0312, lr 4.9e-04, dt 1.5s +All GPU(s): step 893: loss 33.8906, lr 4.9e-04, dt 1.4s +All GPU(s): step 894: loss 34.3438, lr 4.9e-04, dt 1.4s +All GPU(s): step 895: loss 24.3438, lr 4.9e-04, dt 1.5s +All GPU(s): step 896: loss 22.6406, lr 4.9e-04, dt 1.4s +All GPU(s): step 897: loss 13.3672, lr 4.9e-04, dt 1.7s +All GPU(s): step 898: loss 11.7891, lr 4.9e-04, dt 1.6s +All GPU(s): step 899: loss 11.7891, lr 4.9e-04, dt 1.6s +All GPU(s): step 900: loss 9.3125, lr 4.9e-04, dt 1.6s +All GPU(s): step 901: loss 8.4883, lr 4.9e-04, dt 1.6s +All GPU(s): step 902: loss 8.3281, lr 4.9e-04, dt 1.6s +All GPU(s): step 903: loss 7.7461, lr 4.9e-04, dt 1.8s +All GPU(s): step 904: loss 7.4258, lr 4.9e-04, dt 1.7s +All GPU(s): step 905: loss 8.4844, lr 4.9e-04, dt 1.6s +All GPU(s): step 906: loss 8.6484, lr 4.9e-04, dt 1.7s +All GPU(s): step 907: loss 9.0977, lr 4.9e-04, dt 1.6s +All GPU(s): step 908: loss 8.6914, lr 4.9e-04, dt 1.6s +All GPU(s): step 909: loss 9.9180, lr 4.9e-04, dt 1.7s +All GPU(s): step 910: loss 10.3320, lr 4.9e-04, dt 1.6s +All GPU(s): step 911: loss 8.9609, lr 4.9e-04, dt 1.6s +All GPU(s): step 912: loss 11.8398, lr 4.9e-04, dt 1.6s +All GPU(s): step 913: loss 11.0469, lr 4.9e-04, dt 1.6s +All GPU(s): step 914: loss 13.2188, lr 4.9e-04, dt 1.5s +All GPU(s): step 915: loss 11.1211, lr 4.9e-04, dt 1.7s +All GPU(s): step 916: loss 13.2109, lr 4.9e-04, dt 1.6s +All GPU(s): step 917: loss 17.7500, lr 4.9e-04, dt 1.5s +All GPU(s): step 918: loss 20.5938, lr 4.9e-04, dt 1.4s +All GPU(s): step 919: loss 23.9375, lr 4.9e-04, dt 1.5s +All GPU(s): step 920: loss 23.3047, lr 4.9e-04, dt 1.4s +All GPU(s): step 921: loss 23.6328, lr 4.9e-04, dt 1.5s +All GPU(s): step 922: loss 18.1953, lr 4.9e-04, dt 1.6s +All GPU(s): step 923: loss 15.6172, lr 4.9e-04, dt 1.6s +All GPU(s): step 924: loss 14.0000, lr 4.9e-04, dt 1.5s +All GPU(s): step 925: loss 11.7969, lr 4.9e-04, dt 1.5s +All GPU(s): step 926: loss 9.1055, lr 4.9e-04, dt 1.6s +All GPU(s): step 927: loss 7.6875, lr 4.9e-04, dt 1.6s +All GPU(s): step 928: loss 7.9531, lr 4.9e-04, dt 1.8s +All GPU(s): step 929: loss 6.5586, lr 4.9e-04, dt 1.7s +All GPU(s): step 930: loss 6.5312, lr 4.9e-04, dt 1.7s +All GPU(s): step 931: loss 6.4961, lr 4.9e-04, dt 1.7s +All GPU(s): step 932: loss 6.7852, lr 4.9e-04, dt 1.7s +All GPU(s): step 933: loss 7.4648, lr 4.9e-04, dt 1.7s +All GPU(s): step 934: loss 7.8047, lr 4.9e-04, dt 1.8s +All GPU(s): step 935: loss 7.2461, lr 4.9e-04, dt 1.7s +All GPU(s): step 936: loss 7.6562, lr 4.9e-04, dt 1.7s +All GPU(s): step 937: loss 7.1055, lr 4.9e-04, dt 1.6s +All GPU(s): step 938: loss 7.4609, lr 4.9e-04, dt 1.7s +All GPU(s): step 939: loss 7.8867, lr 4.9e-04, dt 1.7s +All GPU(s): step 940: loss 7.0664, lr 4.9e-04, dt 1.7s +All GPU(s): step 941: loss 7.7461, lr 4.9e-04, dt 1.7s +All GPU(s): step 942: loss 7.5703, lr 4.9e-04, dt 1.7s +All GPU(s): step 943: loss 7.8203, lr 4.9e-04, dt 1.6s +All GPU(s): step 944: loss 9.8242, lr 4.9e-04, dt 1.7s +All GPU(s): step 945: loss 7.5625, lr 4.9e-04, dt 1.6s +All GPU(s): step 946: loss 6.9648, lr 4.9e-04, dt 1.7s +All GPU(s): step 947: loss 9.2812, lr 4.9e-04, dt 1.6s +All GPU(s): step 948: loss 9.4414, lr 4.9e-04, dt 1.6s +All GPU(s): step 949: loss 9.2891, lr 4.9e-04, dt 1.6s +All GPU(s): step 950: loss 8.7422, lr 4.9e-04, dt 1.6s +All GPU(s): step 951: loss 9.0547, lr 4.9e-04, dt 1.7s +All GPU(s): step 952: loss 8.6016, lr 4.9e-04, dt 1.7s +All GPU(s): step 953: loss 9.6484, lr 4.9e-04, dt 1.6s +All GPU(s): step 954: loss 12.3320, lr 4.9e-04, dt 1.6s +All GPU(s): step 955: loss 13.6172, lr 4.9e-04, dt 1.6s +All GPU(s): step 956: loss 12.5078, lr 4.9e-04, dt 1.6s +All GPU(s): step 957: loss 13.7188, lr 4.9e-04, dt 1.5s +All GPU(s): step 958: loss 15.5820, lr 4.9e-04, dt 1.6s +All GPU(s): step 959: loss 14.0781, lr 4.9e-04, dt 1.6s +All GPU(s): step 960: loss 14.6484, lr 4.9e-04, dt 1.5s +All GPU(s): step 961: loss 16.1328, lr 4.9e-04, dt 1.5s +All GPU(s): step 962: loss 12.3594, lr 4.9e-04, dt 1.6s +All GPU(s): step 963: loss 11.5820, lr 4.9e-04, dt 1.6s +All GPU(s): step 964: loss 8.9961, lr 4.9e-04, dt 1.7s +All GPU(s): step 965: loss 7.8984, lr 4.9e-04, dt 1.7s +All GPU(s): step 966: loss 8.3477, lr 4.9e-04, dt 1.6s +All GPU(s): step 967: loss 7.6484, lr 4.9e-04, dt 1.7s +All GPU(s): step 968: loss 6.7734, lr 4.9e-04, dt 1.6s +All GPU(s): step 969: loss 9.6484, lr 4.9e-04, dt 1.6s +All GPU(s): step 970: loss 7.5156, lr 4.9e-04, dt 1.7s +All GPU(s): step 971: loss 7.5508, lr 4.9e-04, dt 1.7s +All GPU(s): step 972: loss 6.7031, lr 4.9e-04, dt 1.7s +All GPU(s): step 973: loss 7.2344, lr 4.9e-04, dt 1.7s +All GPU(s): step 974: loss 7.9375, lr 4.9e-04, dt 1.6s +All GPU(s): step 975: loss 6.8242, lr 4.9e-04, dt 1.7s +All GPU(s): step 976: loss 7.2852, lr 4.9e-04, dt 1.8s +All GPU(s): step 977: loss 7.2461, lr 4.9e-04, dt 1.7s +All GPU(s): step 978: loss 8.0430, lr 4.9e-04, dt 1.7s +All GPU(s): step 979: loss 7.2695, lr 4.9e-04, dt 1.7s +All GPU(s): step 980: loss 6.7812, lr 4.9e-04, dt 1.7s +All GPU(s): step 981: loss 7.7969, lr 4.9e-04, dt 1.7s +All GPU(s): step 982: loss 7.2383, lr 4.9e-04, dt 1.7s +All GPU(s): step 983: loss 6.5508, lr 4.9e-04, dt 1.7s +All GPU(s): step 984: loss 7.1133, lr 4.9e-04, dt 1.7s +All GPU(s): step 985: loss 8.2461, lr 4.9e-04, dt 1.6s +All GPU(s): step 986: loss 7.5781, lr 4.9e-04, dt 1.7s +All GPU(s): step 987: loss 6.7695, lr 4.9e-04, dt 1.8s +All GPU(s): step 988: loss 7.5586, lr 4.9e-04, dt 1.8s +All GPU(s): step 989: loss 8.0664, lr 4.9e-04, dt 1.6s +All GPU(s): step 990: loss 7.8477, lr 4.9e-04, dt 1.6s +All GPU(s): step 991: loss 8.4219, lr 4.9e-04, dt 1.6s +All GPU(s): step 992: loss 9.2734, lr 4.9e-04, dt 1.6s +All GPU(s): step 993: loss 10.5547, lr 4.9e-04, dt 1.6s +All GPU(s): step 994: loss 12.0117, lr 4.9e-04, dt 1.7s +All GPU(s): step 995: loss 11.9531, lr 4.9e-04, dt 1.6s +All GPU(s): step 996: loss 14.1250, lr 4.9e-04, dt 1.6s +All GPU(s): step 997: loss 19.0859, lr 4.9e-04, dt 1.5s +All GPU(s): step 998: loss 16.9844, lr 4.9e-04, dt 1.5s +All GPU(s): step 999: loss 12.9844, lr 4.9e-04, dt 1.6s +saving checkpoint to checkpoints/ckpt_1000.pt +All GPU(s): step 1000: loss 13.3984, lr 4.9e-04, dt 1.7s +All GPU(s): step 1001: loss 13.1406, lr 4.9e-04, dt 1.6s +All GPU(s): step 1002: loss 10.4336, lr 4.9e-04, dt 1.6s +All GPU(s): step 1003: loss 8.9570, lr 4.9e-04, dt 1.7s +All GPU(s): step 1004: loss 7.5547, lr 4.9e-04, dt 1.7s +All GPU(s): step 1005: loss 6.8594, lr 4.9e-04, dt 1.7s +All GPU(s): step 1006: loss 7.6680, lr 4.9e-04, dt 1.7s +All GPU(s): step 1007: loss 6.9219, lr 4.9e-04, dt 1.7s +All GPU(s): step 1008: loss 6.7070, lr 4.9e-04, dt 1.7s +All GPU(s): step 1009: loss 7.9688, lr 4.9e-04, dt 1.6s +All GPU(s): step 1010: loss 7.4102, lr 4.9e-04, dt 1.7s +All GPU(s): step 1011: loss 7.5312, lr 4.9e-04, dt 1.7s +All GPU(s): step 1012: loss 7.7148, lr 4.9e-04, dt 1.8s +All GPU(s): step 1013: loss 6.6680, lr 4.9e-04, dt 1.7s +All GPU(s): step 1014: loss 7.6797, lr 4.9e-04, dt 1.6s +All GPU(s): step 1015: loss 6.9883, lr 4.9e-04, dt 1.7s +All GPU(s): step 1016: loss 6.2090, lr 4.9e-04, dt 1.8s +All GPU(s): step 1017: loss 7.6094, lr 4.9e-04, dt 1.7s +All GPU(s): step 1018: loss 6.6484, lr 4.9e-04, dt 1.8s +All GPU(s): step 1019: loss 7.8359, lr 4.9e-04, dt 1.7s +All GPU(s): step 1020: loss 7.0156, lr 4.9e-04, dt 1.7s +All GPU(s): step 1021: loss 7.7930, lr 4.9e-04, dt 1.6s +All GPU(s): step 1022: loss 10.6719, lr 4.9e-04, dt 1.6s +All GPU(s): step 1023: loss 12.0781, lr 4.9e-04, dt 1.6s +All GPU(s): step 1024: loss 11.2344, lr 4.9e-04, dt 1.6s +All GPU(s): step 1025: loss 14.0391, lr 4.9e-04, dt 1.5s +All GPU(s): step 1026: loss 17.8281, lr 4.9e-04, dt 1.5s +All GPU(s): step 1027: loss 28.0469, lr 4.9e-04, dt 1.4s +All GPU(s): step 1028: loss 30.9375, lr 4.9e-04, dt 1.4s +All GPU(s): step 1029: loss 36.9219, lr 4.9e-04, dt 1.3s +All GPU(s): step 1030: loss 27.9531, lr 4.9e-04, dt 1.5s +All GPU(s): step 1031: loss 22.8203, lr 4.9e-04, dt 1.5s +All GPU(s): step 1032: loss 22.2031, lr 4.9e-04, dt 1.5s +All GPU(s): step 1033: loss 15.0703, lr 4.9e-04, dt 1.5s +All GPU(s): step 1034: loss 12.4141, lr 4.9e-04, dt 1.6s +All GPU(s): step 1035: loss 10.7578, lr 4.9e-04, dt 1.6s +All GPU(s): step 1036: loss 8.6562, lr 4.9e-04, dt 1.6s +All GPU(s): step 1037: loss 8.2500, lr 4.9e-04, dt 1.7s +All GPU(s): step 1038: loss 7.8047, lr 4.9e-04, dt 1.7s +All GPU(s): step 1039: loss 8.9688, lr 4.9e-04, dt 1.7s +All GPU(s): step 1040: loss 7.1328, lr 4.9e-04, dt 1.7s +All GPU(s): step 1041: loss 7.5469, lr 4.9e-04, dt 1.7s +All GPU(s): step 1042: loss 7.5000, lr 4.9e-04, dt 1.6s +All GPU(s): step 1043: loss 6.7461, lr 4.9e-04, dt 1.8s +All GPU(s): step 1044: loss 6.5391, lr 4.9e-04, dt 1.7s +All GPU(s): step 1045: loss 8.4453, lr 4.9e-04, dt 1.7s +All GPU(s): step 1046: loss 7.1797, lr 4.9e-04, dt 1.7s +All GPU(s): step 1047: loss 6.4062, lr 4.9e-04, dt 1.7s +All GPU(s): step 1048: loss 7.2109, lr 4.9e-04, dt 1.7s +All GPU(s): step 1049: loss 6.0859, lr 4.9e-04, dt 1.8s +All GPU(s): step 1050: loss 6.3711, lr 4.9e-04, dt 1.7s +All GPU(s): step 1051: loss 7.1289, lr 4.9e-04, dt 1.7s +All GPU(s): step 1052: loss 6.3047, lr 4.9e-04, dt 1.7s +All GPU(s): step 1053: loss 6.8359, lr 4.9e-04, dt 1.7s +All GPU(s): step 1054: loss 7.0117, lr 4.9e-04, dt 1.7s +All GPU(s): step 1055: loss 7.4023, lr 4.9e-04, dt 1.7s +All GPU(s): step 1056: loss 10.0078, lr 4.9e-04, dt 1.6s +All GPU(s): step 1057: loss 7.8086, lr 4.9e-04, dt 1.7s +All GPU(s): step 1058: loss 7.2422, lr 4.9e-04, dt 1.7s +All GPU(s): step 1059: loss 7.4258, lr 4.9e-04, dt 1.7s +All GPU(s): step 1060: loss 7.1289, lr 4.9e-04, dt 1.7s +All GPU(s): step 1061: loss 7.1445, lr 4.9e-04, dt 1.7s diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-metadata.json b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..52e125e896e173d7b70acfb0dd9c21b04f7535a9 --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-metadata.json @@ -0,0 +1,88 @@ +{ + "os": "Linux-5.15.0-117-generic-x86_64-with-glibc2.31", + "python": "3.10.14", + "startedAt": "2024-09-23T15:03:04.830865Z", + "args": [ + "--config-name", + "experimental/byte_autoencoder_1" + ], + "program": "/root/SuperTinyLanguageModels/train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/LeonGuertler/SuperTinyLanguageModels.git", + "commit": "7b6e7767d3d2c8e69005f9debea4643e53335e50" + }, + "email": "calvin14@gmail.com", + "root": "/root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55", + "host": "11c6e13f6a55", + "username": "root", + "executable": "/root/SuperTinyLanguageModels/.conda/bin/python3", + "cpu_count": 128, + "cpu_count_logical": 256, + "gpu": "[NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090]", + "gpu_count": 8, + "disk": { + "/": { + "total": "1123133947904", + "used": "552459030528" + } + }, + "memory": { + "total": "540812599296" + }, + "cpu": { + "count": 128, + "countLogical": 256 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + } + ], + "cudaVersion": "12.5" +} \ No newline at end of file diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-summary.json b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9655fc584a195f386970ce9129f48902e439b24c --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/files/wandb-summary.json @@ -0,0 +1 @@ +{"loss":7.14453125,"lr":0.0004898235153340518,"_step":52150272,"additional_info/total-loss":7.369914531707764,"additional_info/average_chunk_length":10.460227966308594,"iter":1061,"token_num":52150272,"additional_info/BCE-loss":1.3171930313110352,"additional_info/chunk_len_penalty_loss":0,"_runtime":1483.718049982,"_wandb":{"runtime":1483},"_timestamp":1.7271052675829294e+09,"additional_info/chunk_len_loss":6.0527215003967285} \ No newline at end of file diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..9d606aa6958ec581fae460ab3c233cf0751a0b1a --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log @@ -0,0 +1,13 @@ +{"time":"2024-09-23T15:03:04.208454269Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp4mwkpvoj/port-123542.txt","pid":123542,"debug":false,"disable-analytics":false} +{"time":"2024-09-23T15:03:04.20851121Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2024-09-23T15:03:04.212397955Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":32773,"Zone":""}} +{"time":"2024-09-23T15:03:04.212484397Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":123542} +{"time":"2024-09-23T15:03:04.395934944Z","level":"INFO","msg":"created new connection","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:03:04.83338256Z","level":"INFO","msg":"connection init received","streamId":"bbl5fd2u","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:03:04.833672975Z","level":"ERROR","msg":"error creating symlink","error":"symlink /root/.cache/wandb/logs/core-debug-20240923_150304.log /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log: file exists"} +{"time":"2024-09-23T15:03:04.837142564Z","level":"INFO","msg":"connection init completed","streamId":"bbl5fd2u","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:27:48.54875597Z","level":"INFO","msg":"connection: teardown","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:27:48.549055704Z","level":"INFO","msg":"closed connection","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:27:48.549096624Z","level":"INFO","msg":"server is shutting down"} +{"time":"2024-09-23T15:27:53.07942572Z","level":"INFO","msg":"connection closed","id":"127.0.0.1:45550"} +{"time":"2024-09-23T15:27:53.079479061Z","level":"INFO","msg":"server is closed"} diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-internal.log b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6a4f240ee358516782023a72997984dfcc0896fe --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-09-23T15:03:04.833628134Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:03:04.833650595Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log"} +{"time":"2024-09-23T15:03:04.833707186Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:03:04.833713256Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-core.log"} +{"time":"2024-09-23T15:03:04.837118723Z","level":"INFO","msg":"created new stream","id":"bbl5fd2u"} +{"time":"2024-09-23T15:03:04.837137053Z","level":"INFO","msg":"stream: started","id":"bbl5fd2u"} +{"time":"2024-09-23T15:03:04.837160104Z","level":"INFO","msg":"handler: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:04.837187104Z","level":"INFO","msg":"sender: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:04.837235035Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:03:05.197982746Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T15:03:05.20002754Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T15:27:48.548880321Z","level":"INFO","msg":"stream: closing","id":"bbl5fd2u"} +{"time":"2024-09-23T15:27:48.548925082Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T15:27:48.549887675Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T15:27:53.078953404Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079066405Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079081806Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"bbl5fd2u"}} +{"time":"2024-09-23T15:27:53.079280268Z","level":"INFO","msg":"stream: closed","id":"bbl5fd2u"} diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug.log b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3046e4424228a5d4185d2ef8b632877b8fbb529e --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug.log @@ -0,0 +1,26 @@ +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Configure stats pid to 123542 +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/settings +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug.log +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/logs/debug-internal.log +2024-09-23 15:03:04,826 INFO MainThread:123542 [wandb_init.py:init():616] calling init triggers +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 1.0, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0005, 'min_lr': 5e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 100}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():666] starting backend +2024-09-23 15:03:04,827 INFO MainThread:123542 [wandb_init.py:init():670] setting up manager +2024-09-23 15:03:04,829 INFO MainThread:123542 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 15:03:04,830 INFO MainThread:123542 [wandb_init.py:init():678] backend started and connected +2024-09-23 15:03:04,833 INFO MainThread:123542 [wandb_init.py:init():773] updated telemetry +2024-09-23 15:03:04,840 INFO MainThread:123542 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 15:03:05,195 INFO MainThread:123542 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 15:03:05,391 INFO MainThread:123542 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 15:03:05,394 INFO MainThread:123542 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 15:27:48,549 WARNING MsgRouterThr:123542 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/run-bbl5fd2u.wandb b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/run-bbl5fd2u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3c9dcd0885aa4ea221bb5b9be88630fe8ca61528 --- /dev/null +++ b/2024-09-23/15-02-55/wandb/run-20240923_150304-bbl5fd2u/run-bbl5fd2u.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d51541b994fe9ae7303138091cfb2a5ee929e3de58f54d605ff9fe746a8d73b +size 3393727 diff --git a/2024-09-23/15-28-03/.hydra/config.yaml b/2024-09-23/15-28-03/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2952aa7ff8d053d754caf215984e9181f64276fd --- /dev/null +++ b/2024-09-23/15-28-03/.hydra/config.yaml @@ -0,0 +1,74 @@ +experimental: + model: + core_model_type: pass_through + hidden_dim: 384 + byte_hidden: 128 + max_chunk_length: 12 + max_num_chunks: 1024 + num_delimiter_layers: 3 + num_byte_decoder_layers: 5 + target_chunk_len: 8.0 + chunk_len_loss_weight: 1.0 + chunk_len_penalty: 0.1 + context_window: 8192 + embedding_model_type: byte_level + tokenizer_type: bpe + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + vocab_size: 259 + lm_head_type: byte_level + lm_head_normalization: rms_norm + lm_head_bias: false + lm_head_dropout: 0.0 + model_shell_type: byte_autoencoder_shell + embedding_weight_tying: true + ffn_weight_tying: false + cproj_weight_tying: false + positional_encoding_type: rope + trainer: + trainer_type: base_trainer + dataset: fineweb_edu_10B + batch_size: 6 + gradient_accumulation_steps: 8 + max_iters: 10000 + eval_interval: 50000000 + log_interval: 1 + checkpoint_interval: 1000 + eval_iters: 1000 + run_eval: false + eval: + mcq_benchmarks: null + mcq_num_samples: 1000 + eval_byte_metrics: false + text_modeling_eval: false + text_generation_eval: false + optimizer: + optimizer_name: adamW + lr: 0.0004 + min_lr: 4.0e-05 + weight_decay: 0.01 + beta1: 0.9 + beta2: 0.95 + grad_clip: 1.0 + lr_scheduler: + name: cosine + warmup_iters: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + loss_fn: + name: pass_through + general: + logging: + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + group_name: experimental_byte_level + paths: + output_dir: outputs + data_dir: data + checkpoint_dir: checkpoints + eval_dir: evals + seed: 489 + device: cuda diff --git a/2024-09-23/15-28-03/.hydra/hydra.yaml b/2024-09-23/15-28-03/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2a1ac167afa737a500a7801bb45946138020109 --- /dev/null +++ b/2024-09-23/15-28-03/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: experimental/byte_autoencoder_1 + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.1' + cwd: /root/SuperTinyLanguageModels + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /root/SuperTinyLanguageModels/configs/train + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2024-09-23/15-28-03/.hydra/overrides.yaml b/2024-09-23/15-28-03/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2024-09-23/15-28-03/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_1000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_1000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8190dd0ef54e7d4b2d127113866c2f819bec3034 --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_1000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3c9f4a84abd06c349f735bc6ff1fe0550c345eb145ed15e24f96839f3c15fc +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_10000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..5370bd53a33aa31b352981ba80b171f04719bfe6 --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c272953f8f4a1f4cbbd5e487832c25e94c4dcbd9ec0b1947c217ec2cae5528 +size 69377581 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_2000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_2000.pt new file mode 100644 index 0000000000000000000000000000000000000000..3994eb18c50f391116fe939bd6ed71dbc32619bb --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_2000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6083f5cbe4e365560fa28505ff417b0930fa4b8f3f5ebc9e1672bb49ece665dd +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_3000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_3000.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc572eff79e7901cb2f13365dc8ec6f82c5eef0b --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_3000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8154c22296002f9910875dc8c3fd0f6660507d162e92693c168e0aca8f87126 +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_4000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_4000.pt new file mode 100644 index 0000000000000000000000000000000000000000..48770243e934043cb347a6f267202423d32f1cde --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_4000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecee169ed8bb3c77682a51ff854e014bdf85eee2126582527fe4e7488df0f670 +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_5000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_5000.pt new file mode 100644 index 0000000000000000000000000000000000000000..89c22267e727966c96268658ba4e90f98687c8f5 --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_5000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f3c53f4cde9f3c460b1394bb450e9de0efafbddb7f0965d46e300c8129a381 +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_6000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_6000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f334623339121089eb1c46124e8ef4d348cc85e3 --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_6000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ee312bf808a4b5410e784c4283e842e475b67df83e8bdd1ea21b176672e5d2 +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_7000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_7000.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b735ce2187ef1923c128bd14307c4efe22a9e0c --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_7000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3662bb603dab04fad7f1ec0e760850321d15d60a26766a2bf6575663ef36dcb +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_8000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_8000.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e2fa2911d9d806610a0b721712d81cbebd3705d --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_8000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1bf8da2b5843dade48c71bd79887f5a7edee6c44381a00ff8e593aeee0b1ce +size 69377274 diff --git a/2024-09-23/15-28-03/checkpoints/ckpt_9000.pt b/2024-09-23/15-28-03/checkpoints/ckpt_9000.pt new file mode 100644 index 0000000000000000000000000000000000000000..506b80bc1366ca737f4169af6aae17b7f306527c --- /dev/null +++ b/2024-09-23/15-28-03/checkpoints/ckpt_9000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602f4b6751bdbd6dea60f0f3a00c5c2856d49b3756a2e51bf135aa6a8b00e703 +size 69377274 diff --git a/2024-09-23/15-28-03/train.log b/2024-09-23/15-28-03/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/2024-09-23/15-28-03/wandb/debug-internal.log b/2024-09-23/15-28-03/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6d852068997579e77a0b39777cd2e8d8984c2756 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/debug-internal.log @@ -0,0 +1,23 @@ +{"time":"2024-09-23T15:28:12.112481689Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:28:12.112519869Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log"} +{"time":"2024-09-23T15:28:12.112623061Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:28:12.112635331Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log"} +{"time":"2024-09-23T15:28:12.118142746Z","level":"INFO","msg":"created new stream","id":"jp82yqcj"} +{"time":"2024-09-23T15:28:12.118214317Z","level":"INFO","msg":"stream: started","id":"jp82yqcj"} +{"time":"2024-09-23T15:28:12.118265208Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.118273418Z","level":"INFO","msg":"sender: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.118314949Z","level":"INFO","msg":"handler: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.538706106Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T15:28:12.540526091Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T17:00:42.85305684Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T17:52:17.918721149Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/stlm/SuperTinyLanguageModels/jp82yqcj/file_stream"} +{"time":"2024-09-23T20:07:58.162758248Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T20:09:43.17329067Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T20:42:28.235348476Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T21:07:31.27732282Z","level":"INFO","msg":"stream: closing","id":"jp82yqcj"} +{"time":"2024-09-23T21:07:31.2773774Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T21:07:31.278475337Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T21:07:32.697344011Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697421022Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697407192Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697664785Z","level":"INFO","msg":"stream: closed","id":"jp82yqcj"} diff --git a/2024-09-23/15-28-03/wandb/debug.log b/2024-09-23/15-28-03/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0764f36353def6e1b54e483ec7cd27e113f01844 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/debug.log @@ -0,0 +1,26 @@ +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Configure stats pid to 130651 +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/settings +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug.log +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-internal.log +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():616] calling init triggers +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 1.0, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0004, 'min_lr': 4e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 1000}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():666] starting backend +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():670] setting up manager +2024-09-23 15:28:12,108 INFO MainThread:130651 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 15:28:12,110 INFO MainThread:130651 [wandb_init.py:init():678] backend started and connected +2024-09-23 15:28:12,113 INFO MainThread:130651 [wandb_init.py:init():773] updated telemetry +2024-09-23 15:28:12,120 INFO MainThread:130651 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 15:28:12,535 INFO MainThread:130651 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 15:28:12,711 INFO MainThread:130651 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 15:28:12,714 INFO MainThread:130651 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 21:07:31,278 WARNING MsgRouterThr:130651 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/config.yaml b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25bf7de259981bcd63227c2b9837fc067680bf81 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/config.yaml @@ -0,0 +1,115 @@ +_wandb: + value: + cli_version: 0.18.1 + m: [] + python_version: 3.10.14 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "2": + - 1 + - 5 + - 11 + - 49 + - 50 + - 51 + - 53 + - 55 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + - 61 + "4": 3.10.14 + "5": 0.18.1 + "6": 4.44.2 + "8": + - 5 + - 9 + "12": 0.18.1 + "13": linux-x86_64 +general: + value: + device: cuda + logging: + group_name: experimental_byte_level + wandb_log: true + wandb_project: SuperTinyLanguageModels + wandb_run_name: null + paths: + checkpoint_dir: checkpoints + data_dir: /root/SuperTinyLanguageModels/data + eval_dir: /root/SuperTinyLanguageModels/evals + output_dir: outputs + seed: 489 +model: + value: + byte_hidden: 128 + chunk_len_loss_weight: 1 + chunk_len_penalty: 0.1 + context_window: 8192 + core_model_type: pass_through + cproj_weight_tying: false + embedding_model_type: byte_level + embedding_weight_tying: true + ffn_weight_tying: false + hidden_dim: 384 + lm_head_bias: false + lm_head_dropout: 0 + lm_head_normalization: rms_norm + lm_head_type: byte_level + max_chunk_length: 12 + max_num_chunks: 1024 + model_shell_type: byte_autoencoder_shell + num_byte_decoder_layers: 5 + num_delimiter_layers: 3 + positional_encoding_type: rope + target_chunk_len: 8 + tokenizer_dataset_name: simple_en_wiki + tokenizer_simplify_data: true + tokenizer_type: bpe + vocab_size: 259 +trainer: + value: + batch_size: 6 + checkpoint_interval: 1000 + dataloader: + name: autoencoder + datasampling: + name: standard + dataset: fineweb_edu_10B + eval: + eval_byte_metrics: false + mcq_benchmarks: null + mcq_num_samples: 1000 + text_generation_eval: false + text_modeling_eval: false + eval_interval: 50000000 + eval_iters: 1000 + gradient_accumulation_steps: 8 + log_interval: 1 + loss_fn: + name: pass_through + lr_scheduler: + name: cosine + warmup_iters: 1000 + max_iters: 10000 + optimizer: + beta1: 0.9 + beta2: 0.95 + grad_clip: 1 + lr: 0.0004 + min_lr: 4e-05 + optimizer_name: adamW + weight_decay: 0.01 + run_eval: false + trainer_type: base_trainer diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/output.log b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ebd0b7a2515220da087b946412b09caec83b3053 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/output.log @@ -0,0 +1,10012 @@ +Weight and Biases Initialized +Rank0 Trainer built +Training loop is starting +All GPU(s): step 1: loss 18.6250, lr 4.0e-07, dt 2.1s +All GPU(s): step 2: loss 17.7656, lr 8.0e-07, dt 2.1s +All GPU(s): step 3: loss 18.7812, lr 1.2e-06, dt 2.1s +All GPU(s): step 4: loss 18.4062, lr 1.6e-06, dt 2.1s +All GPU(s): step 5: loss 18.2969, lr 2.0e-06, dt 2.1s +All GPU(s): step 6: loss 18.0312, lr 2.4e-06, dt 2.1s +All GPU(s): step 7: loss 17.9531, lr 2.8e-06, dt 2.1s +All GPU(s): step 8: loss 18.4844, lr 3.2e-06, dt 2.1s +All GPU(s): step 9: loss 19.0781, lr 3.6e-06, dt 2.2s +All GPU(s): step 10: loss 18.4531, lr 4.0e-06, dt 2.1s +All GPU(s): step 11: loss 18.2344, lr 4.4e-06, dt 2.1s +All GPU(s): step 12: loss 18.2812, lr 4.8e-06, dt 2.1s +All GPU(s): step 13: loss 18.5938, lr 5.2e-06, dt 2.1s +All GPU(s): step 14: loss 18.5000, lr 5.6e-06, dt 2.2s +All GPU(s): step 15: loss 17.4844, lr 6.0e-06, dt 2.0s +All GPU(s): step 16: loss 18.5469, lr 6.4e-06, dt 2.1s +All GPU(s): step 17: loss 17.8750, lr 6.8e-06, dt 2.1s +All GPU(s): step 18: loss 18.0625, lr 7.2e-06, dt 2.1s +All GPU(s): step 19: loss 18.1250, lr 7.6e-06, dt 2.1s +All GPU(s): step 20: loss 17.5625, lr 8.0e-06, dt 2.1s +All GPU(s): step 21: loss 17.2344, lr 8.4e-06, dt 2.1s +All GPU(s): step 22: loss 18.5312, lr 8.8e-06, dt 2.0s +All GPU(s): step 23: loss 18.0625, lr 9.2e-06, dt 2.1s +All GPU(s): step 24: loss 17.2031, lr 9.6e-06, dt 2.1s +All GPU(s): step 25: loss 17.9844, lr 1.0e-05, dt 2.1s +All GPU(s): step 26: loss 16.9375, lr 1.0e-05, dt 2.1s +All GPU(s): step 27: loss 17.3828, lr 1.1e-05, dt 2.1s +All GPU(s): step 28: loss 17.1484, lr 1.1e-05, dt 2.2s +All GPU(s): step 29: loss 17.3594, lr 1.2e-05, dt 2.1s +All GPU(s): step 30: loss 16.5312, lr 1.2e-05, dt 2.1s +All GPU(s): step 31: loss 16.8125, lr 1.2e-05, dt 2.1s +All GPU(s): step 32: loss 16.5000, lr 1.3e-05, dt 2.1s +All GPU(s): step 33: loss 16.0312, lr 1.3e-05, dt 2.1s +All GPU(s): step 34: loss 15.5547, lr 1.4e-05, dt 2.1s +All GPU(s): step 35: loss 16.5312, lr 1.4e-05, dt 2.0s +All GPU(s): step 36: loss 15.9219, lr 1.4e-05, dt 2.1s +All GPU(s): step 37: loss 15.4688, lr 1.5e-05, dt 2.1s +All GPU(s): step 38: loss 16.1094, lr 1.5e-05, dt 2.2s +All GPU(s): step 39: loss 15.5391, lr 1.6e-05, dt 2.1s +All GPU(s): step 40: loss 15.8672, lr 1.6e-05, dt 2.0s +All GPU(s): step 41: loss 15.6094, lr 1.6e-05, dt 2.1s +All GPU(s): step 42: loss 15.1484, lr 1.7e-05, dt 2.1s +All GPU(s): step 43: loss 14.5078, lr 1.7e-05, dt 2.1s +All GPU(s): step 44: loss 14.7344, lr 1.8e-05, dt 2.0s +All GPU(s): step 45: loss 14.6250, lr 1.8e-05, dt 2.0s +All GPU(s): step 46: loss 15.0938, lr 1.8e-05, dt 2.1s +All GPU(s): step 47: loss 15.5938, lr 1.9e-05, dt 2.1s +All GPU(s): step 48: loss 15.2734, lr 1.9e-05, dt 2.1s +All GPU(s): step 49: loss 15.0078, lr 2.0e-05, dt 2.1s +All GPU(s): step 50: loss 15.9922, lr 2.0e-05, dt 2.1s +All GPU(s): step 51: loss 15.3281, lr 2.0e-05, dt 2.0s +All GPU(s): step 52: loss 15.7109, lr 2.1e-05, dt 2.2s +All GPU(s): step 53: loss 15.9531, lr 2.1e-05, dt 2.1s +All GPU(s): step 54: loss 15.6016, lr 2.2e-05, dt 2.1s +All GPU(s): step 55: loss 14.7109, lr 2.2e-05, dt 2.1s +All GPU(s): step 56: loss 15.9453, lr 2.2e-05, dt 2.1s +All GPU(s): step 57: loss 14.7656, lr 2.3e-05, dt 2.1s +All GPU(s): step 58: loss 15.1875, lr 2.3e-05, dt 2.0s +All GPU(s): step 59: loss 14.4844, lr 2.4e-05, dt 2.1s +All GPU(s): step 60: loss 14.5859, lr 2.4e-05, dt 2.1s +All GPU(s): step 61: loss 13.4141, lr 2.4e-05, dt 2.1s +All GPU(s): step 62: loss 14.3906, lr 2.5e-05, dt 2.1s +All GPU(s): step 63: loss 13.9766, lr 2.5e-05, dt 2.1s +All GPU(s): step 64: loss 13.5859, lr 2.6e-05, dt 2.1s +All GPU(s): step 65: loss 13.4922, lr 2.6e-05, dt 2.1s +All GPU(s): step 66: loss 13.7812, lr 2.6e-05, dt 2.1s +All GPU(s): step 67: loss 13.8047, lr 2.7e-05, dt 2.1s +All GPU(s): step 68: loss 13.6953, lr 2.7e-05, dt 2.1s +All GPU(s): step 69: loss 13.2344, lr 2.8e-05, dt 2.1s +All GPU(s): step 70: loss 13.2344, lr 2.8e-05, dt 2.1s +All GPU(s): step 71: loss 11.7266, lr 2.8e-05, dt 2.1s +All GPU(s): step 72: loss 12.7969, lr 2.9e-05, dt 2.1s +All GPU(s): step 73: loss 13.0312, lr 2.9e-05, dt 2.1s +All GPU(s): step 74: loss 11.9453, lr 3.0e-05, dt 2.1s +All GPU(s): step 75: loss 11.6484, lr 3.0e-05, dt 2.1s +All GPU(s): step 76: loss 11.5938, lr 3.0e-05, dt 2.1s +All GPU(s): step 77: loss 11.2891, lr 3.1e-05, dt 2.1s +All GPU(s): step 78: loss 10.5859, lr 3.1e-05, dt 2.1s +All GPU(s): step 79: loss 10.9375, lr 3.2e-05, dt 2.1s +All GPU(s): step 80: loss 10.8125, lr 3.2e-05, dt 2.1s +All GPU(s): step 81: loss 10.5391, lr 3.2e-05, dt 2.2s +All GPU(s): step 82: loss 9.3398, lr 3.3e-05, dt 2.1s +All GPU(s): step 83: loss 10.8281, lr 3.3e-05, dt 2.1s +All GPU(s): step 84: loss 10.5234, lr 3.4e-05, dt 2.1s +All GPU(s): step 85: loss 9.8125, lr 3.4e-05, dt 2.1s +All GPU(s): step 86: loss 10.1172, lr 3.4e-05, dt 2.1s +All GPU(s): step 87: loss 10.5000, lr 3.5e-05, dt 2.1s +All GPU(s): step 88: loss 11.0547, lr 3.5e-05, dt 2.1s +All GPU(s): step 89: loss 10.3906, lr 3.6e-05, dt 2.1s +All GPU(s): step 90: loss 10.5000, lr 3.6e-05, dt 2.1s +All GPU(s): step 91: loss 11.2969, lr 3.6e-05, dt 2.0s +All GPU(s): step 92: loss 11.1953, lr 3.7e-05, dt 2.1s +All GPU(s): step 93: loss 11.0078, lr 3.7e-05, dt 2.1s +All GPU(s): step 94: loss 10.4766, lr 3.8e-05, dt 2.1s +All GPU(s): step 95: loss 10.5078, lr 3.8e-05, dt 2.1s +All GPU(s): step 96: loss 11.2422, lr 3.8e-05, dt 2.0s +All GPU(s): step 97: loss 11.1172, lr 3.9e-05, dt 2.0s +All GPU(s): step 98: loss 11.1250, lr 3.9e-05, dt 2.0s +All GPU(s): step 99: loss 11.5625, lr 4.0e-05, dt 2.1s +All GPU(s): step 100: loss 11.3594, lr 4.0e-05, dt 2.1s +All GPU(s): step 101: loss 10.2266, lr 4.0e-05, dt 2.1s +All GPU(s): step 102: loss 11.1797, lr 4.1e-05, dt 2.1s +All GPU(s): step 103: loss 10.7422, lr 4.1e-05, dt 2.1s +All GPU(s): step 104: loss 10.7422, lr 4.2e-05, dt 2.1s +All GPU(s): step 105: loss 10.9219, lr 4.2e-05, dt 2.1s +All GPU(s): step 106: loss 10.4141, lr 4.2e-05, dt 2.0s +All GPU(s): step 107: loss 11.4609, lr 4.3e-05, dt 2.1s +All GPU(s): step 108: loss 10.5156, lr 4.3e-05, dt 2.1s +All GPU(s): step 109: loss 10.4922, lr 4.4e-05, dt 2.1s +All GPU(s): step 110: loss 10.3203, lr 4.4e-05, dt 2.1s +All GPU(s): step 111: loss 9.8438, lr 4.4e-05, dt 2.1s +All GPU(s): step 112: loss 9.5391, lr 4.5e-05, dt 2.1s +All GPU(s): step 113: loss 9.4922, lr 4.5e-05, dt 2.1s +All GPU(s): step 114: loss 10.2031, lr 4.6e-05, dt 2.1s +All GPU(s): step 115: loss 9.8281, lr 4.6e-05, dt 2.1s +All GPU(s): step 116: loss 9.1719, lr 4.6e-05, dt 2.1s +All GPU(s): step 117: loss 9.9375, lr 4.7e-05, dt 2.0s +All GPU(s): step 118: loss 9.3203, lr 4.7e-05, dt 2.1s +All GPU(s): step 119: loss 8.8906, lr 4.8e-05, dt 2.2s +All GPU(s): step 120: loss 8.8008, lr 4.8e-05, dt 2.1s +All GPU(s): step 121: loss 8.5898, lr 4.8e-05, dt 2.1s +All GPU(s): step 122: loss 8.2031, lr 4.9e-05, dt 2.0s +All GPU(s): step 123: loss 8.4727, lr 4.9e-05, dt 2.1s +All GPU(s): step 124: loss 8.5234, lr 5.0e-05, dt 2.2s +All GPU(s): step 125: loss 8.6641, lr 5.0e-05, dt 2.1s +All GPU(s): step 126: loss 8.5938, lr 5.0e-05, dt 2.0s +All GPU(s): step 127: loss 8.6680, lr 5.1e-05, dt 2.1s +All GPU(s): step 128: loss 8.4219, lr 5.1e-05, dt 2.1s +All GPU(s): step 129: loss 8.6133, lr 5.2e-05, dt 2.1s +All GPU(s): step 130: loss 8.6914, lr 5.2e-05, dt 2.1s +All GPU(s): step 131: loss 8.8203, lr 5.2e-05, dt 2.0s +All GPU(s): step 132: loss 8.3633, lr 5.3e-05, dt 2.1s +All GPU(s): step 133: loss 8.7070, lr 5.3e-05, dt 2.1s +All GPU(s): step 134: loss 8.7422, lr 5.4e-05, dt 2.1s +All GPU(s): step 135: loss 9.2266, lr 5.4e-05, dt 2.0s +All GPU(s): step 136: loss 8.0664, lr 5.4e-05, dt 2.0s +All GPU(s): step 137: loss 8.3398, lr 5.5e-05, dt 2.1s +All GPU(s): step 138: loss 7.8281, lr 5.5e-05, dt 2.1s +All GPU(s): step 139: loss 8.4453, lr 5.6e-05, dt 2.1s +All GPU(s): step 140: loss 8.4961, lr 5.6e-05, dt 2.1s +All GPU(s): step 141: loss 8.5586, lr 5.6e-05, dt 2.0s +All GPU(s): step 142: loss 8.0586, lr 5.7e-05, dt 2.1s +All GPU(s): step 143: loss 7.7969, lr 5.7e-05, dt 2.1s +All GPU(s): step 144: loss 8.3320, lr 5.8e-05, dt 2.1s +All GPU(s): step 145: loss 7.4219, lr 5.8e-05, dt 2.1s +All GPU(s): step 146: loss 7.5078, lr 5.8e-05, dt 2.1s +All GPU(s): step 147: loss 7.1484, lr 5.9e-05, dt 2.1s +All GPU(s): step 148: loss 8.0039, lr 5.9e-05, dt 2.2s +All GPU(s): step 149: loss 7.9531, lr 6.0e-05, dt 2.1s +All GPU(s): step 150: loss 6.7344, lr 6.0e-05, dt 2.1s +All GPU(s): step 151: loss 7.3398, lr 6.0e-05, dt 2.1s +All GPU(s): step 152: loss 7.1953, lr 6.1e-05, dt 2.1s +All GPU(s): step 153: loss 7.2305, lr 6.1e-05, dt 2.1s +All GPU(s): step 154: loss 6.9688, lr 6.2e-05, dt 2.1s +All GPU(s): step 155: loss 7.3828, lr 6.2e-05, dt 2.1s +All GPU(s): step 156: loss 7.0898, lr 6.2e-05, dt 2.1s +All GPU(s): step 157: loss 6.9141, lr 6.3e-05, dt 2.1s +All GPU(s): step 158: loss 7.0039, lr 6.3e-05, dt 2.1s +All GPU(s): step 159: loss 6.9609, lr 6.4e-05, dt 2.0s +All GPU(s): step 160: loss 7.2500, lr 6.4e-05, dt 2.1s +All GPU(s): step 161: loss 7.0664, lr 6.4e-05, dt 2.1s +All GPU(s): step 162: loss 7.2031, lr 6.5e-05, dt 2.1s +All GPU(s): step 163: loss 7.7227, lr 6.5e-05, dt 2.1s +All GPU(s): step 164: loss 7.3711, lr 6.6e-05, dt 2.1s +All GPU(s): step 165: loss 7.4844, lr 6.6e-05, dt 2.1s +All GPU(s): step 166: loss 7.7148, lr 6.6e-05, dt 2.1s +All GPU(s): step 167: loss 7.1406, lr 6.7e-05, dt 2.2s +All GPU(s): step 168: loss 7.6211, lr 6.7e-05, dt 2.1s +All GPU(s): step 169: loss 7.6992, lr 6.8e-05, dt 2.1s +All GPU(s): step 170: loss 8.1094, lr 6.8e-05, dt 2.1s +All GPU(s): step 171: loss 7.5938, lr 6.8e-05, dt 2.1s +All GPU(s): step 172: loss 7.6250, lr 6.9e-05, dt 2.1s +All GPU(s): step 173: loss 8.1094, lr 6.9e-05, dt 2.0s +All GPU(s): step 174: loss 7.5469, lr 7.0e-05, dt 2.0s +All GPU(s): step 175: loss 8.3711, lr 7.0e-05, dt 2.0s +All GPU(s): step 176: loss 8.8867, lr 7.0e-05, dt 2.1s +All GPU(s): step 177: loss 8.9648, lr 7.1e-05, dt 2.1s +All GPU(s): step 178: loss 8.5391, lr 7.1e-05, dt 2.1s +All GPU(s): step 179: loss 9.0742, lr 7.2e-05, dt 2.1s +All GPU(s): step 180: loss 9.3359, lr 7.2e-05, dt 2.1s +All GPU(s): step 181: loss 9.1953, lr 7.2e-05, dt 2.2s +All GPU(s): step 182: loss 10.4141, lr 7.3e-05, dt 2.1s +All GPU(s): step 183: loss 10.2383, lr 7.3e-05, dt 2.1s +All GPU(s): step 184: loss 10.4766, lr 7.4e-05, dt 2.1s +All GPU(s): step 185: loss 10.8594, lr 7.4e-05, dt 2.1s +All GPU(s): step 186: loss 11.0859, lr 7.4e-05, dt 2.2s +All GPU(s): step 187: loss 11.8984, lr 7.5e-05, dt 2.1s +All GPU(s): step 188: loss 12.2188, lr 7.5e-05, dt 2.1s +All GPU(s): step 189: loss 12.4219, lr 7.6e-05, dt 2.1s +All GPU(s): step 190: loss 12.7812, lr 7.6e-05, dt 2.1s +All GPU(s): step 191: loss 12.1094, lr 7.6e-05, dt 2.1s +All GPU(s): step 192: loss 13.1094, lr 7.7e-05, dt 2.1s +All GPU(s): step 193: loss 13.3906, lr 7.7e-05, dt 2.1s +All GPU(s): step 194: loss 13.5234, lr 7.8e-05, dt 2.1s +All GPU(s): step 195: loss 13.3672, lr 7.8e-05, dt 2.1s +All GPU(s): step 196: loss 13.4688, lr 7.8e-05, dt 2.1s +All GPU(s): step 197: loss 13.9531, lr 7.9e-05, dt 2.1s +All GPU(s): step 198: loss 13.4766, lr 7.9e-05, dt 2.1s +All GPU(s): step 199: loss 13.6562, lr 8.0e-05, dt 2.0s +All GPU(s): step 200: loss 13.2188, lr 8.0e-05, dt 2.1s +All GPU(s): step 201: loss 14.3047, lr 8.0e-05, dt 2.1s +All GPU(s): step 202: loss 14.1328, lr 8.1e-05, dt 2.0s +All GPU(s): step 203: loss 13.6328, lr 8.1e-05, dt 2.1s +All GPU(s): step 204: loss 14.4219, lr 8.2e-05, dt 2.1s +All GPU(s): step 205: loss 15.4766, lr 8.2e-05, dt 2.1s +All GPU(s): step 206: loss 13.9141, lr 8.2e-05, dt 2.0s +All GPU(s): step 207: loss 16.1172, lr 8.3e-05, dt 2.0s +All GPU(s): step 208: loss 15.5078, lr 8.3e-05, dt 2.1s +All GPU(s): step 209: loss 15.4062, lr 8.4e-05, dt 2.0s +All GPU(s): step 210: loss 16.7656, lr 8.4e-05, dt 2.2s +All GPU(s): step 211: loss 17.0625, lr 8.4e-05, dt 2.0s +All GPU(s): step 212: loss 16.8672, lr 8.5e-05, dt 2.0s +All GPU(s): step 213: loss 17.1094, lr 8.5e-05, dt 2.0s +All GPU(s): step 214: loss 17.3906, lr 8.6e-05, dt 2.0s +All GPU(s): step 215: loss 17.4922, lr 8.6e-05, dt 2.2s +All GPU(s): step 216: loss 18.0625, lr 8.6e-05, dt 2.0s +All GPU(s): step 217: loss 19.1719, lr 8.7e-05, dt 2.1s +All GPU(s): step 218: loss 19.4219, lr 8.7e-05, dt 2.0s +All GPU(s): step 219: loss 20.1719, lr 8.8e-05, dt 2.1s +All GPU(s): step 220: loss 21.1094, lr 8.8e-05, dt 2.1s +All GPU(s): step 221: loss 20.6719, lr 8.8e-05, dt 2.1s +All GPU(s): step 222: loss 22.1094, lr 8.9e-05, dt 2.1s +All GPU(s): step 223: loss 22.4219, lr 8.9e-05, dt 2.1s +All GPU(s): step 224: loss 23.7344, lr 9.0e-05, dt 2.1s +All GPU(s): step 225: loss 24.0625, lr 9.0e-05, dt 2.1s +All GPU(s): step 226: loss 24.5938, lr 9.0e-05, dt 2.0s +All GPU(s): step 227: loss 25.3438, lr 9.1e-05, dt 2.1s +All GPU(s): step 228: loss 26.4219, lr 9.1e-05, dt 2.1s +All GPU(s): step 229: loss 27.5469, lr 9.2e-05, dt 2.1s +All GPU(s): step 230: loss 27.9688, lr 9.2e-05, dt 2.0s +All GPU(s): step 231: loss 27.9375, lr 9.2e-05, dt 2.1s +All GPU(s): step 232: loss 28.1875, lr 9.3e-05, dt 2.0s +All GPU(s): step 233: loss 28.6406, lr 9.3e-05, dt 2.1s +All GPU(s): step 234: loss 29.0000, lr 9.4e-05, dt 2.1s +All GPU(s): step 235: loss 28.0156, lr 9.4e-05, dt 2.0s +All GPU(s): step 236: loss 29.1562, lr 9.4e-05, dt 2.0s +All GPU(s): step 237: loss 29.2812, lr 9.5e-05, dt 2.0s +All GPU(s): step 238: loss 28.1875, lr 9.5e-05, dt 2.0s +All GPU(s): step 239: loss 27.9688, lr 9.6e-05, dt 2.1s +All GPU(s): step 240: loss 27.6250, lr 9.6e-05, dt 2.1s +All GPU(s): step 241: loss 28.1250, lr 9.6e-05, dt 2.0s +All GPU(s): step 242: loss 27.8906, lr 9.7e-05, dt 2.0s +All GPU(s): step 243: loss 27.9062, lr 9.7e-05, dt 2.1s +All GPU(s): step 244: loss 28.1406, lr 9.8e-05, dt 2.1s +All GPU(s): step 245: loss 28.1719, lr 9.8e-05, dt 2.0s +All GPU(s): step 246: loss 28.2500, lr 9.8e-05, dt 2.0s +All GPU(s): step 247: loss 27.8438, lr 9.9e-05, dt 2.0s +All GPU(s): step 248: loss 28.0781, lr 9.9e-05, dt 2.1s +All GPU(s): step 249: loss 28.0781, lr 1.0e-04, dt 2.1s +All GPU(s): step 250: loss 28.3438, lr 1.0e-04, dt 2.0s +All GPU(s): step 251: loss 27.8125, lr 1.0e-04, dt 2.0s +All GPU(s): step 252: loss 27.4375, lr 1.0e-04, dt 2.0s +All GPU(s): step 253: loss 27.1562, lr 1.0e-04, dt 2.1s +All GPU(s): step 254: loss 27.0000, lr 1.0e-04, dt 2.1s +All GPU(s): step 255: loss 26.5469, lr 1.0e-04, dt 2.0s +All GPU(s): step 256: loss 25.5938, lr 1.0e-04, dt 2.0s +All GPU(s): step 257: loss 25.5781, lr 1.0e-04, dt 2.1s +All GPU(s): step 258: loss 25.3594, lr 1.0e-04, dt 2.1s +All GPU(s): step 259: loss 24.6250, lr 1.0e-04, dt 2.1s +All GPU(s): step 260: loss 24.0469, lr 1.0e-04, dt 2.1s +All GPU(s): step 261: loss 23.8281, lr 1.0e-04, dt 2.1s +All GPU(s): step 262: loss 23.3594, lr 1.0e-04, dt 2.1s +All GPU(s): step 263: loss 23.0469, lr 1.1e-04, dt 2.2s +All GPU(s): step 264: loss 22.8438, lr 1.1e-04, dt 2.1s +All GPU(s): step 265: loss 22.6094, lr 1.1e-04, dt 2.1s +All GPU(s): step 266: loss 22.0312, lr 1.1e-04, dt 2.0s +All GPU(s): step 267: loss 22.0625, lr 1.1e-04, dt 2.1s +All GPU(s): step 268: loss 22.0625, lr 1.1e-04, dt 2.2s +All GPU(s): step 269: loss 21.2500, lr 1.1e-04, dt 2.1s +All GPU(s): step 270: loss 22.0469, lr 1.1e-04, dt 2.1s +All GPU(s): step 271: loss 22.1562, lr 1.1e-04, dt 2.1s +All GPU(s): step 272: loss 22.2812, lr 1.1e-04, dt 2.1s +All GPU(s): step 273: loss 22.4531, lr 1.1e-04, dt 2.2s +All GPU(s): step 274: loss 22.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 275: loss 22.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 276: loss 21.2812, lr 1.1e-04, dt 2.1s +All GPU(s): step 277: loss 21.7344, lr 1.1e-04, dt 2.1s +All GPU(s): step 278: loss 21.5781, lr 1.1e-04, dt 2.1s +All GPU(s): step 279: loss 20.8750, lr 1.1e-04, dt 2.1s +All GPU(s): step 280: loss 20.5625, lr 1.1e-04, dt 2.1s +All GPU(s): step 281: loss 20.4062, lr 1.1e-04, dt 2.1s +All GPU(s): step 282: loss 19.5312, lr 1.1e-04, dt 2.1s +All GPU(s): step 283: loss 20.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 284: loss 20.1875, lr 1.1e-04, dt 2.1s +All GPU(s): step 285: loss 19.4531, lr 1.1e-04, dt 2.0s +All GPU(s): step 286: loss 20.0938, lr 1.1e-04, dt 2.1s +All GPU(s): step 287: loss 19.6719, lr 1.1e-04, dt 2.1s +All GPU(s): step 288: loss 18.8906, lr 1.2e-04, dt 2.1s +All GPU(s): step 289: loss 18.5625, lr 1.2e-04, dt 2.0s +All GPU(s): step 290: loss 18.7031, lr 1.2e-04, dt 2.1s +All GPU(s): step 291: loss 17.9219, lr 1.2e-04, dt 2.1s +All GPU(s): step 292: loss 18.9844, lr 1.2e-04, dt 2.1s +All GPU(s): step 293: loss 18.6875, lr 1.2e-04, dt 2.0s +All GPU(s): step 294: loss 18.7031, lr 1.2e-04, dt 2.0s +All GPU(s): step 295: loss 18.4375, lr 1.2e-04, dt 2.0s +All GPU(s): step 296: loss 18.3281, lr 1.2e-04, dt 2.1s +All GPU(s): step 297: loss 18.5156, lr 1.2e-04, dt 2.1s +All GPU(s): step 298: loss 18.4844, lr 1.2e-04, dt 2.0s +All GPU(s): step 299: loss 18.5469, lr 1.2e-04, dt 2.0s +All GPU(s): step 300: loss 18.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 301: loss 18.9844, lr 1.2e-04, dt 2.1s +All GPU(s): step 302: loss 18.7188, lr 1.2e-04, dt 2.1s +All GPU(s): step 303: loss 18.7188, lr 1.2e-04, dt 2.1s +All GPU(s): step 304: loss 18.9531, lr 1.2e-04, dt 2.1s +All GPU(s): step 305: loss 18.7969, lr 1.2e-04, dt 2.1s +All GPU(s): step 306: loss 18.5625, lr 1.2e-04, dt 2.1s +All GPU(s): step 307: loss 19.0781, lr 1.2e-04, dt 2.1s +All GPU(s): step 308: loss 19.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 309: loss 18.7500, lr 1.2e-04, dt 2.1s +All GPU(s): step 310: loss 18.9844, lr 1.2e-04, dt 2.1s +All GPU(s): step 311: loss 19.0156, lr 1.2e-04, dt 2.2s +All GPU(s): step 312: loss 18.8125, lr 1.2e-04, dt 2.0s +All GPU(s): step 313: loss 19.1562, lr 1.3e-04, dt 2.0s +All GPU(s): step 314: loss 18.8906, lr 1.3e-04, dt 2.0s +All GPU(s): step 315: loss 18.6875, lr 1.3e-04, dt 2.0s +All GPU(s): step 316: loss 19.1250, lr 1.3e-04, dt 2.1s +All GPU(s): step 317: loss 18.7656, lr 1.3e-04, dt 2.1s +All GPU(s): step 318: loss 19.2812, lr 1.3e-04, dt 2.0s +All GPU(s): step 319: loss 19.1875, lr 1.3e-04, dt 2.0s +All GPU(s): step 320: loss 19.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 321: loss 19.8281, lr 1.3e-04, dt 2.1s +All GPU(s): step 322: loss 19.2812, lr 1.3e-04, dt 2.1s +All GPU(s): step 323: loss 19.7344, lr 1.3e-04, dt 2.1s +All GPU(s): step 324: loss 19.6562, lr 1.3e-04, dt 2.0s +All GPU(s): step 325: loss 19.7969, lr 1.3e-04, dt 2.1s +All GPU(s): step 326: loss 20.0938, lr 1.3e-04, dt 2.1s +All GPU(s): step 327: loss 20.7031, lr 1.3e-04, dt 2.0s +All GPU(s): step 328: loss 20.4844, lr 1.3e-04, dt 2.0s +All GPU(s): step 329: loss 20.9375, lr 1.3e-04, dt 2.0s +All GPU(s): step 330: loss 21.3281, lr 1.3e-04, dt 2.1s +All GPU(s): step 331: loss 21.6094, lr 1.3e-04, dt 2.0s +All GPU(s): step 332: loss 21.7656, lr 1.3e-04, dt 2.0s +All GPU(s): step 333: loss 21.8906, lr 1.3e-04, dt 2.0s +All GPU(s): step 334: loss 22.0312, lr 1.3e-04, dt 2.0s +All GPU(s): step 335: loss 22.3438, lr 1.3e-04, dt 2.1s +All GPU(s): step 336: loss 22.2031, lr 1.3e-04, dt 2.1s +All GPU(s): step 337: loss 22.9219, lr 1.3e-04, dt 2.0s +All GPU(s): step 338: loss 22.3594, lr 1.4e-04, dt 2.0s +All GPU(s): step 339: loss 22.4688, lr 1.4e-04, dt 2.1s +All GPU(s): step 340: loss 22.4062, lr 1.4e-04, dt 2.1s +All GPU(s): step 341: loss 22.4062, lr 1.4e-04, dt 2.1s +All GPU(s): step 342: loss 23.1562, lr 1.4e-04, dt 2.1s +All GPU(s): step 343: loss 22.7344, lr 1.4e-04, dt 2.1s +All GPU(s): step 344: loss 22.6406, lr 1.4e-04, dt 2.1s +All GPU(s): step 345: loss 22.9531, lr 1.4e-04, dt 2.2s +All GPU(s): step 346: loss 23.1719, lr 1.4e-04, dt 2.0s +All GPU(s): step 347: loss 23.0000, lr 1.4e-04, dt 2.1s +All GPU(s): step 348: loss 23.0312, lr 1.4e-04, dt 2.0s +All GPU(s): step 349: loss 22.9375, lr 1.4e-04, dt 2.1s +All GPU(s): step 350: loss 23.0312, lr 1.4e-04, dt 2.1s +All GPU(s): step 351: loss 23.1719, lr 1.4e-04, dt 2.1s +All GPU(s): step 352: loss 23.2812, lr 1.4e-04, dt 2.0s +All GPU(s): step 353: loss 23.2812, lr 1.4e-04, dt 2.0s +All GPU(s): step 354: loss 22.7812, lr 1.4e-04, dt 2.1s +All GPU(s): step 355: loss 23.2812, lr 1.4e-04, dt 2.1s +All GPU(s): step 356: loss 23.5938, lr 1.4e-04, dt 2.1s +All GPU(s): step 357: loss 23.2344, lr 1.4e-04, dt 2.1s +All GPU(s): step 358: loss 23.4688, lr 1.4e-04, dt 2.1s +All GPU(s): step 359: loss 23.1875, lr 1.4e-04, dt 2.1s +All GPU(s): step 360: loss 23.2344, lr 1.4e-04, dt 2.1s +All GPU(s): step 361: loss 23.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 362: loss 23.2188, lr 1.4e-04, dt 2.0s +All GPU(s): step 363: loss 22.8125, lr 1.5e-04, dt 2.1s +All GPU(s): step 364: loss 23.1406, lr 1.5e-04, dt 2.2s +All GPU(s): step 365: loss 23.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 366: loss 22.6250, lr 1.5e-04, dt 2.1s +All GPU(s): step 367: loss 22.5938, lr 1.5e-04, dt 2.1s +All GPU(s): step 368: loss 22.8281, lr 1.5e-04, dt 2.1s +All GPU(s): step 369: loss 22.7031, lr 1.5e-04, dt 2.1s +All GPU(s): step 370: loss 22.5938, lr 1.5e-04, dt 2.0s +All GPU(s): step 371: loss 22.2656, lr 1.5e-04, dt 2.0s +All GPU(s): step 372: loss 22.3906, lr 1.5e-04, dt 2.0s +All GPU(s): step 373: loss 22.5312, lr 1.5e-04, dt 2.1s +All GPU(s): step 374: loss 21.8906, lr 1.5e-04, dt 2.1s +All GPU(s): step 375: loss 22.3750, lr 1.5e-04, dt 2.0s +All GPU(s): step 376: loss 22.4844, lr 1.5e-04, dt 2.0s +All GPU(s): step 377: loss 22.0312, lr 1.5e-04, dt 2.0s +All GPU(s): step 378: loss 22.2812, lr 1.5e-04, dt 2.1s +All GPU(s): step 379: loss 22.2188, lr 1.5e-04, dt 2.1s +All GPU(s): step 380: loss 22.3906, lr 1.5e-04, dt 2.0s +All GPU(s): step 381: loss 22.4062, lr 1.5e-04, dt 2.1s +All GPU(s): step 382: loss 21.9531, lr 1.5e-04, dt 2.1s +All GPU(s): step 383: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 384: loss 22.4062, lr 1.5e-04, dt 2.1s +All GPU(s): step 385: loss 22.3594, lr 1.5e-04, dt 2.1s +All GPU(s): step 386: loss 21.9531, lr 1.5e-04, dt 2.1s +All GPU(s): step 387: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 388: loss 21.9062, lr 1.6e-04, dt 2.1s +All GPU(s): step 389: loss 22.0156, lr 1.6e-04, dt 2.0s +All GPU(s): step 390: loss 22.1562, lr 1.6e-04, dt 2.1s +All GPU(s): step 391: loss 22.1562, lr 1.6e-04, dt 2.1s +All GPU(s): step 392: loss 22.0781, lr 1.6e-04, dt 2.1s +All GPU(s): step 393: loss 22.0000, lr 1.6e-04, dt 2.1s +All GPU(s): step 394: loss 21.7344, lr 1.6e-04, dt 2.1s +All GPU(s): step 395: loss 21.6406, lr 1.6e-04, dt 2.1s +All GPU(s): step 396: loss 21.8281, lr 1.6e-04, dt 2.0s +All GPU(s): step 397: loss 21.9531, lr 1.6e-04, dt 2.0s +All GPU(s): step 398: loss 21.7188, lr 1.6e-04, dt 2.1s +All GPU(s): step 399: loss 21.7344, lr 1.6e-04, dt 2.1s +All GPU(s): step 400: loss 22.0469, lr 1.6e-04, dt 2.0s +All GPU(s): step 401: loss 22.3125, lr 1.6e-04, dt 2.0s +All GPU(s): step 402: loss 22.5156, lr 1.6e-04, dt 2.1s +All GPU(s): step 403: loss 22.3438, lr 1.6e-04, dt 2.1s +All GPU(s): step 404: loss 22.4531, lr 1.6e-04, dt 2.0s +All GPU(s): step 405: loss 22.7188, lr 1.6e-04, dt 2.1s +All GPU(s): step 406: loss 22.9219, lr 1.6e-04, dt 2.0s +All GPU(s): step 407: loss 22.5781, lr 1.6e-04, dt 2.1s +All GPU(s): step 408: loss 22.4844, lr 1.6e-04, dt 2.1s +All GPU(s): step 409: loss 23.0469, lr 1.6e-04, dt 2.1s +All GPU(s): step 410: loss 22.6875, lr 1.6e-04, dt 2.0s +All GPU(s): step 411: loss 22.8906, lr 1.6e-04, dt 2.1s +All GPU(s): step 412: loss 23.0156, lr 1.6e-04, dt 2.2s +All GPU(s): step 413: loss 22.8281, lr 1.7e-04, dt 2.1s +All GPU(s): step 414: loss 23.0938, lr 1.7e-04, dt 2.1s +All GPU(s): step 415: loss 23.2188, lr 1.7e-04, dt 2.0s +All GPU(s): step 416: loss 23.0000, lr 1.7e-04, dt 2.0s +All GPU(s): step 417: loss 23.3750, lr 1.7e-04, dt 2.1s +All GPU(s): step 418: loss 22.9531, lr 1.7e-04, dt 2.1s +All GPU(s): step 419: loss 22.9844, lr 1.7e-04, dt 2.0s +All GPU(s): step 420: loss 23.1406, lr 1.7e-04, dt 2.1s +All GPU(s): step 421: loss 23.2656, lr 1.7e-04, dt 2.1s +All GPU(s): step 422: loss 23.2031, lr 1.7e-04, dt 2.1s +All GPU(s): step 423: loss 23.1875, lr 1.7e-04, dt 2.0s +All GPU(s): step 424: loss 23.0781, lr 1.7e-04, dt 2.1s +All GPU(s): step 425: loss 23.5469, lr 1.7e-04, dt 2.0s +All GPU(s): step 426: loss 23.3438, lr 1.7e-04, dt 2.1s +All GPU(s): step 427: loss 23.6250, lr 1.7e-04, dt 2.1s +All GPU(s): step 428: loss 23.9062, lr 1.7e-04, dt 2.0s +All GPU(s): step 429: loss 23.5000, lr 1.7e-04, dt 2.0s +All GPU(s): step 430: loss 23.5625, lr 1.7e-04, dt 2.0s +All GPU(s): step 431: loss 23.5625, lr 1.7e-04, dt 2.1s +All GPU(s): step 432: loss 23.7500, lr 1.7e-04, dt 2.0s +All GPU(s): step 433: loss 23.9062, lr 1.7e-04, dt 2.0s +All GPU(s): step 434: loss 23.8438, lr 1.7e-04, dt 2.0s +All GPU(s): step 435: loss 24.0156, lr 1.7e-04, dt 2.1s +All GPU(s): step 436: loss 23.7969, lr 1.7e-04, dt 2.1s +All GPU(s): step 437: loss 23.7656, lr 1.7e-04, dt 2.0s +All GPU(s): step 438: loss 23.7656, lr 1.8e-04, dt 2.0s +All GPU(s): step 439: loss 23.7344, lr 1.8e-04, dt 2.0s +All GPU(s): step 440: loss 24.0938, lr 1.8e-04, dt 2.1s +All GPU(s): step 441: loss 24.1719, lr 1.8e-04, dt 2.2s +All GPU(s): step 442: loss 23.7656, lr 1.8e-04, dt 2.1s +All GPU(s): step 443: loss 23.9531, lr 1.8e-04, dt 2.1s +All GPU(s): step 444: loss 24.1250, lr 1.8e-04, dt 2.0s +All GPU(s): step 445: loss 24.0938, lr 1.8e-04, dt 2.1s +All GPU(s): step 446: loss 24.2031, lr 1.8e-04, dt 2.1s +All GPU(s): step 447: loss 24.4062, lr 1.8e-04, dt 2.0s +All GPU(s): step 448: loss 24.2969, lr 1.8e-04, dt 2.1s +All GPU(s): step 449: loss 24.4531, lr 1.8e-04, dt 2.0s +All GPU(s): step 450: loss 24.1562, lr 1.8e-04, dt 2.1s +All GPU(s): step 451: loss 24.2344, lr 1.8e-04, dt 2.1s +All GPU(s): step 452: loss 24.5469, lr 1.8e-04, dt 2.0s +All GPU(s): step 453: loss 24.5000, lr 1.8e-04, dt 2.1s +All GPU(s): step 454: loss 24.4062, lr 1.8e-04, dt 2.0s +All GPU(s): step 455: loss 24.3750, lr 1.8e-04, dt 2.1s +All GPU(s): step 456: loss 24.6875, lr 1.8e-04, dt 2.1s +All GPU(s): step 457: loss 24.4062, lr 1.8e-04, dt 2.1s +All GPU(s): step 458: loss 24.6094, lr 1.8e-04, dt 2.0s +All GPU(s): step 459: loss 24.6562, lr 1.8e-04, dt 2.1s +All GPU(s): step 460: loss 24.6875, lr 1.8e-04, dt 2.1s +All GPU(s): step 461: loss 24.6562, lr 1.8e-04, dt 2.0s +All GPU(s): step 462: loss 24.9375, lr 1.8e-04, dt 2.0s +All GPU(s): step 463: loss 25.0781, lr 1.9e-04, dt 2.0s +All GPU(s): step 464: loss 25.0469, lr 1.9e-04, dt 2.1s +All GPU(s): step 465: loss 24.9219, lr 1.9e-04, dt 2.2s +All GPU(s): step 466: loss 25.0000, lr 1.9e-04, dt 2.1s +All GPU(s): step 467: loss 25.1562, lr 1.9e-04, dt 2.0s +All GPU(s): step 468: loss 24.8906, lr 1.9e-04, dt 2.1s +All GPU(s): step 469: loss 25.2500, lr 1.9e-04, dt 2.1s +All GPU(s): step 470: loss 25.3125, lr 1.9e-04, dt 2.1s +All GPU(s): step 471: loss 25.0000, lr 1.9e-04, dt 2.0s +All GPU(s): step 472: loss 25.3438, lr 1.9e-04, dt 2.0s +All GPU(s): step 473: loss 25.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 474: loss 25.2812, lr 1.9e-04, dt 2.0s +All GPU(s): step 475: loss 25.3594, lr 1.9e-04, dt 2.1s +All GPU(s): step 476: loss 25.1562, lr 1.9e-04, dt 2.0s +All GPU(s): step 477: loss 25.0938, lr 1.9e-04, dt 2.0s +All GPU(s): step 478: loss 25.4844, lr 1.9e-04, dt 2.0s +All GPU(s): step 479: loss 25.0938, lr 1.9e-04, dt 2.1s +All GPU(s): step 480: loss 25.4062, lr 1.9e-04, dt 2.0s +All GPU(s): step 481: loss 25.0781, lr 1.9e-04, dt 2.0s +All GPU(s): step 482: loss 25.2188, lr 1.9e-04, dt 2.0s +All GPU(s): step 483: loss 25.0781, lr 1.9e-04, dt 2.0s +All GPU(s): step 484: loss 24.9062, lr 1.9e-04, dt 2.1s +All GPU(s): step 485: loss 25.2188, lr 1.9e-04, dt 2.0s +All GPU(s): step 486: loss 25.3438, lr 1.9e-04, dt 2.0s +All GPU(s): step 487: loss 25.4844, lr 1.9e-04, dt 2.0s +All GPU(s): step 488: loss 25.3750, lr 2.0e-04, dt 2.0s +All GPU(s): step 489: loss 25.1562, lr 2.0e-04, dt 2.2s +All GPU(s): step 490: loss 25.1875, lr 2.0e-04, dt 2.0s +All GPU(s): step 491: loss 25.4219, lr 2.0e-04, dt 2.0s +All GPU(s): step 492: loss 25.5000, lr 2.0e-04, dt 2.0s +All GPU(s): step 493: loss 25.7031, lr 2.0e-04, dt 2.1s +All GPU(s): step 494: loss 25.6719, lr 2.0e-04, dt 2.2s +All GPU(s): step 495: loss 25.5000, lr 2.0e-04, dt 2.0s +All GPU(s): step 496: loss 25.5781, lr 2.0e-04, dt 2.1s +All GPU(s): step 497: loss 25.4375, lr 2.0e-04, dt 2.0s +All GPU(s): step 498: loss 25.3125, lr 2.0e-04, dt 2.0s +All GPU(s): step 499: loss 25.7969, lr 2.0e-04, dt 2.1s +All GPU(s): step 500: loss 25.4688, lr 2.0e-04, dt 2.0s +All GPU(s): step 501: loss 25.5469, lr 2.0e-04, dt 2.0s +All GPU(s): step 502: loss 25.6875, lr 2.0e-04, dt 2.0s +All GPU(s): step 503: loss 25.8750, lr 2.0e-04, dt 2.1s +All GPU(s): step 504: loss 25.6250, lr 2.0e-04, dt 2.1s +All GPU(s): step 505: loss 25.4844, lr 2.0e-04, dt 2.0s +All GPU(s): step 506: loss 25.5312, lr 2.0e-04, dt 2.0s +All GPU(s): step 507: loss 25.9375, lr 2.0e-04, dt 2.0s +All GPU(s): step 508: loss 25.8594, lr 2.0e-04, dt 2.1s +All GPU(s): step 509: loss 25.9219, lr 2.0e-04, dt 2.1s +All GPU(s): step 510: loss 25.7812, lr 2.0e-04, dt 2.0s +All GPU(s): step 511: loss 25.5469, lr 2.0e-04, dt 2.0s +All GPU(s): step 512: loss 25.7500, lr 2.0e-04, dt 2.1s +All GPU(s): step 513: loss 25.7656, lr 2.1e-04, dt 2.1s +All GPU(s): step 514: loss 25.5625, lr 2.1e-04, dt 2.0s +All GPU(s): step 515: loss 25.7812, lr 2.1e-04, dt 2.0s +All GPU(s): step 516: loss 25.4844, lr 2.1e-04, dt 2.0s +All GPU(s): step 517: loss 25.7656, lr 2.1e-04, dt 2.0s +All GPU(s): step 518: loss 25.6562, lr 2.1e-04, dt 2.1s +All GPU(s): step 519: loss 25.8906, lr 2.1e-04, dt 2.0s +All GPU(s): step 520: loss 25.9844, lr 2.1e-04, dt 2.0s +All GPU(s): step 521: loss 25.5781, lr 2.1e-04, dt 2.0s +All GPU(s): step 522: loss 25.7500, lr 2.1e-04, dt 2.0s +All GPU(s): step 523: loss 25.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 524: loss 25.4219, lr 2.1e-04, dt 2.1s +All GPU(s): step 525: loss 25.9844, lr 2.1e-04, dt 2.1s +All GPU(s): step 526: loss 25.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 527: loss 25.8125, lr 2.1e-04, dt 2.1s +All GPU(s): step 528: loss 26.1406, lr 2.1e-04, dt 2.1s +All GPU(s): step 529: loss 25.9688, lr 2.1e-04, dt 2.1s +All GPU(s): step 530: loss 26.0000, lr 2.1e-04, dt 2.1s +All GPU(s): step 531: loss 25.9844, lr 2.1e-04, dt 2.1s +All GPU(s): step 532: loss 25.9062, lr 2.1e-04, dt 2.1s +All GPU(s): step 533: loss 25.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 534: loss 26.0938, lr 2.1e-04, dt 2.0s +All GPU(s): step 535: loss 26.0156, lr 2.1e-04, dt 2.0s +All GPU(s): step 536: loss 26.0312, lr 2.1e-04, dt 2.0s +All GPU(s): step 537: loss 25.9375, lr 2.1e-04, dt 2.1s +All GPU(s): step 538: loss 26.1094, lr 2.2e-04, dt 2.1s +All GPU(s): step 539: loss 26.3281, lr 2.2e-04, dt 2.0s +All GPU(s): step 540: loss 26.1719, lr 2.2e-04, dt 2.1s +All GPU(s): step 541: loss 26.2812, lr 2.2e-04, dt 2.1s +All GPU(s): step 542: loss 26.1875, lr 2.2e-04, dt 2.1s +All GPU(s): step 543: loss 26.2188, lr 2.2e-04, dt 2.1s +All GPU(s): step 544: loss 26.1094, lr 2.2e-04, dt 2.0s +All GPU(s): step 545: loss 25.8750, lr 2.2e-04, dt 2.0s +All GPU(s): step 546: loss 26.2188, lr 2.2e-04, dt 2.1s +All GPU(s): step 547: loss 26.1719, lr 2.2e-04, dt 2.1s +All GPU(s): step 548: loss 26.0625, lr 2.2e-04, dt 2.0s +All GPU(s): step 549: loss 26.2656, lr 2.2e-04, dt 2.0s +All GPU(s): step 550: loss 26.2500, lr 2.2e-04, dt 2.0s +All GPU(s): step 551: loss 26.2656, lr 2.2e-04, dt 2.0s +All GPU(s): step 552: loss 26.4688, lr 2.2e-04, dt 2.1s +All GPU(s): step 553: loss 26.3281, lr 2.2e-04, dt 2.0s +All GPU(s): step 554: loss 26.8906, lr 2.2e-04, dt 2.1s +All GPU(s): step 555: loss 26.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 556: loss 26.5469, lr 2.2e-04, dt 2.1s +All GPU(s): step 557: loss 26.9844, lr 2.2e-04, dt 2.1s +All GPU(s): step 558: loss 26.7188, lr 2.2e-04, dt 2.0s +All GPU(s): step 559: loss 26.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 560: loss 26.9688, lr 2.2e-04, dt 2.0s +All GPU(s): step 561: loss 26.7188, lr 2.2e-04, dt 2.1s +All GPU(s): step 562: loss 26.9219, lr 2.2e-04, dt 2.1s +All GPU(s): step 563: loss 26.8750, lr 2.3e-04, dt 2.0s +All GPU(s): step 564: loss 26.8125, lr 2.3e-04, dt 2.0s +All GPU(s): step 565: loss 26.6719, lr 2.3e-04, dt 2.1s +All GPU(s): step 566: loss 26.7812, lr 2.3e-04, dt 2.2s +All GPU(s): step 567: loss 26.9688, lr 2.3e-04, dt 2.0s +All GPU(s): step 568: loss 26.9844, lr 2.3e-04, dt 2.0s +All GPU(s): step 569: loss 26.7031, lr 2.3e-04, dt 2.0s +All GPU(s): step 570: loss 26.9062, lr 2.3e-04, dt 2.1s +All GPU(s): step 571: loss 26.5156, lr 2.3e-04, dt 2.1s +All GPU(s): step 572: loss 26.8281, lr 2.3e-04, dt 2.1s +All GPU(s): step 573: loss 26.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 574: loss 26.6562, lr 2.3e-04, dt 2.1s +All GPU(s): step 575: loss 26.4688, lr 2.3e-04, dt 2.1s +All GPU(s): step 576: loss 26.4844, lr 2.3e-04, dt 2.1s +All GPU(s): step 577: loss 26.5312, lr 2.3e-04, dt 2.0s +All GPU(s): step 578: loss 26.1250, lr 2.3e-04, dt 2.0s +All GPU(s): step 579: loss 26.2812, lr 2.3e-04, dt 2.0s +All GPU(s): step 580: loss 26.1406, lr 2.3e-04, dt 2.1s +All GPU(s): step 581: loss 26.2344, lr 2.3e-04, dt 2.1s +All GPU(s): step 582: loss 26.1250, lr 2.3e-04, dt 2.0s +All GPU(s): step 583: loss 26.3594, lr 2.3e-04, dt 2.0s +All GPU(s): step 584: loss 26.3594, lr 2.3e-04, dt 2.0s +All GPU(s): step 585: loss 26.1719, lr 2.3e-04, dt 2.1s +All GPU(s): step 586: loss 26.0000, lr 2.3e-04, dt 2.0s +All GPU(s): step 587: loss 26.2344, lr 2.3e-04, dt 2.0s +All GPU(s): step 588: loss 26.3594, lr 2.4e-04, dt 2.0s +All GPU(s): step 589: loss 26.2031, lr 2.4e-04, dt 2.0s +All GPU(s): step 590: loss 26.4844, lr 2.4e-04, dt 2.1s +All GPU(s): step 591: loss 26.3750, lr 2.4e-04, dt 2.0s +All GPU(s): step 592: loss 26.1250, lr 2.4e-04, dt 2.0s +All GPU(s): step 593: loss 26.2344, lr 2.4e-04, dt 2.0s +All GPU(s): step 594: loss 26.3906, lr 2.4e-04, dt 2.0s +All GPU(s): step 595: loss 26.2812, lr 2.4e-04, dt 2.1s +All GPU(s): step 596: loss 26.2656, lr 2.4e-04, dt 2.1s +All GPU(s): step 597: loss 26.2031, lr 2.4e-04, dt 2.0s +All GPU(s): step 598: loss 26.5312, lr 2.4e-04, dt 2.0s +All GPU(s): step 599: loss 26.3281, lr 2.4e-04, dt 2.1s +All GPU(s): step 600: loss 26.3438, lr 2.4e-04, dt 2.1s +All GPU(s): step 601: loss 26.4375, lr 2.4e-04, dt 2.0s +All GPU(s): step 602: loss 26.5000, lr 2.4e-04, dt 2.0s +All GPU(s): step 603: loss 26.3594, lr 2.4e-04, dt 2.0s +All GPU(s): step 604: loss 26.2969, lr 2.4e-04, dt 2.1s +All GPU(s): step 605: loss 26.3438, lr 2.4e-04, dt 2.1s +All GPU(s): step 606: loss 25.9062, lr 2.4e-04, dt 2.0s +All GPU(s): step 607: loss 26.3125, lr 2.4e-04, dt 2.0s +All GPU(s): step 608: loss 26.3750, lr 2.4e-04, dt 2.1s +All GPU(s): step 609: loss 26.0312, lr 2.4e-04, dt 2.1s +All GPU(s): step 610: loss 26.0000, lr 2.4e-04, dt 2.1s +All GPU(s): step 611: loss 26.1094, lr 2.4e-04, dt 2.0s +All GPU(s): step 612: loss 26.0000, lr 2.4e-04, dt 2.0s +All GPU(s): step 613: loss 26.2188, lr 2.5e-04, dt 2.0s +All GPU(s): step 614: loss 25.8281, lr 2.5e-04, dt 2.1s +All GPU(s): step 615: loss 25.8281, lr 2.5e-04, dt 2.0s +All GPU(s): step 616: loss 26.0000, lr 2.5e-04, dt 2.0s +All GPU(s): step 617: loss 25.5938, lr 2.5e-04, dt 2.1s +All GPU(s): step 618: loss 25.8438, lr 2.5e-04, dt 2.1s +All GPU(s): step 619: loss 25.8750, lr 2.5e-04, dt 2.1s +All GPU(s): step 620: loss 25.7500, lr 2.5e-04, dt 2.1s +All GPU(s): step 621: loss 25.8281, lr 2.5e-04, dt 2.1s +All GPU(s): step 622: loss 25.8125, lr 2.5e-04, dt 2.0s +All GPU(s): step 623: loss 25.7344, lr 2.5e-04, dt 2.1s +All GPU(s): step 624: loss 25.8594, lr 2.5e-04, dt 2.2s +All GPU(s): step 625: loss 25.7188, lr 2.5e-04, dt 2.1s +All GPU(s): step 626: loss 25.2812, lr 2.5e-04, dt 2.0s +All GPU(s): step 627: loss 25.4531, lr 2.5e-04, dt 2.1s +All GPU(s): step 628: loss 25.6250, lr 2.5e-04, dt 2.1s +All GPU(s): step 629: loss 25.6562, lr 2.5e-04, dt 2.1s +All GPU(s): step 630: loss 25.5000, lr 2.5e-04, dt 2.1s +All GPU(s): step 631: loss 25.3594, lr 2.5e-04, dt 2.1s +All GPU(s): step 632: loss 25.3750, lr 2.5e-04, dt 2.1s +All GPU(s): step 633: loss 25.4688, lr 2.5e-04, dt 2.1s +All GPU(s): step 634: loss 25.5469, lr 2.5e-04, dt 2.1s +All GPU(s): step 635: loss 25.5781, lr 2.5e-04, dt 2.0s +All GPU(s): step 636: loss 25.5156, lr 2.5e-04, dt 2.0s +All GPU(s): step 637: loss 25.4375, lr 2.5e-04, dt 2.0s +All GPU(s): step 638: loss 25.5312, lr 2.6e-04, dt 2.1s +All GPU(s): step 639: loss 25.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 640: loss 25.3281, lr 2.6e-04, dt 2.0s +All GPU(s): step 641: loss 25.2656, lr 2.6e-04, dt 2.0s +All GPU(s): step 642: loss 25.5312, lr 2.6e-04, dt 2.1s +All GPU(s): step 643: loss 25.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 644: loss 25.6250, lr 2.6e-04, dt 2.0s +All GPU(s): step 645: loss 25.4531, lr 2.6e-04, dt 2.0s +All GPU(s): step 646: loss 25.7500, lr 2.6e-04, dt 2.0s +All GPU(s): step 647: loss 25.5312, lr 2.6e-04, dt 2.0s +All GPU(s): step 648: loss 25.8281, lr 2.6e-04, dt 2.2s +All GPU(s): step 649: loss 25.4375, lr 2.6e-04, dt 2.1s +All GPU(s): step 650: loss 25.5469, lr 2.6e-04, dt 2.1s +All GPU(s): step 651: loss 25.3750, lr 2.6e-04, dt 2.1s +All GPU(s): step 652: loss 25.2656, lr 2.6e-04, dt 2.1s +All GPU(s): step 653: loss 25.3438, lr 2.6e-04, dt 2.1s +All GPU(s): step 654: loss 25.1094, lr 2.6e-04, dt 2.1s +All GPU(s): step 655: loss 25.3438, lr 2.6e-04, dt 2.1s +All GPU(s): step 656: loss 24.9688, lr 2.6e-04, dt 2.1s +All GPU(s): step 657: loss 25.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 658: loss 25.0156, lr 2.6e-04, dt 2.1s +All GPU(s): step 659: loss 25.1406, lr 2.6e-04, dt 2.0s +All GPU(s): step 660: loss 24.9062, lr 2.6e-04, dt 2.0s +All GPU(s): step 661: loss 24.9531, lr 2.6e-04, dt 2.1s +All GPU(s): step 662: loss 25.0156, lr 2.6e-04, dt 2.1s +All GPU(s): step 663: loss 25.0469, lr 2.7e-04, dt 2.0s +All GPU(s): step 664: loss 25.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 665: loss 24.9844, lr 2.7e-04, dt 2.0s +All GPU(s): step 666: loss 24.9062, lr 2.7e-04, dt 2.1s +All GPU(s): step 667: loss 24.6562, lr 2.7e-04, dt 2.2s +All GPU(s): step 668: loss 24.6562, lr 2.7e-04, dt 2.0s +All GPU(s): step 669: loss 24.8125, lr 2.7e-04, dt 2.1s +All GPU(s): step 670: loss 24.9531, lr 2.7e-04, dt 2.0s +All GPU(s): step 671: loss 24.7344, lr 2.7e-04, dt 2.1s +All GPU(s): step 672: loss 24.7812, lr 2.7e-04, dt 2.2s +All GPU(s): step 673: loss 24.8594, lr 2.7e-04, dt 2.1s +All GPU(s): step 674: loss 24.6719, lr 2.7e-04, dt 2.1s +All GPU(s): step 675: loss 24.5000, lr 2.7e-04, dt 2.1s +All GPU(s): step 676: loss 24.6875, lr 2.7e-04, dt 2.1s +All GPU(s): step 677: loss 24.6406, lr 2.7e-04, dt 2.1s +All GPU(s): step 678: loss 24.6562, lr 2.7e-04, dt 2.0s +All GPU(s): step 679: loss 24.4531, lr 2.7e-04, dt 2.0s +All GPU(s): step 680: loss 24.5938, lr 2.7e-04, dt 2.0s +All GPU(s): step 681: loss 24.6719, lr 2.7e-04, dt 2.1s +All GPU(s): step 682: loss 24.6250, lr 2.7e-04, dt 2.1s +All GPU(s): step 683: loss 24.7500, lr 2.7e-04, dt 2.0s +All GPU(s): step 684: loss 24.7031, lr 2.7e-04, dt 2.1s +All GPU(s): step 685: loss 24.4219, lr 2.7e-04, dt 2.0s +All GPU(s): step 686: loss 24.6562, lr 2.7e-04, dt 2.1s +All GPU(s): step 687: loss 24.5625, lr 2.7e-04, dt 2.0s +All GPU(s): step 688: loss 24.5469, lr 2.8e-04, dt 2.1s +All GPU(s): step 689: loss 24.7500, lr 2.8e-04, dt 2.0s +All GPU(s): step 690: loss 24.6406, lr 2.8e-04, dt 2.0s +All GPU(s): step 691: loss 24.4375, lr 2.8e-04, dt 2.1s +All GPU(s): step 692: loss 24.4375, lr 2.8e-04, dt 2.0s +All GPU(s): step 693: loss 24.5312, lr 2.8e-04, dt 2.0s +All GPU(s): step 694: loss 24.3438, lr 2.8e-04, dt 2.1s +All GPU(s): step 695: loss 24.2031, lr 2.8e-04, dt 2.1s +All GPU(s): step 696: loss 24.0625, lr 2.8e-04, dt 2.1s +All GPU(s): step 697: loss 24.1875, lr 2.8e-04, dt 2.1s +All GPU(s): step 698: loss 24.1562, lr 2.8e-04, dt 2.0s +All GPU(s): step 699: loss 24.3281, lr 2.8e-04, dt 2.0s +All GPU(s): step 700: loss 24.2812, lr 2.8e-04, dt 2.1s +All GPU(s): step 701: loss 24.0625, lr 2.8e-04, dt 2.2s +All GPU(s): step 702: loss 24.2500, lr 2.8e-04, dt 2.1s +All GPU(s): step 703: loss 24.3438, lr 2.8e-04, dt 2.0s +All GPU(s): step 704: loss 24.1719, lr 2.8e-04, dt 2.0s +All GPU(s): step 705: loss 24.2969, lr 2.8e-04, dt 2.0s +All GPU(s): step 706: loss 24.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 707: loss 24.0156, lr 2.8e-04, dt 2.0s +All GPU(s): step 708: loss 24.0938, lr 2.8e-04, dt 2.0s +All GPU(s): step 709: loss 24.2812, lr 2.8e-04, dt 2.0s +All GPU(s): step 710: loss 24.2500, lr 2.8e-04, dt 2.1s +All GPU(s): step 711: loss 23.8438, lr 2.8e-04, dt 2.1s +All GPU(s): step 712: loss 23.9375, lr 2.8e-04, dt 2.1s +All GPU(s): step 713: loss 24.3594, lr 2.9e-04, dt 2.1s +All GPU(s): step 714: loss 24.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 715: loss 24.0000, lr 2.9e-04, dt 2.1s +All GPU(s): step 716: loss 24.0938, lr 2.9e-04, dt 2.1s +All GPU(s): step 717: loss 24.1719, lr 2.9e-04, dt 2.0s +All GPU(s): step 718: loss 23.9688, lr 2.9e-04, dt 2.1s +All GPU(s): step 719: loss 23.7656, lr 2.9e-04, dt 2.1s +All GPU(s): step 720: loss 23.7656, lr 2.9e-04, dt 2.2s +All GPU(s): step 721: loss 23.7500, lr 2.9e-04, dt 2.1s +All GPU(s): step 722: loss 23.9062, lr 2.9e-04, dt 2.1s +All GPU(s): step 723: loss 23.5781, lr 2.9e-04, dt 2.1s +All GPU(s): step 724: loss 23.9688, lr 2.9e-04, dt 2.1s +All GPU(s): step 725: loss 23.7812, lr 2.9e-04, dt 2.2s +All GPU(s): step 726: loss 23.7969, lr 2.9e-04, dt 2.1s +All GPU(s): step 727: loss 23.7188, lr 2.9e-04, dt 2.1s +All GPU(s): step 728: loss 23.1250, lr 2.9e-04, dt 2.0s +All GPU(s): step 729: loss 23.7500, lr 2.9e-04, dt 2.1s +All GPU(s): step 730: loss 23.5156, lr 2.9e-04, dt 2.1s +All GPU(s): step 731: loss 23.5625, lr 2.9e-04, dt 2.1s +All GPU(s): step 732: loss 23.3750, lr 2.9e-04, dt 2.0s +All GPU(s): step 733: loss 23.6094, lr 2.9e-04, dt 2.0s +All GPU(s): step 734: loss 23.9219, lr 2.9e-04, dt 2.1s +All GPU(s): step 735: loss 23.5781, lr 2.9e-04, dt 2.1s +All GPU(s): step 736: loss 23.3750, lr 2.9e-04, dt 2.1s +All GPU(s): step 737: loss 23.5938, lr 2.9e-04, dt 2.1s +All GPU(s): step 738: loss 23.5000, lr 3.0e-04, dt 2.1s +All GPU(s): step 739: loss 23.3438, lr 3.0e-04, dt 2.1s +All GPU(s): step 740: loss 23.7188, lr 3.0e-04, dt 2.0s +All GPU(s): step 741: loss 23.5781, lr 3.0e-04, dt 2.0s +All GPU(s): step 742: loss 23.6875, lr 3.0e-04, dt 2.1s +All GPU(s): step 743: loss 23.5000, lr 3.0e-04, dt 2.1s +All GPU(s): step 744: loss 23.6094, lr 3.0e-04, dt 2.1s +All GPU(s): step 745: loss 23.5312, lr 3.0e-04, dt 2.0s +All GPU(s): step 746: loss 23.7969, lr 3.0e-04, dt 2.0s +All GPU(s): step 747: loss 23.5312, lr 3.0e-04, dt 2.0s +All GPU(s): step 748: loss 23.5312, lr 3.0e-04, dt 2.1s +All GPU(s): step 749: loss 23.7031, lr 3.0e-04, dt 2.2s +All GPU(s): step 750: loss 23.6250, lr 3.0e-04, dt 2.0s +All GPU(s): step 751: loss 23.6719, lr 3.0e-04, dt 2.1s +All GPU(s): step 752: loss 23.8125, lr 3.0e-04, dt 2.1s +All GPU(s): step 753: loss 23.7656, lr 3.0e-04, dt 2.1s +All GPU(s): step 754: loss 23.5156, lr 3.0e-04, dt 2.2s +All GPU(s): step 755: loss 23.5625, lr 3.0e-04, dt 2.1s +All GPU(s): step 756: loss 23.5625, lr 3.0e-04, dt 2.1s +All GPU(s): step 757: loss 23.8125, lr 3.0e-04, dt 2.1s +All GPU(s): step 758: loss 23.8281, lr 3.0e-04, dt 2.1s +All GPU(s): step 759: loss 23.2344, lr 3.0e-04, dt 2.1s +All GPU(s): step 760: loss 23.5312, lr 3.0e-04, dt 2.1s +All GPU(s): step 761: loss 23.6250, lr 3.0e-04, dt 2.1s +All GPU(s): step 762: loss 23.7344, lr 3.0e-04, dt 2.1s +All GPU(s): step 763: loss 23.6719, lr 3.1e-04, dt 2.2s +All GPU(s): step 764: loss 23.5469, lr 3.1e-04, dt 2.1s +All GPU(s): step 765: loss 23.3438, lr 3.1e-04, dt 2.1s +All GPU(s): step 766: loss 23.6094, lr 3.1e-04, dt 2.1s +All GPU(s): step 767: loss 23.7812, lr 3.1e-04, dt 2.1s +All GPU(s): step 768: loss 23.3594, lr 3.1e-04, dt 2.2s +All GPU(s): step 769: loss 23.5781, lr 3.1e-04, dt 2.1s +All GPU(s): step 770: loss 23.3906, lr 3.1e-04, dt 2.1s +All GPU(s): step 771: loss 23.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 772: loss 23.3281, lr 3.1e-04, dt 2.1s +All GPU(s): step 773: loss 23.3125, lr 3.1e-04, dt 2.1s +All GPU(s): step 774: loss 23.5000, lr 3.1e-04, dt 2.0s +All GPU(s): step 775: loss 23.2500, lr 3.1e-04, dt 2.0s +All GPU(s): step 776: loss 23.0469, lr 3.1e-04, dt 2.1s +All GPU(s): step 777: loss 23.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 778: loss 23.6250, lr 3.1e-04, dt 2.1s +All GPU(s): step 779: loss 23.4688, lr 3.1e-04, dt 2.0s +All GPU(s): step 780: loss 23.1875, lr 3.1e-04, dt 2.1s +All GPU(s): step 781: loss 23.1562, lr 3.1e-04, dt 2.0s +All GPU(s): step 782: loss 23.0312, lr 3.1e-04, dt 2.1s +All GPU(s): step 783: loss 23.2656, lr 3.1e-04, dt 2.0s +All GPU(s): step 784: loss 23.1094, lr 3.1e-04, dt 2.1s +All GPU(s): step 785: loss 23.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 786: loss 23.1562, lr 3.1e-04, dt 2.1s +All GPU(s): step 787: loss 23.3281, lr 3.1e-04, dt 2.2s +All GPU(s): step 788: loss 23.4531, lr 3.2e-04, dt 2.1s +All GPU(s): step 789: loss 23.1562, lr 3.2e-04, dt 2.1s +All GPU(s): step 790: loss 23.3594, lr 3.2e-04, dt 2.0s +All GPU(s): step 791: loss 23.7500, lr 3.2e-04, dt 2.1s +All GPU(s): step 792: loss 23.3594, lr 3.2e-04, dt 2.2s +All GPU(s): step 793: loss 23.3906, lr 3.2e-04, dt 2.0s +All GPU(s): step 794: loss 23.2812, lr 3.2e-04, dt 2.0s +All GPU(s): step 795: loss 23.4688, lr 3.2e-04, dt 2.0s +All GPU(s): step 796: loss 23.4375, lr 3.2e-04, dt 2.1s +All GPU(s): step 797: loss 23.0312, lr 3.2e-04, dt 2.1s +All GPU(s): step 798: loss 23.2500, lr 3.2e-04, dt 2.0s +All GPU(s): step 799: loss 23.3281, lr 3.2e-04, dt 2.1s +All GPU(s): step 800: loss 23.7344, lr 3.2e-04, dt 2.0s +All GPU(s): step 801: loss 23.2500, lr 3.2e-04, dt 2.1s +All GPU(s): step 802: loss 23.3906, lr 3.2e-04, dt 2.1s +All GPU(s): step 803: loss 23.4688, lr 3.2e-04, dt 2.0s +All GPU(s): step 804: loss 23.2344, lr 3.2e-04, dt 2.0s +All GPU(s): step 805: loss 23.3906, lr 3.2e-04, dt 2.1s +All GPU(s): step 806: loss 23.3125, lr 3.2e-04, dt 2.1s +All GPU(s): step 807: loss 22.9062, lr 3.2e-04, dt 2.0s +All GPU(s): step 808: loss 23.3281, lr 3.2e-04, dt 2.0s +All GPU(s): step 809: loss 23.3125, lr 3.2e-04, dt 2.1s +All GPU(s): step 810: loss 23.1094, lr 3.2e-04, dt 2.0s +All GPU(s): step 811: loss 23.1094, lr 3.2e-04, dt 2.1s +All GPU(s): step 812: loss 22.9844, lr 3.2e-04, dt 2.0s +All GPU(s): step 813: loss 22.9062, lr 3.3e-04, dt 2.0s +All GPU(s): step 814: loss 22.7344, lr 3.3e-04, dt 2.1s +All GPU(s): step 815: loss 23.0156, lr 3.3e-04, dt 2.1s +All GPU(s): step 816: loss 22.8750, lr 3.3e-04, dt 2.1s +All GPU(s): step 817: loss 22.8750, lr 3.3e-04, dt 2.0s +All GPU(s): step 818: loss 22.9062, lr 3.3e-04, dt 2.0s +All GPU(s): step 819: loss 22.8750, lr 3.3e-04, dt 2.0s +All GPU(s): step 820: loss 22.8750, lr 3.3e-04, dt 2.0s +All GPU(s): step 821: loss 22.9062, lr 3.3e-04, dt 2.1s +All GPU(s): step 822: loss 23.0625, lr 3.3e-04, dt 2.0s +All GPU(s): step 823: loss 22.7031, lr 3.3e-04, dt 2.0s +All GPU(s): step 824: loss 22.4688, lr 3.3e-04, dt 2.0s +All GPU(s): step 825: loss 22.7500, lr 3.3e-04, dt 2.0s +All GPU(s): step 826: loss 22.8750, lr 3.3e-04, dt 2.1s +All GPU(s): step 827: loss 22.5938, lr 3.3e-04, dt 2.1s +All GPU(s): step 828: loss 22.4688, lr 3.3e-04, dt 2.0s +All GPU(s): step 829: loss 22.5781, lr 3.3e-04, dt 2.1s +All GPU(s): step 830: loss 22.7812, lr 3.3e-04, dt 2.1s +All GPU(s): step 831: loss 22.4375, lr 3.3e-04, dt 2.1s +All GPU(s): step 832: loss 22.2656, lr 3.3e-04, dt 2.1s +All GPU(s): step 833: loss 22.7188, lr 3.3e-04, dt 2.1s +All GPU(s): step 834: loss 22.2969, lr 3.3e-04, dt 2.0s +All GPU(s): step 835: loss 22.3906, lr 3.3e-04, dt 2.1s +All GPU(s): step 836: loss 21.9531, lr 3.3e-04, dt 2.1s +All GPU(s): step 837: loss 21.6719, lr 3.3e-04, dt 2.1s +All GPU(s): step 838: loss 21.8594, lr 3.4e-04, dt 2.0s +All GPU(s): step 839: loss 21.7656, lr 3.4e-04, dt 2.0s +All GPU(s): step 840: loss 21.7188, lr 3.4e-04, dt 2.1s +All GPU(s): step 841: loss 21.8594, lr 3.4e-04, dt 2.0s +All GPU(s): step 842: loss 21.7500, lr 3.4e-04, dt 2.0s +All GPU(s): step 843: loss 21.6719, lr 3.4e-04, dt 2.1s +All GPU(s): step 844: loss 21.2344, lr 3.4e-04, dt 2.1s +All GPU(s): step 845: loss 21.4531, lr 3.4e-04, dt 2.2s +All GPU(s): step 846: loss 21.3594, lr 3.4e-04, dt 2.0s +All GPU(s): step 847: loss 21.3594, lr 3.4e-04, dt 2.0s +All GPU(s): step 848: loss 20.8281, lr 3.4e-04, dt 2.0s +All GPU(s): step 849: loss 20.5469, lr 3.4e-04, dt 2.0s +All GPU(s): step 850: loss 20.8750, lr 3.4e-04, dt 2.1s +All GPU(s): step 851: loss 21.0312, lr 3.4e-04, dt 2.0s +All GPU(s): step 852: loss 21.2500, lr 3.4e-04, dt 2.0s +All GPU(s): step 853: loss 20.9219, lr 3.4e-04, dt 2.0s +All GPU(s): step 854: loss 20.9688, lr 3.4e-04, dt 2.1s +All GPU(s): step 855: loss 21.0625, lr 3.4e-04, dt 2.1s +All GPU(s): step 856: loss 21.0469, lr 3.4e-04, dt 2.0s +All GPU(s): step 857: loss 21.0312, lr 3.4e-04, dt 2.0s +All GPU(s): step 858: loss 20.9688, lr 3.4e-04, dt 2.0s +All GPU(s): step 859: loss 20.8125, lr 3.4e-04, dt 2.1s +All GPU(s): step 860: loss 21.0781, lr 3.4e-04, dt 2.1s +All GPU(s): step 861: loss 21.1875, lr 3.4e-04, dt 2.1s +All GPU(s): step 862: loss 21.0312, lr 3.4e-04, dt 2.0s +All GPU(s): step 863: loss 20.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 864: loss 20.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 865: loss 21.0781, lr 3.5e-04, dt 2.0s +All GPU(s): step 866: loss 21.0312, lr 3.5e-04, dt 2.1s +All GPU(s): step 867: loss 20.9062, lr 3.5e-04, dt 2.1s +All GPU(s): step 868: loss 21.1875, lr 3.5e-04, dt 2.1s +All GPU(s): step 869: loss 21.0938, lr 3.5e-04, dt 2.1s +All GPU(s): step 870: loss 21.0156, lr 3.5e-04, dt 2.1s +All GPU(s): step 871: loss 21.1562, lr 3.5e-04, dt 2.1s +All GPU(s): step 872: loss 21.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 873: loss 21.0156, lr 3.5e-04, dt 2.1s +All GPU(s): step 874: loss 21.1719, lr 3.5e-04, dt 2.1s +All GPU(s): step 875: loss 21.1875, lr 3.5e-04, dt 2.1s +All GPU(s): step 876: loss 21.3906, lr 3.5e-04, dt 2.1s +All GPU(s): step 877: loss 20.9531, lr 3.5e-04, dt 2.1s +All GPU(s): step 878: loss 20.8281, lr 3.5e-04, dt 2.1s +All GPU(s): step 879: loss 20.7969, lr 3.5e-04, dt 2.1s +All GPU(s): step 880: loss 20.6562, lr 3.5e-04, dt 2.0s +All GPU(s): step 881: loss 20.5938, lr 3.5e-04, dt 2.0s +All GPU(s): step 882: loss 21.4219, lr 3.5e-04, dt 2.0s +All GPU(s): step 883: loss 20.7188, lr 3.5e-04, dt 2.1s +All GPU(s): step 884: loss 20.7969, lr 3.5e-04, dt 2.0s +All GPU(s): step 885: loss 20.9062, lr 3.5e-04, dt 2.0s +All GPU(s): step 886: loss 21.0625, lr 3.5e-04, dt 2.0s +All GPU(s): step 887: loss 21.0938, lr 3.5e-04, dt 2.1s +All GPU(s): step 888: loss 20.8594, lr 3.6e-04, dt 2.1s +All GPU(s): step 889: loss 20.7969, lr 3.6e-04, dt 2.0s +All GPU(s): step 890: loss 20.9062, lr 3.6e-04, dt 2.0s +All GPU(s): step 891: loss 20.8594, lr 3.6e-04, dt 2.1s +All GPU(s): step 892: loss 21.0156, lr 3.6e-04, dt 2.0s +All GPU(s): step 893: loss 21.1875, lr 3.6e-04, dt 2.1s +All GPU(s): step 894: loss 20.9688, lr 3.6e-04, dt 2.1s +All GPU(s): step 895: loss 20.9375, lr 3.6e-04, dt 2.0s +All GPU(s): step 896: loss 20.7812, lr 3.6e-04, dt 2.1s +All GPU(s): step 897: loss 20.8438, lr 3.6e-04, dt 2.1s +All GPU(s): step 898: loss 20.7344, lr 3.6e-04, dt 2.1s +All GPU(s): step 899: loss 21.1562, lr 3.6e-04, dt 2.1s +All GPU(s): step 900: loss 21.0781, lr 3.6e-04, dt 2.1s +All GPU(s): step 901: loss 20.9219, lr 3.6e-04, dt 2.1s +All GPU(s): step 902: loss 20.6719, lr 3.6e-04, dt 2.1s +All GPU(s): step 903: loss 20.9688, lr 3.6e-04, dt 2.1s +All GPU(s): step 904: loss 21.1094, lr 3.6e-04, dt 2.0s +All GPU(s): step 905: loss 20.5781, lr 3.6e-04, dt 2.0s +All GPU(s): step 906: loss 20.8125, lr 3.6e-04, dt 2.1s +All GPU(s): step 907: loss 20.8281, lr 3.6e-04, dt 2.1s +All GPU(s): step 908: loss 20.5938, lr 3.6e-04, dt 2.1s +All GPU(s): step 909: loss 20.6875, lr 3.6e-04, dt 2.1s +All GPU(s): step 910: loss 20.9688, lr 3.6e-04, dt 2.1s +All GPU(s): step 911: loss 20.2812, lr 3.6e-04, dt 2.1s +All GPU(s): step 912: loss 20.2812, lr 3.6e-04, dt 2.1s +All GPU(s): step 913: loss 20.4531, lr 3.7e-04, dt 2.0s +All GPU(s): step 914: loss 20.4844, lr 3.7e-04, dt 2.0s +All GPU(s): step 915: loss 20.0938, lr 3.7e-04, dt 2.0s +All GPU(s): step 916: loss 19.4375, lr 3.7e-04, dt 2.1s +All GPU(s): step 917: loss 20.4062, lr 3.7e-04, dt 2.1s +All GPU(s): step 918: loss 20.7656, lr 3.7e-04, dt 2.0s +All GPU(s): step 919: loss 20.1719, lr 3.7e-04, dt 2.0s +All GPU(s): step 920: loss 20.2188, lr 3.7e-04, dt 2.1s +All GPU(s): step 921: loss 20.7500, lr 3.7e-04, dt 2.1s +All GPU(s): step 922: loss 20.5625, lr 3.7e-04, dt 2.2s +All GPU(s): step 923: loss 20.8125, lr 3.7e-04, dt 2.0s +All GPU(s): step 924: loss 21.0156, lr 3.7e-04, dt 2.0s +All GPU(s): step 925: loss 20.7812, lr 3.7e-04, dt 2.0s +All GPU(s): step 926: loss 20.6875, lr 3.7e-04, dt 2.1s +All GPU(s): step 927: loss 21.0312, lr 3.7e-04, dt 2.1s +All GPU(s): step 928: loss 20.8594, lr 3.7e-04, dt 2.1s +All GPU(s): step 929: loss 20.6094, lr 3.7e-04, dt 2.0s +All GPU(s): step 930: loss 20.7500, lr 3.7e-04, dt 2.0s +All GPU(s): step 931: loss 20.9688, lr 3.7e-04, dt 2.1s +All GPU(s): step 932: loss 20.3125, lr 3.7e-04, dt 2.1s +All GPU(s): step 933: loss 20.2812, lr 3.7e-04, dt 2.0s +All GPU(s): step 934: loss 20.9375, lr 3.7e-04, dt 2.0s +All GPU(s): step 935: loss 21.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 936: loss 20.7344, lr 3.7e-04, dt 2.1s +All GPU(s): step 937: loss 21.0781, lr 3.7e-04, dt 2.1s +All GPU(s): step 938: loss 21.3438, lr 3.8e-04, dt 2.1s +All GPU(s): step 939: loss 20.9531, lr 3.8e-04, dt 2.0s +All GPU(s): step 940: loss 20.9844, lr 3.8e-04, dt 2.1s +All GPU(s): step 941: loss 21.2969, lr 3.8e-04, dt 2.2s +All GPU(s): step 942: loss 21.1250, lr 3.8e-04, dt 2.1s +All GPU(s): step 943: loss 21.3281, lr 3.8e-04, dt 2.0s +All GPU(s): step 944: loss 21.2344, lr 3.8e-04, dt 2.0s +All GPU(s): step 945: loss 21.0000, lr 3.8e-04, dt 2.1s +All GPU(s): step 946: loss 21.0312, lr 3.8e-04, dt 2.2s +All GPU(s): step 947: loss 20.9688, lr 3.8e-04, dt 2.1s +All GPU(s): step 948: loss 21.2656, lr 3.8e-04, dt 2.0s +All GPU(s): step 949: loss 21.2188, lr 3.8e-04, dt 2.0s +All GPU(s): step 950: loss 21.0938, lr 3.8e-04, dt 2.1s +All GPU(s): step 951: loss 21.2969, lr 3.8e-04, dt 2.1s +All GPU(s): step 952: loss 21.2344, lr 3.8e-04, dt 2.0s +All GPU(s): step 953: loss 21.2656, lr 3.8e-04, dt 2.0s +All GPU(s): step 954: loss 20.7812, lr 3.8e-04, dt 2.0s +All GPU(s): step 955: loss 21.1094, lr 3.8e-04, dt 2.1s +All GPU(s): step 956: loss 21.3750, lr 3.8e-04, dt 2.1s +All GPU(s): step 957: loss 20.9688, lr 3.8e-04, dt 2.1s +All GPU(s): step 958: loss 21.1719, lr 3.8e-04, dt 2.1s +All GPU(s): step 959: loss 21.1719, lr 3.8e-04, dt 2.1s +All GPU(s): step 960: loss 21.3125, lr 3.8e-04, dt 2.1s +All GPU(s): step 961: loss 21.4219, lr 3.8e-04, dt 2.0s +All GPU(s): step 962: loss 21.4688, lr 3.8e-04, dt 2.0s +All GPU(s): step 963: loss 21.2031, lr 3.9e-04, dt 2.0s +All GPU(s): step 964: loss 21.1250, lr 3.9e-04, dt 2.0s +All GPU(s): step 965: loss 21.0625, lr 3.9e-04, dt 2.1s +All GPU(s): step 966: loss 21.4531, lr 3.9e-04, dt 2.0s +All GPU(s): step 967: loss 21.3594, lr 3.9e-04, dt 2.1s +All GPU(s): step 968: loss 21.4062, lr 3.9e-04, dt 2.1s +All GPU(s): step 969: loss 21.2969, lr 3.9e-04, dt 2.1s +All GPU(s): step 970: loss 21.0000, lr 3.9e-04, dt 2.2s +All GPU(s): step 971: loss 20.9062, lr 3.9e-04, dt 2.1s +All GPU(s): step 972: loss 21.0625, lr 3.9e-04, dt 2.0s +All GPU(s): step 973: loss 20.8594, lr 3.9e-04, dt 2.0s +All GPU(s): step 974: loss 20.5000, lr 3.9e-04, dt 2.0s +All GPU(s): step 975: loss 20.6250, lr 3.9e-04, dt 2.1s +All GPU(s): step 976: loss 21.0156, lr 3.9e-04, dt 2.1s +All GPU(s): step 977: loss 21.0312, lr 3.9e-04, dt 2.1s +All GPU(s): step 978: loss 20.4688, lr 3.9e-04, dt 2.1s +All GPU(s): step 979: loss 19.7031, lr 3.9e-04, dt 2.0s +All GPU(s): step 980: loss 19.9531, lr 3.9e-04, dt 2.1s +All GPU(s): step 981: loss 19.5781, lr 3.9e-04, dt 2.0s +All GPU(s): step 982: loss 19.6406, lr 3.9e-04, dt 2.1s +All GPU(s): step 983: loss 19.9531, lr 3.9e-04, dt 2.0s +All GPU(s): step 984: loss 19.8281, lr 3.9e-04, dt 2.1s +All GPU(s): step 985: loss 19.3906, lr 3.9e-04, dt 2.1s +All GPU(s): step 986: loss 19.8125, lr 3.9e-04, dt 2.0s +All GPU(s): step 987: loss 19.6406, lr 3.9e-04, dt 2.1s +All GPU(s): step 988: loss 19.8750, lr 4.0e-04, dt 2.0s +All GPU(s): step 989: loss 18.7188, lr 4.0e-04, dt 2.1s +All GPU(s): step 990: loss 19.6250, lr 4.0e-04, dt 2.1s +All GPU(s): step 991: loss 19.6719, lr 4.0e-04, dt 2.0s +All GPU(s): step 992: loss 19.2500, lr 4.0e-04, dt 2.0s +All GPU(s): step 993: loss 19.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 994: loss 18.0469, lr 4.0e-04, dt 2.1s +All GPU(s): step 995: loss 18.9688, lr 4.0e-04, dt 2.0s +All GPU(s): step 996: loss 18.7188, lr 4.0e-04, dt 2.0s +All GPU(s): step 997: loss 17.5781, lr 4.0e-04, dt 2.0s +All GPU(s): step 998: loss 17.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 999: loss 17.3750, lr 4.0e-04, dt 2.1s +saving checkpoint to checkpoints/ckpt_1000.pt +All GPU(s): step 1000: loss 17.4844, lr 4.0e-04, dt 2.1s +All GPU(s): step 1001: loss 16.6562, lr 4.0e-04, dt 2.0s +All GPU(s): step 1002: loss 16.1562, lr 4.0e-04, dt 2.0s +All GPU(s): step 1003: loss 15.9766, lr 4.0e-04, dt 2.1s +All GPU(s): step 1004: loss 15.4609, lr 4.0e-04, dt 2.1s +All GPU(s): step 1005: loss 15.6797, lr 4.0e-04, dt 2.1s +All GPU(s): step 1006: loss 15.8125, lr 4.0e-04, dt 2.0s +All GPU(s): step 1007: loss 15.2109, lr 4.0e-04, dt 2.0s +All GPU(s): step 1008: loss 14.9531, lr 4.0e-04, dt 2.1s +All GPU(s): step 1009: loss 15.2344, lr 4.0e-04, dt 2.1s +All GPU(s): step 1010: loss 15.0547, lr 4.0e-04, dt 2.0s +All GPU(s): step 1011: loss 15.1094, lr 4.0e-04, dt 2.0s +All GPU(s): step 1012: loss 15.1797, lr 4.0e-04, dt 2.0s +All GPU(s): step 1013: loss 15.1094, lr 4.0e-04, dt 2.1s +All GPU(s): step 1014: loss 15.2969, lr 4.0e-04, dt 2.0s +All GPU(s): step 1015: loss 15.1797, lr 4.0e-04, dt 2.0s +All GPU(s): step 1016: loss 15.1484, lr 4.0e-04, dt 2.0s +All GPU(s): step 1017: loss 15.2422, lr 4.0e-04, dt 2.0s +All GPU(s): step 1018: loss 15.4375, lr 4.0e-04, dt 2.1s +All GPU(s): step 1019: loss 15.1719, lr 4.0e-04, dt 2.0s +All GPU(s): step 1020: loss 15.1250, lr 4.0e-04, dt 2.0s +All GPU(s): step 1021: loss 15.2578, lr 4.0e-04, dt 2.0s +All GPU(s): step 1022: loss 15.2031, lr 4.0e-04, dt 2.1s +All GPU(s): step 1023: loss 15.2891, lr 4.0e-04, dt 2.1s +All GPU(s): step 1024: loss 14.8984, lr 4.0e-04, dt 2.0s +All GPU(s): step 1025: loss 15.0781, lr 4.0e-04, dt 2.1s +All GPU(s): step 1026: loss 15.5547, lr 4.0e-04, dt 2.1s +All GPU(s): step 1027: loss 15.3438, lr 4.0e-04, dt 2.1s +All GPU(s): step 1028: loss 15.0391, lr 4.0e-04, dt 2.1s +All GPU(s): step 1029: loss 15.5938, lr 4.0e-04, dt 2.1s +All GPU(s): step 1030: loss 15.3750, lr 4.0e-04, dt 2.0s +All GPU(s): step 1031: loss 15.2031, lr 4.0e-04, dt 2.1s +All GPU(s): step 1032: loss 15.0703, lr 4.0e-04, dt 2.1s +All GPU(s): step 1033: loss 14.9297, lr 4.0e-04, dt 2.1s +All GPU(s): step 1034: loss 15.2656, lr 4.0e-04, dt 2.0s +All GPU(s): step 1035: loss 15.2109, lr 4.0e-04, dt 2.0s +All GPU(s): step 1036: loss 15.0781, lr 4.0e-04, dt 2.0s +All GPU(s): step 1037: loss 15.0625, lr 4.0e-04, dt 2.1s +All GPU(s): step 1038: loss 15.6328, lr 4.0e-04, dt 2.0s +All GPU(s): step 1039: loss 14.8203, lr 4.0e-04, dt 2.0s +All GPU(s): step 1040: loss 14.8594, lr 4.0e-04, dt 2.0s +All GPU(s): step 1041: loss 15.2891, lr 4.0e-04, dt 2.0s +All GPU(s): step 1042: loss 15.1641, lr 4.0e-04, dt 2.1s +All GPU(s): step 1043: loss 15.1328, lr 4.0e-04, dt 2.0s +All GPU(s): step 1044: loss 14.9531, lr 4.0e-04, dt 2.0s +All GPU(s): step 1045: loss 15.1719, lr 4.0e-04, dt 2.0s +All GPU(s): step 1046: loss 14.6875, lr 4.0e-04, dt 2.0s +All GPU(s): step 1047: loss 15.2266, lr 4.0e-04, dt 2.2s +All GPU(s): step 1048: loss 15.2031, lr 4.0e-04, dt 2.0s +All GPU(s): step 1049: loss 14.9531, lr 4.0e-04, dt 2.1s +All GPU(s): step 1050: loss 15.4453, lr 4.0e-04, dt 2.0s +All GPU(s): step 1051: loss 14.9609, lr 4.0e-04, dt 2.1s +All GPU(s): step 1052: loss 14.7969, lr 4.0e-04, dt 2.1s +All GPU(s): step 1053: loss 15.1953, lr 4.0e-04, dt 2.1s +All GPU(s): step 1054: loss 15.0156, lr 4.0e-04, dt 2.1s +All GPU(s): step 1055: loss 15.1484, lr 4.0e-04, dt 2.0s +All GPU(s): step 1056: loss 14.9297, lr 4.0e-04, dt 2.1s +All GPU(s): step 1057: loss 14.8281, lr 4.0e-04, dt 2.1s +All GPU(s): step 1058: loss 14.9609, lr 4.0e-04, dt 2.1s +All GPU(s): step 1059: loss 14.8672, lr 4.0e-04, dt 2.0s +All GPU(s): step 1060: loss 14.7891, lr 4.0e-04, dt 2.0s +All GPU(s): step 1061: loss 15.7422, lr 4.0e-04, dt 2.1s +All GPU(s): step 1062: loss 14.9375, lr 4.0e-04, dt 2.0s +All GPU(s): step 1063: loss 15.2578, lr 4.0e-04, dt 2.0s +All GPU(s): step 1064: loss 14.5859, lr 4.0e-04, dt 2.0s +All GPU(s): step 1065: loss 14.9922, lr 4.0e-04, dt 2.0s +All GPU(s): step 1066: loss 15.4531, lr 4.0e-04, dt 2.1s +All GPU(s): step 1067: loss 14.9531, lr 4.0e-04, dt 2.0s +All GPU(s): step 1068: loss 14.6797, lr 4.0e-04, dt 2.0s +All GPU(s): step 1069: loss 15.0547, lr 4.0e-04, dt 2.0s +All GPU(s): step 1070: loss 15.0781, lr 4.0e-04, dt 2.1s +All GPU(s): step 1071: loss 14.8438, lr 4.0e-04, dt 2.1s +All GPU(s): step 1072: loss 14.6094, lr 4.0e-04, dt 2.1s +All GPU(s): step 1073: loss 15.4922, lr 4.0e-04, dt 2.1s +All GPU(s): step 1074: loss 14.5234, lr 4.0e-04, dt 2.1s +All GPU(s): step 1075: loss 15.0859, lr 4.0e-04, dt 2.1s +All GPU(s): step 1076: loss 14.8828, lr 4.0e-04, dt 2.2s +All GPU(s): step 1077: loss 15.2578, lr 4.0e-04, dt 2.1s +All GPU(s): step 1078: loss 14.7891, lr 4.0e-04, dt 2.1s +All GPU(s): step 1079: loss 15.1953, lr 4.0e-04, dt 2.0s +All GPU(s): step 1080: loss 14.9297, lr 4.0e-04, dt 2.1s +All GPU(s): step 1081: loss 14.9453, lr 4.0e-04, dt 2.1s +All GPU(s): step 1082: loss 14.9297, lr 4.0e-04, dt 2.1s +All GPU(s): step 1083: loss 14.7344, lr 4.0e-04, dt 2.1s +All GPU(s): step 1084: loss 15.0000, lr 4.0e-04, dt 2.1s +All GPU(s): step 1085: loss 14.5469, lr 4.0e-04, dt 2.1s +All GPU(s): step 1086: loss 15.0391, lr 4.0e-04, dt 2.1s +All GPU(s): step 1087: loss 15.0000, lr 4.0e-04, dt 2.0s +All GPU(s): step 1088: loss 14.8750, lr 4.0e-04, dt 2.1s +All GPU(s): step 1089: loss 15.0938, lr 4.0e-04, dt 2.0s +All GPU(s): step 1090: loss 14.7344, lr 4.0e-04, dt 2.1s +All GPU(s): step 1091: loss 14.9062, lr 4.0e-04, dt 2.0s +All GPU(s): step 1092: loss 15.0234, lr 4.0e-04, dt 2.0s +All GPU(s): step 1093: loss 14.7422, lr 4.0e-04, dt 2.0s +All GPU(s): step 1094: loss 14.7422, lr 4.0e-04, dt 2.1s +All GPU(s): step 1095: loss 14.8516, lr 4.0e-04, dt 2.2s +All GPU(s): step 1096: loss 14.7422, lr 4.0e-04, dt 2.0s +All GPU(s): step 1097: loss 14.5391, lr 4.0e-04, dt 2.0s +All GPU(s): step 1098: loss 14.8594, lr 4.0e-04, dt 2.0s +All GPU(s): step 1099: loss 14.8125, lr 4.0e-04, dt 2.0s +All GPU(s): step 1100: loss 15.0938, lr 4.0e-04, dt 2.1s +All GPU(s): step 1101: loss 15.1016, lr 4.0e-04, dt 2.1s +All GPU(s): step 1102: loss 14.7500, lr 4.0e-04, dt 2.0s +All GPU(s): step 1103: loss 14.9531, lr 4.0e-04, dt 2.0s +All GPU(s): step 1104: loss 15.1484, lr 4.0e-04, dt 2.1s +All GPU(s): step 1105: loss 14.7266, lr 4.0e-04, dt 2.1s +All GPU(s): step 1106: loss 14.4609, lr 4.0e-04, dt 2.0s +All GPU(s): step 1107: loss 14.6406, lr 4.0e-04, dt 2.0s +All GPU(s): step 1108: loss 14.8828, lr 4.0e-04, dt 2.0s +All GPU(s): step 1109: loss 15.2188, lr 4.0e-04, dt 2.1s +All GPU(s): step 1110: loss 14.7578, lr 4.0e-04, dt 2.1s +All GPU(s): step 1111: loss 14.8750, lr 4.0e-04, dt 2.0s +All GPU(s): step 1112: loss 14.6641, lr 4.0e-04, dt 2.0s +All GPU(s): step 1113: loss 14.8906, lr 4.0e-04, dt 2.0s +All GPU(s): step 1114: loss 14.9609, lr 4.0e-04, dt 2.1s +All GPU(s): step 1115: loss 14.8281, lr 4.0e-04, dt 2.0s +All GPU(s): step 1116: loss 14.8516, lr 4.0e-04, dt 2.0s +All GPU(s): step 1117: loss 14.9688, lr 4.0e-04, dt 2.0s +All GPU(s): step 1118: loss 14.7422, lr 4.0e-04, dt 2.1s +All GPU(s): step 1119: loss 15.0391, lr 4.0e-04, dt 2.1s +All GPU(s): step 1120: loss 14.6484, lr 4.0e-04, dt 2.0s +All GPU(s): step 1121: loss 14.7266, lr 4.0e-04, dt 2.0s +All GPU(s): step 1122: loss 14.4844, lr 4.0e-04, dt 2.0s +All GPU(s): step 1123: loss 14.8516, lr 4.0e-04, dt 2.1s +All GPU(s): step 1124: loss 14.9609, lr 4.0e-04, dt 2.1s +All GPU(s): step 1125: loss 15.2109, lr 4.0e-04, dt 2.0s +All GPU(s): step 1126: loss 15.0469, lr 4.0e-04, dt 2.0s +All GPU(s): step 1127: loss 14.7891, lr 4.0e-04, dt 2.0s +All GPU(s): step 1128: loss 14.7578, lr 4.0e-04, dt 2.1s +All GPU(s): step 1129: loss 14.5469, lr 4.0e-04, dt 2.1s +All GPU(s): step 1130: loss 15.1953, lr 4.0e-04, dt 2.0s +All GPU(s): step 1131: loss 14.6172, lr 4.0e-04, dt 2.0s +All GPU(s): step 1132: loss 14.6484, lr 4.0e-04, dt 2.0s +All GPU(s): step 1133: loss 14.8438, lr 4.0e-04, dt 2.1s +All GPU(s): step 1134: loss 14.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1135: loss 14.6406, lr 4.0e-04, dt 2.1s +All GPU(s): step 1136: loss 14.9844, lr 4.0e-04, dt 2.0s +All GPU(s): step 1137: loss 15.3203, lr 4.0e-04, dt 2.0s +All GPU(s): step 1138: loss 14.9688, lr 4.0e-04, dt 2.1s +All GPU(s): step 1139: loss 14.9453, lr 4.0e-04, dt 2.1s +All GPU(s): step 1140: loss 14.6250, lr 4.0e-04, dt 2.0s +All GPU(s): step 1141: loss 14.6484, lr 4.0e-04, dt 2.0s +All GPU(s): step 1142: loss 15.0938, lr 4.0e-04, dt 2.1s +All GPU(s): step 1143: loss 15.2109, lr 4.0e-04, dt 2.2s +All GPU(s): step 1144: loss 15.0469, lr 4.0e-04, dt 2.0s +All GPU(s): step 1145: loss 14.8984, lr 4.0e-04, dt 2.1s +All GPU(s): step 1146: loss 14.6953, lr 4.0e-04, dt 2.0s +All GPU(s): step 1147: loss 14.9141, lr 4.0e-04, dt 2.0s +All GPU(s): step 1148: loss 14.6719, lr 4.0e-04, dt 2.2s +All GPU(s): step 1149: loss 14.5312, lr 4.0e-04, dt 2.1s +All GPU(s): step 1150: loss 14.8438, lr 4.0e-04, dt 2.0s +All GPU(s): step 1151: loss 14.5781, lr 4.0e-04, dt 2.1s +All GPU(s): step 1152: loss 14.8438, lr 4.0e-04, dt 2.1s +All GPU(s): step 1153: loss 14.6406, lr 4.0e-04, dt 2.1s +All GPU(s): step 1154: loss 14.5938, lr 4.0e-04, dt 2.0s +All GPU(s): step 1155: loss 14.8438, lr 4.0e-04, dt 2.0s +All GPU(s): step 1156: loss 14.7578, lr 4.0e-04, dt 2.0s +All GPU(s): step 1157: loss 14.6875, lr 4.0e-04, dt 2.1s +All GPU(s): step 1158: loss 14.9453, lr 4.0e-04, dt 2.1s +All GPU(s): step 1159: loss 14.6250, lr 4.0e-04, dt 2.1s +All GPU(s): step 1160: loss 14.7422, lr 4.0e-04, dt 2.0s +All GPU(s): step 1161: loss 14.7578, lr 4.0e-04, dt 2.0s +All GPU(s): step 1162: loss 14.8281, lr 4.0e-04, dt 2.1s +All GPU(s): step 1163: loss 14.7969, lr 4.0e-04, dt 2.1s +All GPU(s): step 1164: loss 14.9922, lr 4.0e-04, dt 2.0s +All GPU(s): step 1165: loss 15.0078, lr 4.0e-04, dt 2.1s +All GPU(s): step 1166: loss 14.7812, lr 4.0e-04, dt 2.0s +All GPU(s): step 1167: loss 14.8125, lr 4.0e-04, dt 2.1s +All GPU(s): step 1168: loss 14.9453, lr 4.0e-04, dt 2.1s +All GPU(s): step 1169: loss 14.8984, lr 4.0e-04, dt 2.1s +All GPU(s): step 1170: loss 14.6797, lr 4.0e-04, dt 2.1s +All GPU(s): step 1171: loss 14.8906, lr 4.0e-04, dt 2.1s +All GPU(s): step 1172: loss 14.8125, lr 4.0e-04, dt 2.1s +All GPU(s): step 1173: loss 14.8672, lr 4.0e-04, dt 2.0s +All GPU(s): step 1174: loss 14.8594, lr 4.0e-04, dt 2.0s +All GPU(s): step 1175: loss 14.8125, lr 4.0e-04, dt 2.0s +All GPU(s): step 1176: loss 14.6094, lr 4.0e-04, dt 2.1s +All GPU(s): step 1177: loss 14.5234, lr 4.0e-04, dt 2.1s +All GPU(s): step 1178: loss 14.6641, lr 4.0e-04, dt 2.1s +All GPU(s): step 1179: loss 15.0000, lr 4.0e-04, dt 2.1s +All GPU(s): step 1180: loss 14.9062, lr 4.0e-04, dt 2.0s +All GPU(s): step 1181: loss 14.5000, lr 4.0e-04, dt 2.0s +All GPU(s): step 1182: loss 14.8281, lr 4.0e-04, dt 2.1s +All GPU(s): step 1183: loss 14.8125, lr 4.0e-04, dt 2.1s +All GPU(s): step 1184: loss 14.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1185: loss 14.8516, lr 4.0e-04, dt 2.0s +All GPU(s): step 1186: loss 14.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1187: loss 14.7266, lr 4.0e-04, dt 2.1s +All GPU(s): step 1188: loss 14.7969, lr 4.0e-04, dt 2.0s +All GPU(s): step 1189: loss 14.7031, lr 4.0e-04, dt 2.0s +All GPU(s): step 1190: loss 14.7266, lr 4.0e-04, dt 2.0s +All GPU(s): step 1191: loss 14.8359, lr 4.0e-04, dt 2.1s +All GPU(s): step 1192: loss 14.3125, lr 4.0e-04, dt 2.0s +All GPU(s): step 1193: loss 14.6562, lr 4.0e-04, dt 2.1s +All GPU(s): step 1194: loss 14.9375, lr 4.0e-04, dt 2.1s +All GPU(s): step 1195: loss 14.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1196: loss 14.5547, lr 4.0e-04, dt 2.1s +All GPU(s): step 1197: loss 14.7031, lr 4.0e-04, dt 2.0s +All GPU(s): step 1198: loss 14.8828, lr 4.0e-04, dt 2.0s +All GPU(s): step 1199: loss 15.0391, lr 4.0e-04, dt 2.0s +All GPU(s): step 1200: loss 14.7109, lr 4.0e-04, dt 2.0s +All GPU(s): step 1201: loss 14.9219, lr 4.0e-04, dt 2.2s +All GPU(s): step 1202: loss 15.2969, lr 4.0e-04, dt 2.1s +All GPU(s): step 1203: loss 15.0938, lr 4.0e-04, dt 2.1s +All GPU(s): step 1204: loss 14.8203, lr 4.0e-04, dt 2.1s +All GPU(s): step 1205: loss 15.0703, lr 4.0e-04, dt 2.1s +All GPU(s): step 1206: loss 15.4062, lr 4.0e-04, dt 2.1s +All GPU(s): step 1207: loss 14.9141, lr 4.0e-04, dt 2.0s +All GPU(s): step 1208: loss 15.3750, lr 4.0e-04, dt 2.0s +All GPU(s): step 1209: loss 15.8047, lr 4.0e-04, dt 2.0s +All GPU(s): step 1210: loss 15.2812, lr 4.0e-04, dt 2.0s +All GPU(s): step 1211: loss 15.6016, lr 4.0e-04, dt 2.1s +All GPU(s): step 1212: loss 16.0078, lr 4.0e-04, dt 2.0s +All GPU(s): step 1213: loss 15.4766, lr 4.0e-04, dt 2.1s +All GPU(s): step 1214: loss 15.7344, lr 4.0e-04, dt 2.0s +All GPU(s): step 1215: loss 16.0000, lr 4.0e-04, dt 2.1s +All GPU(s): step 1216: loss 15.3906, lr 4.0e-04, dt 2.0s +All GPU(s): step 1217: loss 15.2734, lr 4.0e-04, dt 2.0s +All GPU(s): step 1218: loss 15.3438, lr 4.0e-04, dt 2.0s +All GPU(s): step 1219: loss 15.2031, lr 4.0e-04, dt 2.1s +All GPU(s): step 1220: loss 15.2500, lr 4.0e-04, dt 2.1s +All GPU(s): step 1221: loss 15.2891, lr 4.0e-04, dt 2.0s +All GPU(s): step 1222: loss 15.2734, lr 4.0e-04, dt 2.0s +All GPU(s): step 1223: loss 15.5078, lr 4.0e-04, dt 2.0s +All GPU(s): step 1224: loss 15.3516, lr 4.0e-04, dt 2.0s +All GPU(s): step 1225: loss 14.9844, lr 4.0e-04, dt 2.1s +All GPU(s): step 1226: loss 15.0781, lr 4.0e-04, dt 2.0s +All GPU(s): step 1227: loss 14.3984, lr 4.0e-04, dt 2.0s +All GPU(s): step 1228: loss 14.5391, lr 4.0e-04, dt 2.0s +All GPU(s): step 1229: loss 14.9609, lr 4.0e-04, dt 2.0s +All GPU(s): step 1230: loss 14.6641, lr 4.0e-04, dt 2.1s +All GPU(s): step 1231: loss 14.3047, lr 4.0e-04, dt 2.0s +All GPU(s): step 1232: loss 15.0234, lr 4.0e-04, dt 2.0s +All GPU(s): step 1233: loss 15.0703, lr 4.0e-04, dt 2.0s +All GPU(s): step 1234: loss 14.9453, lr 4.0e-04, dt 2.0s +All GPU(s): step 1235: loss 14.6953, lr 4.0e-04, dt 2.1s +All GPU(s): step 1236: loss 14.5781, lr 4.0e-04, dt 2.0s +All GPU(s): step 1237: loss 13.4219, lr 4.0e-04, dt 2.0s +All GPU(s): step 1238: loss 12.9531, lr 4.0e-04, dt 2.0s +All GPU(s): step 1239: loss 13.5781, lr 4.0e-04, dt 2.1s +All GPU(s): step 1240: loss 12.6797, lr 4.0e-04, dt 2.1s +All GPU(s): step 1241: loss 10.6328, lr 4.0e-04, dt 2.1s +All GPU(s): step 1242: loss 7.2070, lr 4.0e-04, dt 2.1s +All GPU(s): step 1243: loss 3.1357, lr 4.0e-04, dt 2.1s +All GPU(s): step 1244: loss 1.0078, lr 4.0e-04, dt 2.1s +All GPU(s): step 1245: loss 1.6924, lr 4.0e-04, dt 1.9s +All GPU(s): step 1246: loss 5.6328, lr 4.0e-04, dt 1.8s +All GPU(s): step 1247: loss 8.5059, lr 4.0e-04, dt 1.7s +All GPU(s): step 1248: loss 9.2227, lr 4.0e-04, dt 1.6s +All GPU(s): step 1249: loss 9.9902, lr 4.0e-04, dt 1.6s +All GPU(s): step 1250: loss 7.9375, lr 4.0e-04, dt 1.7s +All GPU(s): step 1251: loss 8.0508, lr 4.0e-04, dt 1.6s +All GPU(s): step 1252: loss 8.3281, lr 4.0e-04, dt 1.7s +All GPU(s): step 1253: loss 11.1445, lr 4.0e-04, dt 1.6s +All GPU(s): step 1254: loss 11.6289, lr 4.0e-04, dt 1.6s +All GPU(s): step 1255: loss 8.3203, lr 4.0e-04, dt 1.7s +All GPU(s): step 1256: loss 10.0781, lr 4.0e-04, dt 1.8s +All GPU(s): step 1257: loss 9.7461, lr 4.0e-04, dt 1.7s +All GPU(s): step 1258: loss 9.8555, lr 4.0e-04, dt 1.6s +All GPU(s): step 1259: loss 8.9492, lr 4.0e-04, dt 1.6s +All GPU(s): step 1260: loss 7.6289, lr 4.0e-04, dt 1.7s +All GPU(s): step 1261: loss 9.6953, lr 4.0e-04, dt 1.7s +All GPU(s): step 1262: loss 6.6953, lr 4.0e-04, dt 1.7s +All GPU(s): step 1263: loss 5.0859, lr 4.0e-04, dt 1.8s +All GPU(s): step 1264: loss 1.8257, lr 4.0e-04, dt 2.0s +All GPU(s): step 1265: loss 2.1445, lr 4.0e-04, dt 1.9s +All GPU(s): step 1266: loss 2.1362, lr 4.0e-04, dt 1.9s +All GPU(s): step 1267: loss 2.2490, lr 4.0e-04, dt 2.0s +All GPU(s): step 1268: loss 1.3013, lr 4.0e-04, dt 1.9s +All GPU(s): step 1269: loss 1.5083, lr 4.0e-04, dt 1.9s +All GPU(s): step 1270: loss 1.0479, lr 4.0e-04, dt 2.0s +All GPU(s): step 1271: loss 0.9180, lr 4.0e-04, dt 2.0s +All GPU(s): step 1272: loss 0.8184, lr 4.0e-04, dt 2.1s +All GPU(s): step 1273: loss 0.8535, lr 4.0e-04, dt 2.0s +All GPU(s): step 1274: loss 0.7305, lr 4.0e-04, dt 2.0s +All GPU(s): step 1275: loss 0.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1276: loss 1.7695, lr 4.0e-04, dt 2.1s +All GPU(s): step 1277: loss 1.1079, lr 4.0e-04, dt 2.1s +All GPU(s): step 1278: loss 0.7852, lr 4.0e-04, dt 2.0s +All GPU(s): step 1279: loss 1.3711, lr 4.0e-04, dt 2.0s +All GPU(s): step 1280: loss 2.3516, lr 4.0e-04, dt 2.1s +All GPU(s): step 1281: loss 2.4248, lr 4.0e-04, dt 2.1s +All GPU(s): step 1282: loss 3.8242, lr 4.0e-04, dt 2.2s +All GPU(s): step 1283: loss 4.0312, lr 4.0e-04, dt 2.0s +All GPU(s): step 1284: loss 3.9004, lr 4.0e-04, dt 2.0s +All GPU(s): step 1285: loss 6.2344, lr 4.0e-04, dt 2.0s +All GPU(s): step 1286: loss 7.7344, lr 4.0e-04, dt 2.1s +All GPU(s): step 1287: loss 9.1445, lr 4.0e-04, dt 2.1s +All GPU(s): step 1288: loss 10.5469, lr 4.0e-04, dt 2.1s +All GPU(s): step 1289: loss 9.2188, lr 4.0e-04, dt 2.1s +All GPU(s): step 1290: loss 9.0312, lr 4.0e-04, dt 2.1s +All GPU(s): step 1291: loss 10.6875, lr 4.0e-04, dt 2.1s +All GPU(s): step 1292: loss 11.6172, lr 4.0e-04, dt 2.1s +All GPU(s): step 1293: loss 11.7578, lr 4.0e-04, dt 2.1s +All GPU(s): step 1294: loss 11.3125, lr 4.0e-04, dt 2.1s +All GPU(s): step 1295: loss 8.8945, lr 4.0e-04, dt 2.0s +All GPU(s): step 1296: loss 9.8750, lr 4.0e-04, dt 2.1s +All GPU(s): step 1297: loss 9.7617, lr 4.0e-04, dt 2.0s +All GPU(s): step 1298: loss 7.8828, lr 4.0e-04, dt 2.1s +All GPU(s): step 1299: loss 6.4648, lr 4.0e-04, dt 2.1s +All GPU(s): step 1300: loss 4.8652, lr 4.0e-04, dt 2.1s +All GPU(s): step 1301: loss 3.5977, lr 4.0e-04, dt 2.1s +All GPU(s): step 1302: loss 3.5752, lr 4.0e-04, dt 2.1s +All GPU(s): step 1303: loss 5.3477, lr 4.0e-04, dt 2.1s +All GPU(s): step 1304: loss 4.4648, lr 4.0e-04, dt 2.1s +All GPU(s): step 1305: loss 1.6240, lr 4.0e-04, dt 2.0s +All GPU(s): step 1306: loss 0.7119, lr 4.0e-04, dt 2.1s +All GPU(s): step 1307: loss 1.8330, lr 4.0e-04, dt 1.9s +All GPU(s): step 1308: loss 8.0156, lr 4.0e-04, dt 1.6s +All GPU(s): step 1309: loss 8.4297, lr 4.0e-04, dt 1.7s +All GPU(s): step 1310: loss 8.1250, lr 4.0e-04, dt 1.7s +All GPU(s): step 1311: loss 7.9688, lr 4.0e-04, dt 1.7s +All GPU(s): step 1312: loss 9.9258, lr 4.0e-04, dt 1.7s +All GPU(s): step 1313: loss 12.1406, lr 4.0e-04, dt 1.5s +All GPU(s): step 1314: loss 10.9922, lr 4.0e-04, dt 1.6s +All GPU(s): step 1315: loss 8.8945, lr 4.0e-04, dt 1.6s +All GPU(s): step 1316: loss 10.8828, lr 4.0e-04, dt 1.6s +All GPU(s): step 1317: loss 10.6602, lr 4.0e-04, dt 1.6s +All GPU(s): step 1318: loss 12.0938, lr 4.0e-04, dt 1.6s +All GPU(s): step 1319: loss 10.9766, lr 4.0e-04, dt 1.6s +All GPU(s): step 1320: loss 9.1055, lr 4.0e-04, dt 1.6s +All GPU(s): step 1321: loss 9.8203, lr 4.0e-04, dt 1.6s +All GPU(s): step 1322: loss 11.4297, lr 4.0e-04, dt 1.6s +All GPU(s): step 1323: loss 11.0703, lr 4.0e-04, dt 1.6s +All GPU(s): step 1324: loss 10.5195, lr 4.0e-04, dt 1.8s +All GPU(s): step 1325: loss 9.8281, lr 4.0e-04, dt 1.6s +All GPU(s): step 1326: loss 11.9609, lr 4.0e-04, dt 1.6s +All GPU(s): step 1327: loss 12.7578, lr 4.0e-04, dt 1.6s +All GPU(s): step 1328: loss 8.9805, lr 4.0e-04, dt 1.6s +All GPU(s): step 1329: loss 10.9688, lr 4.0e-04, dt 1.6s +All GPU(s): step 1330: loss 10.6445, lr 4.0e-04, dt 1.6s +All GPU(s): step 1331: loss 11.5703, lr 4.0e-04, dt 1.5s +All GPU(s): step 1332: loss 11.5000, lr 4.0e-04, dt 1.6s +All GPU(s): step 1333: loss 12.3672, lr 4.0e-04, dt 1.5s +All GPU(s): step 1334: loss 11.0625, lr 4.0e-04, dt 1.5s +All GPU(s): step 1335: loss 10.6641, lr 4.0e-04, dt 1.6s +All GPU(s): step 1336: loss 12.3203, lr 4.0e-04, dt 1.6s +All GPU(s): step 1337: loss 9.5234, lr 4.0e-04, dt 1.6s +All GPU(s): step 1338: loss 10.5039, lr 4.0e-04, dt 1.6s +All GPU(s): step 1339: loss 11.0703, lr 4.0e-04, dt 1.6s +All GPU(s): step 1340: loss 10.5430, lr 4.0e-04, dt 1.6s +All GPU(s): step 1341: loss 7.8945, lr 4.0e-04, dt 1.6s +All GPU(s): step 1342: loss 7.9570, lr 4.0e-04, dt 1.6s +All GPU(s): step 1343: loss 7.2422, lr 4.0e-04, dt 1.7s +All GPU(s): step 1344: loss 7.0801, lr 4.0e-04, dt 1.7s +All GPU(s): step 1345: loss 8.2734, lr 4.0e-04, dt 1.7s +All GPU(s): step 1346: loss 11.1055, lr 4.0e-04, dt 1.6s +All GPU(s): step 1347: loss 11.3477, lr 4.0e-04, dt 1.6s +All GPU(s): step 1348: loss 8.8633, lr 4.0e-04, dt 1.6s +All GPU(s): step 1349: loss 10.3867, lr 4.0e-04, dt 1.7s +All GPU(s): step 1350: loss 9.2695, lr 4.0e-04, dt 1.6s +All GPU(s): step 1351: loss 8.5938, lr 4.0e-04, dt 1.6s +All GPU(s): step 1352: loss 10.6172, lr 4.0e-04, dt 1.6s +All GPU(s): step 1353: loss 10.1680, lr 4.0e-04, dt 1.6s +All GPU(s): step 1354: loss 11.3711, lr 4.0e-04, dt 1.6s +All GPU(s): step 1355: loss 10.6836, lr 4.0e-04, dt 1.6s +All GPU(s): step 1356: loss 12.1094, lr 4.0e-04, dt 1.5s +All GPU(s): step 1357: loss 12.1719, lr 4.0e-04, dt 1.5s +All GPU(s): step 1358: loss 10.7812, lr 4.0e-04, dt 1.6s +All GPU(s): step 1359: loss 10.8477, lr 4.0e-04, dt 1.6s +All GPU(s): step 1360: loss 9.1836, lr 4.0e-04, dt 1.6s +All GPU(s): step 1361: loss 6.6328, lr 4.0e-04, dt 1.7s +All GPU(s): step 1362: loss 8.4023, lr 4.0e-04, dt 1.6s +All GPU(s): step 1363: loss 7.9570, lr 4.0e-04, dt 1.6s +All GPU(s): step 1364: loss 9.6328, lr 4.0e-04, dt 1.6s +All GPU(s): step 1365: loss 9.7852, lr 4.0e-04, dt 1.6s +All GPU(s): step 1366: loss 7.6523, lr 4.0e-04, dt 1.6s +All GPU(s): step 1367: loss 8.3320, lr 4.0e-04, dt 1.7s +All GPU(s): step 1368: loss 5.0195, lr 4.0e-04, dt 1.8s +All GPU(s): step 1369: loss 3.5312, lr 4.0e-04, dt 1.8s +All GPU(s): step 1370: loss 6.4648, lr 4.0e-04, dt 1.7s +All GPU(s): step 1371: loss 7.7148, lr 4.0e-04, dt 1.7s +All GPU(s): step 1372: loss 9.7070, lr 4.0e-04, dt 1.7s +All GPU(s): step 1373: loss 9.9531, lr 4.0e-04, dt 1.6s +All GPU(s): step 1374: loss 7.9219, lr 4.0e-04, dt 1.6s +All GPU(s): step 1375: loss 9.7891, lr 4.0e-04, dt 1.6s +All GPU(s): step 1376: loss 10.1133, lr 4.0e-04, dt 1.6s +All GPU(s): step 1377: loss 10.4141, lr 4.0e-04, dt 1.5s +All GPU(s): step 1378: loss 10.1914, lr 4.0e-04, dt 1.6s +All GPU(s): step 1379: loss 8.9570, lr 4.0e-04, dt 1.7s +All GPU(s): step 1380: loss 10.0508, lr 4.0e-04, dt 1.6s +All GPU(s): step 1381: loss 11.1484, lr 4.0e-04, dt 1.5s +All GPU(s): step 1382: loss 7.9473, lr 4.0e-04, dt 1.7s +All GPU(s): step 1383: loss 8.1797, lr 4.0e-04, dt 1.7s +All GPU(s): step 1384: loss 8.4961, lr 4.0e-04, dt 1.6s +All GPU(s): step 1385: loss 7.2852, lr 4.0e-04, dt 1.7s +All GPU(s): step 1386: loss 6.2324, lr 4.0e-04, dt 1.7s +All GPU(s): step 1387: loss 8.0664, lr 4.0e-04, dt 1.6s +All GPU(s): step 1388: loss 7.8867, lr 4.0e-04, dt 1.6s +All GPU(s): step 1389: loss 8.0977, lr 4.0e-04, dt 1.7s +All GPU(s): step 1390: loss 8.9258, lr 4.0e-04, dt 1.6s +All GPU(s): step 1391: loss 9.2773, lr 4.0e-04, dt 1.6s +All GPU(s): step 1392: loss 8.2578, lr 4.0e-04, dt 1.7s +All GPU(s): step 1393: loss 8.0547, lr 4.0e-04, dt 1.7s +All GPU(s): step 1394: loss 6.5352, lr 4.0e-04, dt 1.7s +All GPU(s): step 1395: loss 6.5430, lr 4.0e-04, dt 1.7s +All GPU(s): step 1396: loss 7.4531, lr 4.0e-04, dt 1.6s +All GPU(s): step 1397: loss 7.3945, lr 4.0e-04, dt 1.7s +All GPU(s): step 1398: loss 8.1055, lr 4.0e-04, dt 1.7s +All GPU(s): step 1399: loss 8.2188, lr 4.0e-04, dt 1.6s +All GPU(s): step 1400: loss 7.2598, lr 4.0e-04, dt 1.7s +All GPU(s): step 1401: loss 8.2109, lr 4.0e-04, dt 1.6s +All GPU(s): step 1402: loss 8.5703, lr 4.0e-04, dt 1.6s +All GPU(s): step 1403: loss 7.3594, lr 4.0e-04, dt 1.8s +All GPU(s): step 1404: loss 6.9648, lr 4.0e-04, dt 1.7s +All GPU(s): step 1405: loss 5.3164, lr 4.0e-04, dt 1.7s +All GPU(s): step 1406: loss 6.8555, lr 4.0e-04, dt 1.7s +All GPU(s): step 1407: loss 8.5039, lr 4.0e-04, dt 1.7s +All GPU(s): step 1408: loss 6.5430, lr 4.0e-04, dt 1.7s +All GPU(s): step 1409: loss 8.9727, lr 4.0e-04, dt 1.7s +All GPU(s): step 1410: loss 9.4180, lr 4.0e-04, dt 1.6s +All GPU(s): step 1411: loss 8.5391, lr 4.0e-04, dt 1.6s +All GPU(s): step 1412: loss 11.7031, lr 4.0e-04, dt 1.6s +All GPU(s): step 1413: loss 10.2734, lr 4.0e-04, dt 1.6s +All GPU(s): step 1414: loss 8.8398, lr 4.0e-04, dt 1.6s +All GPU(s): step 1415: loss 10.5430, lr 4.0e-04, dt 1.7s +All GPU(s): step 1416: loss 10.4375, lr 4.0e-04, dt 1.6s +All GPU(s): step 1417: loss 9.1406, lr 4.0e-04, dt 1.6s +All GPU(s): step 1418: loss 9.4883, lr 4.0e-04, dt 1.6s +All GPU(s): step 1419: loss 11.5703, lr 4.0e-04, dt 1.5s +All GPU(s): step 1420: loss 9.7383, lr 4.0e-04, dt 1.6s +All GPU(s): step 1421: loss 9.6133, lr 4.0e-04, dt 1.6s +All GPU(s): step 1422: loss 7.9766, lr 4.0e-04, dt 1.7s +All GPU(s): step 1423: loss 8.7305, lr 4.0e-04, dt 1.6s +All GPU(s): step 1424: loss 11.5547, lr 4.0e-04, dt 1.6s +All GPU(s): step 1425: loss 10.3945, lr 4.0e-04, dt 1.6s +All GPU(s): step 1426: loss 11.2773, lr 4.0e-04, dt 1.6s +All GPU(s): step 1427: loss 10.9766, lr 4.0e-04, dt 1.6s +All GPU(s): step 1428: loss 9.6250, lr 4.0e-04, dt 1.7s +All GPU(s): step 1429: loss 10.5234, lr 4.0e-04, dt 1.6s +All GPU(s): step 1430: loss 12.9375, lr 4.0e-04, dt 1.5s +All GPU(s): step 1431: loss 10.6016, lr 4.0e-04, dt 1.6s +All GPU(s): step 1432: loss 11.1211, lr 4.0e-04, dt 1.6s +All GPU(s): step 1433: loss 9.1797, lr 4.0e-04, dt 1.6s +All GPU(s): step 1434: loss 8.6484, lr 4.0e-04, dt 1.8s +All GPU(s): step 1435: loss 9.5801, lr 4.0e-04, dt 1.7s +All GPU(s): step 1436: loss 8.9102, lr 4.0e-04, dt 1.6s +All GPU(s): step 1437: loss 6.9727, lr 4.0e-04, dt 1.6s +All GPU(s): step 1438: loss 6.3828, lr 4.0e-04, dt 1.7s +All GPU(s): step 1439: loss 4.8867, lr 4.0e-04, dt 1.7s +All GPU(s): step 1440: loss 5.6836, lr 4.0e-04, dt 1.8s +All GPU(s): step 1441: loss 3.8491, lr 4.0e-04, dt 1.9s +All GPU(s): step 1442: loss 6.7969, lr 4.0e-04, dt 1.7s +All GPU(s): step 1443: loss 9.8906, lr 4.0e-04, dt 1.6s +All GPU(s): step 1444: loss 7.9082, lr 4.0e-04, dt 1.7s +All GPU(s): step 1445: loss 7.2188, lr 4.0e-04, dt 1.7s +All GPU(s): step 1446: loss 6.9102, lr 4.0e-04, dt 1.7s +All GPU(s): step 1447: loss 8.6172, lr 4.0e-04, dt 1.6s +All GPU(s): step 1448: loss 6.8965, lr 4.0e-04, dt 1.7s +All GPU(s): step 1449: loss 5.0156, lr 4.0e-04, dt 1.8s +All GPU(s): step 1450: loss 4.1602, lr 4.0e-04, dt 1.7s +All GPU(s): step 1451: loss 0.5657, lr 4.0e-04, dt 1.9s +All GPU(s): step 1452: loss 0.8916, lr 4.0e-04, dt 2.0s +All GPU(s): step 1453: loss 1.8201, lr 4.0e-04, dt 1.9s +All GPU(s): step 1454: loss 0.8036, lr 4.0e-04, dt 1.9s +All GPU(s): step 1455: loss 1.0601, lr 4.0e-04, dt 2.0s +All GPU(s): step 1456: loss 8.3594, lr 4.0e-04, dt 2.1s +All GPU(s): step 1457: loss 10.4688, lr 4.0e-04, dt 2.1s +All GPU(s): step 1458: loss 11.6016, lr 4.0e-04, dt 2.1s +All GPU(s): step 1459: loss 12.4297, lr 4.0e-04, dt 2.1s +All GPU(s): step 1460: loss 12.1328, lr 4.0e-04, dt 2.0s +All GPU(s): step 1461: loss 13.3672, lr 4.0e-04, dt 2.1s +All GPU(s): step 1462: loss 14.9688, lr 4.0e-04, dt 2.1s +All GPU(s): step 1463: loss 15.5859, lr 4.0e-04, dt 2.1s +All GPU(s): step 1464: loss 15.7656, lr 4.0e-04, dt 2.1s +All GPU(s): step 1465: loss 15.7266, lr 4.0e-04, dt 2.1s +All GPU(s): step 1466: loss 16.3516, lr 4.0e-04, dt 2.1s +All GPU(s): step 1467: loss 15.8359, lr 4.0e-04, dt 2.0s +All GPU(s): step 1468: loss 16.2500, lr 4.0e-04, dt 2.0s +All GPU(s): step 1469: loss 16.2109, lr 4.0e-04, dt 2.0s +All GPU(s): step 1470: loss 16.4453, lr 4.0e-04, dt 2.1s +All GPU(s): step 1471: loss 16.2812, lr 4.0e-04, dt 2.1s +All GPU(s): step 1472: loss 16.7266, lr 4.0e-04, dt 2.0s +All GPU(s): step 1473: loss 16.1797, lr 4.0e-04, dt 2.1s +All GPU(s): step 1474: loss 16.6641, lr 4.0e-04, dt 2.0s +All GPU(s): step 1475: loss 16.7891, lr 4.0e-04, dt 2.1s +All GPU(s): step 1476: loss 16.1172, lr 4.0e-04, dt 2.2s +All GPU(s): step 1477: loss 16.4766, lr 4.0e-04, dt 2.1s +All GPU(s): step 1478: loss 16.0000, lr 4.0e-04, dt 2.0s +All GPU(s): step 1479: loss 15.9922, lr 4.0e-04, dt 2.0s +All GPU(s): step 1480: loss 15.9219, lr 4.0e-04, dt 2.1s +All GPU(s): step 1481: loss 15.9062, lr 4.0e-04, dt 2.1s +All GPU(s): step 1482: loss 15.6562, lr 4.0e-04, dt 2.0s +All GPU(s): step 1483: loss 16.1953, lr 4.0e-04, dt 2.0s +All GPU(s): step 1484: loss 15.7969, lr 4.0e-04, dt 2.0s +All GPU(s): step 1485: loss 16.0234, lr 4.0e-04, dt 2.1s +All GPU(s): step 1486: loss 16.2969, lr 4.0e-04, dt 2.1s +All GPU(s): step 1487: loss 16.2500, lr 4.0e-04, dt 2.0s +All GPU(s): step 1488: loss 15.9609, lr 4.0e-04, dt 2.0s +All GPU(s): step 1489: loss 16.8359, lr 4.0e-04, dt 2.0s +All GPU(s): step 1490: loss 16.3750, lr 4.0e-04, dt 2.1s +All GPU(s): step 1491: loss 16.5078, lr 4.0e-04, dt 2.1s +All GPU(s): step 1492: loss 16.3359, lr 4.0e-04, dt 2.0s +All GPU(s): step 1493: loss 16.4531, lr 4.0e-04, dt 2.1s +All GPU(s): step 1494: loss 16.7578, lr 4.0e-04, dt 2.1s +All GPU(s): step 1495: loss 16.2109, lr 4.0e-04, dt 2.1s +All GPU(s): step 1496: loss 16.6953, lr 4.0e-04, dt 2.1s +All GPU(s): step 1497: loss 16.0312, lr 4.0e-04, dt 2.1s +All GPU(s): step 1498: loss 15.6641, lr 4.0e-04, dt 2.0s +All GPU(s): step 1499: loss 15.6953, lr 4.0e-04, dt 2.1s +All GPU(s): step 1500: loss 15.0000, lr 4.0e-04, dt 2.3s +All GPU(s): step 1501: loss 15.1719, lr 4.0e-04, dt 2.0s +All GPU(s): step 1502: loss 15.5312, lr 4.0e-04, dt 2.0s +All GPU(s): step 1503: loss 15.3516, lr 4.0e-04, dt 2.0s +All GPU(s): step 1504: loss 15.4766, lr 4.0e-04, dt 2.0s +All GPU(s): step 1505: loss 15.5625, lr 4.0e-04, dt 2.2s +All GPU(s): step 1506: loss 15.3594, lr 4.0e-04, dt 2.1s +All GPU(s): step 1507: loss 15.2500, lr 4.0e-04, dt 2.0s +All GPU(s): step 1508: loss 14.7812, lr 4.0e-04, dt 2.0s +All GPU(s): step 1509: loss 11.7031, lr 4.0e-04, dt 2.1s +All GPU(s): step 1510: loss 9.3086, lr 4.0e-04, dt 2.1s +All GPU(s): step 1511: loss 6.1250, lr 4.0e-04, dt 2.0s +All GPU(s): step 1512: loss 3.7773, lr 4.0e-04, dt 2.0s +All GPU(s): step 1513: loss 3.3438, lr 4.0e-04, dt 2.0s +All GPU(s): step 1514: loss 1.7031, lr 4.0e-04, dt 2.1s +All GPU(s): step 1515: loss 0.4316, lr 4.0e-04, dt 2.1s +All GPU(s): step 1516: loss 0.9391, lr 4.0e-04, dt 1.9s +All GPU(s): step 1517: loss 0.4084, lr 4.0e-04, dt 2.1s +All GPU(s): step 1518: loss 2.0166, lr 4.0e-04, dt 2.1s +All GPU(s): step 1519: loss 6.1309, lr 4.0e-04, dt 2.1s +All GPU(s): step 1520: loss 3.6230, lr 4.0e-04, dt 2.1s +All GPU(s): step 1521: loss 0.7175, lr 4.0e-04, dt 2.0s +All GPU(s): step 1522: loss 1.2239, lr 4.0e-04, dt 1.9s +All GPU(s): step 1523: loss 3.6934, lr 4.0e-04, dt 1.8s +All GPU(s): step 1524: loss 5.9805, lr 4.0e-04, dt 1.8s +All GPU(s): step 1525: loss 7.2852, lr 4.0e-04, dt 1.6s +All GPU(s): step 1526: loss 5.6426, lr 4.0e-04, dt 1.7s +All GPU(s): step 1527: loss 6.8594, lr 4.0e-04, dt 1.7s +All GPU(s): step 1528: loss 5.9766, lr 4.0e-04, dt 1.7s +All GPU(s): step 1529: loss 6.2207, lr 4.0e-04, dt 1.7s +All GPU(s): step 1530: loss 6.5664, lr 4.0e-04, dt 1.7s +All GPU(s): step 1531: loss 7.7852, lr 4.0e-04, dt 1.7s +All GPU(s): step 1532: loss 6.7109, lr 4.0e-04, dt 1.7s +All GPU(s): step 1533: loss 5.9570, lr 4.0e-04, dt 1.7s +All GPU(s): step 1534: loss 7.8594, lr 4.0e-04, dt 1.7s +All GPU(s): step 1535: loss 6.8125, lr 4.0e-04, dt 1.7s +All GPU(s): step 1536: loss 6.8320, lr 4.0e-04, dt 1.7s +All GPU(s): step 1537: loss 7.3457, lr 4.0e-04, dt 1.6s +All GPU(s): step 1538: loss 7.4219, lr 4.0e-04, dt 1.7s +All GPU(s): step 1539: loss 8.0352, lr 4.0e-04, dt 1.6s +All GPU(s): step 1540: loss 6.6445, lr 4.0e-04, dt 1.7s +All GPU(s): step 1541: loss 7.1719, lr 4.0e-04, dt 1.6s +All GPU(s): step 1542: loss 5.0625, lr 4.0e-04, dt 1.7s +All GPU(s): step 1543: loss 5.0098, lr 4.0e-04, dt 1.7s +All GPU(s): step 1544: loss 7.2305, lr 4.0e-04, dt 1.6s +All GPU(s): step 1545: loss 5.1172, lr 4.0e-04, dt 1.7s +All GPU(s): step 1546: loss 7.1680, lr 4.0e-04, dt 1.7s +All GPU(s): step 1547: loss 5.3613, lr 4.0e-04, dt 1.7s +All GPU(s): step 1548: loss 5.9375, lr 4.0e-04, dt 1.8s +All GPU(s): step 1549: loss 7.2109, lr 4.0e-04, dt 1.6s +All GPU(s): step 1550: loss 7.6172, lr 4.0e-04, dt 1.7s +All GPU(s): step 1551: loss 5.9531, lr 4.0e-04, dt 1.7s +All GPU(s): step 1552: loss 7.0938, lr 4.0e-04, dt 1.7s +All GPU(s): step 1553: loss 6.5508, lr 4.0e-04, dt 1.6s +All GPU(s): step 1554: loss 7.4219, lr 4.0e-04, dt 1.7s +All GPU(s): step 1555: loss 6.5723, lr 4.0e-04, dt 1.7s +All GPU(s): step 1556: loss 6.3457, lr 4.0e-04, dt 1.6s +All GPU(s): step 1557: loss 7.0781, lr 4.0e-04, dt 1.6s +All GPU(s): step 1558: loss 6.8262, lr 4.0e-04, dt 1.7s +All GPU(s): step 1559: loss 5.3906, lr 4.0e-04, dt 1.7s +All GPU(s): step 1560: loss 8.4570, lr 4.0e-04, dt 1.7s +All GPU(s): step 1561: loss 5.8164, lr 4.0e-04, dt 1.6s +All GPU(s): step 1562: loss 6.6641, lr 4.0e-04, dt 1.6s +All GPU(s): step 1563: loss 6.1992, lr 4.0e-04, dt 1.7s +All GPU(s): step 1564: loss 8.4336, lr 4.0e-04, dt 1.6s +All GPU(s): step 1565: loss 7.7773, lr 4.0e-04, dt 1.7s +All GPU(s): step 1566: loss 6.7734, lr 4.0e-04, dt 1.7s +All GPU(s): step 1567: loss 6.4453, lr 4.0e-04, dt 1.7s +All GPU(s): step 1568: loss 7.0977, lr 4.0e-04, dt 1.7s +All GPU(s): step 1569: loss 7.2812, lr 4.0e-04, dt 1.6s +All GPU(s): step 1570: loss 6.7832, lr 4.0e-04, dt 1.6s +All GPU(s): step 1571: loss 7.3516, lr 4.0e-04, dt 1.7s +All GPU(s): step 1572: loss 7.0742, lr 4.0e-04, dt 1.8s +All GPU(s): step 1573: loss 8.1523, lr 4.0e-04, dt 1.6s +All GPU(s): step 1574: loss 6.8867, lr 4.0e-04, dt 1.7s +All GPU(s): step 1575: loss 6.6172, lr 4.0e-04, dt 1.6s +All GPU(s): step 1576: loss 6.2891, lr 4.0e-04, dt 1.7s +All GPU(s): step 1577: loss 6.3906, lr 4.0e-04, dt 1.6s +All GPU(s): step 1578: loss 6.0625, lr 4.0e-04, dt 1.8s +All GPU(s): step 1579: loss 6.7188, lr 4.0e-04, dt 1.6s +All GPU(s): step 1580: loss 6.3633, lr 4.0e-04, dt 1.6s +All GPU(s): step 1581: loss 4.9160, lr 4.0e-04, dt 1.7s +All GPU(s): step 1582: loss 5.7871, lr 4.0e-04, dt 1.7s +All GPU(s): step 1583: loss 6.0254, lr 4.0e-04, dt 1.7s +All GPU(s): step 1584: loss 6.8398, lr 4.0e-04, dt 1.8s +All GPU(s): step 1585: loss 5.6426, lr 4.0e-04, dt 1.7s +All GPU(s): step 1586: loss 6.0977, lr 4.0e-04, dt 1.7s +All GPU(s): step 1587: loss 4.8887, lr 4.0e-04, dt 1.7s +All GPU(s): step 1588: loss 5.6230, lr 4.0e-04, dt 1.7s +All GPU(s): step 1589: loss 5.6543, lr 4.0e-04, dt 1.7s +All GPU(s): step 1590: loss 6.3867, lr 4.0e-04, dt 1.8s +All GPU(s): step 1591: loss 5.8828, lr 4.0e-04, dt 1.7s +All GPU(s): step 1592: loss 5.7266, lr 4.0e-04, dt 1.7s +All GPU(s): step 1593: loss 6.1445, lr 4.0e-04, dt 1.7s +All GPU(s): step 1594: loss 5.8750, lr 4.0e-04, dt 1.7s +All GPU(s): step 1595: loss 6.1387, lr 4.0e-04, dt 1.7s +All GPU(s): step 1596: loss 6.0059, lr 4.0e-04, dt 1.8s +All GPU(s): step 1597: loss 5.5508, lr 4.0e-04, dt 1.6s +All GPU(s): step 1598: loss 6.2676, lr 4.0e-04, dt 1.7s +All GPU(s): step 1599: loss 5.5898, lr 4.0e-04, dt 1.6s +All GPU(s): step 1600: loss 6.8223, lr 4.0e-04, dt 1.7s +All GPU(s): step 1601: loss 4.4805, lr 4.0e-04, dt 1.8s +All GPU(s): step 1602: loss 3.9961, lr 4.0e-04, dt 1.7s +All GPU(s): step 1603: loss 4.2930, lr 4.0e-04, dt 1.7s +All GPU(s): step 1604: loss 3.8857, lr 4.0e-04, dt 1.7s +All GPU(s): step 1605: loss 3.2441, lr 4.0e-04, dt 1.7s +All GPU(s): step 1606: loss 3.0498, lr 4.0e-04, dt 1.8s +All GPU(s): step 1607: loss 5.4551, lr 4.0e-04, dt 1.7s +All GPU(s): step 1608: loss 6.2324, lr 4.0e-04, dt 1.7s +All GPU(s): step 1609: loss 5.7988, lr 4.0e-04, dt 1.7s +All GPU(s): step 1610: loss 5.1191, lr 4.0e-04, dt 1.7s +All GPU(s): step 1611: loss 5.8125, lr 4.0e-04, dt 1.7s +All GPU(s): step 1612: loss 5.7324, lr 4.0e-04, dt 1.7s +All GPU(s): step 1613: loss 6.4766, lr 4.0e-04, dt 1.7s +All GPU(s): step 1614: loss 6.1406, lr 4.0e-04, dt 1.7s +All GPU(s): step 1615: loss 5.7656, lr 4.0e-04, dt 1.7s +All GPU(s): step 1616: loss 6.5586, lr 4.0e-04, dt 1.7s +All GPU(s): step 1617: loss 5.9141, lr 4.0e-04, dt 1.7s +All GPU(s): step 1618: loss 7.3711, lr 4.0e-04, dt 1.6s +All GPU(s): step 1619: loss 6.6592, lr 4.0e-04, dt 1.8s +All GPU(s): step 1620: loss 7.0352, lr 4.0e-04, dt 1.7s +All GPU(s): step 1621: loss 6.7842, lr 4.0e-04, dt 1.7s +All GPU(s): step 1622: loss 7.2500, lr 4.0e-04, dt 1.6s +All GPU(s): step 1623: loss 7.5703, lr 4.0e-04, dt 1.6s +All GPU(s): step 1624: loss 7.1426, lr 4.0e-04, dt 1.6s +All GPU(s): step 1625: loss 6.6367, lr 4.0e-04, dt 1.8s +All GPU(s): step 1626: loss 11.0586, lr 4.0e-04, dt 1.6s +All GPU(s): step 1627: loss 11.9297, lr 4.0e-04, dt 1.5s +All GPU(s): step 1628: loss 12.3906, lr 4.0e-04, dt 1.5s +All GPU(s): step 1629: loss 10.3320, lr 4.0e-04, dt 1.6s +All GPU(s): step 1630: loss 12.3125, lr 4.0e-04, dt 1.5s +All GPU(s): step 1631: loss 11.7773, lr 4.0e-04, dt 1.7s +All GPU(s): step 1632: loss 10.2773, lr 4.0e-04, dt 1.6s +All GPU(s): step 1633: loss 9.3359, lr 4.0e-04, dt 1.6s +All GPU(s): step 1634: loss 10.0820, lr 4.0e-04, dt 1.6s +All GPU(s): step 1635: loss 8.0273, lr 4.0e-04, dt 1.6s +All GPU(s): step 1636: loss 6.6836, lr 4.0e-04, dt 1.6s +All GPU(s): step 1637: loss 7.8809, lr 4.0e-04, dt 1.8s +All GPU(s): step 1638: loss 6.5742, lr 4.0e-04, dt 1.7s +All GPU(s): step 1639: loss 7.8008, lr 4.0e-04, dt 1.7s +All GPU(s): step 1640: loss 6.6914, lr 4.0e-04, dt 1.7s +All GPU(s): step 1641: loss 7.0508, lr 4.0e-04, dt 1.7s +All GPU(s): step 1642: loss 8.8398, lr 4.0e-04, dt 1.6s +All GPU(s): step 1643: loss 8.5742, lr 4.0e-04, dt 1.7s +All GPU(s): step 1644: loss 12.2188, lr 4.0e-04, dt 1.6s +All GPU(s): step 1645: loss 9.6562, lr 4.0e-04, dt 1.6s +All GPU(s): step 1646: loss 15.6016, lr 4.0e-04, dt 1.5s +All GPU(s): step 1647: loss 14.6875, lr 4.0e-04, dt 1.5s +All GPU(s): step 1648: loss 20.2266, lr 4.0e-04, dt 1.5s +All GPU(s): step 1649: loss 33.1719, lr 4.0e-04, dt 1.5s +All GPU(s): step 1650: loss 39.6875, lr 4.0e-04, dt 1.3s +All GPU(s): step 1651: loss 35.9375, lr 4.0e-04, dt 1.5s +All GPU(s): step 1652: loss 46.0312, lr 4.0e-04, dt 1.3s +All GPU(s): step 1653: loss 63.1875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1654: loss 59.0625, lr 4.0e-04, dt 1.2s +All GPU(s): step 1655: loss 64.0625, lr 4.0e-04, dt 1.2s +All GPU(s): step 1656: loss 61.3750, lr 4.0e-04, dt 1.3s +All GPU(s): step 1657: loss 68.3125, lr 4.0e-04, dt 1.3s +All GPU(s): step 1658: loss 64.2812, lr 4.0e-04, dt 1.2s +All GPU(s): step 1659: loss 70.0938, lr 4.0e-04, dt 1.2s +All GPU(s): step 1660: loss 75.4688, lr 4.0e-04, dt 1.1s +All GPU(s): step 1661: loss 64.7812, lr 4.0e-04, dt 1.2s +All GPU(s): step 1662: loss 63.3125, lr 4.0e-04, dt 1.2s +All GPU(s): step 1663: loss 69.1875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1664: loss 66.5000, lr 4.0e-04, dt 1.1s +All GPU(s): step 1665: loss 40.0625, lr 4.0e-04, dt 1.4s +All GPU(s): step 1666: loss 48.1562, lr 4.0e-04, dt 1.3s +All GPU(s): step 1667: loss 37.6406, lr 4.0e-04, dt 1.3s +All GPU(s): step 1668: loss 25.9062, lr 4.0e-04, dt 1.4s +All GPU(s): step 1669: loss 24.4922, lr 4.0e-04, dt 1.4s +All GPU(s): step 1670: loss 23.9844, lr 4.0e-04, dt 1.5s +All GPU(s): step 1671: loss 17.7734, lr 4.0e-04, dt 1.6s +All GPU(s): step 1672: loss 23.5000, lr 4.0e-04, dt 1.5s +All GPU(s): step 1673: loss 15.5703, lr 4.0e-04, dt 1.6s +All GPU(s): step 1674: loss 14.9922, lr 4.0e-04, dt 1.5s +All GPU(s): step 1675: loss 7.7578, lr 4.0e-04, dt 1.6s +All GPU(s): step 1676: loss 8.2031, lr 4.0e-04, dt 1.7s +All GPU(s): step 1677: loss 7.6270, lr 4.0e-04, dt 1.7s +All GPU(s): step 1678: loss 13.6719, lr 4.0e-04, dt 1.7s +All GPU(s): step 1679: loss 25.0469, lr 4.0e-04, dt 1.5s +All GPU(s): step 1680: loss 33.0781, lr 4.0e-04, dt 1.4s +All GPU(s): step 1681: loss 46.6562, lr 4.0e-04, dt 1.3s +All GPU(s): step 1682: loss 53.5000, lr 4.0e-04, dt 1.2s +All GPU(s): step 1683: loss 52.8125, lr 4.0e-04, dt 1.2s +All GPU(s): step 1684: loss 67.0625, lr 4.0e-04, dt 1.2s +All GPU(s): step 1685: loss 51.4062, lr 4.0e-04, dt 1.3s +All GPU(s): step 1686: loss 59.4375, lr 4.0e-04, dt 1.2s +All GPU(s): step 1687: loss 60.5625, lr 4.0e-04, dt 1.2s +All GPU(s): step 1688: loss 62.9688, lr 4.0e-04, dt 1.2s +All GPU(s): step 1689: loss 57.1250, lr 4.0e-04, dt 1.2s +All GPU(s): step 1690: loss 61.8594, lr 4.0e-04, dt 1.3s +All GPU(s): step 1691: loss 64.5000, lr 4.0e-04, dt 1.2s +All GPU(s): step 1692: loss 64.0000, lr 4.0e-04, dt 1.2s +All GPU(s): step 1693: loss 65.5000, lr 4.0e-04, dt 1.2s +All GPU(s): step 1694: loss 60.6250, lr 4.0e-04, dt 1.2s +All GPU(s): step 1695: loss 66.3438, lr 4.0e-04, dt 1.2s +All GPU(s): step 1696: loss 69.1250, lr 4.0e-04, dt 1.1s +All GPU(s): step 1697: loss 60.9062, lr 4.0e-04, dt 1.2s +All GPU(s): step 1698: loss 57.1875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1699: loss 68.3125, lr 4.0e-04, dt 1.1s +All GPU(s): step 1700: loss 55.9219, lr 4.0e-04, dt 1.3s +All GPU(s): step 1701: loss 61.7188, lr 4.0e-04, dt 1.3s +All GPU(s): step 1702: loss 55.2188, lr 4.0e-04, dt 1.4s +All GPU(s): step 1703: loss 56.6875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1704: loss 57.0469, lr 4.0e-04, dt 1.3s +All GPU(s): step 1705: loss 55.7500, lr 4.0e-04, dt 1.3s +All GPU(s): step 1706: loss 57.1562, lr 4.0e-04, dt 1.4s +All GPU(s): step 1707: loss 49.8438, lr 4.0e-04, dt 1.4s +All GPU(s): step 1708: loss 59.6562, lr 4.0e-04, dt 1.2s +All GPU(s): step 1709: loss 53.4688, lr 4.0e-04, dt 1.3s +All GPU(s): step 1710: loss 62.0000, lr 4.0e-04, dt 1.2s +All GPU(s): step 1711: loss 60.0312, lr 4.0e-04, dt 1.2s +All GPU(s): step 1712: loss 54.3125, lr 4.0e-04, dt 1.3s +All GPU(s): step 1713: loss 57.7188, lr 4.0e-04, dt 1.3s +All GPU(s): step 1714: loss 52.1875, lr 4.0e-04, dt 1.3s +All GPU(s): step 1715: loss 49.0312, lr 4.0e-04, dt 1.3s +All GPU(s): step 1716: loss 46.7656, lr 4.0e-04, dt 1.3s +All GPU(s): step 1717: loss 37.0000, lr 4.0e-04, dt 1.4s +All GPU(s): step 1718: loss 46.9062, lr 4.0e-04, dt 1.3s +All GPU(s): step 1719: loss 49.6250, lr 4.0e-04, dt 1.3s +All GPU(s): step 1720: loss 40.4844, lr 4.0e-04, dt 1.3s +All GPU(s): step 1721: loss 50.8750, lr 4.0e-04, dt 1.3s +All GPU(s): step 1722: loss 62.1250, lr 4.0e-04, dt 1.2s +All GPU(s): step 1723: loss 59.1875, lr 4.0e-04, dt 1.4s +All GPU(s): step 1724: loss 56.7500, lr 4.0e-04, dt 1.3s +All GPU(s): step 1725: loss 65.3125, lr 4.0e-04, dt 1.2s +All GPU(s): step 1726: loss 63.5625, lr 4.0e-04, dt 1.3s +All GPU(s): step 1727: loss 63.4688, lr 4.0e-04, dt 1.2s +All GPU(s): step 1728: loss 55.1875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1729: loss 59.9062, lr 4.0e-04, dt 1.2s +All GPU(s): step 1730: loss 63.0938, lr 4.0e-04, dt 1.2s +All GPU(s): step 1731: loss 62.7188, lr 4.0e-04, dt 1.2s +All GPU(s): step 1732: loss 59.7188, lr 4.0e-04, dt 1.3s +All GPU(s): step 1733: loss 57.6562, lr 4.0e-04, dt 1.2s +All GPU(s): step 1734: loss 63.1250, lr 4.0e-04, dt 1.2s +All GPU(s): step 1735: loss 54.1875, lr 4.0e-04, dt 1.3s +All GPU(s): step 1736: loss 58.3750, lr 4.0e-04, dt 1.2s +All GPU(s): step 1737: loss 64.3125, lr 4.0e-04, dt 1.2s +All GPU(s): step 1738: loss 60.1250, lr 4.0e-04, dt 1.2s +All GPU(s): step 1739: loss 61.5312, lr 4.0e-04, dt 1.2s +All GPU(s): step 1740: loss 66.6875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1741: loss 61.2812, lr 4.0e-04, dt 1.3s +All GPU(s): step 1742: loss 54.6719, lr 4.0e-04, dt 1.3s +All GPU(s): step 1743: loss 56.0312, lr 4.0e-04, dt 1.3s +All GPU(s): step 1744: loss 56.4375, lr 4.0e-04, dt 1.2s +All GPU(s): step 1745: loss 47.8750, lr 4.0e-04, dt 1.3s +All GPU(s): step 1746: loss 62.7500, lr 4.0e-04, dt 1.1s +All GPU(s): step 1747: loss 51.2500, lr 4.0e-04, dt 1.2s +All GPU(s): step 1748: loss 52.2188, lr 4.0e-04, dt 1.3s +All GPU(s): step 1749: loss 55.4688, lr 4.0e-04, dt 1.3s +All GPU(s): step 1750: loss 44.1875, lr 4.0e-04, dt 1.2s +All GPU(s): step 1751: loss 44.9219, lr 4.0e-04, dt 1.3s +All GPU(s): step 1752: loss 43.8281, lr 4.0e-04, dt 1.3s +All GPU(s): step 1753: loss 29.2031, lr 3.9e-04, dt 1.4s +All GPU(s): step 1754: loss 42.3438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1755: loss 37.0938, lr 3.9e-04, dt 1.4s +All GPU(s): step 1756: loss 39.4062, lr 3.9e-04, dt 1.3s +All GPU(s): step 1757: loss 45.8281, lr 3.9e-04, dt 1.4s +All GPU(s): step 1758: loss 54.4531, lr 3.9e-04, dt 1.3s +All GPU(s): step 1759: loss 50.7500, lr 3.9e-04, dt 1.3s +All GPU(s): step 1760: loss 59.1250, lr 3.9e-04, dt 1.2s +All GPU(s): step 1761: loss 61.0625, lr 3.9e-04, dt 1.2s +All GPU(s): step 1762: loss 64.4375, lr 3.9e-04, dt 1.2s +All GPU(s): step 1763: loss 67.3125, lr 3.9e-04, dt 1.1s +All GPU(s): step 1764: loss 67.4062, lr 3.9e-04, dt 1.2s +All GPU(s): step 1765: loss 64.0625, lr 3.9e-04, dt 1.3s +All GPU(s): step 1766: loss 64.4688, lr 3.9e-04, dt 1.2s +All GPU(s): step 1767: loss 63.1875, lr 3.9e-04, dt 1.2s +All GPU(s): step 1768: loss 77.6562, lr 3.9e-04, dt 1.2s +All GPU(s): step 1769: loss 73.9688, lr 3.9e-04, dt 1.2s +All GPU(s): step 1770: loss 75.0312, lr 3.9e-04, dt 1.2s +All GPU(s): step 1771: loss 78.5312, lr 3.9e-04, dt 1.2s +All GPU(s): step 1772: loss 80.1562, lr 3.9e-04, dt 1.1s +All GPU(s): step 1773: loss 74.7812, lr 3.9e-04, dt 1.4s +All GPU(s): step 1774: loss 88.1875, lr 3.9e-04, dt 1.1s +All GPU(s): step 1775: loss 80.1250, lr 3.9e-04, dt 1.2s +All GPU(s): step 1776: loss 64.4062, lr 3.9e-04, dt 1.2s +All GPU(s): step 1777: loss 71.6250, lr 3.9e-04, dt 1.2s +All GPU(s): step 1778: loss 59.6875, lr 3.9e-04, dt 1.3s +All GPU(s): step 1779: loss 53.4375, lr 3.9e-04, dt 1.3s +All GPU(s): step 1780: loss 49.0469, lr 3.9e-04, dt 1.3s +All GPU(s): step 1781: loss 56.3438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1782: loss 52.1875, lr 3.9e-04, dt 1.3s +All GPU(s): step 1783: loss 47.7344, lr 3.9e-04, dt 1.3s +All GPU(s): step 1784: loss 49.7812, lr 3.9e-04, dt 1.3s +All GPU(s): step 1785: loss 48.0000, lr 3.9e-04, dt 1.4s +All GPU(s): step 1786: loss 51.6875, lr 3.9e-04, dt 1.2s +All GPU(s): step 1787: loss 52.4375, lr 3.9e-04, dt 1.3s +All GPU(s): step 1788: loss 50.0938, lr 3.9e-04, dt 1.3s +All GPU(s): step 1789: loss 46.1094, lr 3.9e-04, dt 1.5s +All GPU(s): step 1790: loss 48.4062, lr 3.9e-04, dt 1.2s +All GPU(s): step 1791: loss 41.5469, lr 3.9e-04, dt 1.3s +All GPU(s): step 1792: loss 25.1875, lr 3.9e-04, dt 1.5s +All GPU(s): step 1793: loss 16.2578, lr 3.9e-04, dt 1.5s +All GPU(s): step 1794: loss 12.1406, lr 3.9e-04, dt 1.5s +All GPU(s): step 1795: loss 11.0781, lr 3.9e-04, dt 1.6s +All GPU(s): step 1796: loss 12.9336, lr 3.9e-04, dt 1.6s +All GPU(s): step 1797: loss 13.6172, lr 3.9e-04, dt 1.5s +All GPU(s): step 1798: loss 9.2773, lr 3.9e-04, dt 1.6s +All GPU(s): step 1799: loss 8.7656, lr 3.9e-04, dt 1.6s +All GPU(s): step 1800: loss 6.3281, lr 3.9e-04, dt 1.7s +All GPU(s): step 1801: loss 4.7012, lr 3.9e-04, dt 1.7s +All GPU(s): step 1802: loss 2.9912, lr 3.9e-04, dt 1.8s +All GPU(s): step 1803: loss 3.5000, lr 3.9e-04, dt 1.8s +All GPU(s): step 1804: loss 3.5742, lr 3.9e-04, dt 1.7s +All GPU(s): step 1805: loss 4.5430, lr 3.9e-04, dt 1.7s +All GPU(s): step 1806: loss 10.5664, lr 3.9e-04, dt 1.6s +All GPU(s): step 1807: loss 12.8125, lr 3.9e-04, dt 1.6s +All GPU(s): step 1808: loss 24.0156, lr 3.9e-04, dt 1.5s +All GPU(s): step 1809: loss 50.2812, lr 3.9e-04, dt 1.3s +All GPU(s): step 1810: loss 27.7656, lr 3.9e-04, dt 1.4s +All GPU(s): step 1811: loss 35.9219, lr 3.9e-04, dt 1.3s +All GPU(s): step 1812: loss 21.2891, lr 3.9e-04, dt 1.4s +All GPU(s): step 1813: loss 22.3359, lr 3.9e-04, dt 1.5s +All GPU(s): step 1814: loss 17.3281, lr 3.9e-04, dt 1.5s +All GPU(s): step 1815: loss 15.9453, lr 3.9e-04, dt 1.5s +All GPU(s): step 1816: loss 15.9375, lr 3.9e-04, dt 1.4s +All GPU(s): step 1817: loss 6.8867, lr 3.9e-04, dt 1.7s +All GPU(s): step 1818: loss 4.8721, lr 3.9e-04, dt 1.8s +All GPU(s): step 1819: loss 4.2617, lr 3.9e-04, dt 1.7s +All GPU(s): step 1820: loss 1.5664, lr 3.9e-04, dt 1.9s +All GPU(s): step 1821: loss 3.3369, lr 3.9e-04, dt 1.8s +All GPU(s): step 1822: loss 2.3589, lr 3.9e-04, dt 1.8s +All GPU(s): step 1823: loss 3.7949, lr 3.9e-04, dt 1.7s +All GPU(s): step 1824: loss 6.5098, lr 3.9e-04, dt 1.6s +All GPU(s): step 1825: loss 3.6895, lr 3.9e-04, dt 1.8s +All GPU(s): step 1826: loss 1.1543, lr 3.9e-04, dt 1.9s +All GPU(s): step 1827: loss 0.9167, lr 3.9e-04, dt 1.9s +All GPU(s): step 1828: loss 0.9963, lr 3.9e-04, dt 1.9s +All GPU(s): step 1829: loss 1.1553, lr 3.9e-04, dt 1.9s +All GPU(s): step 1830: loss 1.1875, lr 3.9e-04, dt 1.9s +All GPU(s): step 1831: loss 0.9348, lr 3.9e-04, dt 2.0s +All GPU(s): step 1832: loss 0.5672, lr 3.9e-04, dt 2.0s +All GPU(s): step 1833: loss 0.4692, lr 3.9e-04, dt 1.9s +All GPU(s): step 1834: loss 0.5530, lr 3.9e-04, dt 1.9s +All GPU(s): step 1835: loss 0.7918, lr 3.9e-04, dt 2.0s +All GPU(s): step 1836: loss 0.6232, lr 3.9e-04, dt 2.0s +All GPU(s): step 1837: loss 0.4912, lr 3.9e-04, dt 1.9s +All GPU(s): step 1838: loss 0.5884, lr 3.9e-04, dt 1.9s +All GPU(s): step 1839: loss 0.9780, lr 3.9e-04, dt 1.9s +All GPU(s): step 1840: loss 0.6782, lr 3.9e-04, dt 1.9s +All GPU(s): step 1841: loss 1.0491, lr 3.9e-04, dt 1.9s +All GPU(s): step 1842: loss 1.3396, lr 3.9e-04, dt 1.9s +All GPU(s): step 1843: loss 1.4443, lr 3.9e-04, dt 1.9s +All GPU(s): step 1844: loss 2.4600, lr 3.9e-04, dt 1.8s +All GPU(s): step 1845: loss 6.2852, lr 3.9e-04, dt 1.8s +All GPU(s): step 1846: loss 12.1133, lr 3.9e-04, dt 1.5s +All GPU(s): step 1847: loss 13.0547, lr 3.9e-04, dt 1.6s +All GPU(s): step 1848: loss 22.8750, lr 3.9e-04, dt 1.4s +All GPU(s): step 1849: loss 23.1719, lr 3.9e-04, dt 1.4s +All GPU(s): step 1850: loss 30.5312, lr 3.9e-04, dt 1.4s +All GPU(s): step 1851: loss 27.3281, lr 3.9e-04, dt 1.3s +All GPU(s): step 1852: loss 25.8125, lr 3.9e-04, dt 1.4s +All GPU(s): step 1853: loss 19.8672, lr 3.9e-04, dt 1.5s +All GPU(s): step 1854: loss 23.8125, lr 3.9e-04, dt 1.5s +All GPU(s): step 1855: loss 25.8906, lr 3.9e-04, dt 1.5s +All GPU(s): step 1856: loss 27.8125, lr 3.9e-04, dt 1.4s +All GPU(s): step 1857: loss 34.5781, lr 3.9e-04, dt 1.3s +All GPU(s): step 1858: loss 36.4219, lr 3.9e-04, dt 1.4s +All GPU(s): step 1859: loss 35.0781, lr 3.9e-04, dt 1.3s +All GPU(s): step 1860: loss 29.3281, lr 3.9e-04, dt 1.4s +All GPU(s): step 1861: loss 33.6875, lr 3.9e-04, dt 1.4s +All GPU(s): step 1862: loss 38.6719, lr 3.9e-04, dt 1.3s +All GPU(s): step 1863: loss 42.4688, lr 3.9e-04, dt 1.2s +All GPU(s): step 1864: loss 43.3125, lr 3.9e-04, dt 1.3s +All GPU(s): step 1865: loss 43.4062, lr 3.9e-04, dt 1.2s +All GPU(s): step 1866: loss 37.8438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1867: loss 37.2031, lr 3.9e-04, dt 1.4s +All GPU(s): step 1868: loss 37.8594, lr 3.9e-04, dt 1.3s +All GPU(s): step 1869: loss 30.9688, lr 3.9e-04, dt 1.5s +All GPU(s): step 1870: loss 26.6719, lr 3.9e-04, dt 1.4s +All GPU(s): step 1871: loss 35.6719, lr 3.9e-04, dt 1.4s +All GPU(s): step 1872: loss 42.5625, lr 3.9e-04, dt 1.3s +All GPU(s): step 1873: loss 50.2188, lr 3.9e-04, dt 1.3s +All GPU(s): step 1874: loss 61.7188, lr 3.9e-04, dt 1.3s +All GPU(s): step 1875: loss 57.4062, lr 3.9e-04, dt 1.2s +All GPU(s): step 1876: loss 55.2812, lr 3.9e-04, dt 1.3s +All GPU(s): step 1877: loss 61.9062, lr 3.9e-04, dt 1.3s +All GPU(s): step 1878: loss 62.3438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1879: loss 61.8750, lr 3.9e-04, dt 1.2s +All GPU(s): step 1880: loss 66.6562, lr 3.9e-04, dt 1.2s +All GPU(s): step 1881: loss 50.3750, lr 3.9e-04, dt 1.3s +All GPU(s): step 1882: loss 58.6875, lr 3.9e-04, dt 1.2s +All GPU(s): step 1883: loss 51.3438, lr 3.9e-04, dt 1.2s +All GPU(s): step 1884: loss 54.1562, lr 3.9e-04, dt 1.3s +All GPU(s): step 1885: loss 41.7188, lr 3.9e-04, dt 1.4s +All GPU(s): step 1886: loss 46.5938, lr 3.9e-04, dt 1.4s +All GPU(s): step 1887: loss 67.0312, lr 3.9e-04, dt 1.2s +All GPU(s): step 1888: loss 51.2344, lr 3.9e-04, dt 1.3s +All GPU(s): step 1889: loss 58.1250, lr 3.9e-04, dt 1.2s +All GPU(s): step 1890: loss 56.8125, lr 3.9e-04, dt 1.2s +All GPU(s): step 1891: loss 56.0312, lr 3.9e-04, dt 1.3s +All GPU(s): step 1892: loss 59.3750, lr 3.9e-04, dt 1.3s +All GPU(s): step 1893: loss 49.9062, lr 3.9e-04, dt 1.3s +All GPU(s): step 1894: loss 43.3281, lr 3.9e-04, dt 1.4s +All GPU(s): step 1895: loss 44.4375, lr 3.9e-04, dt 1.2s +All GPU(s): step 1896: loss 40.9375, lr 3.9e-04, dt 1.4s +All GPU(s): step 1897: loss 36.9375, lr 3.9e-04, dt 1.4s +All GPU(s): step 1898: loss 49.8750, lr 3.9e-04, dt 1.3s +All GPU(s): step 1899: loss 49.3125, lr 3.9e-04, dt 1.3s +All GPU(s): step 1900: loss 58.7500, lr 3.9e-04, dt 1.3s +All GPU(s): step 1901: loss 60.3125, lr 3.9e-04, dt 1.2s +All GPU(s): step 1902: loss 45.5938, lr 3.9e-04, dt 1.3s +All GPU(s): step 1903: loss 45.1562, lr 3.9e-04, dt 1.3s +All GPU(s): step 1904: loss 48.5000, lr 3.9e-04, dt 1.3s +All GPU(s): step 1905: loss 47.6250, lr 3.9e-04, dt 1.4s +All GPU(s): step 1906: loss 50.3750, lr 3.9e-04, dt 1.2s +All GPU(s): step 1907: loss 40.1875, lr 3.9e-04, dt 1.3s +All GPU(s): step 1908: loss 46.5000, lr 3.9e-04, dt 1.3s +All GPU(s): step 1909: loss 36.8750, lr 3.9e-04, dt 1.4s +All GPU(s): step 1910: loss 27.9844, lr 3.9e-04, dt 1.4s +All GPU(s): step 1911: loss 33.5469, lr 3.9e-04, dt 1.5s +All GPU(s): step 1912: loss 38.0781, lr 3.9e-04, dt 1.4s +All GPU(s): step 1913: loss 40.4375, lr 3.9e-04, dt 1.3s +All GPU(s): step 1914: loss 40.6094, lr 3.9e-04, dt 1.4s +All GPU(s): step 1915: loss 46.2812, lr 3.9e-04, dt 1.3s +All GPU(s): step 1916: loss 52.8438, lr 3.9e-04, dt 1.2s +All GPU(s): step 1917: loss 46.0625, lr 3.9e-04, dt 1.3s +All GPU(s): step 1918: loss 53.6875, lr 3.9e-04, dt 1.3s +All GPU(s): step 1919: loss 50.2812, lr 3.9e-04, dt 1.2s +All GPU(s): step 1920: loss 51.0938, lr 3.9e-04, dt 1.2s +All GPU(s): step 1921: loss 49.8750, lr 3.9e-04, dt 1.3s +All GPU(s): step 1922: loss 51.5938, lr 3.9e-04, dt 1.3s +All GPU(s): step 1923: loss 52.8438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1924: loss 51.7188, lr 3.9e-04, dt 1.2s +All GPU(s): step 1925: loss 52.7812, lr 3.9e-04, dt 1.2s +All GPU(s): step 1926: loss 58.2188, lr 3.9e-04, dt 1.2s +All GPU(s): step 1927: loss 50.8125, lr 3.9e-04, dt 1.3s +All GPU(s): step 1928: loss 65.2500, lr 3.9e-04, dt 1.2s +All GPU(s): step 1929: loss 68.3438, lr 3.9e-04, dt 1.2s +All GPU(s): step 1930: loss 66.2500, lr 3.9e-04, dt 1.3s +All GPU(s): step 1931: loss 64.6250, lr 3.9e-04, dt 1.2s +All GPU(s): step 1932: loss 64.1875, lr 3.9e-04, dt 1.2s +All GPU(s): step 1933: loss 53.7188, lr 3.9e-04, dt 1.2s +All GPU(s): step 1934: loss 63.7188, lr 3.9e-04, dt 1.2s +All GPU(s): step 1935: loss 65.9062, lr 3.9e-04, dt 1.3s +All GPU(s): step 1936: loss 56.8438, lr 3.9e-04, dt 1.3s +All GPU(s): step 1937: loss 55.9375, lr 3.9e-04, dt 1.2s +All GPU(s): step 1938: loss 35.6875, lr 3.9e-04, dt 1.4s +All GPU(s): step 1939: loss 34.3438, lr 3.9e-04, dt 1.4s +All GPU(s): step 1940: loss 41.7656, lr 3.9e-04, dt 1.3s +All GPU(s): step 1941: loss 40.6250, lr 3.9e-04, dt 1.3s +All GPU(s): step 1942: loss 7.3242, lr 3.9e-04, dt 1.6s +All GPU(s): step 1943: loss 1.9297, lr 3.9e-04, dt 1.8s +All GPU(s): step 1944: loss 0.6722, lr 3.9e-04, dt 2.0s +All GPU(s): step 1945: loss 0.4543, lr 3.9e-04, dt 2.0s +All GPU(s): step 1946: loss 0.3206, lr 3.9e-04, dt 1.9s +All GPU(s): step 1947: loss 0.5066, lr 3.9e-04, dt 1.9s +All GPU(s): step 1948: loss 0.4237, lr 3.9e-04, dt 1.9s +All GPU(s): step 1949: loss 0.5964, lr 3.9e-04, dt 1.9s +All GPU(s): step 1950: loss 0.9013, lr 3.9e-04, dt 2.0s +All GPU(s): step 1951: loss 0.3506, lr 3.9e-04, dt 2.0s +All GPU(s): step 1952: loss 0.5401, lr 3.9e-04, dt 1.9s +All GPU(s): step 1953: loss 0.8818, lr 3.9e-04, dt 1.9s +All GPU(s): step 1954: loss 0.6602, lr 3.9e-04, dt 1.9s +All GPU(s): step 1955: loss 0.6675, lr 3.9e-04, dt 2.0s +All GPU(s): step 1956: loss 0.5739, lr 3.9e-04, dt 1.9s +All GPU(s): step 1957: loss 0.9297, lr 3.9e-04, dt 1.9s +All GPU(s): step 1958: loss 1.2708, lr 3.9e-04, dt 1.9s +All GPU(s): step 1959: loss 1.0714, lr 3.9e-04, dt 1.9s +All GPU(s): step 1960: loss 1.3994, lr 3.9e-04, dt 1.9s +All GPU(s): step 1961: loss 1.8594, lr 3.9e-04, dt 1.8s +All GPU(s): step 1962: loss 3.4238, lr 3.9e-04, dt 1.8s +All GPU(s): step 1963: loss 5.0957, lr 3.9e-04, dt 1.7s +All GPU(s): step 1964: loss 9.8359, lr 3.9e-04, dt 1.7s +All GPU(s): step 1965: loss 12.7734, lr 3.9e-04, dt 1.6s +All GPU(s): step 1966: loss 17.8438, lr 3.9e-04, dt 1.6s +All GPU(s): step 1967: loss 14.0391, lr 3.9e-04, dt 1.5s +All GPU(s): step 1968: loss 12.9062, lr 3.9e-04, dt 1.5s +All GPU(s): step 1969: loss 8.2773, lr 3.9e-04, dt 1.6s +All GPU(s): step 1970: loss 7.2715, lr 3.9e-04, dt 1.7s +All GPU(s): step 1971: loss 4.9980, lr 3.9e-04, dt 1.8s +All GPU(s): step 1972: loss 3.5156, lr 3.9e-04, dt 1.9s +All GPU(s): step 1973: loss 2.2637, lr 3.9e-04, dt 1.8s +All GPU(s): step 1974: loss 2.4990, lr 3.9e-04, dt 1.8s +All GPU(s): step 1975: loss 3.0098, lr 3.9e-04, dt 1.8s +All GPU(s): step 1976: loss 2.3359, lr 3.9e-04, dt 1.9s +All GPU(s): step 1977: loss 3.1504, lr 3.9e-04, dt 1.8s +All GPU(s): step 1978: loss 4.8691, lr 3.9e-04, dt 1.8s +All GPU(s): step 1979: loss 6.2051, lr 3.9e-04, dt 1.7s +All GPU(s): step 1980: loss 5.1855, lr 3.9e-04, dt 1.7s +All GPU(s): step 1981: loss 8.4941, lr 3.9e-04, dt 1.7s +All GPU(s): step 1982: loss 26.8906, lr 3.9e-04, dt 1.4s +All GPU(s): step 1983: loss 22.8281, lr 3.9e-04, dt 1.5s +All GPU(s): step 1984: loss 25.8125, lr 3.9e-04, dt 1.4s +All GPU(s): step 1985: loss 32.6406, lr 3.9e-04, dt 1.3s +All GPU(s): step 1986: loss 47.5938, lr 3.9e-04, dt 1.2s +All GPU(s): step 1987: loss 37.9062, lr 3.9e-04, dt 1.3s +All GPU(s): step 1988: loss 30.3438, lr 3.9e-04, dt 1.4s +All GPU(s): step 1989: loss 18.5547, lr 3.9e-04, dt 1.5s +All GPU(s): step 1990: loss 15.3984, lr 3.9e-04, dt 1.5s +All GPU(s): step 1991: loss 22.0625, lr 3.9e-04, dt 1.5s +All GPU(s): step 1992: loss 16.8281, lr 3.9e-04, dt 1.5s +All GPU(s): step 1993: loss 10.0469, lr 3.9e-04, dt 1.6s +All GPU(s): step 1994: loss 3.4863, lr 3.9e-04, dt 1.7s +All GPU(s): step 1995: loss 2.0532, lr 3.9e-04, dt 1.8s +All GPU(s): step 1996: loss 1.3394, lr 3.9e-04, dt 1.9s +All GPU(s): step 1997: loss 1.0254, lr 3.9e-04, dt 2.0s +All GPU(s): step 1998: loss 0.7368, lr 3.9e-04, dt 1.9s +All GPU(s): step 1999: loss 0.6750, lr 3.9e-04, dt 1.9s +saving checkpoint to checkpoints/ckpt_2000.pt +All GPU(s): step 2000: loss 0.7449, lr 3.9e-04, dt 1.9s +All GPU(s): step 2001: loss 0.6642, lr 3.9e-04, dt 1.9s +All GPU(s): step 2002: loss 0.4331, lr 3.9e-04, dt 2.1s +All GPU(s): step 2003: loss 0.6630, lr 3.9e-04, dt 1.9s +All GPU(s): step 2004: loss 0.7063, lr 3.9e-04, dt 2.0s +All GPU(s): step 2005: loss 0.5591, lr 3.9e-04, dt 2.0s +All GPU(s): step 2006: loss 0.3931, lr 3.9e-04, dt 1.9s +All GPU(s): step 2007: loss 0.4597, lr 3.9e-04, dt 2.1s +All GPU(s): step 2008: loss 0.6232, lr 3.9e-04, dt 1.9s +All GPU(s): step 2009: loss 0.5874, lr 3.9e-04, dt 1.9s +All GPU(s): step 2010: loss 0.7842, lr 3.9e-04, dt 1.9s +All GPU(s): step 2011: loss 0.4307, lr 3.9e-04, dt 1.9s +All GPU(s): step 2012: loss 0.9035, lr 3.9e-04, dt 2.0s +All GPU(s): step 2013: loss 0.6973, lr 3.9e-04, dt 1.9s +All GPU(s): step 2014: loss 0.5836, lr 3.9e-04, dt 2.0s +All GPU(s): step 2015: loss 0.9908, lr 3.9e-04, dt 1.9s +All GPU(s): step 2016: loss 0.7085, lr 3.9e-04, dt 1.9s +All GPU(s): step 2017: loss 0.9941, lr 3.9e-04, dt 2.0s +All GPU(s): step 2018: loss 1.1294, lr 3.9e-04, dt 1.9s +All GPU(s): step 2019: loss 0.9573, lr 3.9e-04, dt 1.9s +All GPU(s): step 2020: loss 0.9695, lr 3.9e-04, dt 1.9s +All GPU(s): step 2021: loss 1.3174, lr 3.9e-04, dt 1.9s +All GPU(s): step 2022: loss 1.8350, lr 3.9e-04, dt 1.9s +All GPU(s): step 2023: loss 1.2113, lr 3.9e-04, dt 1.9s +All GPU(s): step 2024: loss 1.6069, lr 3.9e-04, dt 1.8s +All GPU(s): step 2025: loss 1.9268, lr 3.9e-04, dt 1.9s +All GPU(s): step 2026: loss 1.4443, lr 3.9e-04, dt 1.9s +All GPU(s): step 2027: loss 0.9073, lr 3.9e-04, dt 1.9s +All GPU(s): step 2028: loss 0.7024, lr 3.9e-04, dt 2.0s +All GPU(s): step 2029: loss 0.9016, lr 3.9e-04, dt 1.9s +All GPU(s): step 2030: loss 0.7609, lr 3.9e-04, dt 1.9s +All GPU(s): step 2031: loss 0.5729, lr 3.9e-04, dt 1.9s +All GPU(s): step 2032: loss 0.8008, lr 3.9e-04, dt 1.9s +All GPU(s): step 2033: loss 0.8260, lr 3.9e-04, dt 2.0s +All GPU(s): step 2034: loss 0.6712, lr 3.9e-04, dt 1.9s +All GPU(s): step 2035: loss 1.0836, lr 3.9e-04, dt 1.9s +All GPU(s): step 2036: loss 1.1162, lr 3.9e-04, dt 1.9s +All GPU(s): step 2037: loss 0.9136, lr 3.9e-04, dt 1.9s +All GPU(s): step 2038: loss 0.8611, lr 3.9e-04, dt 2.0s +All GPU(s): step 2039: loss 1.0029, lr 3.9e-04, dt 1.9s +All GPU(s): step 2040: loss 0.6007, lr 3.9e-04, dt 2.0s +All GPU(s): step 2041: loss 0.6353, lr 3.9e-04, dt 2.0s +All GPU(s): step 2042: loss 0.6660, lr 3.9e-04, dt 1.9s +All GPU(s): step 2043: loss 0.4684, lr 3.9e-04, dt 2.0s +All GPU(s): step 2044: loss 0.7370, lr 3.9e-04, dt 1.9s +All GPU(s): step 2045: loss 0.7710, lr 3.9e-04, dt 2.0s +All GPU(s): step 2046: loss 0.9087, lr 3.9e-04, dt 1.9s +All GPU(s): step 2047: loss 0.6578, lr 3.9e-04, dt 1.9s +All GPU(s): step 2048: loss 0.7024, lr 3.9e-04, dt 2.0s +All GPU(s): step 2049: loss 0.5554, lr 3.9e-04, dt 1.9s +All GPU(s): step 2050: loss 0.3158, lr 3.9e-04, dt 2.0s +All GPU(s): step 2051: loss 0.5432, lr 3.9e-04, dt 2.0s +All GPU(s): step 2052: loss 0.4281, lr 3.9e-04, dt 2.0s +All GPU(s): step 2053: loss 0.7693, lr 3.9e-04, dt 2.0s +All GPU(s): step 2054: loss 0.5220, lr 3.9e-04, dt 2.0s +All GPU(s): step 2055: loss 0.2614, lr 3.9e-04, dt 2.0s +All GPU(s): step 2056: loss 0.5285, lr 3.9e-04, dt 1.9s +All GPU(s): step 2057: loss 0.3246, lr 3.9e-04, dt 1.9s +All GPU(s): step 2058: loss 0.3527, lr 3.9e-04, dt 2.0s +All GPU(s): step 2059: loss 0.5851, lr 3.9e-04, dt 2.0s +All GPU(s): step 2060: loss 0.2699, lr 3.9e-04, dt 1.9s +All GPU(s): step 2061: loss 0.3557, lr 3.9e-04, dt 1.9s +All GPU(s): step 2062: loss 0.6470, lr 3.9e-04, dt 1.9s +All GPU(s): step 2063: loss 0.5146, lr 3.9e-04, dt 2.0s +All GPU(s): step 2064: loss 0.4337, lr 3.9e-04, dt 2.0s +All GPU(s): step 2065: loss 0.4478, lr 3.9e-04, dt 2.0s +All GPU(s): step 2066: loss 0.3764, lr 3.9e-04, dt 2.0s +All GPU(s): step 2067: loss 0.7192, lr 3.9e-04, dt 1.9s +All GPU(s): step 2068: loss 0.6716, lr 3.9e-04, dt 1.9s +All GPU(s): step 2069: loss 0.5876, lr 3.9e-04, dt 2.0s +All GPU(s): step 2070: loss 0.4711, lr 3.9e-04, dt 1.9s +All GPU(s): step 2071: loss 0.7526, lr 3.9e-04, dt 1.9s +All GPU(s): step 2072: loss 1.0376, lr 3.9e-04, dt 1.9s +All GPU(s): step 2073: loss 0.5510, lr 3.9e-04, dt 1.9s +All GPU(s): step 2074: loss 0.5337, lr 3.9e-04, dt 2.0s +All GPU(s): step 2075: loss 0.6011, lr 3.9e-04, dt 1.9s +All GPU(s): step 2076: loss 0.6208, lr 3.9e-04, dt 1.9s +All GPU(s): step 2077: loss 1.0764, lr 3.9e-04, dt 1.9s +All GPU(s): step 2078: loss 0.5839, lr 3.9e-04, dt 2.0s +All GPU(s): step 2079: loss 1.1917, lr 3.9e-04, dt 2.0s +All GPU(s): step 2080: loss 1.0009, lr 3.9e-04, dt 2.0s +All GPU(s): step 2081: loss 0.9370, lr 3.9e-04, dt 1.9s +All GPU(s): step 2082: loss 1.4912, lr 3.9e-04, dt 1.8s +All GPU(s): step 2083: loss 1.7344, lr 3.9e-04, dt 1.8s +All GPU(s): step 2084: loss 2.0527, lr 3.9e-04, dt 2.0s +All GPU(s): step 2085: loss 3.7373, lr 3.9e-04, dt 1.7s +All GPU(s): step 2086: loss 2.2627, lr 3.9e-04, dt 1.8s +All GPU(s): step 2087: loss 1.5581, lr 3.9e-04, dt 1.8s +All GPU(s): step 2088: loss 1.3638, lr 3.9e-04, dt 1.9s +All GPU(s): step 2089: loss 1.5547, lr 3.9e-04, dt 1.9s +All GPU(s): step 2090: loss 1.4778, lr 3.9e-04, dt 1.9s +All GPU(s): step 2091: loss 0.9560, lr 3.9e-04, dt 1.9s +All GPU(s): step 2092: loss 0.4852, lr 3.9e-04, dt 1.9s +All GPU(s): step 2093: loss 0.9095, lr 3.9e-04, dt 1.9s +All GPU(s): step 2094: loss 0.8936, lr 3.9e-04, dt 1.9s +All GPU(s): step 2095: loss 1.0756, lr 3.9e-04, dt 2.0s +All GPU(s): step 2096: loss 0.5762, lr 3.9e-04, dt 2.0s +All GPU(s): step 2097: loss 0.6849, lr 3.9e-04, dt 1.9s +All GPU(s): step 2098: loss 0.4966, lr 3.9e-04, dt 2.0s +All GPU(s): step 2099: loss 0.3988, lr 3.9e-04, dt 2.0s +All GPU(s): step 2100: loss 0.1244, lr 3.9e-04, dt 2.1s +All GPU(s): step 2101: loss 0.1874, lr 3.9e-04, dt 2.0s +All GPU(s): step 2102: loss 0.0897, lr 3.9e-04, dt 2.0s +All GPU(s): step 2103: loss 0.1313, lr 3.9e-04, dt 2.0s +All GPU(s): step 2104: loss 0.1134, lr 3.9e-04, dt 2.1s +All GPU(s): step 2105: loss 0.2163, lr 3.9e-04, dt 2.0s +All GPU(s): step 2106: loss 0.1798, lr 3.9e-04, dt 2.0s +All GPU(s): step 2107: loss 0.2941, lr 3.9e-04, dt 2.0s +All GPU(s): step 2108: loss 0.2220, lr 3.9e-04, dt 2.0s +All GPU(s): step 2109: loss 0.0950, lr 3.9e-04, dt 2.0s +All GPU(s): step 2110: loss 0.1788, lr 3.9e-04, dt 2.0s +All GPU(s): step 2111: loss 0.2151, lr 3.9e-04, dt 2.0s +All GPU(s): step 2112: loss 0.2715, lr 3.9e-04, dt 2.0s +All GPU(s): step 2113: loss 0.3066, lr 3.9e-04, dt 2.0s +All GPU(s): step 2114: loss 0.2414, lr 3.9e-04, dt 2.0s +All GPU(s): step 2115: loss 0.3545, lr 3.9e-04, dt 2.0s +All GPU(s): step 2116: loss 0.2610, lr 3.9e-04, dt 2.0s +All GPU(s): step 2117: loss 0.2955, lr 3.9e-04, dt 2.0s +All GPU(s): step 2118: loss 0.3179, lr 3.9e-04, dt 2.0s +All GPU(s): step 2119: loss 0.3793, lr 3.9e-04, dt 2.0s +All GPU(s): step 2120: loss 0.3620, lr 3.9e-04, dt 2.0s +All GPU(s): step 2121: loss 0.2958, lr 3.9e-04, dt 2.0s +All GPU(s): step 2122: loss 0.4440, lr 3.9e-04, dt 1.9s +All GPU(s): step 2123: loss 0.2238, lr 3.9e-04, dt 2.0s +All GPU(s): step 2124: loss 0.4534, lr 3.9e-04, dt 2.0s +All GPU(s): step 2125: loss 0.4851, lr 3.9e-04, dt 2.0s +All GPU(s): step 2126: loss 0.3761, lr 3.9e-04, dt 2.0s +All GPU(s): step 2127: loss 0.4713, lr 3.9e-04, dt 2.0s +All GPU(s): step 2128: loss 0.4254, lr 3.9e-04, dt 2.0s +All GPU(s): step 2129: loss 0.4483, lr 3.9e-04, dt 2.0s +All GPU(s): step 2130: loss 0.6013, lr 3.9e-04, dt 2.0s +All GPU(s): step 2131: loss 0.5463, lr 3.9e-04, dt 1.9s +All GPU(s): step 2132: loss 0.5768, lr 3.9e-04, dt 1.9s +All GPU(s): step 2133: loss 0.5403, lr 3.9e-04, dt 1.9s +All GPU(s): step 2134: loss 0.4899, lr 3.9e-04, dt 1.9s +All GPU(s): step 2135: loss 0.4371, lr 3.9e-04, dt 2.0s +All GPU(s): step 2136: loss 0.6753, lr 3.9e-04, dt 1.9s +All GPU(s): step 2137: loss 0.6479, lr 3.9e-04, dt 1.9s +All GPU(s): step 2138: loss 0.4104, lr 3.9e-04, dt 1.9s +All GPU(s): step 2139: loss 0.7095, lr 3.9e-04, dt 1.9s +All GPU(s): step 2140: loss 0.4270, lr 3.9e-04, dt 2.1s +All GPU(s): step 2141: loss 0.6224, lr 3.9e-04, dt 2.0s +All GPU(s): step 2142: loss 0.4491, lr 3.9e-04, dt 1.9s +All GPU(s): step 2143: loss 0.4971, lr 3.9e-04, dt 1.9s +All GPU(s): step 2144: loss 0.4559, lr 3.9e-04, dt 1.9s +All GPU(s): step 2145: loss 0.2155, lr 3.9e-04, dt 2.1s +All GPU(s): step 2146: loss 0.4543, lr 3.9e-04, dt 1.9s +All GPU(s): step 2147: loss 0.3564, lr 3.9e-04, dt 2.0s +All GPU(s): step 2148: loss 0.4490, lr 3.9e-04, dt 1.9s +All GPU(s): step 2149: loss 0.4063, lr 3.9e-04, dt 2.0s +All GPU(s): step 2150: loss 0.3941, lr 3.9e-04, dt 2.0s +All GPU(s): step 2151: loss 0.3328, lr 3.9e-04, dt 2.0s +All GPU(s): step 2152: loss 0.3688, lr 3.9e-04, dt 1.9s +All GPU(s): step 2153: loss 0.3392, lr 3.9e-04, dt 2.0s +All GPU(s): step 2154: loss 0.4649, lr 3.9e-04, dt 1.9s +All GPU(s): step 2155: loss 0.4120, lr 3.9e-04, dt 2.0s +All GPU(s): step 2156: loss 0.4073, lr 3.9e-04, dt 2.0s +All GPU(s): step 2157: loss 0.4863, lr 3.9e-04, dt 2.0s +All GPU(s): step 2158: loss 0.6733, lr 3.9e-04, dt 1.9s +All GPU(s): step 2159: loss 0.5092, lr 3.9e-04, dt 1.9s +All GPU(s): step 2160: loss 0.3829, lr 3.9e-04, dt 2.0s +All GPU(s): step 2161: loss 0.4426, lr 3.9e-04, dt 2.0s +All GPU(s): step 2162: loss 0.3367, lr 3.9e-04, dt 2.0s +All GPU(s): step 2163: loss 0.4916, lr 3.9e-04, dt 2.0s +All GPU(s): step 2164: loss 0.3000, lr 3.9e-04, dt 1.9s +All GPU(s): step 2165: loss 0.3030, lr 3.9e-04, dt 2.0s +All GPU(s): step 2166: loss 0.4591, lr 3.9e-04, dt 1.9s +All GPU(s): step 2167: loss 0.5734, lr 3.9e-04, dt 1.9s +All GPU(s): step 2168: loss 0.4263, lr 3.9e-04, dt 1.9s +All GPU(s): step 2169: loss 0.4814, lr 3.9e-04, dt 2.0s +All GPU(s): step 2170: loss 0.9849, lr 3.9e-04, dt 2.0s +All GPU(s): step 2171: loss 0.9565, lr 3.9e-04, dt 1.9s +All GPU(s): step 2172: loss 1.0169, lr 3.9e-04, dt 2.0s +All GPU(s): step 2173: loss 1.1343, lr 3.9e-04, dt 1.8s +All GPU(s): step 2174: loss 0.9348, lr 3.9e-04, dt 1.9s +All GPU(s): step 2175: loss 1.1984, lr 3.9e-04, dt 2.0s +All GPU(s): step 2176: loss 1.7056, lr 3.9e-04, dt 2.0s +All GPU(s): step 2177: loss 1.4446, lr 3.9e-04, dt 1.9s +All GPU(s): step 2178: loss 1.0493, lr 3.9e-04, dt 1.9s +All GPU(s): step 2179: loss 1.3346, lr 3.9e-04, dt 1.9s +All GPU(s): step 2180: loss 1.3960, lr 3.9e-04, dt 1.9s +All GPU(s): step 2181: loss 1.6436, lr 3.9e-04, dt 1.9s +All GPU(s): step 2182: loss 1.4253, lr 3.9e-04, dt 1.8s +All GPU(s): step 2183: loss 0.7327, lr 3.9e-04, dt 1.9s +All GPU(s): step 2184: loss 0.8939, lr 3.9e-04, dt 1.9s +All GPU(s): step 2185: loss 0.9548, lr 3.9e-04, dt 1.9s +All GPU(s): step 2186: loss 0.7713, lr 3.9e-04, dt 2.0s +All GPU(s): step 2187: loss 0.2583, lr 3.9e-04, dt 1.9s +All GPU(s): step 2188: loss 0.5620, lr 3.9e-04, dt 1.9s +All GPU(s): step 2189: loss 0.7031, lr 3.9e-04, dt 2.0s +All GPU(s): step 2190: loss 0.6555, lr 3.9e-04, dt 2.0s +All GPU(s): step 2191: loss 0.3665, lr 3.9e-04, dt 2.1s +All GPU(s): step 2192: loss 0.4703, lr 3.9e-04, dt 2.0s +All GPU(s): step 2193: loss 0.5710, lr 3.9e-04, dt 2.0s +All GPU(s): step 2194: loss 0.3033, lr 3.9e-04, dt 1.9s +All GPU(s): step 2195: loss 0.6147, lr 3.9e-04, dt 2.0s +All GPU(s): step 2196: loss 0.6273, lr 3.9e-04, dt 2.0s +All GPU(s): step 2197: loss 0.4018, lr 3.9e-04, dt 1.9s +All GPU(s): step 2198: loss 0.2809, lr 3.9e-04, dt 1.9s +All GPU(s): step 2199: loss 0.2211, lr 3.9e-04, dt 2.0s +All GPU(s): step 2200: loss 0.3747, lr 3.9e-04, dt 2.0s +All GPU(s): step 2201: loss 0.2734, lr 3.9e-04, dt 2.1s +All GPU(s): step 2202: loss 0.2405, lr 3.9e-04, dt 2.0s +All GPU(s): step 2203: loss 0.3492, lr 3.9e-04, dt 2.0s +All GPU(s): step 2204: loss 0.4863, lr 3.9e-04, dt 1.9s +All GPU(s): step 2205: loss 0.2937, lr 3.9e-04, dt 1.9s +All GPU(s): step 2206: loss 0.5121, lr 3.9e-04, dt 2.0s +All GPU(s): step 2207: loss 0.4563, lr 3.9e-04, dt 1.9s +All GPU(s): step 2208: loss 0.3860, lr 3.9e-04, dt 1.9s +All GPU(s): step 2209: loss 0.4332, lr 3.9e-04, dt 2.0s +All GPU(s): step 2210: loss 0.5533, lr 3.9e-04, dt 2.0s +All GPU(s): step 2211: loss 0.6628, lr 3.9e-04, dt 2.1s +All GPU(s): step 2212: loss 0.5552, lr 3.9e-04, dt 2.0s +All GPU(s): step 2213: loss 0.5287, lr 3.9e-04, dt 1.9s +All GPU(s): step 2214: loss 0.7727, lr 3.9e-04, dt 1.9s +All GPU(s): step 2215: loss 0.7346, lr 3.9e-04, dt 1.9s +All GPU(s): step 2216: loss 0.5243, lr 3.9e-04, dt 2.0s +All GPU(s): step 2217: loss 0.7886, lr 3.9e-04, dt 1.9s +All GPU(s): step 2218: loss 0.5398, lr 3.9e-04, dt 2.0s +All GPU(s): step 2219: loss 0.3977, lr 3.9e-04, dt 2.0s +All GPU(s): step 2220: loss 0.6501, lr 3.9e-04, dt 1.9s +All GPU(s): step 2221: loss 0.3175, lr 3.9e-04, dt 2.0s +All GPU(s): step 2222: loss 0.4254, lr 3.9e-04, dt 2.0s +All GPU(s): step 2223: loss 0.3766, lr 3.9e-04, dt 2.0s +All GPU(s): step 2224: loss 0.5591, lr 3.9e-04, dt 2.0s +All GPU(s): step 2225: loss 0.3237, lr 3.9e-04, dt 2.0s +All GPU(s): step 2226: loss 0.6432, lr 3.9e-04, dt 2.0s +All GPU(s): step 2227: loss 0.2699, lr 3.9e-04, dt 2.0s +All GPU(s): step 2228: loss 0.4356, lr 3.9e-04, dt 2.0s +All GPU(s): step 2229: loss 0.2765, lr 3.9e-04, dt 1.9s +All GPU(s): step 2230: loss 0.6394, lr 3.9e-04, dt 2.0s +All GPU(s): step 2231: loss 0.6385, lr 3.9e-04, dt 1.9s +All GPU(s): step 2232: loss 0.5524, lr 3.9e-04, dt 2.0s +All GPU(s): step 2233: loss 0.3233, lr 3.9e-04, dt 2.0s +All GPU(s): step 2234: loss 0.7028, lr 3.9e-04, dt 1.9s +All GPU(s): step 2235: loss 0.3158, lr 3.9e-04, dt 2.0s +All GPU(s): step 2236: loss 0.4956, lr 3.9e-04, dt 2.0s +All GPU(s): step 2237: loss 0.5388, lr 3.9e-04, dt 2.0s +All GPU(s): step 2238: loss 0.3706, lr 3.9e-04, dt 2.0s +All GPU(s): step 2239: loss 0.2843, lr 3.9e-04, dt 2.0s +All GPU(s): step 2240: loss 0.2663, lr 3.9e-04, dt 1.9s +All GPU(s): step 2241: loss 0.4111, lr 3.9e-04, dt 2.0s +All GPU(s): step 2242: loss 0.1222, lr 3.9e-04, dt 2.1s +All GPU(s): step 2243: loss 0.3659, lr 3.9e-04, dt 1.9s +All GPU(s): step 2244: loss 0.4758, lr 3.9e-04, dt 1.9s +All GPU(s): step 2245: loss 0.2732, lr 3.9e-04, dt 2.0s +All GPU(s): step 2246: loss 0.2839, lr 3.9e-04, dt 2.0s +All GPU(s): step 2247: loss 0.4006, lr 3.9e-04, dt 2.0s +All GPU(s): step 2248: loss 0.3306, lr 3.9e-04, dt 2.0s +All GPU(s): step 2249: loss 0.2907, lr 3.9e-04, dt 2.0s +All GPU(s): step 2250: loss 0.4486, lr 3.9e-04, dt 2.0s +All GPU(s): step 2251: loss 0.2487, lr 3.9e-04, dt 2.0s +All GPU(s): step 2252: loss 0.2506, lr 3.9e-04, dt 2.1s +All GPU(s): step 2253: loss 0.3878, lr 3.9e-04, dt 2.0s +All GPU(s): step 2254: loss 0.2278, lr 3.9e-04, dt 2.0s +All GPU(s): step 2255: loss 0.1840, lr 3.9e-04, dt 2.0s +All GPU(s): step 2256: loss 0.5940, lr 3.9e-04, dt 2.0s +All GPU(s): step 2257: loss 0.4732, lr 3.9e-04, dt 2.0s +All GPU(s): step 2258: loss 0.4562, lr 3.9e-04, dt 1.9s +All GPU(s): step 2259: loss 0.4355, lr 3.9e-04, dt 1.9s +All GPU(s): step 2260: loss 0.4712, lr 3.9e-04, dt 1.9s +All GPU(s): step 2261: loss 0.3414, lr 3.9e-04, dt 1.9s +All GPU(s): step 2262: loss 0.6009, lr 3.9e-04, dt 2.0s +All GPU(s): step 2263: loss 0.4628, lr 3.9e-04, dt 2.0s +All GPU(s): step 2264: loss 0.6827, lr 3.9e-04, dt 1.9s +All GPU(s): step 2265: loss 0.5945, lr 3.9e-04, dt 1.9s +All GPU(s): step 2266: loss 0.3753, lr 3.9e-04, dt 2.0s +All GPU(s): step 2267: loss 0.6216, lr 3.9e-04, dt 2.0s +All GPU(s): step 2268: loss 0.4663, lr 3.9e-04, dt 1.9s +All GPU(s): step 2269: loss 0.7249, lr 3.9e-04, dt 1.9s +All GPU(s): step 2270: loss 0.4437, lr 3.9e-04, dt 1.9s +All GPU(s): step 2271: loss 0.3956, lr 3.9e-04, dt 1.9s +All GPU(s): step 2272: loss 0.6788, lr 3.9e-04, dt 2.0s +All GPU(s): step 2273: loss 0.6415, lr 3.9e-04, dt 1.9s +All GPU(s): step 2274: loss 0.8618, lr 3.9e-04, dt 1.9s +All GPU(s): step 2275: loss 0.6824, lr 3.9e-04, dt 1.9s +All GPU(s): step 2276: loss 0.5384, lr 3.9e-04, dt 1.9s +All GPU(s): step 2277: loss 0.4524, lr 3.9e-04, dt 2.1s +All GPU(s): step 2278: loss 0.8208, lr 3.9e-04, dt 1.9s +All GPU(s): step 2279: loss 0.8521, lr 3.9e-04, dt 1.9s +All GPU(s): step 2280: loss 0.9425, lr 3.9e-04, dt 1.9s +All GPU(s): step 2281: loss 0.7064, lr 3.9e-04, dt 2.0s +All GPU(s): step 2282: loss 1.0714, lr 3.9e-04, dt 2.0s +All GPU(s): step 2283: loss 1.0176, lr 3.9e-04, dt 1.9s +All GPU(s): step 2284: loss 0.6340, lr 3.9e-04, dt 1.9s +All GPU(s): step 2285: loss 0.5626, lr 3.9e-04, dt 1.9s +All GPU(s): step 2286: loss 0.7368, lr 3.9e-04, dt 1.9s +All GPU(s): step 2287: loss 0.8777, lr 3.9e-04, dt 2.0s +All GPU(s): step 2288: loss 0.6677, lr 3.9e-04, dt 1.9s +All GPU(s): step 2289: loss 0.7003, lr 3.9e-04, dt 1.9s +All GPU(s): step 2290: loss 0.5381, lr 3.9e-04, dt 1.9s +All GPU(s): step 2291: loss 0.7976, lr 3.9e-04, dt 1.9s +All GPU(s): step 2292: loss 0.4079, lr 3.9e-04, dt 2.0s +All GPU(s): step 2293: loss 0.6193, lr 3.9e-04, dt 2.0s +All GPU(s): step 2294: loss 0.6451, lr 3.9e-04, dt 1.9s +All GPU(s): step 2295: loss 0.5723, lr 3.9e-04, dt 1.9s +All GPU(s): step 2296: loss 0.7228, lr 3.9e-04, dt 1.9s +All GPU(s): step 2297: loss 0.5707, lr 3.9e-04, dt 2.0s +All GPU(s): step 2298: loss 0.7771, lr 3.9e-04, dt 1.9s +All GPU(s): step 2299: loss 0.8850, lr 3.9e-04, dt 1.9s +All GPU(s): step 2300: loss 0.9932, lr 3.9e-04, dt 1.9s +All GPU(s): step 2301: loss 0.7776, lr 3.9e-04, dt 1.9s +All GPU(s): step 2302: loss 0.8213, lr 3.9e-04, dt 2.0s +All GPU(s): step 2303: loss 0.7298, lr 3.9e-04, dt 2.1s +All GPU(s): step 2304: loss 0.8008, lr 3.9e-04, dt 1.9s +All GPU(s): step 2305: loss 1.0557, lr 3.9e-04, dt 1.9s +All GPU(s): step 2306: loss 1.1675, lr 3.9e-04, dt 1.9s +All GPU(s): step 2307: loss 3.3965, lr 3.9e-04, dt 1.8s +All GPU(s): step 2308: loss 2.4834, lr 3.9e-04, dt 1.9s +All GPU(s): step 2309: loss 1.4368, lr 3.8e-04, dt 1.9s +All GPU(s): step 2310: loss 1.5312, lr 3.8e-04, dt 1.8s +All GPU(s): step 2311: loss 0.8662, lr 3.8e-04, dt 2.0s +All GPU(s): step 2312: loss 0.8289, lr 3.8e-04, dt 1.9s +All GPU(s): step 2313: loss 1.0352, lr 3.8e-04, dt 1.9s +All GPU(s): step 2314: loss 0.8613, lr 3.8e-04, dt 1.9s +All GPU(s): step 2315: loss 1.0929, lr 3.8e-04, dt 1.9s +All GPU(s): step 2316: loss 1.4805, lr 3.8e-04, dt 1.9s +All GPU(s): step 2317: loss 0.9980, lr 3.8e-04, dt 1.9s +All GPU(s): step 2318: loss 0.7651, lr 3.8e-04, dt 2.0s +All GPU(s): step 2319: loss 0.7844, lr 3.8e-04, dt 1.9s +All GPU(s): step 2320: loss 0.8590, lr 3.8e-04, dt 1.9s +All GPU(s): step 2321: loss 0.6454, lr 3.8e-04, dt 1.9s +All GPU(s): step 2322: loss 0.4622, lr 3.8e-04, dt 1.9s +All GPU(s): step 2323: loss 0.5645, lr 3.8e-04, dt 1.9s +All GPU(s): step 2324: loss 0.7882, lr 3.8e-04, dt 2.0s +All GPU(s): step 2325: loss 0.5403, lr 3.8e-04, dt 2.0s +All GPU(s): step 2326: loss 0.8970, lr 3.8e-04, dt 1.9s +All GPU(s): step 2327: loss 0.6132, lr 3.8e-04, dt 1.9s +All GPU(s): step 2328: loss 0.5618, lr 3.8e-04, dt 2.0s +All GPU(s): step 2329: loss 0.8157, lr 3.8e-04, dt 2.0s +All GPU(s): step 2330: loss 0.7095, lr 3.8e-04, dt 1.9s +All GPU(s): step 2331: loss 0.6458, lr 3.8e-04, dt 1.9s +All GPU(s): step 2332: loss 0.9060, lr 3.8e-04, dt 1.9s +All GPU(s): step 2333: loss 0.4855, lr 3.8e-04, dt 2.0s +All GPU(s): step 2334: loss 1.0249, lr 3.8e-04, dt 2.0s +All GPU(s): step 2335: loss 0.7676, lr 3.8e-04, dt 1.9s +All GPU(s): step 2336: loss 0.8468, lr 3.8e-04, dt 1.9s +All GPU(s): step 2337: loss 0.6338, lr 3.8e-04, dt 1.9s +All GPU(s): step 2338: loss 0.5988, lr 3.8e-04, dt 1.9s +All GPU(s): step 2339: loss 0.7878, lr 3.8e-04, dt 2.0s +All GPU(s): step 2340: loss 0.6907, lr 3.8e-04, dt 1.9s +All GPU(s): step 2341: loss 0.4352, lr 3.8e-04, dt 1.9s +All GPU(s): step 2342: loss 0.5587, lr 3.8e-04, dt 1.9s +All GPU(s): step 2343: loss 0.5453, lr 3.8e-04, dt 1.9s +All GPU(s): step 2344: loss 0.7240, lr 3.8e-04, dt 2.0s +All GPU(s): step 2345: loss 0.4947, lr 3.8e-04, dt 1.9s +All GPU(s): step 2346: loss 0.7217, lr 3.8e-04, dt 1.9s +All GPU(s): step 2347: loss 0.6675, lr 3.8e-04, dt 1.9s +All GPU(s): step 2348: loss 0.6377, lr 3.8e-04, dt 1.9s +All GPU(s): step 2349: loss 0.5626, lr 3.8e-04, dt 2.0s +All GPU(s): step 2350: loss 0.6543, lr 3.8e-04, dt 1.9s +All GPU(s): step 2351: loss 0.5956, lr 3.8e-04, dt 1.9s +All GPU(s): step 2352: loss 0.6753, lr 3.8e-04, dt 1.9s +All GPU(s): step 2353: loss 0.6880, lr 3.8e-04, dt 1.9s +All GPU(s): step 2354: loss 0.4727, lr 3.8e-04, dt 2.0s +All GPU(s): step 2355: loss 0.4211, lr 3.8e-04, dt 1.9s +All GPU(s): step 2356: loss 0.3579, lr 3.8e-04, dt 2.0s +All GPU(s): step 2357: loss 0.4195, lr 3.8e-04, dt 1.9s +All GPU(s): step 2358: loss 0.4406, lr 3.8e-04, dt 1.9s +All GPU(s): step 2359: loss 0.4527, lr 3.8e-04, dt 2.0s +All GPU(s): step 2360: loss 0.7534, lr 3.8e-04, dt 1.9s +All GPU(s): step 2361: loss 0.9265, lr 3.8e-04, dt 1.9s +All GPU(s): step 2362: loss 0.3310, lr 3.8e-04, dt 2.0s +All GPU(s): step 2363: loss 0.3119, lr 3.8e-04, dt 2.0s +All GPU(s): step 2364: loss 0.3425, lr 3.8e-04, dt 2.0s +All GPU(s): step 2365: loss 0.4145, lr 3.8e-04, dt 2.0s +All GPU(s): step 2366: loss 0.4470, lr 3.8e-04, dt 1.9s +All GPU(s): step 2367: loss 0.7810, lr 3.8e-04, dt 1.9s +All GPU(s): step 2368: loss 0.7275, lr 3.8e-04, dt 1.9s +All GPU(s): step 2369: loss 0.7507, lr 3.8e-04, dt 1.9s +All GPU(s): step 2370: loss 0.9163, lr 3.8e-04, dt 2.0s +All GPU(s): step 2371: loss 1.4370, lr 3.8e-04, dt 1.9s +All GPU(s): step 2372: loss 1.0640, lr 3.8e-04, dt 1.9s +All GPU(s): step 2373: loss 1.1772, lr 3.8e-04, dt 1.8s +All GPU(s): step 2374: loss 1.5991, lr 3.8e-04, dt 1.9s +All GPU(s): step 2375: loss 0.8730, lr 3.8e-04, dt 2.0s +All GPU(s): step 2376: loss 1.0209, lr 3.8e-04, dt 1.9s +All GPU(s): step 2377: loss 0.9468, lr 3.8e-04, dt 1.9s +All GPU(s): step 2378: loss 0.7697, lr 3.8e-04, dt 1.9s +All GPU(s): step 2379: loss 0.7982, lr 3.8e-04, dt 1.9s +All GPU(s): step 2380: loss 0.6445, lr 3.8e-04, dt 2.0s +All GPU(s): step 2381: loss 0.8923, lr 3.8e-04, dt 1.9s +All GPU(s): step 2382: loss 0.3868, lr 3.8e-04, dt 2.0s +All GPU(s): step 2383: loss 0.3139, lr 3.8e-04, dt 2.0s +All GPU(s): step 2384: loss 0.3376, lr 3.8e-04, dt 2.0s +All GPU(s): step 2385: loss 0.3323, lr 3.8e-04, dt 2.1s +All GPU(s): step 2386: loss 0.2858, lr 3.8e-04, dt 2.0s +All GPU(s): step 2387: loss 0.2281, lr 3.8e-04, dt 1.9s +All GPU(s): step 2388: loss 0.6326, lr 3.8e-04, dt 1.9s +All GPU(s): step 2389: loss 0.2358, lr 3.8e-04, dt 1.9s +All GPU(s): step 2390: loss 0.4873, lr 3.8e-04, dt 2.0s +All GPU(s): step 2391: loss 0.7624, lr 3.8e-04, dt 2.0s +All GPU(s): step 2392: loss 0.4677, lr 3.8e-04, dt 1.9s +All GPU(s): step 2393: loss 0.6624, lr 3.8e-04, dt 1.9s +All GPU(s): step 2394: loss 0.3430, lr 3.8e-04, dt 1.9s +All GPU(s): step 2395: loss 0.3300, lr 3.8e-04, dt 2.1s +All GPU(s): step 2396: loss 0.4853, lr 3.8e-04, dt 2.0s +All GPU(s): step 2397: loss 0.3602, lr 3.8e-04, dt 1.9s +All GPU(s): step 2398: loss 0.6598, lr 3.8e-04, dt 1.9s +All GPU(s): step 2399: loss 0.4323, lr 3.8e-04, dt 2.0s +All GPU(s): step 2400: loss 0.5287, lr 3.8e-04, dt 2.0s +All GPU(s): step 2401: loss 0.8496, lr 3.8e-04, dt 1.9s +All GPU(s): step 2402: loss 0.7648, lr 3.8e-04, dt 1.9s +All GPU(s): step 2403: loss 0.5640, lr 3.8e-04, dt 2.0s +All GPU(s): step 2404: loss 0.6477, lr 3.8e-04, dt 1.9s +All GPU(s): step 2405: loss 0.4102, lr 3.8e-04, dt 2.0s +All GPU(s): step 2406: loss 0.6401, lr 3.8e-04, dt 1.9s +All GPU(s): step 2407: loss 0.3825, lr 3.8e-04, dt 2.0s +All GPU(s): step 2408: loss 0.3819, lr 3.8e-04, dt 2.0s +All GPU(s): step 2409: loss 0.4006, lr 3.8e-04, dt 1.9s +All GPU(s): step 2410: loss 0.3884, lr 3.8e-04, dt 2.0s +All GPU(s): step 2411: loss 0.4661, lr 3.8e-04, dt 2.0s +All GPU(s): step 2412: loss 0.4215, lr 3.8e-04, dt 2.0s +All GPU(s): step 2413: loss 0.7417, lr 3.8e-04, dt 1.9s +All GPU(s): step 2414: loss 0.4921, lr 3.8e-04, dt 1.9s +All GPU(s): step 2415: loss 0.5383, lr 3.8e-04, dt 1.9s +All GPU(s): step 2416: loss 0.3257, lr 3.8e-04, dt 2.0s +All GPU(s): step 2417: loss 0.4678, lr 3.8e-04, dt 1.9s +All GPU(s): step 2418: loss 0.4385, lr 3.8e-04, dt 2.0s +All GPU(s): step 2419: loss 0.4479, lr 3.8e-04, dt 1.9s +All GPU(s): step 2420: loss 0.6315, lr 3.8e-04, dt 2.0s +All GPU(s): step 2421: loss 0.4590, lr 3.8e-04, dt 2.1s +All GPU(s): step 2422: loss 0.3211, lr 3.8e-04, dt 2.0s +All GPU(s): step 2423: loss 0.2407, lr 3.8e-04, dt 2.0s +All GPU(s): step 2424: loss 0.2983, lr 3.8e-04, dt 2.0s +All GPU(s): step 2425: loss 0.1466, lr 3.8e-04, dt 2.0s +All GPU(s): step 2426: loss 0.2630, lr 3.8e-04, dt 2.0s +All GPU(s): step 2427: loss 0.1817, lr 3.8e-04, dt 2.0s +All GPU(s): step 2428: loss 0.1934, lr 3.8e-04, dt 2.0s +All GPU(s): step 2429: loss 0.1180, lr 3.8e-04, dt 2.0s +All GPU(s): step 2430: loss 0.3237, lr 3.8e-04, dt 2.1s +All GPU(s): step 2431: loss 0.1649, lr 3.8e-04, dt 2.1s +All GPU(s): step 2432: loss 0.1937, lr 3.8e-04, dt 2.0s +All GPU(s): step 2433: loss 0.1594, lr 3.8e-04, dt 2.0s +All GPU(s): step 2434: loss 0.1190, lr 3.8e-04, dt 2.0s +All GPU(s): step 2435: loss 0.2478, lr 3.8e-04, dt 2.0s +All GPU(s): step 2436: loss 0.2714, lr 3.8e-04, dt 2.0s +All GPU(s): step 2437: loss 0.2902, lr 3.8e-04, dt 1.9s +All GPU(s): step 2438: loss 0.2296, lr 3.8e-04, dt 2.0s +All GPU(s): step 2439: loss 0.1601, lr 3.8e-04, dt 2.0s +All GPU(s): step 2440: loss 0.3450, lr 3.8e-04, dt 2.0s +All GPU(s): step 2441: loss 0.1479, lr 3.8e-04, dt 2.1s +All GPU(s): step 2442: loss 0.3426, lr 3.8e-04, dt 2.0s +All GPU(s): step 2443: loss 0.1878, lr 3.8e-04, dt 2.0s +All GPU(s): step 2444: loss 0.1600, lr 3.8e-04, dt 2.0s +All GPU(s): step 2445: loss 0.2071, lr 3.8e-04, dt 2.0s +All GPU(s): step 2446: loss 0.3213, lr 3.8e-04, dt 2.0s +All GPU(s): step 2447: loss 0.1819, lr 3.8e-04, dt 2.0s +All GPU(s): step 2448: loss 0.4239, lr 3.8e-04, dt 1.9s +All GPU(s): step 2449: loss 0.1740, lr 3.8e-04, dt 2.0s +All GPU(s): step 2450: loss 0.3681, lr 3.8e-04, dt 2.0s +All GPU(s): step 2451: loss 0.2899, lr 3.8e-04, dt 2.0s +All GPU(s): step 2452: loss 0.2031, lr 3.8e-04, dt 2.0s +All GPU(s): step 2453: loss 0.2611, lr 3.8e-04, dt 2.0s +All GPU(s): step 2454: loss 0.1324, lr 3.8e-04, dt 2.0s +All GPU(s): step 2455: loss 0.1310, lr 3.8e-04, dt 2.0s +All GPU(s): step 2456: loss 0.0598, lr 3.8e-04, dt 2.0s +All GPU(s): step 2457: loss 0.1208, lr 3.8e-04, dt 2.0s +All GPU(s): step 2458: loss 0.1013, lr 3.8e-04, dt 2.0s +All GPU(s): step 2459: loss 0.0634, lr 3.8e-04, dt 2.0s +All GPU(s): step 2460: loss 0.0779, lr 3.8e-04, dt 2.0s +All GPU(s): step 2461: loss 0.0910, lr 3.8e-04, dt 2.0s +All GPU(s): step 2462: loss 0.0457, lr 3.8e-04, dt 2.0s +All GPU(s): step 2463: loss 0.0650, lr 3.8e-04, dt 2.0s +All GPU(s): step 2464: loss 0.3099, lr 3.8e-04, dt 2.0s +All GPU(s): step 2465: loss 0.2053, lr 3.8e-04, dt 2.0s +All GPU(s): step 2466: loss 0.1540, lr 3.8e-04, dt 2.0s +All GPU(s): step 2467: loss 0.0988, lr 3.8e-04, dt 2.0s +All GPU(s): step 2468: loss 0.1155, lr 3.8e-04, dt 2.0s +All GPU(s): step 2469: loss 0.1815, lr 3.8e-04, dt 1.9s +All GPU(s): step 2470: loss 0.0543, lr 3.8e-04, dt 2.0s +All GPU(s): step 2471: loss 0.1672, lr 3.8e-04, dt 2.1s +All GPU(s): step 2472: loss 0.0993, lr 3.8e-04, dt 2.0s +All GPU(s): step 2473: loss 0.1566, lr 3.8e-04, dt 2.0s +All GPU(s): step 2474: loss 0.1987, lr 3.8e-04, dt 2.0s +All GPU(s): step 2475: loss 0.0982, lr 3.8e-04, dt 2.1s +All GPU(s): step 2476: loss 0.0542, lr 3.8e-04, dt 2.0s +All GPU(s): step 2477: loss 0.0612, lr 3.8e-04, dt 2.0s +All GPU(s): step 2478: loss 0.1219, lr 3.8e-04, dt 2.0s +All GPU(s): step 2479: loss 0.0480, lr 3.8e-04, dt 2.0s +All GPU(s): step 2480: loss 0.0948, lr 3.8e-04, dt 2.1s +All GPU(s): step 2481: loss 0.0819, lr 3.8e-04, dt 2.1s +All GPU(s): step 2482: loss 0.0801, lr 3.8e-04, dt 2.0s +All GPU(s): step 2483: loss 0.0641, lr 3.8e-04, dt 2.0s +All GPU(s): step 2484: loss 0.0637, lr 3.8e-04, dt 2.0s +All GPU(s): step 2485: loss 0.1786, lr 3.8e-04, dt 2.1s +All GPU(s): step 2486: loss 0.1599, lr 3.8e-04, dt 2.0s +All GPU(s): step 2487: loss 0.3264, lr 3.8e-04, dt 2.0s +All GPU(s): step 2488: loss 0.1666, lr 3.8e-04, dt 2.0s +All GPU(s): step 2489: loss 0.0854, lr 3.8e-04, dt 2.0s +All GPU(s): step 2490: loss 0.2414, lr 3.8e-04, dt 2.1s +All GPU(s): step 2491: loss 0.1927, lr 3.8e-04, dt 2.0s +All GPU(s): step 2492: loss 0.2872, lr 3.8e-04, dt 1.9s +All GPU(s): step 2493: loss 0.2277, lr 3.8e-04, dt 2.0s +All GPU(s): step 2494: loss 0.2201, lr 3.8e-04, dt 1.9s +All GPU(s): step 2495: loss 0.2684, lr 3.8e-04, dt 2.1s +All GPU(s): step 2496: loss 0.2228, lr 3.8e-04, dt 2.0s +All GPU(s): step 2497: loss 0.1251, lr 3.8e-04, dt 2.0s +All GPU(s): step 2498: loss 0.2681, lr 3.8e-04, dt 2.0s +All GPU(s): step 2499: loss 0.3387, lr 3.8e-04, dt 2.0s +All GPU(s): step 2500: loss 0.2416, lr 3.8e-04, dt 2.2s +All GPU(s): step 2501: loss 0.2333, lr 3.8e-04, dt 1.9s +All GPU(s): step 2502: loss 0.3040, lr 3.8e-04, dt 2.0s +All GPU(s): step 2503: loss 0.1634, lr 3.8e-04, dt 2.0s +All GPU(s): step 2504: loss 0.2885, lr 3.8e-04, dt 2.0s +All GPU(s): step 2505: loss 0.1631, lr 3.8e-04, dt 2.1s +All GPU(s): step 2506: loss 0.3173, lr 3.8e-04, dt 2.0s +All GPU(s): step 2507: loss 0.1308, lr 3.8e-04, dt 2.0s +All GPU(s): step 2508: loss 0.1629, lr 3.8e-04, dt 2.0s +All GPU(s): step 2509: loss 0.1513, lr 3.8e-04, dt 2.0s +All GPU(s): step 2510: loss 0.1842, lr 3.8e-04, dt 2.1s +All GPU(s): step 2511: loss 0.0922, lr 3.8e-04, dt 2.0s +All GPU(s): step 2512: loss 0.1519, lr 3.8e-04, dt 2.0s +All GPU(s): step 2513: loss 0.1010, lr 3.8e-04, dt 2.0s +All GPU(s): step 2514: loss 0.1417, lr 3.8e-04, dt 2.1s +All GPU(s): step 2515: loss 0.2454, lr 3.8e-04, dt 2.1s +All GPU(s): step 2516: loss 0.1569, lr 3.8e-04, dt 2.0s +All GPU(s): step 2517: loss 0.0809, lr 3.8e-04, dt 2.0s +All GPU(s): step 2518: loss 0.1567, lr 3.8e-04, dt 2.0s +All GPU(s): step 2519: loss 0.0814, lr 3.8e-04, dt 2.0s +All GPU(s): step 2520: loss 0.1796, lr 3.8e-04, dt 2.1s +All GPU(s): step 2521: loss 0.0956, lr 3.8e-04, dt 2.1s +All GPU(s): step 2522: loss 0.2399, lr 3.8e-04, dt 2.0s +All GPU(s): step 2523: loss 0.1621, lr 3.8e-04, dt 2.0s +All GPU(s): step 2524: loss 0.1700, lr 3.8e-04, dt 2.0s +All GPU(s): step 2525: loss 0.1687, lr 3.8e-04, dt 2.1s +All GPU(s): step 2526: loss 0.0374, lr 3.8e-04, dt 2.0s +All GPU(s): step 2527: loss 0.1033, lr 3.8e-04, dt 2.0s +All GPU(s): step 2528: loss 0.0537, lr 3.8e-04, dt 2.0s +All GPU(s): step 2529: loss 0.1731, lr 3.8e-04, dt 2.1s +All GPU(s): step 2530: loss 0.0325, lr 3.8e-04, dt 2.1s +All GPU(s): step 2531: loss 0.0541, lr 3.8e-04, dt 2.0s +All GPU(s): step 2532: loss 0.0233, lr 3.8e-04, dt 2.0s +All GPU(s): step 2533: loss 0.1655, lr 3.8e-04, dt 2.0s +All GPU(s): step 2534: loss 0.1806, lr 3.8e-04, dt 2.1s +All GPU(s): step 2535: loss 0.1251, lr 3.8e-04, dt 2.0s +All GPU(s): step 2536: loss 0.0766, lr 3.8e-04, dt 2.0s +All GPU(s): step 2537: loss 0.1120, lr 3.8e-04, dt 2.0s +All GPU(s): step 2538: loss 0.0890, lr 3.8e-04, dt 2.0s +All GPU(s): step 2539: loss 0.1585, lr 3.8e-04, dt 2.0s +All GPU(s): step 2540: loss 0.1037, lr 3.8e-04, dt 2.0s +All GPU(s): step 2541: loss 0.0933, lr 3.8e-04, dt 2.0s +All GPU(s): step 2542: loss 0.1224, lr 3.8e-04, dt 2.0s +All GPU(s): step 2543: loss 0.2566, lr 3.8e-04, dt 1.9s +All GPU(s): step 2544: loss 0.1473, lr 3.8e-04, dt 2.1s +All GPU(s): step 2545: loss 0.2328, lr 3.8e-04, dt 2.0s +All GPU(s): step 2546: loss 0.2129, lr 3.8e-04, dt 1.9s +All GPU(s): step 2547: loss 0.1683, lr 3.8e-04, dt 2.0s +All GPU(s): step 2548: loss 0.2991, lr 3.8e-04, dt 1.9s +All GPU(s): step 2549: loss 0.4072, lr 3.8e-04, dt 2.0s +All GPU(s): step 2550: loss 0.3838, lr 3.8e-04, dt 2.0s +All GPU(s): step 2551: loss 0.1982, lr 3.8e-04, dt 2.0s +All GPU(s): step 2552: loss 0.2321, lr 3.8e-04, dt 2.0s +All GPU(s): step 2553: loss 0.2164, lr 3.8e-04, dt 2.0s +All GPU(s): step 2554: loss 0.3430, lr 3.8e-04, dt 2.0s +All GPU(s): step 2555: loss 0.4535, lr 3.8e-04, dt 1.9s +All GPU(s): step 2556: loss 0.4596, lr 3.8e-04, dt 2.0s +All GPU(s): step 2557: loss 0.3309, lr 3.8e-04, dt 2.0s +All GPU(s): step 2558: loss 0.1888, lr 3.8e-04, dt 2.0s +All GPU(s): step 2559: loss 0.2084, lr 3.8e-04, dt 2.0s +All GPU(s): step 2560: loss 0.3298, lr 3.8e-04, dt 2.0s +All GPU(s): step 2561: loss 0.2454, lr 3.8e-04, dt 2.0s +All GPU(s): step 2562: loss 0.2485, lr 3.8e-04, dt 2.0s +All GPU(s): step 2563: loss 0.1509, lr 3.8e-04, dt 2.0s +All GPU(s): step 2564: loss 0.3228, lr 3.8e-04, dt 2.0s +All GPU(s): step 2565: loss 0.5183, lr 3.8e-04, dt 1.9s +All GPU(s): step 2566: loss 0.6728, lr 3.8e-04, dt 1.9s +All GPU(s): step 2567: loss 0.1821, lr 3.8e-04, dt 2.0s +All GPU(s): step 2568: loss 0.2400, lr 3.8e-04, dt 2.0s +All GPU(s): step 2569: loss 0.2618, lr 3.8e-04, dt 2.0s +All GPU(s): step 2570: loss 0.3546, lr 3.8e-04, dt 2.0s +All GPU(s): step 2571: loss 0.2473, lr 3.8e-04, dt 2.0s +All GPU(s): step 2572: loss 0.2580, lr 3.8e-04, dt 2.0s +All GPU(s): step 2573: loss 0.3871, lr 3.8e-04, dt 2.0s +All GPU(s): step 2574: loss 0.3039, lr 3.8e-04, dt 2.0s +All GPU(s): step 2575: loss 0.5507, lr 3.8e-04, dt 2.0s +All GPU(s): step 2576: loss 0.3397, lr 3.8e-04, dt 2.0s +All GPU(s): step 2577: loss 0.2888, lr 3.8e-04, dt 1.9s +All GPU(s): step 2578: loss 0.4386, lr 3.8e-04, dt 1.9s +All GPU(s): step 2579: loss 0.3476, lr 3.8e-04, dt 2.0s +All GPU(s): step 2580: loss 0.2423, lr 3.8e-04, dt 2.0s +All GPU(s): step 2581: loss 0.1500, lr 3.8e-04, dt 2.0s +All GPU(s): step 2582: loss 0.2448, lr 3.8e-04, dt 1.9s +All GPU(s): step 2583: loss 0.2614, lr 3.8e-04, dt 2.0s +All GPU(s): step 2584: loss 0.0997, lr 3.8e-04, dt 2.1s +All GPU(s): step 2585: loss 0.1419, lr 3.8e-04, dt 2.1s +All GPU(s): step 2586: loss 0.1015, lr 3.8e-04, dt 2.0s +All GPU(s): step 2587: loss 0.1198, lr 3.8e-04, dt 2.0s +All GPU(s): step 2588: loss 0.1049, lr 3.8e-04, dt 2.0s +All GPU(s): step 2589: loss 0.2291, lr 3.8e-04, dt 2.1s +All GPU(s): step 2590: loss 0.2857, lr 3.8e-04, dt 2.1s +All GPU(s): step 2591: loss 0.1926, lr 3.8e-04, dt 2.0s +All GPU(s): step 2592: loss 0.2407, lr 3.8e-04, dt 2.0s +All GPU(s): step 2593: loss 0.2336, lr 3.8e-04, dt 2.0s +All GPU(s): step 2594: loss 0.1654, lr 3.8e-04, dt 2.0s +All GPU(s): step 2595: loss 0.3293, lr 3.8e-04, dt 1.9s +All GPU(s): step 2596: loss 0.1662, lr 3.8e-04, dt 2.0s +All GPU(s): step 2597: loss 0.2558, lr 3.8e-04, dt 2.0s +All GPU(s): step 2598: loss 0.1311, lr 3.8e-04, dt 2.0s +All GPU(s): step 2599: loss 0.2726, lr 3.8e-04, dt 2.0s +All GPU(s): step 2600: loss 0.1282, lr 3.8e-04, dt 2.0s +All GPU(s): step 2601: loss 0.1265, lr 3.8e-04, dt 1.9s +All GPU(s): step 2602: loss 0.4246, lr 3.8e-04, dt 1.9s +All GPU(s): step 2603: loss 0.1646, lr 3.8e-04, dt 2.0s +All GPU(s): step 2604: loss 0.2141, lr 3.8e-04, dt 2.0s +All GPU(s): step 2605: loss 0.1928, lr 3.8e-04, dt 2.0s +All GPU(s): step 2606: loss 0.1073, lr 3.8e-04, dt 2.0s +All GPU(s): step 2607: loss 0.1574, lr 3.8e-04, dt 2.0s +All GPU(s): step 2608: loss 0.0939, lr 3.8e-04, dt 2.0s +All GPU(s): step 2609: loss 0.1281, lr 3.8e-04, dt 2.1s +All GPU(s): step 2610: loss 0.1929, lr 3.8e-04, dt 2.0s +All GPU(s): step 2611: loss 0.1432, lr 3.8e-04, dt 2.0s +All GPU(s): step 2612: loss 0.0272, lr 3.8e-04, dt 2.0s +All GPU(s): step 2613: loss 0.2785, lr 3.8e-04, dt 2.0s +All GPU(s): step 2614: loss 0.1065, lr 3.8e-04, dt 2.1s +All GPU(s): step 2615: loss 0.0734, lr 3.8e-04, dt 2.1s +All GPU(s): step 2616: loss 0.1751, lr 3.8e-04, dt 2.0s +All GPU(s): step 2617: loss 0.0693, lr 3.8e-04, dt 2.0s +All GPU(s): step 2618: loss 0.1187, lr 3.8e-04, dt 2.0s +All GPU(s): step 2619: loss 0.1265, lr 3.8e-04, dt 2.0s +All GPU(s): step 2620: loss 0.1776, lr 3.8e-04, dt 2.0s +All GPU(s): step 2621: loss 0.1243, lr 3.8e-04, dt 2.0s +All GPU(s): step 2622: loss 0.1269, lr 3.8e-04, dt 2.0s +All GPU(s): step 2623: loss 0.1260, lr 3.8e-04, dt 2.0s +All GPU(s): step 2624: loss 0.1165, lr 3.8e-04, dt 2.1s +All GPU(s): step 2625: loss 0.0955, lr 3.8e-04, dt 2.0s +All GPU(s): step 2626: loss 0.0408, lr 3.8e-04, dt 2.0s +All GPU(s): step 2627: loss 0.0886, lr 3.8e-04, dt 2.0s +All GPU(s): step 2628: loss 0.0768, lr 3.8e-04, dt 2.0s +All GPU(s): step 2629: loss 0.1875, lr 3.8e-04, dt 2.1s +All GPU(s): step 2630: loss 0.2803, lr 3.8e-04, dt 2.0s +All GPU(s): step 2631: loss 0.1296, lr 3.8e-04, dt 2.0s +All GPU(s): step 2632: loss 0.2246, lr 3.8e-04, dt 2.0s +All GPU(s): step 2633: loss 0.3308, lr 3.8e-04, dt 2.0s +All GPU(s): step 2634: loss 0.3098, lr 3.8e-04, dt 2.1s +All GPU(s): step 2635: loss 0.2302, lr 3.8e-04, dt 2.0s +All GPU(s): step 2636: loss 0.1755, lr 3.8e-04, dt 2.0s +All GPU(s): step 2637: loss 0.1854, lr 3.8e-04, dt 2.0s +All GPU(s): step 2638: loss 0.2075, lr 3.8e-04, dt 2.0s +All GPU(s): step 2639: loss 0.2927, lr 3.8e-04, dt 2.1s +All GPU(s): step 2640: loss 0.2852, lr 3.8e-04, dt 2.0s +All GPU(s): step 2641: loss 0.1256, lr 3.8e-04, dt 2.0s +All GPU(s): step 2642: loss 0.2864, lr 3.8e-04, dt 2.0s +All GPU(s): step 2643: loss 0.2117, lr 3.8e-04, dt 2.0s +All GPU(s): step 2644: loss 0.1105, lr 3.8e-04, dt 2.1s +All GPU(s): step 2645: loss 0.1574, lr 3.8e-04, dt 2.0s +All GPU(s): step 2646: loss 0.0522, lr 3.8e-04, dt 2.0s +All GPU(s): step 2647: loss 0.0685, lr 3.8e-04, dt 2.0s +All GPU(s): step 2648: loss 0.2252, lr 3.8e-04, dt 2.0s +All GPU(s): step 2649: loss 0.1814, lr 3.8e-04, dt 2.1s +All GPU(s): step 2650: loss 0.1256, lr 3.8e-04, dt 2.0s +All GPU(s): step 2651: loss 0.0999, lr 3.8e-04, dt 2.0s +All GPU(s): step 2652: loss 0.2304, lr 3.8e-04, dt 2.0s +All GPU(s): step 2653: loss 0.2354, lr 3.8e-04, dt 2.0s +All GPU(s): step 2654: loss 0.1884, lr 3.8e-04, dt 2.1s +All GPU(s): step 2655: loss 0.1807, lr 3.8e-04, dt 2.0s +All GPU(s): step 2656: loss 0.0371, lr 3.8e-04, dt 2.0s +All GPU(s): step 2657: loss 0.1609, lr 3.8e-04, dt 2.0s +All GPU(s): step 2658: loss 0.0706, lr 3.8e-04, dt 2.0s +All GPU(s): step 2659: loss 0.1247, lr 3.8e-04, dt 2.1s +All GPU(s): step 2660: loss 0.0938, lr 3.8e-04, dt 2.0s +All GPU(s): step 2661: loss 0.1301, lr 3.8e-04, dt 2.0s +All GPU(s): step 2662: loss 0.0839, lr 3.8e-04, dt 2.0s +All GPU(s): step 2663: loss 0.1849, lr 3.8e-04, dt 2.0s +All GPU(s): step 2664: loss 0.0813, lr 3.8e-04, dt 2.0s +All GPU(s): step 2665: loss 0.0828, lr 3.8e-04, dt 2.0s +All GPU(s): step 2666: loss 0.1463, lr 3.8e-04, dt 2.0s +All GPU(s): step 2667: loss 0.1182, lr 3.8e-04, dt 2.0s +All GPU(s): step 2668: loss 0.1759, lr 3.8e-04, dt 2.0s +All GPU(s): step 2669: loss 0.1481, lr 3.8e-04, dt 2.1s +All GPU(s): step 2670: loss 0.2117, lr 3.8e-04, dt 2.0s +All GPU(s): step 2671: loss 0.1698, lr 3.8e-04, dt 2.0s +All GPU(s): step 2672: loss 0.1763, lr 3.8e-04, dt 2.0s +All GPU(s): step 2673: loss 0.0946, lr 3.8e-04, dt 2.0s +All GPU(s): step 2674: loss 0.2372, lr 3.8e-04, dt 2.0s +All GPU(s): step 2675: loss 0.1437, lr 3.8e-04, dt 2.0s +All GPU(s): step 2676: loss 0.0874, lr 3.8e-04, dt 2.0s +All GPU(s): step 2677: loss 0.3051, lr 3.8e-04, dt 2.0s +All GPU(s): step 2678: loss 0.1224, lr 3.8e-04, dt 2.0s +All GPU(s): step 2679: loss 0.3080, lr 3.8e-04, dt 2.0s +All GPU(s): step 2680: loss 0.1178, lr 3.8e-04, dt 2.0s +All GPU(s): step 2681: loss 0.2696, lr 3.8e-04, dt 2.0s +All GPU(s): step 2682: loss 0.1310, lr 3.8e-04, dt 2.0s +All GPU(s): step 2683: loss 0.0987, lr 3.8e-04, dt 2.0s +All GPU(s): step 2684: loss 0.2403, lr 3.8e-04, dt 2.0s +All GPU(s): step 2685: loss 0.0875, lr 3.8e-04, dt 2.0s +All GPU(s): step 2686: loss 0.1277, lr 3.8e-04, dt 2.0s +All GPU(s): step 2687: loss 0.1482, lr 3.8e-04, dt 2.0s +All GPU(s): step 2688: loss 0.1354, lr 3.8e-04, dt 2.1s +All GPU(s): step 2689: loss 0.0688, lr 3.8e-04, dt 2.0s +All GPU(s): step 2690: loss 0.1658, lr 3.8e-04, dt 2.0s +All GPU(s): step 2691: loss 0.1207, lr 3.8e-04, dt 2.0s +All GPU(s): step 2692: loss 0.0504, lr 3.8e-04, dt 2.0s +All GPU(s): step 2693: loss 0.1812, lr 3.8e-04, dt 2.1s +All GPU(s): step 2694: loss 0.1604, lr 3.8e-04, dt 2.0s +All GPU(s): step 2695: loss 0.0399, lr 3.8e-04, dt 2.0s +All GPU(s): step 2696: loss 0.1150, lr 3.8e-04, dt 2.0s +All GPU(s): step 2697: loss 0.1702, lr 3.8e-04, dt 2.0s +All GPU(s): step 2698: loss 0.0508, lr 3.7e-04, dt 2.1s +All GPU(s): step 2699: loss 0.0780, lr 3.7e-04, dt 2.0s +All GPU(s): step 2700: loss 0.0898, lr 3.7e-04, dt 2.0s +All GPU(s): step 2701: loss 0.1562, lr 3.7e-04, dt 2.0s +All GPU(s): step 2702: loss 0.1739, lr 3.7e-04, dt 2.0s +All GPU(s): step 2703: loss 0.0572, lr 3.7e-04, dt 2.1s +All GPU(s): step 2704: loss 0.2162, lr 3.7e-04, dt 2.0s +All GPU(s): step 2705: loss 0.1855, lr 3.7e-04, dt 2.0s +All GPU(s): step 2706: loss 0.1040, lr 3.7e-04, dt 2.0s +All GPU(s): step 2707: loss 0.1447, lr 3.7e-04, dt 2.0s +All GPU(s): step 2708: loss 0.1227, lr 3.7e-04, dt 2.1s +All GPU(s): step 2709: loss 0.0785, lr 3.7e-04, dt 2.0s +All GPU(s): step 2710: loss 0.0890, lr 3.7e-04, dt 2.0s +All GPU(s): step 2711: loss 0.1570, lr 3.7e-04, dt 2.0s +All GPU(s): step 2712: loss 0.1609, lr 3.7e-04, dt 2.0s +All GPU(s): step 2713: loss 0.2088, lr 3.7e-04, dt 2.1s +All GPU(s): step 2714: loss 0.2239, lr 3.7e-04, dt 2.0s +All GPU(s): step 2715: loss 0.1218, lr 3.7e-04, dt 2.0s +All GPU(s): step 2716: loss 0.2673, lr 3.7e-04, dt 2.0s +All GPU(s): step 2717: loss 0.3832, lr 3.7e-04, dt 2.0s +All GPU(s): step 2718: loss 0.3674, lr 3.7e-04, dt 2.1s +All GPU(s): step 2719: loss 0.1972, lr 3.7e-04, dt 2.0s +All GPU(s): step 2720: loss 0.1808, lr 3.7e-04, dt 2.0s +All GPU(s): step 2721: loss 0.2211, lr 3.7e-04, dt 2.0s +All GPU(s): step 2722: loss 0.1035, lr 3.7e-04, dt 2.0s +All GPU(s): step 2723: loss 0.1616, lr 3.7e-04, dt 2.1s +All GPU(s): step 2724: loss 0.0871, lr 3.7e-04, dt 1.9s +All GPU(s): step 2725: loss 0.1340, lr 3.7e-04, dt 2.0s +All GPU(s): step 2726: loss 0.0595, lr 3.7e-04, dt 2.0s +All GPU(s): step 2727: loss 0.0825, lr 3.7e-04, dt 2.0s +All GPU(s): step 2728: loss 0.1018, lr 3.7e-04, dt 2.1s +All GPU(s): step 2729: loss 0.1829, lr 3.7e-04, dt 2.0s +All GPU(s): step 2730: loss 0.2603, lr 3.7e-04, dt 2.1s +All GPU(s): step 2731: loss 0.1202, lr 3.7e-04, dt 2.0s +All GPU(s): step 2732: loss 0.1169, lr 3.7e-04, dt 2.0s +All GPU(s): step 2733: loss 0.1088, lr 3.7e-04, dt 2.1s +All GPU(s): step 2734: loss 0.1743, lr 3.7e-04, dt 1.9s +All GPU(s): step 2735: loss 0.1329, lr 3.7e-04, dt 2.0s +All GPU(s): step 2736: loss 0.0924, lr 3.7e-04, dt 2.0s +All GPU(s): step 2737: loss 0.1507, lr 3.7e-04, dt 2.0s +All GPU(s): step 2738: loss 0.1024, lr 3.7e-04, dt 2.1s +All GPU(s): step 2739: loss 0.1099, lr 3.7e-04, dt 2.0s +All GPU(s): step 2740: loss 0.1644, lr 3.7e-04, dt 2.0s +All GPU(s): step 2741: loss 0.0864, lr 3.7e-04, dt 2.1s +All GPU(s): step 2742: loss 0.1033, lr 3.7e-04, dt 2.0s +All GPU(s): step 2743: loss 0.0460, lr 3.7e-04, dt 2.1s +All GPU(s): step 2744: loss 0.1530, lr 3.7e-04, dt 2.0s +All GPU(s): step 2745: loss 0.1654, lr 3.7e-04, dt 2.0s +All GPU(s): step 2746: loss 0.1214, lr 3.7e-04, dt 2.0s +All GPU(s): step 2747: loss 0.2749, lr 3.7e-04, dt 2.0s +All GPU(s): step 2748: loss 0.1650, lr 3.7e-04, dt 2.1s +All GPU(s): step 2749: loss 0.1735, lr 3.7e-04, dt 2.0s +All GPU(s): step 2750: loss 0.1749, lr 3.7e-04, dt 2.0s +All GPU(s): step 2751: loss 0.1787, lr 3.7e-04, dt 2.0s +All GPU(s): step 2752: loss 0.3451, lr 3.7e-04, dt 2.0s +All GPU(s): step 2753: loss 0.0946, lr 3.7e-04, dt 2.0s +All GPU(s): step 2754: loss 0.2131, lr 3.7e-04, dt 2.0s +All GPU(s): step 2755: loss 0.2189, lr 3.7e-04, dt 2.0s +All GPU(s): step 2756: loss 0.1354, lr 3.7e-04, dt 2.0s +All GPU(s): step 2757: loss 0.1279, lr 3.7e-04, dt 2.0s +All GPU(s): step 2758: loss 0.0776, lr 3.7e-04, dt 2.1s +All GPU(s): step 2759: loss 0.1134, lr 3.7e-04, dt 2.0s +All GPU(s): step 2760: loss 0.1906, lr 3.7e-04, dt 2.0s +All GPU(s): step 2761: loss 0.1431, lr 3.7e-04, dt 2.0s +All GPU(s): step 2762: loss 0.1275, lr 3.7e-04, dt 2.1s +All GPU(s): step 2763: loss 0.0599, lr 3.7e-04, dt 2.0s +All GPU(s): step 2764: loss 0.0547, lr 3.7e-04, dt 2.0s +All GPU(s): step 2765: loss 0.1683, lr 3.7e-04, dt 2.0s +All GPU(s): step 2766: loss 0.1699, lr 3.7e-04, dt 2.0s +All GPU(s): step 2767: loss 0.1275, lr 3.7e-04, dt 2.1s +All GPU(s): step 2768: loss 0.0688, lr 3.7e-04, dt 2.0s +All GPU(s): step 2769: loss 0.1024, lr 3.7e-04, dt 2.0s +All GPU(s): step 2770: loss 0.1653, lr 3.7e-04, dt 2.0s +All GPU(s): step 2771: loss 0.0365, lr 3.7e-04, dt 2.0s +All GPU(s): step 2772: loss 0.1078, lr 3.7e-04, dt 2.0s +All GPU(s): step 2773: loss 0.0936, lr 3.7e-04, dt 2.0s +All GPU(s): step 2774: loss 0.0743, lr 3.7e-04, dt 2.0s +All GPU(s): step 2775: loss 0.0595, lr 3.7e-04, dt 2.0s +All GPU(s): step 2776: loss 0.0821, lr 3.7e-04, dt 2.0s +All GPU(s): step 2777: loss 0.0477, lr 3.7e-04, dt 2.1s +All GPU(s): step 2778: loss 0.0896, lr 3.7e-04, dt 2.0s +All GPU(s): step 2779: loss 0.1628, lr 3.7e-04, dt 2.0s +All GPU(s): step 2780: loss 0.2314, lr 3.7e-04, dt 2.0s +All GPU(s): step 2781: loss 0.0783, lr 3.7e-04, dt 2.0s +All GPU(s): step 2782: loss 0.0712, lr 3.7e-04, dt 2.1s +All GPU(s): step 2783: loss 0.2223, lr 3.7e-04, dt 2.0s +All GPU(s): step 2784: loss 0.3901, lr 3.7e-04, dt 2.1s +All GPU(s): step 2785: loss 0.3208, lr 3.7e-04, dt 2.0s +All GPU(s): step 2786: loss 0.2010, lr 3.7e-04, dt 2.0s +All GPU(s): step 2787: loss 0.3400, lr 3.7e-04, dt 2.2s +All GPU(s): step 2788: loss 0.3815, lr 3.7e-04, dt 2.1s +All GPU(s): step 2789: loss 0.7336, lr 3.7e-04, dt 2.1s +All GPU(s): step 2790: loss 0.9448, lr 3.7e-04, dt 2.1s +All GPU(s): step 2791: loss 0.7689, lr 3.7e-04, dt 2.1s +All GPU(s): step 2792: loss 0.3793, lr 3.7e-04, dt 2.1s +All GPU(s): step 2793: loss 0.3002, lr 3.7e-04, dt 2.1s +All GPU(s): step 2794: loss 0.1367, lr 3.7e-04, dt 2.0s +All GPU(s): step 2795: loss 0.3718, lr 3.7e-04, dt 2.1s +All GPU(s): step 2796: loss 0.0933, lr 3.7e-04, dt 2.0s +All GPU(s): step 2797: loss 0.2127, lr 3.7e-04, dt 2.0s +All GPU(s): step 2798: loss 0.0949, lr 3.7e-04, dt 2.0s +All GPU(s): step 2799: loss 0.0798, lr 3.7e-04, dt 2.0s +All GPU(s): step 2800: loss 0.0570, lr 3.7e-04, dt 2.0s +All GPU(s): step 2801: loss 0.0621, lr 3.7e-04, dt 2.0s +All GPU(s): step 2802: loss 0.0785, lr 3.7e-04, dt 2.0s +All GPU(s): step 2803: loss 0.0721, lr 3.7e-04, dt 2.0s +All GPU(s): step 2804: loss 0.0601, lr 3.7e-04, dt 2.0s +All GPU(s): step 2805: loss 0.2083, lr 3.7e-04, dt 2.0s +All GPU(s): step 2806: loss 0.0585, lr 3.7e-04, dt 2.1s +All GPU(s): step 2807: loss 0.0294, lr 3.7e-04, dt 2.0s +All GPU(s): step 2808: loss 0.1333, lr 3.7e-04, dt 2.0s +All GPU(s): step 2809: loss 0.0837, lr 3.7e-04, dt 2.0s +All GPU(s): step 2810: loss 0.0790, lr 3.7e-04, dt 2.0s +All GPU(s): step 2811: loss 0.0694, lr 3.7e-04, dt 2.1s +All GPU(s): step 2812: loss 0.0422, lr 3.7e-04, dt 2.0s +All GPU(s): step 2813: loss 0.2082, lr 3.7e-04, dt 2.0s +All GPU(s): step 2814: loss 0.0995, lr 3.7e-04, dt 2.0s +All GPU(s): step 2815: loss 0.1644, lr 3.7e-04, dt 2.0s +All GPU(s): step 2816: loss 0.0736, lr 3.7e-04, dt 2.1s +All GPU(s): step 2817: loss 0.1302, lr 3.7e-04, dt 2.0s +All GPU(s): step 2818: loss 0.0522, lr 3.7e-04, dt 2.0s +All GPU(s): step 2819: loss 0.0963, lr 3.7e-04, dt 2.0s +All GPU(s): step 2820: loss 0.1084, lr 3.7e-04, dt 2.0s +All GPU(s): step 2821: loss 0.0463, lr 3.7e-04, dt 2.1s +All GPU(s): step 2822: loss 0.0274, lr 3.7e-04, dt 2.0s +All GPU(s): step 2823: loss 0.2135, lr 3.7e-04, dt 2.0s +All GPU(s): step 2824: loss 0.1365, lr 3.7e-04, dt 2.0s +All GPU(s): step 2825: loss 0.2243, lr 3.7e-04, dt 2.0s +All GPU(s): step 2826: loss 0.1910, lr 3.7e-04, dt 2.1s +All GPU(s): step 2827: loss 0.2392, lr 3.7e-04, dt 2.0s +All GPU(s): step 2828: loss 0.1713, lr 3.7e-04, dt 2.0s +All GPU(s): step 2829: loss 0.0875, lr 3.7e-04, dt 2.0s +All GPU(s): step 2830: loss 0.1743, lr 3.7e-04, dt 2.0s +All GPU(s): step 2831: loss 0.0424, lr 3.7e-04, dt 2.1s +All GPU(s): step 2832: loss 0.1194, lr 3.7e-04, dt 2.0s +All GPU(s): step 2833: loss 0.1404, lr 3.7e-04, dt 2.0s +All GPU(s): step 2834: loss 0.1129, lr 3.7e-04, dt 2.0s +All GPU(s): step 2835: loss 0.0410, lr 3.7e-04, dt 2.0s +All GPU(s): step 2836: loss 0.1144, lr 3.7e-04, dt 2.1s +All GPU(s): step 2837: loss 0.0976, lr 3.7e-04, dt 2.0s +All GPU(s): step 2838: loss 0.0487, lr 3.7e-04, dt 2.0s +All GPU(s): step 2839: loss 0.1488, lr 3.7e-04, dt 2.0s +All GPU(s): step 2840: loss 0.0918, lr 3.7e-04, dt 2.1s +All GPU(s): step 2841: loss 0.0901, lr 3.7e-04, dt 2.1s +All GPU(s): step 2842: loss 0.1630, lr 3.7e-04, dt 2.1s +All GPU(s): step 2843: loss 0.3325, lr 3.7e-04, dt 2.1s +All GPU(s): step 2844: loss 0.3086, lr 3.7e-04, dt 2.1s +All GPU(s): step 2845: loss 0.2606, lr 3.7e-04, dt 2.1s +All GPU(s): step 2846: loss 0.1929, lr 3.7e-04, dt 2.1s +All GPU(s): step 2847: loss 0.1011, lr 3.7e-04, dt 2.1s +All GPU(s): step 2848: loss 0.1745, lr 3.7e-04, dt 2.0s +All GPU(s): step 2849: loss 0.0760, lr 3.7e-04, dt 2.0s +All GPU(s): step 2850: loss 0.0774, lr 3.7e-04, dt 2.0s +All GPU(s): step 2851: loss 0.1009, lr 3.7e-04, dt 2.1s +All GPU(s): step 2852: loss 0.0642, lr 3.7e-04, dt 2.0s +All GPU(s): step 2853: loss 0.0458, lr 3.7e-04, dt 2.0s +All GPU(s): step 2854: loss 0.1274, lr 3.7e-04, dt 2.0s +All GPU(s): step 2855: loss 0.1147, lr 3.7e-04, dt 2.0s +All GPU(s): step 2856: loss 0.2808, lr 3.7e-04, dt 2.0s +All GPU(s): step 2857: loss 0.1469, lr 3.7e-04, dt 2.0s +All GPU(s): step 2858: loss 0.1356, lr 3.7e-04, dt 2.0s +All GPU(s): step 2859: loss 0.2697, lr 3.7e-04, dt 2.0s +All GPU(s): step 2860: loss 0.2184, lr 3.7e-04, dt 2.0s +All GPU(s): step 2861: loss 0.3979, lr 3.7e-04, dt 2.0s +All GPU(s): step 2862: loss 0.2006, lr 3.7e-04, dt 2.0s +All GPU(s): step 2863: loss 0.0949, lr 3.7e-04, dt 2.0s +All GPU(s): step 2864: loss 0.1708, lr 3.7e-04, dt 2.0s +All GPU(s): step 2865: loss 0.1784, lr 3.7e-04, dt 2.0s +All GPU(s): step 2866: loss 0.1406, lr 3.7e-04, dt 2.0s +All GPU(s): step 2867: loss 0.1486, lr 3.7e-04, dt 2.0s +All GPU(s): step 2868: loss 0.0951, lr 3.7e-04, dt 2.0s +All GPU(s): step 2869: loss 0.1982, lr 3.7e-04, dt 2.0s +All GPU(s): step 2870: loss 0.1286, lr 3.7e-04, dt 2.1s +All GPU(s): step 2871: loss 0.1948, lr 3.7e-04, dt 2.0s +All GPU(s): step 2872: loss 0.0712, lr 3.7e-04, dt 2.0s +All GPU(s): step 2873: loss 0.1856, lr 3.7e-04, dt 2.0s +All GPU(s): step 2874: loss 0.2233, lr 3.7e-04, dt 2.0s +All GPU(s): step 2875: loss 0.1498, lr 3.7e-04, dt 2.1s +All GPU(s): step 2876: loss 0.3510, lr 3.7e-04, dt 2.0s +All GPU(s): step 2877: loss 0.1428, lr 3.7e-04, dt 2.0s +All GPU(s): step 2878: loss 0.3198, lr 3.7e-04, dt 2.0s +All GPU(s): step 2879: loss 0.6624, lr 3.7e-04, dt 1.9s +All GPU(s): step 2880: loss 0.3124, lr 3.7e-04, dt 2.1s +All GPU(s): step 2881: loss 0.4062, lr 3.7e-04, dt 2.0s +All GPU(s): step 2882: loss 0.4613, lr 3.7e-04, dt 1.9s +All GPU(s): step 2883: loss 0.5153, lr 3.7e-04, dt 2.0s +All GPU(s): step 2884: loss 0.5395, lr 3.7e-04, dt 2.0s +All GPU(s): step 2885: loss 0.5075, lr 3.7e-04, dt 2.0s +All GPU(s): step 2886: loss 0.3480, lr 3.7e-04, dt 2.0s +All GPU(s): step 2887: loss 0.2305, lr 3.7e-04, dt 2.0s +All GPU(s): step 2888: loss 0.1932, lr 3.7e-04, dt 2.0s +All GPU(s): step 2889: loss 0.2679, lr 3.7e-04, dt 2.0s +All GPU(s): step 2890: loss 0.1241, lr 3.7e-04, dt 2.0s +All GPU(s): step 2891: loss 0.1655, lr 3.7e-04, dt 2.0s +All GPU(s): step 2892: loss 0.1797, lr 3.7e-04, dt 2.0s +All GPU(s): step 2893: loss 0.3127, lr 3.7e-04, dt 2.0s +All GPU(s): step 2894: loss 0.5159, lr 3.7e-04, dt 1.9s +All GPU(s): step 2895: loss 0.2842, lr 3.7e-04, dt 2.0s +All GPU(s): step 2896: loss 0.3112, lr 3.7e-04, dt 2.0s +All GPU(s): step 2897: loss 0.2342, lr 3.7e-04, dt 2.0s +All GPU(s): step 2898: loss 0.1636, lr 3.7e-04, dt 2.0s +All GPU(s): step 2899: loss 0.1932, lr 3.7e-04, dt 2.0s +All GPU(s): step 2900: loss 0.2435, lr 3.7e-04, dt 2.1s +All GPU(s): step 2901: loss 0.1917, lr 3.7e-04, dt 2.0s +All GPU(s): step 2902: loss 0.3270, lr 3.7e-04, dt 2.0s +All GPU(s): step 2903: loss 0.2634, lr 3.7e-04, dt 2.0s +All GPU(s): step 2904: loss 0.0488, lr 3.7e-04, dt 2.0s +All GPU(s): step 2905: loss 0.1191, lr 3.7e-04, dt 2.0s +All GPU(s): step 2906: loss 0.1885, lr 3.7e-04, dt 2.0s +All GPU(s): step 2907: loss 0.2230, lr 3.7e-04, dt 2.0s +All GPU(s): step 2908: loss 0.3865, lr 3.7e-04, dt 2.0s +All GPU(s): step 2909: loss 0.2549, lr 3.7e-04, dt 2.0s +All GPU(s): step 2910: loss 0.2372, lr 3.7e-04, dt 2.0s +All GPU(s): step 2911: loss 0.2129, lr 3.7e-04, dt 2.0s +All GPU(s): step 2912: loss 0.1493, lr 3.7e-04, dt 2.0s +All GPU(s): step 2913: loss 0.1105, lr 3.7e-04, dt 2.0s +All GPU(s): step 2914: loss 0.1513, lr 3.7e-04, dt 2.0s +All GPU(s): step 2915: loss 0.2080, lr 3.7e-04, dt 2.1s +All GPU(s): step 2916: loss 0.2148, lr 3.7e-04, dt 2.0s +All GPU(s): step 2917: loss 0.0819, lr 3.7e-04, dt 2.0s +All GPU(s): step 2918: loss 0.1301, lr 3.7e-04, dt 2.0s +All GPU(s): step 2919: loss 0.1251, lr 3.7e-04, dt 2.0s +All GPU(s): step 2920: loss 0.1766, lr 3.7e-04, dt 2.1s +All GPU(s): step 2921: loss 0.1339, lr 3.7e-04, dt 2.0s +All GPU(s): step 2922: loss 0.1940, lr 3.7e-04, dt 2.0s +All GPU(s): step 2923: loss 0.1244, lr 3.7e-04, dt 2.0s +All GPU(s): step 2924: loss 0.0453, lr 3.7e-04, dt 2.0s +All GPU(s): step 2925: loss 0.2014, lr 3.7e-04, dt 2.1s +All GPU(s): step 2926: loss 0.1232, lr 3.7e-04, dt 2.0s +All GPU(s): step 2927: loss 0.0773, lr 3.7e-04, dt 2.0s +All GPU(s): step 2928: loss 0.0356, lr 3.7e-04, dt 2.0s +All GPU(s): step 2929: loss 0.2311, lr 3.7e-04, dt 2.0s +All GPU(s): step 2930: loss 0.1164, lr 3.7e-04, dt 2.1s +All GPU(s): step 2931: loss 0.1325, lr 3.7e-04, dt 2.0s +All GPU(s): step 2932: loss 0.1308, lr 3.7e-04, dt 2.0s +All GPU(s): step 2933: loss 0.0831, lr 3.7e-04, dt 2.0s +All GPU(s): step 2934: loss 0.0860, lr 3.7e-04, dt 2.0s +All GPU(s): step 2935: loss 0.0889, lr 3.7e-04, dt 2.1s +All GPU(s): step 2936: loss 0.0765, lr 3.7e-04, dt 2.0s +All GPU(s): step 2937: loss 0.0780, lr 3.7e-04, dt 2.0s +All GPU(s): step 2938: loss 0.0915, lr 3.7e-04, dt 2.0s +All GPU(s): step 2939: loss 0.2403, lr 3.7e-04, dt 2.1s +All GPU(s): step 2940: loss 0.1499, lr 3.7e-04, dt 2.1s +All GPU(s): step 2941: loss 0.0663, lr 3.7e-04, dt 2.0s +All GPU(s): step 2942: loss 0.0506, lr 3.7e-04, dt 2.0s +All GPU(s): step 2943: loss 0.0767, lr 3.7e-04, dt 2.0s +All GPU(s): step 2944: loss 0.1695, lr 3.7e-04, dt 2.1s +All GPU(s): step 2945: loss 0.2077, lr 3.7e-04, dt 2.1s +All GPU(s): step 2946: loss 0.1974, lr 3.7e-04, dt 2.0s +All GPU(s): step 2947: loss 0.0915, lr 3.7e-04, dt 2.0s +All GPU(s): step 2948: loss 0.0955, lr 3.7e-04, dt 2.1s +All GPU(s): step 2949: loss 0.1643, lr 3.7e-04, dt 2.0s +All GPU(s): step 2950: loss 0.0458, lr 3.7e-04, dt 2.0s +All GPU(s): step 2951: loss 0.0725, lr 3.7e-04, dt 2.0s +All GPU(s): step 2952: loss 0.0428, lr 3.7e-04, dt 2.0s +All GPU(s): step 2953: loss 0.1419, lr 3.7e-04, dt 2.0s +All GPU(s): step 2954: loss 0.0847, lr 3.7e-04, dt 2.1s +All GPU(s): step 2955: loss 0.0591, lr 3.7e-04, dt 2.0s +All GPU(s): step 2956: loss 0.0838, lr 3.7e-04, dt 2.0s +All GPU(s): step 2957: loss 0.0758, lr 3.7e-04, dt 2.0s +All GPU(s): step 2958: loss 0.1290, lr 3.7e-04, dt 2.0s +All GPU(s): step 2959: loss 0.0855, lr 3.7e-04, dt 2.1s +All GPU(s): step 2960: loss 0.0444, lr 3.7e-04, dt 2.0s +All GPU(s): step 2961: loss 0.0578, lr 3.7e-04, dt 2.0s +All GPU(s): step 2962: loss 0.1165, lr 3.7e-04, dt 2.0s +All GPU(s): step 2963: loss 0.1527, lr 3.7e-04, dt 2.0s +All GPU(s): step 2964: loss 0.1260, lr 3.7e-04, dt 2.1s +All GPU(s): step 2965: loss 0.0654, lr 3.7e-04, dt 2.0s +All GPU(s): step 2966: loss 0.0577, lr 3.7e-04, dt 2.0s +All GPU(s): step 2967: loss 0.0769, lr 3.7e-04, dt 2.0s +All GPU(s): step 2968: loss 0.1911, lr 3.7e-04, dt 2.0s +All GPU(s): step 2969: loss 0.0658, lr 3.7e-04, dt 2.1s +All GPU(s): step 2970: loss 0.1515, lr 3.7e-04, dt 2.0s +All GPU(s): step 2971: loss 0.0791, lr 3.7e-04, dt 2.0s +All GPU(s): step 2972: loss 0.0341, lr 3.7e-04, dt 2.0s +All GPU(s): step 2973: loss 0.1890, lr 3.7e-04, dt 2.0s +All GPU(s): step 2974: loss 0.0976, lr 3.7e-04, dt 2.1s +All GPU(s): step 2975: loss 0.0870, lr 3.7e-04, dt 2.0s +All GPU(s): step 2976: loss 0.1286, lr 3.7e-04, dt 2.0s +All GPU(s): step 2977: loss 0.1505, lr 3.7e-04, dt 2.0s +All GPU(s): step 2978: loss 0.2078, lr 3.7e-04, dt 2.0s +All GPU(s): step 2979: loss 0.1378, lr 3.7e-04, dt 2.1s +All GPU(s): step 2980: loss 0.0624, lr 3.7e-04, dt 2.0s +All GPU(s): step 2981: loss 0.1738, lr 3.7e-04, dt 2.0s +All GPU(s): step 2982: loss 0.1926, lr 3.7e-04, dt 2.0s +All GPU(s): step 2983: loss 0.0540, lr 3.7e-04, dt 2.0s +All GPU(s): step 2984: loss 0.1258, lr 3.7e-04, dt 2.1s +All GPU(s): step 2985: loss 0.1759, lr 3.7e-04, dt 2.0s +All GPU(s): step 2986: loss 0.0612, lr 3.7e-04, dt 2.0s +All GPU(s): step 2987: loss 0.0432, lr 3.7e-04, dt 2.0s +All GPU(s): step 2988: loss 0.0150, lr 3.7e-04, dt 2.0s +All GPU(s): step 2989: loss 0.0539, lr 3.7e-04, dt 2.1s +All GPU(s): step 2990: loss 0.1405, lr 3.7e-04, dt 2.0s +All GPU(s): step 2991: loss 0.1369, lr 3.7e-04, dt 2.0s +All GPU(s): step 2992: loss 0.0776, lr 3.7e-04, dt 2.0s +All GPU(s): step 2993: loss 0.2645, lr 3.7e-04, dt 2.1s +All GPU(s): step 2994: loss 0.0958, lr 3.7e-04, dt 2.0s +All GPU(s): step 2995: loss 0.1904, lr 3.7e-04, dt 2.0s +All GPU(s): step 2996: loss 0.2131, lr 3.7e-04, dt 2.0s +All GPU(s): step 2997: loss 0.1697, lr 3.7e-04, dt 2.0s +All GPU(s): step 2998: loss 0.1217, lr 3.7e-04, dt 2.1s +All GPU(s): step 2999: loss 0.0769, lr 3.7e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_3000.pt +All GPU(s): step 3000: loss 0.3477, lr 3.7e-04, dt 2.1s +All GPU(s): step 3001: loss 1.4111, lr 3.7e-04, dt 2.1s +All GPU(s): step 3002: loss 3.9980, lr 3.7e-04, dt 2.0s +All GPU(s): step 3003: loss 7.5273, lr 3.7e-04, dt 2.1s +All GPU(s): step 3004: loss 13.1094, lr 3.7e-04, dt 2.0s +All GPU(s): step 3005: loss 16.2109, lr 3.7e-04, dt 2.0s +All GPU(s): step 3006: loss 15.4375, lr 3.7e-04, dt 2.0s +All GPU(s): step 3007: loss 13.7109, lr 3.7e-04, dt 2.1s +All GPU(s): step 3008: loss 9.4297, lr 3.7e-04, dt 2.2s +All GPU(s): step 3009: loss 5.7031, lr 3.7e-04, dt 2.0s +All GPU(s): step 3010: loss 2.6846, lr 3.7e-04, dt 2.1s +All GPU(s): step 3011: loss 2.2266, lr 3.7e-04, dt 2.0s +All GPU(s): step 3012: loss 1.2275, lr 3.7e-04, dt 2.1s +All GPU(s): step 3013: loss 0.5971, lr 3.7e-04, dt 2.2s +All GPU(s): step 3014: loss 1.0439, lr 3.7e-04, dt 2.1s +All GPU(s): step 3015: loss 1.0295, lr 3.7e-04, dt 2.1s +All GPU(s): step 3016: loss 0.5326, lr 3.7e-04, dt 2.1s +All GPU(s): step 3017: loss 0.3148, lr 3.7e-04, dt 2.1s +All GPU(s): step 3018: loss 0.2518, lr 3.7e-04, dt 2.1s +All GPU(s): step 3019: loss 0.1175, lr 3.6e-04, dt 2.0s +All GPU(s): step 3020: loss 0.0890, lr 3.6e-04, dt 2.0s +All GPU(s): step 3021: loss 0.0339, lr 3.6e-04, dt 2.0s +All GPU(s): step 3022: loss 0.0341, lr 3.6e-04, dt 2.1s +All GPU(s): step 3023: loss 0.0835, lr 3.6e-04, dt 2.0s +All GPU(s): step 3024: loss 0.1015, lr 3.6e-04, dt 2.0s +All GPU(s): step 3025: loss 0.0292, lr 3.6e-04, dt 2.0s +All GPU(s): step 3026: loss 0.0841, lr 3.6e-04, dt 2.0s +All GPU(s): step 3027: loss 0.0445, lr 3.6e-04, dt 2.1s +All GPU(s): step 3028: loss 0.0741, lr 3.6e-04, dt 2.0s +All GPU(s): step 3029: loss 0.0696, lr 3.6e-04, dt 2.0s +All GPU(s): step 3030: loss 0.0717, lr 3.6e-04, dt 2.0s +All GPU(s): step 3031: loss 0.0545, lr 3.6e-04, dt 2.0s +All GPU(s): step 3032: loss 0.0814, lr 3.6e-04, dt 2.1s +All GPU(s): step 3033: loss 0.0696, lr 3.6e-04, dt 2.0s +All GPU(s): step 3034: loss 0.0874, lr 3.6e-04, dt 2.0s +All GPU(s): step 3035: loss 0.1463, lr 3.6e-04, dt 2.0s +All GPU(s): step 3036: loss 0.1398, lr 3.6e-04, dt 2.0s +All GPU(s): step 3037: loss 0.0790, lr 3.6e-04, dt 2.1s +All GPU(s): step 3038: loss 0.0812, lr 3.6e-04, dt 2.0s +All GPU(s): step 3039: loss 0.0240, lr 3.6e-04, dt 2.0s +All GPU(s): step 3040: loss 0.0675, lr 3.6e-04, dt 2.0s +All GPU(s): step 3041: loss 0.0583, lr 3.6e-04, dt 2.0s +All GPU(s): step 3042: loss 0.0591, lr 3.6e-04, dt 2.2s +All GPU(s): step 3043: loss 0.0876, lr 3.6e-04, dt 2.0s +All GPU(s): step 3044: loss 0.0441, lr 3.6e-04, dt 2.0s +All GPU(s): step 3045: loss 0.0728, lr 3.6e-04, dt 2.0s +All GPU(s): step 3046: loss 0.0260, lr 3.6e-04, dt 2.0s +All GPU(s): step 3047: loss 0.0895, lr 3.6e-04, dt 2.1s +All GPU(s): step 3048: loss 0.0832, lr 3.6e-04, dt 2.0s +All GPU(s): step 3049: loss 0.0912, lr 3.6e-04, dt 2.0s +All GPU(s): step 3050: loss 0.1201, lr 3.6e-04, dt 2.0s +All GPU(s): step 3051: loss 0.0618, lr 3.6e-04, dt 2.0s +All GPU(s): step 3052: loss 0.0631, lr 3.6e-04, dt 2.1s +All GPU(s): step 3053: loss 0.1205, lr 3.6e-04, dt 2.0s +All GPU(s): step 3054: loss 0.0993, lr 3.6e-04, dt 2.0s +All GPU(s): step 3055: loss 0.0121, lr 3.6e-04, dt 2.0s +All GPU(s): step 3056: loss 0.0866, lr 3.6e-04, dt 2.1s +All GPU(s): step 3057: loss 0.0873, lr 3.6e-04, dt 2.1s +All GPU(s): step 3058: loss 0.1006, lr 3.6e-04, dt 2.0s +All GPU(s): step 3059: loss 0.1408, lr 3.6e-04, dt 2.0s +All GPU(s): step 3060: loss 0.0498, lr 3.6e-04, dt 2.0s +All GPU(s): step 3061: loss 0.0916, lr 3.6e-04, dt 2.0s +All GPU(s): step 3062: loss 0.0982, lr 3.6e-04, dt 2.1s +All GPU(s): step 3063: loss 0.0461, lr 3.6e-04, dt 2.0s +All GPU(s): step 3064: loss 0.1269, lr 3.6e-04, dt 2.1s +All GPU(s): step 3065: loss 0.1094, lr 3.6e-04, dt 2.0s +All GPU(s): step 3066: loss 0.1551, lr 3.6e-04, dt 2.1s +All GPU(s): step 3067: loss 0.1531, lr 3.6e-04, dt 2.0s +All GPU(s): step 3068: loss 0.1373, lr 3.6e-04, dt 2.0s +All GPU(s): step 3069: loss 0.1202, lr 3.6e-04, dt 2.0s +All GPU(s): step 3070: loss 0.0733, lr 3.6e-04, dt 2.0s +All GPU(s): step 3071: loss 0.1513, lr 3.6e-04, dt 2.1s +All GPU(s): step 3072: loss 0.1449, lr 3.6e-04, dt 2.0s +All GPU(s): step 3073: loss 0.1033, lr 3.6e-04, dt 2.0s +All GPU(s): step 3074: loss 0.0365, lr 3.6e-04, dt 2.0s +All GPU(s): step 3075: loss 0.1121, lr 3.6e-04, dt 2.0s +All GPU(s): step 3076: loss 0.0352, lr 3.6e-04, dt 2.1s +All GPU(s): step 3077: loss 0.0828, lr 3.6e-04, dt 2.0s +All GPU(s): step 3078: loss 0.1678, lr 3.6e-04, dt 2.0s +All GPU(s): step 3079: loss 0.1486, lr 3.6e-04, dt 2.0s +All GPU(s): step 3080: loss 0.0822, lr 3.6e-04, dt 2.0s +All GPU(s): step 3081: loss 0.0717, lr 3.6e-04, dt 2.1s +All GPU(s): step 3082: loss 0.1741, lr 3.6e-04, dt 1.9s +All GPU(s): step 3083: loss 0.0475, lr 3.6e-04, dt 2.0s +All GPU(s): step 3084: loss 0.0431, lr 3.6e-04, dt 2.0s +All GPU(s): step 3085: loss 0.0550, lr 3.6e-04, dt 2.0s +All GPU(s): step 3086: loss 0.0474, lr 3.6e-04, dt 2.1s +All GPU(s): step 3087: loss 0.0706, lr 3.6e-04, dt 2.0s +All GPU(s): step 3088: loss 0.1786, lr 3.6e-04, dt 2.0s +All GPU(s): step 3089: loss 0.0549, lr 3.6e-04, dt 2.0s +All GPU(s): step 3090: loss 0.0850, lr 3.6e-04, dt 2.0s +All GPU(s): step 3091: loss 0.0710, lr 3.6e-04, dt 2.1s +All GPU(s): step 3092: loss 0.0857, lr 3.6e-04, dt 2.0s +All GPU(s): step 3093: loss 0.0825, lr 3.6e-04, dt 2.0s +All GPU(s): step 3094: loss 0.0484, lr 3.6e-04, dt 2.0s +All GPU(s): step 3095: loss 0.0851, lr 3.6e-04, dt 2.0s +All GPU(s): step 3096: loss 0.0825, lr 3.6e-04, dt 2.1s +All GPU(s): step 3097: loss 0.0451, lr 3.6e-04, dt 2.0s +All GPU(s): step 3098: loss 0.0620, lr 3.6e-04, dt 2.0s +All GPU(s): step 3099: loss 0.0381, lr 3.6e-04, dt 2.0s +All GPU(s): step 3100: loss 0.0469, lr 3.6e-04, dt 2.0s +All GPU(s): step 3101: loss 0.0114, lr 3.6e-04, dt 2.1s +All GPU(s): step 3102: loss 0.1343, lr 3.6e-04, dt 2.1s +All GPU(s): step 3103: loss 0.0715, lr 3.6e-04, dt 2.0s +All GPU(s): step 3104: loss 0.0431, lr 3.6e-04, dt 2.0s +All GPU(s): step 3105: loss 0.0996, lr 3.6e-04, dt 2.0s +All GPU(s): step 3106: loss 0.0895, lr 3.6e-04, dt 2.1s +All GPU(s): step 3107: loss 0.0779, lr 3.6e-04, dt 2.0s +All GPU(s): step 3108: loss 0.0869, lr 3.6e-04, dt 2.0s +All GPU(s): step 3109: loss 0.0447, lr 3.6e-04, dt 2.0s +All GPU(s): step 3110: loss 0.0489, lr 3.6e-04, dt 2.0s +All GPU(s): step 3111: loss 0.0689, lr 3.6e-04, dt 2.0s +All GPU(s): step 3112: loss 0.0691, lr 3.6e-04, dt 2.0s +All GPU(s): step 3113: loss 0.1157, lr 3.6e-04, dt 2.0s +All GPU(s): step 3114: loss 0.0680, lr 3.6e-04, dt 2.1s +All GPU(s): step 3115: loss 0.0284, lr 3.6e-04, dt 2.1s +All GPU(s): step 3116: loss 0.1111, lr 3.6e-04, dt 2.1s +All GPU(s): step 3117: loss 0.2031, lr 3.6e-04, dt 2.1s +All GPU(s): step 3118: loss 0.0553, lr 3.6e-04, dt 2.0s +All GPU(s): step 3119: loss 0.1622, lr 3.6e-04, dt 2.0s +All GPU(s): step 3120: loss 0.0968, lr 3.6e-04, dt 2.1s +All GPU(s): step 3121: loss 0.2203, lr 3.6e-04, dt 2.0s +All GPU(s): step 3122: loss 0.1014, lr 3.6e-04, dt 2.0s +All GPU(s): step 3123: loss 0.0417, lr 3.6e-04, dt 2.0s +All GPU(s): step 3124: loss 0.1481, lr 3.6e-04, dt 2.0s +All GPU(s): step 3125: loss 0.0757, lr 3.6e-04, dt 2.1s +All GPU(s): step 3126: loss 0.0715, lr 3.6e-04, dt 2.0s +All GPU(s): step 3127: loss 0.0614, lr 3.6e-04, dt 2.0s +All GPU(s): step 3128: loss 0.1069, lr 3.6e-04, dt 2.0s +All GPU(s): step 3129: loss 0.0715, lr 3.6e-04, dt 2.0s +All GPU(s): step 3130: loss 0.0222, lr 3.6e-04, dt 2.1s +All GPU(s): step 3131: loss 0.0826, lr 3.6e-04, dt 2.0s +All GPU(s): step 3132: loss 0.0930, lr 3.6e-04, dt 2.0s +All GPU(s): step 3133: loss 0.0554, lr 3.6e-04, dt 2.0s +All GPU(s): step 3134: loss 0.0626, lr 3.6e-04, dt 2.0s +All GPU(s): step 3135: loss 0.0914, lr 3.6e-04, dt 2.2s +All GPU(s): step 3136: loss 0.2355, lr 3.6e-04, dt 2.1s +All GPU(s): step 3137: loss 0.0576, lr 3.6e-04, dt 2.0s +All GPU(s): step 3138: loss 0.0588, lr 3.6e-04, dt 2.0s +All GPU(s): step 3139: loss 0.1633, lr 3.6e-04, dt 2.1s +All GPU(s): step 3140: loss 0.0794, lr 3.6e-04, dt 2.1s +All GPU(s): step 3141: loss 0.0395, lr 3.6e-04, dt 2.0s +All GPU(s): step 3142: loss 0.1993, lr 3.6e-04, dt 2.1s +All GPU(s): step 3143: loss 0.2546, lr 3.6e-04, dt 2.1s +All GPU(s): step 3144: loss 0.3270, lr 3.6e-04, dt 2.1s +All GPU(s): step 3145: loss 0.0463, lr 3.6e-04, dt 2.1s +All GPU(s): step 3146: loss 0.2463, lr 3.6e-04, dt 2.0s +All GPU(s): step 3147: loss 0.0868, lr 3.6e-04, dt 2.0s +All GPU(s): step 3148: loss 0.0321, lr 3.6e-04, dt 2.0s +All GPU(s): step 3149: loss 0.1462, lr 3.6e-04, dt 2.0s +All GPU(s): step 3150: loss 0.1025, lr 3.6e-04, dt 2.0s +All GPU(s): step 3151: loss 0.0790, lr 3.6e-04, dt 2.0s +All GPU(s): step 3152: loss 0.1034, lr 3.6e-04, dt 2.0s +All GPU(s): step 3153: loss 0.2144, lr 3.6e-04, dt 2.0s +All GPU(s): step 3154: loss 0.0883, lr 3.6e-04, dt 2.1s +All GPU(s): step 3155: loss 0.1589, lr 3.6e-04, dt 2.0s +All GPU(s): step 3156: loss 0.0866, lr 3.6e-04, dt 2.0s +All GPU(s): step 3157: loss 0.1934, lr 3.6e-04, dt 2.0s +All GPU(s): step 3158: loss 0.1553, lr 3.6e-04, dt 2.1s +All GPU(s): step 3159: loss 0.0616, lr 3.6e-04, dt 2.1s +All GPU(s): step 3160: loss 0.0943, lr 3.6e-04, dt 2.1s +All GPU(s): step 3161: loss 0.2682, lr 3.6e-04, dt 2.1s +All GPU(s): step 3162: loss 1.2373, lr 3.6e-04, dt 2.0s +All GPU(s): step 3163: loss 2.6348, lr 3.6e-04, dt 2.1s +All GPU(s): step 3164: loss 5.0156, lr 3.6e-04, dt 2.2s +All GPU(s): step 3165: loss 9.1602, lr 3.6e-04, dt 2.1s +All GPU(s): step 3166: loss 13.1172, lr 3.6e-04, dt 2.1s +All GPU(s): step 3167: loss 15.7500, lr 3.6e-04, dt 2.0s +All GPU(s): step 3168: loss 17.2344, lr 3.6e-04, dt 2.1s +All GPU(s): step 3169: loss 18.3125, lr 3.6e-04, dt 2.2s +All GPU(s): step 3170: loss 18.8750, lr 3.6e-04, dt 2.0s +All GPU(s): step 3171: loss 19.4219, lr 3.6e-04, dt 2.0s +All GPU(s): step 3172: loss 19.5781, lr 3.6e-04, dt 2.0s +All GPU(s): step 3173: loss 19.2969, lr 3.6e-04, dt 2.0s +All GPU(s): step 3174: loss 19.1562, lr 3.6e-04, dt 2.1s +All GPU(s): step 3175: loss 18.5781, lr 3.6e-04, dt 2.0s +All GPU(s): step 3176: loss 17.4688, lr 3.6e-04, dt 2.0s +All GPU(s): step 3177: loss 17.9375, lr 3.6e-04, dt 2.0s +All GPU(s): step 3178: loss 17.6406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3179: loss 17.8906, lr 3.6e-04, dt 2.1s +All GPU(s): step 3180: loss 18.1562, lr 3.6e-04, dt 2.0s +All GPU(s): step 3181: loss 17.8203, lr 3.6e-04, dt 2.0s +All GPU(s): step 3182: loss 17.4375, lr 3.6e-04, dt 2.0s +All GPU(s): step 3183: loss 18.0000, lr 3.6e-04, dt 2.1s +All GPU(s): step 3184: loss 17.6719, lr 3.6e-04, dt 2.1s +All GPU(s): step 3185: loss 17.8906, lr 3.6e-04, dt 2.0s +All GPU(s): step 3186: loss 18.0625, lr 3.6e-04, dt 2.0s +All GPU(s): step 3187: loss 18.0469, lr 3.6e-04, dt 2.0s +All GPU(s): step 3188: loss 18.0469, lr 3.6e-04, dt 2.1s +All GPU(s): step 3189: loss 18.0781, lr 3.6e-04, dt 2.0s +All GPU(s): step 3190: loss 17.2344, lr 3.6e-04, dt 2.1s +All GPU(s): step 3191: loss 17.2969, lr 3.6e-04, dt 2.0s +All GPU(s): step 3192: loss 16.1094, lr 3.6e-04, dt 2.1s +All GPU(s): step 3193: loss 15.7812, lr 3.6e-04, dt 2.2s +All GPU(s): step 3194: loss 15.3594, lr 3.6e-04, dt 2.1s +All GPU(s): step 3195: loss 14.5000, lr 3.6e-04, dt 2.0s +All GPU(s): step 3196: loss 14.1719, lr 3.6e-04, dt 2.1s +All GPU(s): step 3197: loss 13.3438, lr 3.6e-04, dt 2.1s +All GPU(s): step 3198: loss 12.9219, lr 3.6e-04, dt 2.2s +All GPU(s): step 3199: loss 12.7422, lr 3.6e-04, dt 2.1s +All GPU(s): step 3200: loss 12.2344, lr 3.6e-04, dt 2.1s +All GPU(s): step 3201: loss 10.5781, lr 3.6e-04, dt 2.1s +All GPU(s): step 3202: loss 8.7266, lr 3.6e-04, dt 2.1s +All GPU(s): step 3203: loss 9.0664, lr 3.6e-04, dt 2.1s +All GPU(s): step 3204: loss 10.5234, lr 3.6e-04, dt 2.1s +All GPU(s): step 3205: loss 11.4062, lr 3.6e-04, dt 2.1s +All GPU(s): step 3206: loss 11.9375, lr 3.6e-04, dt 2.1s +All GPU(s): step 3207: loss 14.0547, lr 3.6e-04, dt 2.1s +All GPU(s): step 3208: loss 15.2031, lr 3.6e-04, dt 2.1s +All GPU(s): step 3209: loss 16.6875, lr 3.6e-04, dt 2.0s +All GPU(s): step 3210: loss 16.9297, lr 3.6e-04, dt 2.0s +All GPU(s): step 3211: loss 17.5625, lr 3.6e-04, dt 2.0s +All GPU(s): step 3212: loss 16.5938, lr 3.6e-04, dt 2.2s +All GPU(s): step 3213: loss 15.6406, lr 3.6e-04, dt 2.1s +All GPU(s): step 3214: loss 12.7656, lr 3.6e-04, dt 2.1s +All GPU(s): step 3215: loss 10.6172, lr 3.6e-04, dt 2.1s +All GPU(s): step 3216: loss 7.6250, lr 3.6e-04, dt 2.1s +All GPU(s): step 3217: loss 5.9004, lr 3.6e-04, dt 2.1s +All GPU(s): step 3218: loss 4.4688, lr 3.6e-04, dt 2.1s +All GPU(s): step 3219: loss 2.6729, lr 3.6e-04, dt 2.1s +All GPU(s): step 3220: loss 2.3291, lr 3.6e-04, dt 2.1s +All GPU(s): step 3221: loss 2.4365, lr 3.6e-04, dt 2.1s +All GPU(s): step 3222: loss 1.9590, lr 3.6e-04, dt 2.1s +All GPU(s): step 3223: loss 2.1113, lr 3.6e-04, dt 2.1s +All GPU(s): step 3224: loss 1.5078, lr 3.6e-04, dt 2.1s +All GPU(s): step 3225: loss 0.6069, lr 3.6e-04, dt 2.0s +All GPU(s): step 3226: loss 0.3480, lr 3.6e-04, dt 2.1s +All GPU(s): step 3227: loss 0.2482, lr 3.6e-04, dt 2.1s +All GPU(s): step 3228: loss 0.2429, lr 3.6e-04, dt 2.0s +All GPU(s): step 3229: loss 0.1465, lr 3.6e-04, dt 2.0s +All GPU(s): step 3230: loss 0.1220, lr 3.6e-04, dt 2.1s +All GPU(s): step 3231: loss 0.0596, lr 3.6e-04, dt 2.1s +All GPU(s): step 3232: loss 0.1095, lr 3.6e-04, dt 2.0s +All GPU(s): step 3233: loss 0.0898, lr 3.6e-04, dt 2.0s +All GPU(s): step 3234: loss 0.0734, lr 3.6e-04, dt 2.0s +All GPU(s): step 3235: loss 0.0489, lr 3.6e-04, dt 2.0s +All GPU(s): step 3236: loss 0.1700, lr 3.6e-04, dt 2.1s +All GPU(s): step 3237: loss 0.2267, lr 3.6e-04, dt 2.1s +All GPU(s): step 3238: loss 0.0799, lr 3.6e-04, dt 2.0s +All GPU(s): step 3239: loss 0.1079, lr 3.6e-04, dt 2.0s +All GPU(s): step 3240: loss 0.1123, lr 3.6e-04, dt 2.0s +All GPU(s): step 3241: loss 0.0669, lr 3.6e-04, dt 2.1s +All GPU(s): step 3242: loss 0.1161, lr 3.6e-04, dt 2.0s +All GPU(s): step 3243: loss 0.1713, lr 3.6e-04, dt 2.0s +All GPU(s): step 3244: loss 0.1677, lr 3.6e-04, dt 2.0s +All GPU(s): step 3245: loss 0.3094, lr 3.6e-04, dt 2.0s +All GPU(s): step 3246: loss 0.2627, lr 3.6e-04, dt 2.1s +All GPU(s): step 3247: loss 0.4546, lr 3.6e-04, dt 1.9s +All GPU(s): step 3248: loss 0.2593, lr 3.6e-04, dt 2.0s +All GPU(s): step 3249: loss 0.3802, lr 3.6e-04, dt 2.0s +All GPU(s): step 3250: loss 0.3887, lr 3.6e-04, dt 2.0s +All GPU(s): step 3251: loss 0.2650, lr 3.6e-04, dt 2.1s +All GPU(s): step 3252: loss 0.3850, lr 3.6e-04, dt 2.0s +All GPU(s): step 3253: loss 0.4162, lr 3.6e-04, dt 2.0s +All GPU(s): step 3254: loss 0.3325, lr 3.6e-04, dt 2.0s +All GPU(s): step 3255: loss 0.2553, lr 3.6e-04, dt 2.0s +All GPU(s): step 3256: loss 0.2357, lr 3.6e-04, dt 2.0s +All GPU(s): step 3257: loss 0.3946, lr 3.6e-04, dt 1.9s +All GPU(s): step 3258: loss 0.2909, lr 3.6e-04, dt 2.0s +All GPU(s): step 3259: loss 0.2920, lr 3.6e-04, dt 2.0s +All GPU(s): step 3260: loss 0.3245, lr 3.6e-04, dt 1.9s +All GPU(s): step 3261: loss 0.0997, lr 3.6e-04, dt 2.1s +All GPU(s): step 3262: loss 0.2029, lr 3.6e-04, dt 1.9s +All GPU(s): step 3263: loss 0.2267, lr 3.6e-04, dt 2.0s +All GPU(s): step 3264: loss 0.0921, lr 3.6e-04, dt 2.0s +All GPU(s): step 3265: loss 0.2223, lr 3.6e-04, dt 2.0s +All GPU(s): step 3266: loss 0.0550, lr 3.6e-04, dt 2.1s +All GPU(s): step 3267: loss 0.0388, lr 3.6e-04, dt 2.0s +All GPU(s): step 3268: loss 0.0972, lr 3.6e-04, dt 2.1s +All GPU(s): step 3269: loss 0.0251, lr 3.6e-04, dt 2.0s +All GPU(s): step 3270: loss 0.0854, lr 3.6e-04, dt 2.1s +All GPU(s): step 3271: loss 0.0735, lr 3.6e-04, dt 2.1s +All GPU(s): step 3272: loss 0.0711, lr 3.6e-04, dt 2.1s +All GPU(s): step 3273: loss 0.0295, lr 3.6e-04, dt 2.0s +All GPU(s): step 3274: loss 0.0419, lr 3.6e-04, dt 2.1s +All GPU(s): step 3275: loss 0.0755, lr 3.6e-04, dt 2.0s +All GPU(s): step 3276: loss 0.0353, lr 3.6e-04, dt 2.1s +All GPU(s): step 3277: loss 0.1807, lr 3.6e-04, dt 2.1s +All GPU(s): step 3278: loss 0.0436, lr 3.6e-04, dt 2.0s +All GPU(s): step 3279: loss 0.0362, lr 3.6e-04, dt 2.0s +All GPU(s): step 3280: loss 0.0344, lr 3.6e-04, dt 2.0s +All GPU(s): step 3281: loss 0.0456, lr 3.6e-04, dt 2.0s +All GPU(s): step 3282: loss 0.0815, lr 3.6e-04, dt 2.0s +All GPU(s): step 3283: loss 0.1212, lr 3.6e-04, dt 2.0s +All GPU(s): step 3284: loss 0.1324, lr 3.6e-04, dt 2.0s +All GPU(s): step 3285: loss 0.0474, lr 3.6e-04, dt 2.0s +All GPU(s): step 3286: loss 0.0497, lr 3.6e-04, dt 2.0s +All GPU(s): step 3287: loss 0.0894, lr 3.6e-04, dt 2.1s +All GPU(s): step 3288: loss 0.0775, lr 3.6e-04, dt 2.0s +All GPU(s): step 3289: loss 0.1178, lr 3.6e-04, dt 2.0s +All GPU(s): step 3290: loss 0.0249, lr 3.6e-04, dt 2.1s +All GPU(s): step 3291: loss 0.2210, lr 3.6e-04, dt 2.1s +All GPU(s): step 3292: loss 0.1308, lr 3.6e-04, dt 2.0s +All GPU(s): step 3293: loss 0.1341, lr 3.6e-04, dt 2.0s +All GPU(s): step 3294: loss 0.1659, lr 3.6e-04, dt 2.1s +All GPU(s): step 3295: loss 0.1760, lr 3.6e-04, dt 2.1s +All GPU(s): step 3296: loss 0.3986, lr 3.6e-04, dt 2.1s +All GPU(s): step 3297: loss 0.8162, lr 3.6e-04, dt 2.1s +All GPU(s): step 3298: loss 1.2183, lr 3.6e-04, dt 2.1s +All GPU(s): step 3299: loss 1.9248, lr 3.6e-04, dt 2.1s +All GPU(s): step 3300: loss 2.1318, lr 3.6e-04, dt 2.1s +All GPU(s): step 3301: loss 2.0244, lr 3.5e-04, dt 2.1s +All GPU(s): step 3302: loss 2.5957, lr 3.5e-04, dt 2.0s +All GPU(s): step 3303: loss 2.7988, lr 3.5e-04, dt 2.1s +All GPU(s): step 3304: loss 3.9131, lr 3.5e-04, dt 2.1s +All GPU(s): step 3305: loss 4.6191, lr 3.5e-04, dt 2.1s +All GPU(s): step 3306: loss 5.6953, lr 3.5e-04, dt 2.0s +All GPU(s): step 3307: loss 6.8359, lr 3.5e-04, dt 2.1s +All GPU(s): step 3308: loss 7.4883, lr 3.5e-04, dt 2.0s +All GPU(s): step 3309: loss 8.9453, lr 3.5e-04, dt 2.1s +All GPU(s): step 3310: loss 9.6719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3311: loss 11.2969, lr 3.5e-04, dt 2.1s +All GPU(s): step 3312: loss 11.8359, lr 3.5e-04, dt 2.1s +All GPU(s): step 3313: loss 13.3359, lr 3.5e-04, dt 2.0s +All GPU(s): step 3314: loss 14.7500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3315: loss 13.7500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3316: loss 11.8750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3317: loss 10.4844, lr 3.5e-04, dt 2.0s +All GPU(s): step 3318: loss 10.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3319: loss 10.4766, lr 3.5e-04, dt 2.2s +All GPU(s): step 3320: loss 10.3984, lr 3.5e-04, dt 2.1s +All GPU(s): step 3321: loss 9.3750, lr 3.5e-04, dt 2.1s +All GPU(s): step 3322: loss 12.3984, lr 3.5e-04, dt 2.0s +All GPU(s): step 3323: loss 14.0469, lr 3.5e-04, dt 2.1s +All GPU(s): step 3324: loss 17.0469, lr 3.5e-04, dt 2.2s +All GPU(s): step 3325: loss 17.9688, lr 3.5e-04, dt 2.0s +All GPU(s): step 3326: loss 18.3906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3327: loss 14.3828, lr 3.5e-04, dt 2.1s +All GPU(s): step 3328: loss 9.0156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3329: loss 6.1836, lr 3.5e-04, dt 2.1s +All GPU(s): step 3330: loss 4.7949, lr 3.5e-04, dt 2.1s +All GPU(s): step 3331: loss 3.7402, lr 3.5e-04, dt 2.1s +All GPU(s): step 3332: loss 2.1699, lr 3.5e-04, dt 2.1s +All GPU(s): step 3333: loss 1.1404, lr 3.5e-04, dt 2.1s +All GPU(s): step 3334: loss 0.7285, lr 3.5e-04, dt 2.1s +All GPU(s): step 3335: loss 0.5461, lr 3.5e-04, dt 2.1s +All GPU(s): step 3336: loss 0.3887, lr 3.5e-04, dt 2.1s +All GPU(s): step 3337: loss 0.3243, lr 3.5e-04, dt 2.1s +All GPU(s): step 3338: loss 0.5220, lr 3.5e-04, dt 2.2s +All GPU(s): step 3339: loss 1.3496, lr 3.5e-04, dt 2.1s +All GPU(s): step 3340: loss 1.9863, lr 3.5e-04, dt 2.0s +All GPU(s): step 3341: loss 3.4902, lr 3.5e-04, dt 2.0s +All GPU(s): step 3342: loss 6.0508, lr 3.5e-04, dt 2.1s +All GPU(s): step 3343: loss 7.6641, lr 3.5e-04, dt 2.2s +All GPU(s): step 3344: loss 8.5117, lr 3.5e-04, dt 2.1s +All GPU(s): step 3345: loss 8.6445, lr 3.5e-04, dt 2.1s +All GPU(s): step 3346: loss 9.5156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3347: loss 11.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3348: loss 11.9844, lr 3.5e-04, dt 2.2s +All GPU(s): step 3349: loss 13.7422, lr 3.5e-04, dt 2.0s +All GPU(s): step 3350: loss 16.3672, lr 3.5e-04, dt 2.0s +All GPU(s): step 3351: loss 18.7656, lr 3.5e-04, dt 2.0s +All GPU(s): step 3352: loss 19.6719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3353: loss 19.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3354: loss 19.2812, lr 3.5e-04, dt 2.0s +All GPU(s): step 3355: loss 19.0000, lr 3.5e-04, dt 2.0s +All GPU(s): step 3356: loss 18.7188, lr 3.5e-04, dt 2.0s +All GPU(s): step 3357: loss 15.6172, lr 3.5e-04, dt 2.1s +All GPU(s): step 3358: loss 12.7500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3359: loss 9.8984, lr 3.5e-04, dt 2.1s +All GPU(s): step 3360: loss 9.2969, lr 3.5e-04, dt 2.1s +All GPU(s): step 3361: loss 11.9531, lr 3.5e-04, dt 2.1s +All GPU(s): step 3362: loss 13.3281, lr 3.5e-04, dt 2.1s +All GPU(s): step 3363: loss 15.4609, lr 3.5e-04, dt 2.1s +All GPU(s): step 3364: loss 15.7109, lr 3.5e-04, dt 2.1s +All GPU(s): step 3365: loss 16.4688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3366: loss 17.6562, lr 3.5e-04, dt 2.1s +All GPU(s): step 3367: loss 18.6250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3368: loss 19.5469, lr 3.5e-04, dt 2.0s +All GPU(s): step 3369: loss 19.7031, lr 3.5e-04, dt 2.0s +All GPU(s): step 3370: loss 19.3281, lr 3.5e-04, dt 2.0s +All GPU(s): step 3371: loss 19.9062, lr 3.5e-04, dt 2.0s +All GPU(s): step 3372: loss 19.8125, lr 3.5e-04, dt 2.1s +All GPU(s): step 3373: loss 20.5938, lr 3.5e-04, dt 2.0s +All GPU(s): step 3374: loss 20.9062, lr 3.5e-04, dt 2.0s +All GPU(s): step 3375: loss 21.1094, lr 3.5e-04, dt 2.0s +All GPU(s): step 3376: loss 20.8750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3377: loss 20.4844, lr 3.5e-04, dt 2.2s +All GPU(s): step 3378: loss 20.7344, lr 3.5e-04, dt 2.0s +All GPU(s): step 3379: loss 20.8750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3380: loss 20.8438, lr 3.5e-04, dt 2.0s +All GPU(s): step 3381: loss 20.3750, lr 3.5e-04, dt 2.1s +All GPU(s): step 3382: loss 20.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3383: loss 20.6562, lr 3.5e-04, dt 2.1s +All GPU(s): step 3384: loss 20.0625, lr 3.5e-04, dt 2.0s +All GPU(s): step 3385: loss 20.4062, lr 3.5e-04, dt 2.0s +All GPU(s): step 3386: loss 20.0625, lr 3.5e-04, dt 2.1s +All GPU(s): step 3387: loss 20.2188, lr 3.5e-04, dt 2.0s +All GPU(s): step 3388: loss 19.8438, lr 3.5e-04, dt 2.0s +All GPU(s): step 3389: loss 20.0938, lr 3.5e-04, dt 2.0s +All GPU(s): step 3390: loss 20.7188, lr 3.5e-04, dt 2.0s +All GPU(s): step 3391: loss 20.6719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3392: loss 20.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 3393: loss 20.6875, lr 3.5e-04, dt 2.1s +All GPU(s): step 3394: loss 20.8438, lr 3.5e-04, dt 2.1s +All GPU(s): step 3395: loss 20.5156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3396: loss 20.8906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3397: loss 20.8438, lr 3.5e-04, dt 2.0s +All GPU(s): step 3398: loss 20.8125, lr 3.5e-04, dt 2.0s +All GPU(s): step 3399: loss 20.9844, lr 3.5e-04, dt 2.0s +All GPU(s): step 3400: loss 21.2031, lr 3.5e-04, dt 2.0s +All GPU(s): step 3401: loss 21.3281, lr 3.5e-04, dt 2.2s +All GPU(s): step 3402: loss 21.3125, lr 3.5e-04, dt 2.0s +All GPU(s): step 3403: loss 21.1875, lr 3.5e-04, dt 2.1s +All GPU(s): step 3404: loss 21.3438, lr 3.5e-04, dt 2.0s +All GPU(s): step 3405: loss 21.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 3406: loss 21.6719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3407: loss 21.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3408: loss 21.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 3409: loss 21.0156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3410: loss 21.0625, lr 3.5e-04, dt 2.1s +All GPU(s): step 3411: loss 21.5312, lr 3.5e-04, dt 2.1s +All GPU(s): step 3412: loss 21.5781, lr 3.5e-04, dt 2.0s +All GPU(s): step 3413: loss 21.6406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3414: loss 21.7031, lr 3.5e-04, dt 2.1s +All GPU(s): step 3415: loss 21.2188, lr 3.5e-04, dt 2.1s +All GPU(s): step 3416: loss 21.5625, lr 3.5e-04, dt 2.1s +All GPU(s): step 3417: loss 21.5000, lr 3.5e-04, dt 2.0s +All GPU(s): step 3418: loss 21.6406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3419: loss 21.6094, lr 3.5e-04, dt 2.0s +All GPU(s): step 3420: loss 21.3906, lr 3.5e-04, dt 2.2s +All GPU(s): step 3421: loss 21.1094, lr 3.5e-04, dt 2.1s +All GPU(s): step 3422: loss 20.7188, lr 3.5e-04, dt 2.0s +All GPU(s): step 3423: loss 21.2500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3424: loss 21.6250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3425: loss 21.5156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3426: loss 21.1094, lr 3.5e-04, dt 2.1s +All GPU(s): step 3427: loss 21.2031, lr 3.5e-04, dt 2.1s +All GPU(s): step 3428: loss 20.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3429: loss 21.3594, lr 3.5e-04, dt 2.1s +All GPU(s): step 3430: loss 21.0469, lr 3.5e-04, dt 2.1s +All GPU(s): step 3431: loss 21.6875, lr 3.5e-04, dt 2.0s +All GPU(s): step 3432: loss 21.4844, lr 3.5e-04, dt 2.0s +All GPU(s): step 3433: loss 21.5781, lr 3.5e-04, dt 2.0s +All GPU(s): step 3434: loss 22.0000, lr 3.5e-04, dt 2.1s +All GPU(s): step 3435: loss 21.9531, lr 3.5e-04, dt 2.1s +All GPU(s): step 3436: loss 21.9531, lr 3.5e-04, dt 2.0s +All GPU(s): step 3437: loss 21.6875, lr 3.5e-04, dt 2.1s +All GPU(s): step 3438: loss 21.4844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3439: loss 21.6406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3440: loss 21.9375, lr 3.5e-04, dt 2.1s +All GPU(s): step 3441: loss 22.3438, lr 3.5e-04, dt 2.1s +All GPU(s): step 3442: loss 22.7344, lr 3.5e-04, dt 2.1s +All GPU(s): step 3443: loss 22.3750, lr 3.5e-04, dt 2.1s +All GPU(s): step 3444: loss 22.7812, lr 3.5e-04, dt 2.2s +All GPU(s): step 3445: loss 23.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3446: loss 22.4688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3447: loss 22.5781, lr 3.5e-04, dt 2.1s +All GPU(s): step 3448: loss 21.7969, lr 3.5e-04, dt 2.1s +All GPU(s): step 3449: loss 22.0469, lr 3.5e-04, dt 2.2s +All GPU(s): step 3450: loss 22.0938, lr 3.5e-04, dt 2.1s +All GPU(s): step 3451: loss 21.7500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3452: loss 21.6719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3453: loss 21.5781, lr 3.5e-04, dt 2.1s +All GPU(s): step 3454: loss 21.2500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3455: loss 21.1094, lr 3.5e-04, dt 2.0s +All GPU(s): step 3456: loss 21.4375, lr 3.5e-04, dt 2.0s +All GPU(s): step 3457: loss 21.3906, lr 3.5e-04, dt 2.0s +All GPU(s): step 3458: loss 22.0312, lr 3.5e-04, dt 2.1s +All GPU(s): step 3459: loss 21.8906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3460: loss 22.4219, lr 3.5e-04, dt 2.0s +All GPU(s): step 3461: loss 22.0000, lr 3.5e-04, dt 2.0s +All GPU(s): step 3462: loss 22.3125, lr 3.5e-04, dt 2.0s +All GPU(s): step 3463: loss 21.5938, lr 3.5e-04, dt 2.1s +All GPU(s): step 3464: loss 20.5781, lr 3.5e-04, dt 2.1s +All GPU(s): step 3465: loss 19.3906, lr 3.5e-04, dt 2.0s +All GPU(s): step 3466: loss 19.1562, lr 3.5e-04, dt 2.0s +All GPU(s): step 3467: loss 18.5156, lr 3.5e-04, dt 2.1s +All GPU(s): step 3468: loss 17.1406, lr 3.5e-04, dt 2.1s +All GPU(s): step 3469: loss 17.4219, lr 3.5e-04, dt 2.0s +All GPU(s): step 3470: loss 17.1641, lr 3.5e-04, dt 2.0s +All GPU(s): step 3471: loss 16.2578, lr 3.5e-04, dt 2.0s +All GPU(s): step 3472: loss 14.8906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3473: loss 13.6641, lr 3.5e-04, dt 2.2s +All GPU(s): step 3474: loss 10.5391, lr 3.5e-04, dt 2.1s +All GPU(s): step 3475: loss 7.4922, lr 3.5e-04, dt 2.1s +All GPU(s): step 3476: loss 5.0547, lr 3.5e-04, dt 2.0s +All GPU(s): step 3477: loss 4.1621, lr 3.5e-04, dt 2.0s +All GPU(s): step 3478: loss 3.0078, lr 3.5e-04, dt 2.1s +All GPU(s): step 3479: loss 2.6504, lr 3.5e-04, dt 2.1s +All GPU(s): step 3480: loss 2.7090, lr 3.5e-04, dt 2.1s +All GPU(s): step 3481: loss 2.7051, lr 3.5e-04, dt 2.0s +All GPU(s): step 3482: loss 1.7974, lr 3.5e-04, dt 2.1s +All GPU(s): step 3483: loss 3.7324, lr 3.5e-04, dt 2.1s +All GPU(s): step 3484: loss 10.8516, lr 3.5e-04, dt 2.0s +All GPU(s): step 3485: loss 18.2812, lr 3.5e-04, dt 2.0s +All GPU(s): step 3486: loss 19.4531, lr 3.5e-04, dt 2.0s +All GPU(s): step 3487: loss 20.2969, lr 3.5e-04, dt 2.1s +All GPU(s): step 3488: loss 20.9688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3489: loss 22.2188, lr 3.5e-04, dt 2.0s +All GPU(s): step 3490: loss 22.7500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3491: loss 23.0000, lr 3.5e-04, dt 2.0s +All GPU(s): step 3492: loss 23.4062, lr 3.5e-04, dt 2.1s +All GPU(s): step 3493: loss 23.2969, lr 3.5e-04, dt 2.1s +All GPU(s): step 3494: loss 23.4219, lr 3.5e-04, dt 2.1s +All GPU(s): step 3495: loss 23.5469, lr 3.5e-04, dt 2.0s +All GPU(s): step 3496: loss 23.4688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3497: loss 23.3906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3498: loss 23.5312, lr 3.5e-04, dt 2.0s +All GPU(s): step 3499: loss 23.1406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3500: loss 23.4531, lr 3.5e-04, dt 2.1s +All GPU(s): step 3501: loss 23.0312, lr 3.5e-04, dt 2.0s +All GPU(s): step 3502: loss 23.2969, lr 3.5e-04, dt 2.2s +All GPU(s): step 3503: loss 23.3750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3504: loss 23.1719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3505: loss 23.2812, lr 3.5e-04, dt 2.0s +All GPU(s): step 3506: loss 23.0625, lr 3.5e-04, dt 2.1s +All GPU(s): step 3507: loss 23.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3508: loss 23.2500, lr 3.5e-04, dt 2.0s +All GPU(s): step 3509: loss 22.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3510: loss 22.5469, lr 3.5e-04, dt 2.0s +All GPU(s): step 3511: loss 22.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 3512: loss 21.9531, lr 3.5e-04, dt 2.1s +All GPU(s): step 3513: loss 21.5000, lr 3.5e-04, dt 2.0s +All GPU(s): step 3514: loss 21.5938, lr 3.5e-04, dt 2.0s +All GPU(s): step 3515: loss 21.6875, lr 3.5e-04, dt 2.0s +All GPU(s): step 3516: loss 21.9062, lr 3.5e-04, dt 2.1s +All GPU(s): step 3517: loss 22.6406, lr 3.5e-04, dt 2.0s +All GPU(s): step 3518: loss 22.3906, lr 3.5e-04, dt 2.1s +All GPU(s): step 3519: loss 23.0781, lr 3.5e-04, dt 2.1s +All GPU(s): step 3520: loss 23.3281, lr 3.5e-04, dt 2.1s +All GPU(s): step 3521: loss 23.0938, lr 3.5e-04, dt 2.1s +All GPU(s): step 3522: loss 23.2812, lr 3.5e-04, dt 2.1s +All GPU(s): step 3523: loss 23.1719, lr 3.5e-04, dt 2.1s +All GPU(s): step 3524: loss 23.5312, lr 3.5e-04, dt 2.0s +All GPU(s): step 3525: loss 23.2500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3526: loss 23.4531, lr 3.5e-04, dt 2.2s +All GPU(s): step 3527: loss 23.3125, lr 3.5e-04, dt 2.0s +All GPU(s): step 3528: loss 23.2656, lr 3.5e-04, dt 2.1s +All GPU(s): step 3529: loss 23.3750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3530: loss 23.3750, lr 3.5e-04, dt 2.1s +All GPU(s): step 3531: loss 23.4219, lr 3.5e-04, dt 2.2s +All GPU(s): step 3532: loss 23.2031, lr 3.5e-04, dt 2.0s +All GPU(s): step 3533: loss 23.1250, lr 3.5e-04, dt 2.0s +All GPU(s): step 3534: loss 23.0625, lr 3.5e-04, dt 2.0s +All GPU(s): step 3535: loss 23.0312, lr 3.5e-04, dt 2.1s +All GPU(s): step 3536: loss 23.3438, lr 3.5e-04, dt 2.1s +All GPU(s): step 3537: loss 22.9688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3538: loss 22.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3539: loss 22.9688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3540: loss 22.8125, lr 3.5e-04, dt 2.1s +All GPU(s): step 3541: loss 22.4688, lr 3.5e-04, dt 2.1s +All GPU(s): step 3542: loss 22.3281, lr 3.5e-04, dt 2.0s +All GPU(s): step 3543: loss 22.1562, lr 3.5e-04, dt 2.1s +All GPU(s): step 3544: loss 22.2500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3545: loss 21.9062, lr 3.5e-04, dt 2.1s +All GPU(s): step 3546: loss 22.1250, lr 3.5e-04, dt 2.1s +All GPU(s): step 3547: loss 22.0156, lr 3.5e-04, dt 2.0s +All GPU(s): step 3548: loss 21.7656, lr 3.5e-04, dt 2.0s +All GPU(s): step 3549: loss 21.2344, lr 3.5e-04, dt 2.1s +All GPU(s): step 3550: loss 21.0000, lr 3.5e-04, dt 2.2s +All GPU(s): step 3551: loss 20.4844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3552: loss 20.2031, lr 3.5e-04, dt 2.1s +All GPU(s): step 3553: loss 20.2500, lr 3.5e-04, dt 2.1s +All GPU(s): step 3554: loss 19.9844, lr 3.5e-04, dt 2.1s +All GPU(s): step 3555: loss 19.7812, lr 3.5e-04, dt 2.2s +All GPU(s): step 3556: loss 20.3750, lr 3.5e-04, dt 2.0s +All GPU(s): step 3557: loss 19.6719, lr 3.4e-04, dt 2.1s +All GPU(s): step 3558: loss 19.9062, lr 3.4e-04, dt 2.0s +All GPU(s): step 3559: loss 19.9844, lr 3.4e-04, dt 2.1s +All GPU(s): step 3560: loss 20.2500, lr 3.4e-04, dt 2.1s +All GPU(s): step 3561: loss 19.6406, lr 3.4e-04, dt 2.0s +All GPU(s): step 3562: loss 19.9375, lr 3.4e-04, dt 2.0s +All GPU(s): step 3563: loss 19.9219, lr 3.4e-04, dt 2.0s +All GPU(s): step 3564: loss 19.9375, lr 3.4e-04, dt 2.1s +All GPU(s): step 3565: loss 19.7031, lr 3.4e-04, dt 2.1s +All GPU(s): step 3566: loss 19.5469, lr 3.4e-04, dt 2.0s +All GPU(s): step 3567: loss 19.4844, lr 3.4e-04, dt 2.0s +All GPU(s): step 3568: loss 19.1719, lr 3.4e-04, dt 2.0s +All GPU(s): step 3569: loss 18.9688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3570: loss 18.8438, lr 3.4e-04, dt 2.1s +All GPU(s): step 3571: loss 18.4688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3572: loss 18.5312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3573: loss 18.3125, lr 3.4e-04, dt 2.1s +All GPU(s): step 3574: loss 18.8281, lr 3.4e-04, dt 2.1s +All GPU(s): step 3575: loss 18.5000, lr 3.4e-04, dt 2.1s +All GPU(s): step 3576: loss 18.7812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3577: loss 18.6562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3578: loss 18.4531, lr 3.4e-04, dt 2.1s +All GPU(s): step 3579: loss 18.0781, lr 3.4e-04, dt 2.1s +All GPU(s): step 3580: loss 17.5625, lr 3.4e-04, dt 2.0s +All GPU(s): step 3581: loss 16.4922, lr 3.4e-04, dt 2.0s +All GPU(s): step 3582: loss 15.1875, lr 3.4e-04, dt 2.1s +All GPU(s): step 3583: loss 14.6016, lr 3.4e-04, dt 2.1s +All GPU(s): step 3584: loss 13.1562, lr 3.4e-04, dt 2.2s +All GPU(s): step 3585: loss 11.3672, lr 3.4e-04, dt 2.1s +All GPU(s): step 3586: loss 9.7812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3587: loss 8.3516, lr 3.4e-04, dt 2.1s +All GPU(s): step 3588: loss 6.9688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3589: loss 5.8438, lr 3.4e-04, dt 2.1s +All GPU(s): step 3590: loss 5.5234, lr 3.4e-04, dt 2.0s +All GPU(s): step 3591: loss 5.2500, lr 3.4e-04, dt 2.1s +All GPU(s): step 3592: loss 7.5664, lr 3.4e-04, dt 2.1s +All GPU(s): step 3593: loss 9.4453, lr 3.4e-04, dt 2.1s +All GPU(s): step 3594: loss 10.3750, lr 3.4e-04, dt 2.1s +All GPU(s): step 3595: loss 11.3672, lr 3.4e-04, dt 2.0s +All GPU(s): step 3596: loss 11.7969, lr 3.4e-04, dt 2.0s +All GPU(s): step 3597: loss 12.6016, lr 3.4e-04, dt 2.0s +All GPU(s): step 3598: loss 13.7422, lr 3.4e-04, dt 2.1s +All GPU(s): step 3599: loss 14.4609, lr 3.4e-04, dt 2.1s +All GPU(s): step 3600: loss 16.0781, lr 3.4e-04, dt 2.1s +All GPU(s): step 3601: loss 17.8906, lr 3.4e-04, dt 2.1s +All GPU(s): step 3602: loss 18.9062, lr 3.4e-04, dt 2.1s +All GPU(s): step 3603: loss 18.8750, lr 3.4e-04, dt 2.1s +All GPU(s): step 3604: loss 19.0156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3605: loss 19.0781, lr 3.4e-04, dt 2.0s +All GPU(s): step 3606: loss 19.0000, lr 3.4e-04, dt 2.1s +All GPU(s): step 3607: loss 18.7969, lr 3.4e-04, dt 2.0s +All GPU(s): step 3608: loss 19.1250, lr 3.4e-04, dt 2.3s +All GPU(s): step 3609: loss 19.1562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3610: loss 19.4219, lr 3.4e-04, dt 2.1s +All GPU(s): step 3611: loss 18.9531, lr 3.4e-04, dt 2.1s +All GPU(s): step 3612: loss 18.6250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3613: loss 17.9062, lr 3.4e-04, dt 2.1s +All GPU(s): step 3614: loss 17.7188, lr 3.4e-04, dt 2.1s +All GPU(s): step 3615: loss 17.3438, lr 3.4e-04, dt 2.1s +All GPU(s): step 3616: loss 17.9688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3617: loss 18.0938, lr 3.4e-04, dt 2.1s +All GPU(s): step 3618: loss 18.5625, lr 3.4e-04, dt 2.1s +All GPU(s): step 3619: loss 19.7031, lr 3.4e-04, dt 2.0s +All GPU(s): step 3620: loss 20.1875, lr 3.4e-04, dt 2.0s +All GPU(s): step 3621: loss 20.5625, lr 3.4e-04, dt 2.0s +All GPU(s): step 3622: loss 20.8281, lr 3.4e-04, dt 2.1s +All GPU(s): step 3623: loss 20.5312, lr 3.4e-04, dt 2.0s +All GPU(s): step 3624: loss 21.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3625: loss 21.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3626: loss 20.9531, lr 3.4e-04, dt 2.1s +All GPU(s): step 3627: loss 21.7812, lr 3.4e-04, dt 2.1s +All GPU(s): step 3628: loss 21.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 3629: loss 21.3594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3630: loss 21.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3631: loss 20.6406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3632: loss 20.9531, lr 3.4e-04, dt 2.2s +All GPU(s): step 3633: loss 21.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3634: loss 20.8750, lr 3.4e-04, dt 2.1s +All GPU(s): step 3635: loss 20.9219, lr 3.4e-04, dt 2.1s +All GPU(s): step 3636: loss 20.5781, lr 3.4e-04, dt 2.1s +All GPU(s): step 3637: loss 20.7812, lr 3.4e-04, dt 2.1s +All GPU(s): step 3638: loss 20.7656, lr 3.4e-04, dt 2.0s +All GPU(s): step 3639: loss 20.4844, lr 3.4e-04, dt 2.0s +All GPU(s): step 3640: loss 20.4688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3641: loss 20.8594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3642: loss 21.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3643: loss 20.7812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3644: loss 21.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3645: loss 21.5000, lr 3.4e-04, dt 2.0s +All GPU(s): step 3646: loss 21.1719, lr 3.4e-04, dt 2.1s +All GPU(s): step 3647: loss 20.7812, lr 3.4e-04, dt 2.1s +All GPU(s): step 3648: loss 20.8750, lr 3.4e-04, dt 2.1s +All GPU(s): step 3649: loss 20.9844, lr 3.4e-04, dt 2.1s +All GPU(s): step 3650: loss 21.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3651: loss 20.6406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3652: loss 20.9375, lr 3.4e-04, dt 2.1s +All GPU(s): step 3653: loss 20.7656, lr 3.4e-04, dt 2.1s +All GPU(s): step 3654: loss 21.1875, lr 3.4e-04, dt 2.0s +All GPU(s): step 3655: loss 20.8594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3656: loss 21.4688, lr 3.4e-04, dt 2.2s +All GPU(s): step 3657: loss 21.9844, lr 3.4e-04, dt 2.1s +All GPU(s): step 3658: loss 21.8281, lr 3.4e-04, dt 2.1s +All GPU(s): step 3659: loss 22.7344, lr 3.4e-04, dt 2.1s +All GPU(s): step 3660: loss 22.6406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3661: loss 23.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3662: loss 23.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3663: loss 23.4219, lr 3.4e-04, dt 2.1s +All GPU(s): step 3664: loss 22.6562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3665: loss 23.1406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3666: loss 23.0625, lr 3.4e-04, dt 2.1s +All GPU(s): step 3667: loss 22.8438, lr 3.4e-04, dt 2.0s +All GPU(s): step 3668: loss 23.2812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3669: loss 23.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 3670: loss 23.3594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3671: loss 23.1875, lr 3.4e-04, dt 2.1s +All GPU(s): step 3672: loss 23.2969, lr 3.4e-04, dt 2.1s +All GPU(s): step 3673: loss 23.4219, lr 3.4e-04, dt 2.0s +All GPU(s): step 3674: loss 23.2188, lr 3.4e-04, dt 2.1s +All GPU(s): step 3675: loss 23.1406, lr 3.4e-04, dt 2.2s +All GPU(s): step 3676: loss 23.5156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3677: loss 23.5000, lr 3.4e-04, dt 2.1s +All GPU(s): step 3678: loss 23.4531, lr 3.4e-04, dt 2.0s +All GPU(s): step 3679: loss 23.5000, lr 3.4e-04, dt 2.0s +All GPU(s): step 3680: loss 23.0625, lr 3.4e-04, dt 2.1s +All GPU(s): step 3681: loss 23.1875, lr 3.4e-04, dt 2.0s +All GPU(s): step 3682: loss 23.2500, lr 3.4e-04, dt 2.0s +All GPU(s): step 3683: loss 23.3438, lr 3.4e-04, dt 2.1s +All GPU(s): step 3684: loss 23.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3685: loss 23.2656, lr 3.4e-04, dt 2.1s +All GPU(s): step 3686: loss 23.0156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3687: loss 22.9844, lr 3.4e-04, dt 2.0s +All GPU(s): step 3688: loss 22.8906, lr 3.4e-04, dt 2.1s +All GPU(s): step 3689: loss 23.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3690: loss 22.9062, lr 3.4e-04, dt 2.1s +All GPU(s): step 3691: loss 23.0000, lr 3.4e-04, dt 2.1s +All GPU(s): step 3692: loss 22.5469, lr 3.4e-04, dt 2.1s +All GPU(s): step 3693: loss 22.7812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3694: loss 22.8750, lr 3.4e-04, dt 2.1s +All GPU(s): step 3695: loss 22.7812, lr 3.4e-04, dt 2.1s +All GPU(s): step 3696: loss 23.1406, lr 3.4e-04, dt 2.0s +All GPU(s): step 3697: loss 22.6719, lr 3.4e-04, dt 2.0s +All GPU(s): step 3698: loss 22.7500, lr 3.4e-04, dt 2.1s +All GPU(s): step 3699: loss 22.7656, lr 3.4e-04, dt 2.2s +All GPU(s): step 3700: loss 22.7656, lr 3.4e-04, dt 2.1s +All GPU(s): step 3701: loss 22.2812, lr 3.4e-04, dt 2.0s +All GPU(s): step 3702: loss 22.5625, lr 3.4e-04, dt 2.0s +All GPU(s): step 3703: loss 22.1250, lr 3.4e-04, dt 2.1s +All GPU(s): step 3704: loss 21.5781, lr 3.4e-04, dt 2.1s +All GPU(s): step 3705: loss 22.0312, lr 3.4e-04, dt 2.0s +All GPU(s): step 3706: loss 22.1562, lr 3.4e-04, dt 2.0s +All GPU(s): step 3707: loss 21.9688, lr 3.4e-04, dt 2.0s +All GPU(s): step 3708: loss 21.7812, lr 3.4e-04, dt 2.1s +All GPU(s): step 3709: loss 22.0156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3710: loss 22.0000, lr 3.4e-04, dt 2.1s +All GPU(s): step 3711: loss 21.7031, lr 3.4e-04, dt 2.0s +All GPU(s): step 3712: loss 22.0156, lr 3.4e-04, dt 2.0s +All GPU(s): step 3713: loss 21.6875, lr 3.4e-04, dt 2.1s +All GPU(s): step 3714: loss 21.6875, lr 3.4e-04, dt 2.1s +All GPU(s): step 3715: loss 21.7500, lr 3.4e-04, dt 2.0s +All GPU(s): step 3716: loss 21.6094, lr 3.4e-04, dt 2.0s +All GPU(s): step 3717: loss 21.5469, lr 3.4e-04, dt 2.0s +All GPU(s): step 3718: loss 22.0156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3719: loss 21.7500, lr 3.4e-04, dt 2.0s +All GPU(s): step 3720: loss 21.7344, lr 3.4e-04, dt 2.0s +All GPU(s): step 3721: loss 21.8125, lr 3.4e-04, dt 2.0s +All GPU(s): step 3722: loss 21.6250, lr 3.4e-04, dt 2.0s +All GPU(s): step 3723: loss 21.7969, lr 3.4e-04, dt 2.1s +All GPU(s): step 3724: loss 21.5625, lr 3.4e-04, dt 2.1s +All GPU(s): step 3725: loss 21.7656, lr 3.4e-04, dt 2.1s +All GPU(s): step 3726: loss 21.5469, lr 3.4e-04, dt 2.1s +All GPU(s): step 3727: loss 21.4688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3728: loss 21.8438, lr 3.4e-04, dt 2.2s +All GPU(s): step 3729: loss 21.6875, lr 3.4e-04, dt 2.1s +All GPU(s): step 3730: loss 21.7188, lr 3.4e-04, dt 2.0s +All GPU(s): step 3731: loss 21.5938, lr 3.4e-04, dt 2.0s +All GPU(s): step 3732: loss 22.0469, lr 3.4e-04, dt 2.1s +All GPU(s): step 3733: loss 21.6094, lr 3.4e-04, dt 2.1s +All GPU(s): step 3734: loss 21.7031, lr 3.4e-04, dt 2.1s +All GPU(s): step 3735: loss 21.3594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3736: loss 21.5156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3737: loss 21.3438, lr 3.4e-04, dt 2.1s +All GPU(s): step 3738: loss 21.2344, lr 3.4e-04, dt 2.1s +All GPU(s): step 3739: loss 21.6562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3740: loss 21.4688, lr 3.4e-04, dt 2.1s +All GPU(s): step 3741: loss 21.1875, lr 3.4e-04, dt 2.0s +All GPU(s): step 3742: loss 21.0469, lr 3.4e-04, dt 2.1s +All GPU(s): step 3743: loss 20.9219, lr 3.4e-04, dt 2.1s +All GPU(s): step 3744: loss 20.7344, lr 3.4e-04, dt 2.1s +All GPU(s): step 3745: loss 21.2500, lr 3.4e-04, dt 2.0s +All GPU(s): step 3746: loss 20.8438, lr 3.4e-04, dt 2.0s +All GPU(s): step 3747: loss 20.8125, lr 3.4e-04, dt 2.1s +All GPU(s): step 3748: loss 20.8125, lr 3.4e-04, dt 2.1s +All GPU(s): step 3749: loss 20.8281, lr 3.4e-04, dt 2.0s +All GPU(s): step 3750: loss 20.9375, lr 3.4e-04, dt 2.0s +All GPU(s): step 3751: loss 21.0312, lr 3.4e-04, dt 2.1s +All GPU(s): step 3752: loss 21.1719, lr 3.4e-04, dt 2.2s +All GPU(s): step 3753: loss 20.8906, lr 3.4e-04, dt 2.0s +All GPU(s): step 3754: loss 21.1719, lr 3.4e-04, dt 2.0s +All GPU(s): step 3755: loss 21.6562, lr 3.4e-04, dt 2.0s +All GPU(s): step 3756: loss 21.2188, lr 3.4e-04, dt 2.1s +All GPU(s): step 3757: loss 21.1719, lr 3.4e-04, dt 2.2s +All GPU(s): step 3758: loss 21.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 3759: loss 20.8281, lr 3.4e-04, dt 2.0s +All GPU(s): step 3760: loss 20.4531, lr 3.4e-04, dt 2.0s +All GPU(s): step 3761: loss 20.5625, lr 3.4e-04, dt 2.1s +All GPU(s): step 3762: loss 20.1562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3763: loss 20.0156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3764: loss 20.0781, lr 3.4e-04, dt 2.0s +All GPU(s): step 3765: loss 19.8594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3766: loss 19.3594, lr 3.4e-04, dt 2.1s +All GPU(s): step 3767: loss 18.6406, lr 3.4e-04, dt 2.1s +All GPU(s): step 3768: loss 17.0625, lr 3.4e-04, dt 2.0s +All GPU(s): step 3769: loss 14.5625, lr 3.4e-04, dt 2.0s +All GPU(s): step 3770: loss 10.6562, lr 3.4e-04, dt 2.1s +All GPU(s): step 3771: loss 7.4531, lr 3.4e-04, dt 2.1s +All GPU(s): step 3772: loss 5.3242, lr 3.4e-04, dt 2.1s +All GPU(s): step 3773: loss 2.6230, lr 3.4e-04, dt 2.1s +All GPU(s): step 3774: loss 2.5361, lr 3.4e-04, dt 2.1s +All GPU(s): step 3775: loss 4.6367, lr 3.4e-04, dt 2.1s +All GPU(s): step 3776: loss 5.4238, lr 3.4e-04, dt 2.2s +All GPU(s): step 3777: loss 7.8516, lr 3.4e-04, dt 2.1s +All GPU(s): step 3778: loss 11.8906, lr 3.4e-04, dt 2.1s +All GPU(s): step 3779: loss 15.3750, lr 3.4e-04, dt 2.0s +All GPU(s): step 3780: loss 19.3125, lr 3.4e-04, dt 2.1s +All GPU(s): step 3781: loss 20.5156, lr 3.4e-04, dt 2.1s +All GPU(s): step 3782: loss 22.0312, lr 3.4e-04, dt 2.0s +All GPU(s): step 3783: loss 22.2188, lr 3.4e-04, dt 2.0s +All GPU(s): step 3784: loss 23.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 3785: loss 22.9375, lr 3.4e-04, dt 2.1s +All GPU(s): step 3786: loss 23.0156, lr 3.4e-04, dt 2.2s +All GPU(s): step 3787: loss 23.1875, lr 3.4e-04, dt 2.0s +All GPU(s): step 3788: loss 23.3594, lr 3.4e-04, dt 2.0s +All GPU(s): step 3789: loss 23.2188, lr 3.4e-04, dt 2.0s +All GPU(s): step 3790: loss 23.3750, lr 3.4e-04, dt 2.0s +All GPU(s): step 3791: loss 23.2344, lr 3.4e-04, dt 2.1s +All GPU(s): step 3792: loss 23.2500, lr 3.4e-04, dt 2.0s +All GPU(s): step 3793: loss 23.1250, lr 3.4e-04, dt 2.0s +All GPU(s): step 3794: loss 23.0156, lr 3.3e-04, dt 2.0s +All GPU(s): step 3795: loss 23.3125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3796: loss 22.9219, lr 3.3e-04, dt 2.0s +All GPU(s): step 3797: loss 23.4688, lr 3.3e-04, dt 2.0s +All GPU(s): step 3798: loss 23.4062, lr 3.3e-04, dt 2.0s +All GPU(s): step 3799: loss 23.2344, lr 3.3e-04, dt 2.0s +All GPU(s): step 3800: loss 23.0469, lr 3.3e-04, dt 2.1s +All GPU(s): step 3801: loss 23.3125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3802: loss 22.8438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3803: loss 22.9375, lr 3.3e-04, dt 2.0s +All GPU(s): step 3804: loss 22.8906, lr 3.3e-04, dt 2.1s +All GPU(s): step 3805: loss 22.7656, lr 3.3e-04, dt 2.1s +All GPU(s): step 3806: loss 22.7656, lr 3.3e-04, dt 2.1s +All GPU(s): step 3807: loss 22.9688, lr 3.3e-04, dt 2.1s +All GPU(s): step 3808: loss 23.2969, lr 3.3e-04, dt 2.0s +All GPU(s): step 3809: loss 23.2656, lr 3.3e-04, dt 2.1s +All GPU(s): step 3810: loss 23.3281, lr 3.3e-04, dt 2.2s +All GPU(s): step 3811: loss 23.7031, lr 3.3e-04, dt 2.0s +All GPU(s): step 3812: loss 23.2656, lr 3.3e-04, dt 2.0s +All GPU(s): step 3813: loss 23.2188, lr 3.3e-04, dt 2.0s +All GPU(s): step 3814: loss 23.1875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3815: loss 23.2812, lr 3.3e-04, dt 2.1s +All GPU(s): step 3816: loss 23.0000, lr 3.3e-04, dt 2.0s +All GPU(s): step 3817: loss 22.8594, lr 3.3e-04, dt 2.0s +All GPU(s): step 3818: loss 23.0625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3819: loss 22.7656, lr 3.3e-04, dt 2.1s +All GPU(s): step 3820: loss 23.0156, lr 3.3e-04, dt 2.1s +All GPU(s): step 3821: loss 22.9531, lr 3.3e-04, dt 2.1s +All GPU(s): step 3822: loss 23.2188, lr 3.3e-04, dt 2.0s +All GPU(s): step 3823: loss 23.3125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3824: loss 23.2344, lr 3.3e-04, dt 2.1s +All GPU(s): step 3825: loss 23.2031, lr 3.3e-04, dt 2.0s +All GPU(s): step 3826: loss 23.5156, lr 3.3e-04, dt 2.0s +All GPU(s): step 3827: loss 23.2812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3828: loss 23.3438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3829: loss 23.3125, lr 3.3e-04, dt 2.2s +All GPU(s): step 3830: loss 23.3125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3831: loss 23.2969, lr 3.3e-04, dt 2.1s +All GPU(s): step 3832: loss 23.0312, lr 3.3e-04, dt 2.0s +All GPU(s): step 3833: loss 23.4062, lr 3.3e-04, dt 2.1s +All GPU(s): step 3834: loss 23.3125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3835: loss 23.4062, lr 3.3e-04, dt 2.1s +All GPU(s): step 3836: loss 23.0938, lr 3.3e-04, dt 2.1s +All GPU(s): step 3837: loss 23.2812, lr 3.3e-04, dt 2.1s +All GPU(s): step 3838: loss 23.6875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3839: loss 23.7344, lr 3.3e-04, dt 2.1s +All GPU(s): step 3840: loss 23.8438, lr 3.3e-04, dt 2.0s +All GPU(s): step 3841: loss 23.4844, lr 3.3e-04, dt 2.0s +All GPU(s): step 3842: loss 23.3594, lr 3.3e-04, dt 2.0s +All GPU(s): step 3843: loss 23.5312, lr 3.3e-04, dt 2.1s +All GPU(s): step 3844: loss 23.6562, lr 3.3e-04, dt 2.1s +All GPU(s): step 3845: loss 23.2812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3846: loss 23.5312, lr 3.3e-04, dt 2.0s +All GPU(s): step 3847: loss 23.3594, lr 3.3e-04, dt 2.0s +All GPU(s): step 3848: loss 23.4375, lr 3.3e-04, dt 2.1s +All GPU(s): step 3849: loss 23.6875, lr 3.3e-04, dt 2.0s +All GPU(s): step 3850: loss 23.6094, lr 3.3e-04, dt 2.0s +All GPU(s): step 3851: loss 23.5625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3852: loss 23.6719, lr 3.3e-04, dt 2.0s +All GPU(s): step 3853: loss 23.6250, lr 3.3e-04, dt 2.1s +All GPU(s): step 3854: loss 23.4375, lr 3.3e-04, dt 2.0s +All GPU(s): step 3855: loss 23.8125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3856: loss 23.5156, lr 3.3e-04, dt 2.0s +All GPU(s): step 3857: loss 23.7344, lr 3.3e-04, dt 2.0s +All GPU(s): step 3858: loss 23.5469, lr 3.3e-04, dt 2.2s +All GPU(s): step 3859: loss 23.2188, lr 3.3e-04, dt 2.1s +All GPU(s): step 3860: loss 23.4844, lr 3.3e-04, dt 2.1s +All GPU(s): step 3861: loss 23.4219, lr 3.3e-04, dt 2.1s +All GPU(s): step 3862: loss 23.3906, lr 3.3e-04, dt 2.1s +All GPU(s): step 3863: loss 23.3750, lr 3.3e-04, dt 2.1s +All GPU(s): step 3864: loss 23.5625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3865: loss 23.4062, lr 3.3e-04, dt 2.0s +All GPU(s): step 3866: loss 23.3438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3867: loss 23.3125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3868: loss 23.4531, lr 3.3e-04, dt 2.1s +All GPU(s): step 3869: loss 23.4375, lr 3.3e-04, dt 2.0s +All GPU(s): step 3870: loss 23.6562, lr 3.3e-04, dt 2.0s +All GPU(s): step 3871: loss 23.8125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3872: loss 23.8125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3873: loss 23.6875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3874: loss 23.4844, lr 3.3e-04, dt 2.0s +All GPU(s): step 3875: loss 23.6562, lr 3.3e-04, dt 2.1s +All GPU(s): step 3876: loss 23.5000, lr 3.3e-04, dt 2.0s +All GPU(s): step 3877: loss 23.5625, lr 3.3e-04, dt 2.1s +All GPU(s): step 3878: loss 23.4062, lr 3.3e-04, dt 2.0s +All GPU(s): step 3879: loss 23.3906, lr 3.3e-04, dt 2.0s +All GPU(s): step 3880: loss 23.4219, lr 3.3e-04, dt 2.0s +All GPU(s): step 3881: loss 23.4375, lr 3.3e-04, dt 2.1s +All GPU(s): step 3882: loss 23.5312, lr 3.3e-04, dt 2.1s +All GPU(s): step 3883: loss 23.4531, lr 3.3e-04, dt 2.0s +All GPU(s): step 3884: loss 23.3750, lr 3.3e-04, dt 2.0s +All GPU(s): step 3885: loss 23.2344, lr 3.3e-04, dt 2.0s +All GPU(s): step 3886: loss 23.3594, lr 3.3e-04, dt 2.1s +All GPU(s): step 3887: loss 23.3750, lr 3.3e-04, dt 2.2s +All GPU(s): step 3888: loss 23.7188, lr 3.3e-04, dt 2.0s +All GPU(s): step 3889: loss 23.3125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3890: loss 23.4688, lr 3.3e-04, dt 2.0s +All GPU(s): step 3891: loss 23.5156, lr 3.3e-04, dt 2.1s +All GPU(s): step 3892: loss 23.7031, lr 3.3e-04, dt 2.1s +All GPU(s): step 3893: loss 23.3750, lr 3.3e-04, dt 2.1s +All GPU(s): step 3894: loss 23.3438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3895: loss 23.4375, lr 3.3e-04, dt 2.0s +All GPU(s): step 3896: loss 23.5156, lr 3.3e-04, dt 2.1s +All GPU(s): step 3897: loss 23.6562, lr 3.3e-04, dt 2.1s +All GPU(s): step 3898: loss 23.7969, lr 3.3e-04, dt 2.0s +All GPU(s): step 3899: loss 23.3594, lr 3.3e-04, dt 2.0s +All GPU(s): step 3900: loss 23.7188, lr 3.3e-04, dt 2.1s +All GPU(s): step 3901: loss 23.5781, lr 3.3e-04, dt 2.1s +All GPU(s): step 3902: loss 23.3125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3903: loss 23.6875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3904: loss 23.6875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3905: loss 23.6562, lr 3.3e-04, dt 2.0s +All GPU(s): step 3906: loss 23.5938, lr 3.3e-04, dt 2.1s +All GPU(s): step 3907: loss 23.6406, lr 3.3e-04, dt 2.0s +All GPU(s): step 3908: loss 23.7344, lr 3.3e-04, dt 2.0s +All GPU(s): step 3909: loss 23.4531, lr 3.3e-04, dt 2.1s +All GPU(s): step 3910: loss 23.6406, lr 3.3e-04, dt 2.1s +All GPU(s): step 3911: loss 23.5156, lr 3.3e-04, dt 2.2s +All GPU(s): step 3912: loss 23.7031, lr 3.3e-04, dt 2.1s +All GPU(s): step 3913: loss 23.6719, lr 3.3e-04, dt 2.1s +All GPU(s): step 3914: loss 23.4531, lr 3.3e-04, dt 2.1s +All GPU(s): step 3915: loss 23.6719, lr 3.3e-04, dt 2.1s +All GPU(s): step 3916: loss 23.5938, lr 3.3e-04, dt 2.2s +All GPU(s): step 3917: loss 23.6719, lr 3.3e-04, dt 2.0s +All GPU(s): step 3918: loss 23.5625, lr 3.3e-04, dt 2.1s +All GPU(s): step 3919: loss 23.5156, lr 3.3e-04, dt 2.0s +All GPU(s): step 3920: loss 23.5312, lr 3.3e-04, dt 2.1s +All GPU(s): step 3921: loss 23.6406, lr 3.3e-04, dt 2.1s +All GPU(s): step 3922: loss 23.5312, lr 3.3e-04, dt 2.1s +All GPU(s): step 3923: loss 23.5312, lr 3.3e-04, dt 2.0s +All GPU(s): step 3924: loss 23.6250, lr 3.3e-04, dt 2.0s +All GPU(s): step 3925: loss 23.6562, lr 3.3e-04, dt 2.1s +All GPU(s): step 3926: loss 23.3594, lr 3.3e-04, dt 2.1s +All GPU(s): step 3927: loss 23.7656, lr 3.3e-04, dt 2.0s +All GPU(s): step 3928: loss 23.7812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3929: loss 23.5625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3930: loss 23.5781, lr 3.3e-04, dt 2.2s +All GPU(s): step 3931: loss 23.3750, lr 3.3e-04, dt 2.1s +All GPU(s): step 3932: loss 23.5625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3933: loss 23.4219, lr 3.3e-04, dt 2.0s +All GPU(s): step 3934: loss 23.4531, lr 3.3e-04, dt 2.1s +All GPU(s): step 3935: loss 23.4375, lr 3.3e-04, dt 2.2s +All GPU(s): step 3936: loss 23.2969, lr 3.3e-04, dt 2.1s +All GPU(s): step 3937: loss 23.3750, lr 3.3e-04, dt 2.1s +All GPU(s): step 3938: loss 23.1562, lr 3.3e-04, dt 2.1s +All GPU(s): step 3939: loss 23.3125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3940: loss 23.6875, lr 3.3e-04, dt 2.1s +All GPU(s): step 3941: loss 23.5000, lr 3.3e-04, dt 2.0s +All GPU(s): step 3942: loss 23.6250, lr 3.3e-04, dt 2.1s +All GPU(s): step 3943: loss 23.4219, lr 3.3e-04, dt 2.1s +All GPU(s): step 3944: loss 23.5781, lr 3.3e-04, dt 2.1s +All GPU(s): step 3945: loss 23.3594, lr 3.3e-04, dt 2.1s +All GPU(s): step 3946: loss 23.5625, lr 3.3e-04, dt 2.1s +All GPU(s): step 3947: loss 23.2812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3948: loss 23.1719, lr 3.3e-04, dt 2.0s +All GPU(s): step 3949: loss 23.2812, lr 3.3e-04, dt 2.1s +All GPU(s): step 3950: loss 22.9688, lr 3.3e-04, dt 2.1s +All GPU(s): step 3951: loss 23.1719, lr 3.3e-04, dt 2.1s +All GPU(s): step 3952: loss 23.1094, lr 3.3e-04, dt 2.1s +All GPU(s): step 3953: loss 22.8438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3954: loss 23.0781, lr 3.3e-04, dt 2.1s +All GPU(s): step 3955: loss 22.5781, lr 3.3e-04, dt 2.0s +All GPU(s): step 3956: loss 22.7812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3957: loss 22.3906, lr 3.3e-04, dt 2.0s +All GPU(s): step 3958: loss 22.5938, lr 3.3e-04, dt 2.1s +All GPU(s): step 3959: loss 22.1250, lr 3.3e-04, dt 2.2s +All GPU(s): step 3960: loss 22.7812, lr 3.3e-04, dt 2.1s +All GPU(s): step 3961: loss 22.5156, lr 3.3e-04, dt 2.1s +All GPU(s): step 3962: loss 22.4219, lr 3.3e-04, dt 2.1s +All GPU(s): step 3963: loss 22.5469, lr 3.3e-04, dt 2.0s +All GPU(s): step 3964: loss 22.8125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3965: loss 22.7812, lr 3.3e-04, dt 2.0s +All GPU(s): step 3966: loss 22.4062, lr 3.3e-04, dt 2.0s +All GPU(s): step 3967: loss 22.5312, lr 3.3e-04, dt 2.0s +All GPU(s): step 3968: loss 22.7344, lr 3.3e-04, dt 2.1s +All GPU(s): step 3969: loss 22.6094, lr 3.3e-04, dt 2.2s +All GPU(s): step 3970: loss 22.6250, lr 3.3e-04, dt 2.0s +All GPU(s): step 3971: loss 22.8125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3972: loss 22.7500, lr 3.3e-04, dt 2.0s +All GPU(s): step 3973: loss 22.7031, lr 3.3e-04, dt 2.0s +All GPU(s): step 3974: loss 22.8125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3975: loss 22.8125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3976: loss 22.7031, lr 3.3e-04, dt 2.0s +All GPU(s): step 3977: loss 22.9062, lr 3.3e-04, dt 2.0s +All GPU(s): step 3978: loss 22.5156, lr 3.3e-04, dt 2.1s +All GPU(s): step 3979: loss 22.8594, lr 3.3e-04, dt 2.1s +All GPU(s): step 3980: loss 23.0156, lr 3.3e-04, dt 2.0s +All GPU(s): step 3981: loss 22.8438, lr 3.3e-04, dt 2.0s +All GPU(s): step 3982: loss 22.5312, lr 3.3e-04, dt 2.0s +All GPU(s): step 3983: loss 22.6250, lr 3.3e-04, dt 2.1s +All GPU(s): step 3984: loss 22.7656, lr 3.3e-04, dt 2.0s +All GPU(s): step 3985: loss 22.3125, lr 3.3e-04, dt 2.0s +All GPU(s): step 3986: loss 22.5781, lr 3.3e-04, dt 2.0s +All GPU(s): step 3987: loss 22.2344, lr 3.3e-04, dt 2.1s +All GPU(s): step 3988: loss 22.2031, lr 3.3e-04, dt 2.2s +All GPU(s): step 3989: loss 22.0625, lr 3.3e-04, dt 2.0s +All GPU(s): step 3990: loss 22.0469, lr 3.3e-04, dt 2.0s +All GPU(s): step 3991: loss 22.0781, lr 3.3e-04, dt 2.1s +All GPU(s): step 3992: loss 22.0312, lr 3.3e-04, dt 2.1s +All GPU(s): step 3993: loss 21.8281, lr 3.3e-04, dt 2.2s +All GPU(s): step 3994: loss 21.9531, lr 3.3e-04, dt 2.0s +All GPU(s): step 3995: loss 22.1250, lr 3.3e-04, dt 2.0s +All GPU(s): step 3996: loss 21.8281, lr 3.3e-04, dt 2.0s +All GPU(s): step 3997: loss 21.8125, lr 3.3e-04, dt 2.1s +All GPU(s): step 3998: loss 21.8438, lr 3.3e-04, dt 2.1s +All GPU(s): step 3999: loss 21.8906, lr 3.3e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_4000.pt +All GPU(s): step 4000: loss 21.3594, lr 3.3e-04, dt 2.1s +All GPU(s): step 4001: loss 21.1875, lr 3.3e-04, dt 2.1s +All GPU(s): step 4002: loss 21.1562, lr 3.3e-04, dt 2.1s +All GPU(s): step 4003: loss 20.6875, lr 3.3e-04, dt 2.0s +All GPU(s): step 4004: loss 19.8906, lr 3.3e-04, dt 2.0s +All GPU(s): step 4005: loss 19.8906, lr 3.3e-04, dt 2.0s +All GPU(s): step 4006: loss 19.1875, lr 3.3e-04, dt 2.1s +All GPU(s): step 4007: loss 19.0312, lr 3.3e-04, dt 2.1s +All GPU(s): step 4008: loss 18.0469, lr 3.3e-04, dt 2.1s +All GPU(s): step 4009: loss 17.4062, lr 3.3e-04, dt 2.0s +All GPU(s): step 4010: loss 16.7969, lr 3.3e-04, dt 2.1s +All GPU(s): step 4011: loss 17.2656, lr 3.3e-04, dt 2.1s +All GPU(s): step 4012: loss 17.0859, lr 3.3e-04, dt 2.2s +All GPU(s): step 4013: loss 17.4375, lr 3.3e-04, dt 2.0s +All GPU(s): step 4014: loss 18.1875, lr 3.3e-04, dt 2.0s +All GPU(s): step 4015: loss 18.6875, lr 3.3e-04, dt 2.0s +All GPU(s): step 4016: loss 19.7656, lr 3.3e-04, dt 2.1s +All GPU(s): step 4017: loss 19.4531, lr 3.3e-04, dt 2.3s +All GPU(s): step 4018: loss 20.0938, lr 3.2e-04, dt 2.0s +All GPU(s): step 4019: loss 20.1562, lr 3.2e-04, dt 2.0s +All GPU(s): step 4020: loss 20.3125, lr 3.2e-04, dt 2.1s +All GPU(s): step 4021: loss 20.3438, lr 3.2e-04, dt 2.1s +All GPU(s): step 4022: loss 20.4219, lr 3.2e-04, dt 2.1s +All GPU(s): step 4023: loss 20.4531, lr 3.2e-04, dt 2.0s +All GPU(s): step 4024: loss 20.7344, lr 3.2e-04, dt 2.1s +All GPU(s): step 4025: loss 21.0156, lr 3.2e-04, dt 2.0s +All GPU(s): step 4026: loss 20.6406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4027: loss 20.7031, lr 3.2e-04, dt 2.1s +All GPU(s): step 4028: loss 20.4844, lr 3.2e-04, dt 2.0s +All GPU(s): step 4029: loss 20.3906, lr 3.2e-04, dt 2.0s +All GPU(s): step 4030: loss 18.9844, lr 3.2e-04, dt 2.1s +All GPU(s): step 4031: loss 18.2500, lr 3.2e-04, dt 2.1s +All GPU(s): step 4032: loss 17.7188, lr 3.2e-04, dt 2.0s +All GPU(s): step 4033: loss 17.4062, lr 3.2e-04, dt 2.0s +All GPU(s): step 4034: loss 16.3984, lr 3.2e-04, dt 2.0s +All GPU(s): step 4035: loss 14.5938, lr 3.2e-04, dt 2.1s +All GPU(s): step 4036: loss 14.4219, lr 3.2e-04, dt 2.2s +All GPU(s): step 4037: loss 12.2422, lr 3.2e-04, dt 2.1s +All GPU(s): step 4038: loss 10.7656, lr 3.2e-04, dt 2.1s +All GPU(s): step 4039: loss 9.3828, lr 3.2e-04, dt 2.1s +All GPU(s): step 4040: loss 7.4570, lr 3.2e-04, dt 2.1s +All GPU(s): step 4041: loss 4.8574, lr 3.2e-04, dt 2.2s +All GPU(s): step 4042: loss 3.6094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4043: loss 1.7119, lr 3.2e-04, dt 2.1s +All GPU(s): step 4044: loss 1.4448, lr 3.2e-04, dt 2.1s +All GPU(s): step 4045: loss 0.8638, lr 3.2e-04, dt 2.1s +All GPU(s): step 4046: loss 0.4786, lr 3.2e-04, dt 2.1s +All GPU(s): step 4047: loss 0.3597, lr 3.2e-04, dt 2.1s +All GPU(s): step 4048: loss 0.2757, lr 3.2e-04, dt 2.1s +All GPU(s): step 4049: loss 0.1177, lr 3.2e-04, dt 2.1s +All GPU(s): step 4050: loss 0.0642, lr 3.2e-04, dt 2.1s +All GPU(s): step 4051: loss 0.0442, lr 3.2e-04, dt 2.1s +All GPU(s): step 4052: loss 0.0334, lr 3.2e-04, dt 2.0s +All GPU(s): step 4053: loss 0.0322, lr 3.2e-04, dt 2.0s +All GPU(s): step 4054: loss 0.0277, lr 3.2e-04, dt 2.0s +All GPU(s): step 4055: loss 0.0512, lr 3.2e-04, dt 2.1s +All GPU(s): step 4056: loss 0.0739, lr 3.2e-04, dt 2.0s +All GPU(s): step 4057: loss 0.0728, lr 3.2e-04, dt 2.0s +All GPU(s): step 4058: loss 0.1269, lr 3.2e-04, dt 2.0s +All GPU(s): step 4059: loss 0.1339, lr 3.2e-04, dt 2.0s +All GPU(s): step 4060: loss 0.2730, lr 3.2e-04, dt 2.1s +All GPU(s): step 4061: loss 0.0412, lr 3.2e-04, dt 2.0s +All GPU(s): step 4062: loss 0.0190, lr 3.2e-04, dt 2.0s +All GPU(s): step 4063: loss 0.0493, lr 3.2e-04, dt 2.0s +All GPU(s): step 4064: loss 0.1027, lr 3.2e-04, dt 2.1s +All GPU(s): step 4065: loss 0.1017, lr 3.2e-04, dt 2.2s +All GPU(s): step 4066: loss 0.0897, lr 3.2e-04, dt 2.0s +All GPU(s): step 4067: loss 0.1222, lr 3.2e-04, dt 2.1s +All GPU(s): step 4068: loss 0.0864, lr 3.2e-04, dt 2.1s +All GPU(s): step 4069: loss 0.1008, lr 3.2e-04, dt 2.0s +All GPU(s): step 4070: loss 0.1433, lr 3.2e-04, dt 2.2s +All GPU(s): step 4071: loss 0.2489, lr 3.2e-04, dt 2.1s +All GPU(s): step 4072: loss 0.1651, lr 3.2e-04, dt 2.0s +All GPU(s): step 4073: loss 0.1915, lr 3.2e-04, dt 2.1s +All GPU(s): step 4074: loss 0.1874, lr 3.2e-04, dt 2.1s +All GPU(s): step 4075: loss 0.3786, lr 3.2e-04, dt 2.1s +All GPU(s): step 4076: loss 0.1919, lr 3.2e-04, dt 2.0s +All GPU(s): step 4077: loss 0.4896, lr 3.2e-04, dt 2.1s +All GPU(s): step 4078: loss 0.3343, lr 3.2e-04, dt 2.0s +All GPU(s): step 4079: loss 0.2438, lr 3.2e-04, dt 2.1s +All GPU(s): step 4080: loss 0.4234, lr 3.2e-04, dt 2.1s +All GPU(s): step 4081: loss 0.3506, lr 3.2e-04, dt 2.1s +All GPU(s): step 4082: loss 0.4385, lr 3.2e-04, dt 2.1s +All GPU(s): step 4083: loss 0.6290, lr 3.2e-04, dt 2.0s +All GPU(s): step 4084: loss 0.3608, lr 3.2e-04, dt 2.1s +All GPU(s): step 4085: loss 0.3791, lr 3.2e-04, dt 2.1s +All GPU(s): step 4086: loss 0.2564, lr 3.2e-04, dt 2.1s +All GPU(s): step 4087: loss 0.3252, lr 3.2e-04, dt 2.0s +All GPU(s): step 4088: loss 0.2438, lr 3.2e-04, dt 2.1s +All GPU(s): step 4089: loss 0.3871, lr 3.2e-04, dt 2.2s +All GPU(s): step 4090: loss 0.5785, lr 3.2e-04, dt 2.1s +All GPU(s): step 4091: loss 0.4584, lr 3.2e-04, dt 2.1s +All GPU(s): step 4092: loss 0.6279, lr 3.2e-04, dt 2.1s +All GPU(s): step 4093: loss 0.7218, lr 3.2e-04, dt 2.1s +All GPU(s): step 4094: loss 0.7559, lr 3.2e-04, dt 2.2s +All GPU(s): step 4095: loss 0.7130, lr 3.2e-04, dt 2.1s +All GPU(s): step 4096: loss 0.8521, lr 3.2e-04, dt 2.1s +All GPU(s): step 4097: loss 1.0017, lr 3.2e-04, dt 2.1s +All GPU(s): step 4098: loss 1.2759, lr 3.2e-04, dt 2.1s +All GPU(s): step 4099: loss 1.2148, lr 3.2e-04, dt 2.1s +All GPU(s): step 4100: loss 1.4028, lr 3.2e-04, dt 2.1s +All GPU(s): step 4101: loss 1.6094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4102: loss 1.5322, lr 3.2e-04, dt 2.0s +All GPU(s): step 4103: loss 1.7266, lr 3.2e-04, dt 2.1s +All GPU(s): step 4104: loss 1.9590, lr 3.2e-04, dt 2.1s +All GPU(s): step 4105: loss 2.4336, lr 3.2e-04, dt 2.1s +All GPU(s): step 4106: loss 2.8193, lr 3.2e-04, dt 2.1s +All GPU(s): step 4107: loss 3.0322, lr 3.2e-04, dt 2.1s +All GPU(s): step 4108: loss 2.6943, lr 3.2e-04, dt 2.2s +All GPU(s): step 4109: loss 2.9023, lr 3.2e-04, dt 2.1s +All GPU(s): step 4110: loss 3.1309, lr 3.2e-04, dt 2.1s +All GPU(s): step 4111: loss 3.6797, lr 3.2e-04, dt 2.1s +All GPU(s): step 4112: loss 3.1094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4113: loss 3.2129, lr 3.2e-04, dt 2.1s +All GPU(s): step 4114: loss 3.3164, lr 3.2e-04, dt 2.1s +All GPU(s): step 4115: loss 3.4551, lr 3.2e-04, dt 2.1s +All GPU(s): step 4116: loss 3.2344, lr 3.2e-04, dt 2.1s +All GPU(s): step 4117: loss 2.7041, lr 3.2e-04, dt 2.1s +All GPU(s): step 4118: loss 3.0410, lr 3.2e-04, dt 2.2s +All GPU(s): step 4119: loss 1.8926, lr 3.2e-04, dt 2.1s +All GPU(s): step 4120: loss 2.0146, lr 3.2e-04, dt 2.1s +All GPU(s): step 4121: loss 1.7656, lr 3.2e-04, dt 2.1s +All GPU(s): step 4122: loss 1.5527, lr 3.2e-04, dt 2.1s +All GPU(s): step 4123: loss 1.4741, lr 3.2e-04, dt 2.1s +All GPU(s): step 4124: loss 1.4771, lr 3.2e-04, dt 2.1s +All GPU(s): step 4125: loss 1.2300, lr 3.2e-04, dt 2.1s +All GPU(s): step 4126: loss 1.2349, lr 3.2e-04, dt 2.1s +All GPU(s): step 4127: loss 1.0049, lr 3.2e-04, dt 2.1s +All GPU(s): step 4128: loss 1.1226, lr 3.2e-04, dt 2.1s +All GPU(s): step 4129: loss 1.0088, lr 3.2e-04, dt 2.1s +All GPU(s): step 4130: loss 1.2622, lr 3.2e-04, dt 2.1s +All GPU(s): step 4131: loss 1.6040, lr 3.2e-04, dt 2.1s +All GPU(s): step 4132: loss 2.4287, lr 3.2e-04, dt 2.2s +All GPU(s): step 4133: loss 2.7344, lr 3.2e-04, dt 2.1s +All GPU(s): step 4134: loss 3.4434, lr 3.2e-04, dt 2.1s +All GPU(s): step 4135: loss 3.2910, lr 3.2e-04, dt 2.1s +All GPU(s): step 4136: loss 3.7969, lr 3.2e-04, dt 2.1s +All GPU(s): step 4137: loss 4.2754, lr 3.2e-04, dt 2.1s +All GPU(s): step 4138: loss 6.7969, lr 3.2e-04, dt 2.1s +All GPU(s): step 4139: loss 8.0117, lr 3.2e-04, dt 2.1s +All GPU(s): step 4140: loss 9.8984, lr 3.2e-04, dt 2.1s +All GPU(s): step 4141: loss 12.2266, lr 3.2e-04, dt 2.1s +All GPU(s): step 4142: loss 15.0234, lr 3.2e-04, dt 2.1s +All GPU(s): step 4143: loss 17.2188, lr 3.2e-04, dt 2.0s +All GPU(s): step 4144: loss 18.6406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4145: loss 19.3438, lr 3.2e-04, dt 2.1s +All GPU(s): step 4146: loss 20.9531, lr 3.2e-04, dt 2.1s +All GPU(s): step 4147: loss 20.6719, lr 3.2e-04, dt 2.1s +All GPU(s): step 4148: loss 20.8594, lr 3.2e-04, dt 2.1s +All GPU(s): step 4149: loss 21.0000, lr 3.2e-04, dt 2.1s +All GPU(s): step 4150: loss 21.2188, lr 3.2e-04, dt 2.0s +All GPU(s): step 4151: loss 20.6094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4152: loss 21.0938, lr 3.2e-04, dt 2.1s +All GPU(s): step 4153: loss 21.0625, lr 3.2e-04, dt 2.1s +All GPU(s): step 4154: loss 21.1094, lr 3.2e-04, dt 2.0s +All GPU(s): step 4155: loss 21.1406, lr 3.2e-04, dt 2.1s +All GPU(s): step 4156: loss 21.2812, lr 3.2e-04, dt 2.2s +All GPU(s): step 4157: loss 20.7812, lr 3.2e-04, dt 2.1s +All GPU(s): step 4158: loss 20.2812, lr 3.2e-04, dt 2.1s +All GPU(s): step 4159: loss 19.8906, lr 3.2e-04, dt 2.1s +All GPU(s): step 4160: loss 19.6094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4161: loss 19.3438, lr 3.2e-04, dt 2.1s +All GPU(s): step 4162: loss 18.9062, lr 3.2e-04, dt 2.1s +All GPU(s): step 4163: loss 18.2188, lr 3.2e-04, dt 2.1s +All GPU(s): step 4164: loss 19.1875, lr 3.2e-04, dt 2.0s +All GPU(s): step 4165: loss 19.5156, lr 3.2e-04, dt 2.1s +All GPU(s): step 4166: loss 20.4062, lr 3.2e-04, dt 2.1s +All GPU(s): step 4167: loss 21.2812, lr 3.2e-04, dt 2.0s +All GPU(s): step 4168: loss 21.4219, lr 3.2e-04, dt 2.0s +All GPU(s): step 4169: loss 21.7500, lr 3.2e-04, dt 2.1s +All GPU(s): step 4170: loss 22.0156, lr 3.2e-04, dt 2.1s +All GPU(s): step 4171: loss 22.0000, lr 3.2e-04, dt 2.1s +All GPU(s): step 4172: loss 22.6094, lr 3.2e-04, dt 2.0s +All GPU(s): step 4173: loss 22.4688, lr 3.2e-04, dt 2.0s +All GPU(s): step 4174: loss 22.8125, lr 3.2e-04, dt 2.0s +All GPU(s): step 4175: loss 22.8906, lr 3.2e-04, dt 2.1s +All GPU(s): step 4176: loss 22.9375, lr 3.2e-04, dt 2.0s +All GPU(s): step 4177: loss 22.6562, lr 3.2e-04, dt 2.0s +All GPU(s): step 4178: loss 22.4844, lr 3.2e-04, dt 2.1s +All GPU(s): step 4179: loss 22.6875, lr 3.2e-04, dt 2.1s +All GPU(s): step 4180: loss 22.9844, lr 3.2e-04, dt 2.1s +All GPU(s): step 4181: loss 23.1719, lr 3.2e-04, dt 2.1s +All GPU(s): step 4182: loss 22.7969, lr 3.2e-04, dt 2.1s +All GPU(s): step 4183: loss 22.6562, lr 3.2e-04, dt 2.1s +All GPU(s): step 4184: loss 22.6719, lr 3.2e-04, dt 2.1s +All GPU(s): step 4185: loss 22.4688, lr 3.2e-04, dt 2.1s +All GPU(s): step 4186: loss 23.0938, lr 3.2e-04, dt 2.0s +All GPU(s): step 4187: loss 22.3594, lr 3.2e-04, dt 2.1s +All GPU(s): step 4188: loss 22.4062, lr 3.2e-04, dt 2.1s +All GPU(s): step 4189: loss 22.6719, lr 3.2e-04, dt 2.1s +All GPU(s): step 4190: loss 22.6094, lr 3.2e-04, dt 2.1s +All GPU(s): step 4191: loss 22.4219, lr 3.2e-04, dt 2.1s +All GPU(s): step 4192: loss 22.1562, lr 3.2e-04, dt 2.0s +All GPU(s): step 4193: loss 22.2812, lr 3.2e-04, dt 2.0s +All GPU(s): step 4194: loss 22.4688, lr 3.2e-04, dt 2.1s +All GPU(s): step 4195: loss 22.2969, lr 3.2e-04, dt 2.1s +All GPU(s): step 4196: loss 22.4375, lr 3.2e-04, dt 2.0s +All GPU(s): step 4197: loss 22.5625, lr 3.2e-04, dt 2.1s +All GPU(s): step 4198: loss 22.3281, lr 3.2e-04, dt 2.1s +All GPU(s): step 4199: loss 22.2656, lr 3.2e-04, dt 2.1s +All GPU(s): step 4200: loss 22.2188, lr 3.2e-04, dt 2.1s +All GPU(s): step 4201: loss 22.0781, lr 3.2e-04, dt 2.0s +All GPU(s): step 4202: loss 22.1562, lr 3.2e-04, dt 2.1s +All GPU(s): step 4203: loss 22.2812, lr 3.2e-04, dt 2.0s +All GPU(s): step 4204: loss 22.1250, lr 3.2e-04, dt 2.1s +All GPU(s): step 4205: loss 22.1406, lr 3.2e-04, dt 2.0s +All GPU(s): step 4206: loss 22.2031, lr 3.2e-04, dt 2.0s +All GPU(s): step 4207: loss 22.1250, lr 3.2e-04, dt 2.0s +All GPU(s): step 4208: loss 22.1719, lr 3.2e-04, dt 2.1s +All GPU(s): step 4209: loss 22.4219, lr 3.2e-04, dt 2.1s +All GPU(s): step 4210: loss 22.3125, lr 3.2e-04, dt 2.0s +All GPU(s): step 4211: loss 22.1094, lr 3.2e-04, dt 2.0s +All GPU(s): step 4212: loss 21.7031, lr 3.2e-04, dt 2.0s +All GPU(s): step 4213: loss 22.2031, lr 3.2e-04, dt 2.1s +All GPU(s): step 4214: loss 22.2500, lr 3.2e-04, dt 2.2s +All GPU(s): step 4215: loss 22.3906, lr 3.2e-04, dt 2.1s +All GPU(s): step 4216: loss 22.2344, lr 3.2e-04, dt 2.0s +All GPU(s): step 4217: loss 21.9375, lr 3.2e-04, dt 2.0s +All GPU(s): step 4218: loss 22.1562, lr 3.2e-04, dt 2.1s +All GPU(s): step 4219: loss 22.0938, lr 3.2e-04, dt 2.1s +All GPU(s): step 4220: loss 22.2812, lr 3.2e-04, dt 2.1s +All GPU(s): step 4221: loss 22.2656, lr 3.2e-04, dt 2.0s +All GPU(s): step 4222: loss 22.2031, lr 3.2e-04, dt 2.1s +All GPU(s): step 4223: loss 22.0938, lr 3.2e-04, dt 2.1s +All GPU(s): step 4224: loss 21.7500, lr 3.2e-04, dt 2.1s +All GPU(s): step 4225: loss 22.0781, lr 3.2e-04, dt 2.1s +All GPU(s): step 4226: loss 22.1875, lr 3.2e-04, dt 2.1s +All GPU(s): step 4227: loss 21.6562, lr 3.2e-04, dt 2.1s +All GPU(s): step 4228: loss 21.7500, lr 3.2e-04, dt 2.1s +All GPU(s): step 4229: loss 22.0312, lr 3.2e-04, dt 2.1s +All GPU(s): step 4230: loss 22.2188, lr 3.2e-04, dt 2.1s +All GPU(s): step 4231: loss 22.3125, lr 3.1e-04, dt 2.1s +All GPU(s): step 4232: loss 22.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4233: loss 22.3906, lr 3.1e-04, dt 2.2s +All GPU(s): step 4234: loss 22.5312, lr 3.1e-04, dt 2.1s +All GPU(s): step 4235: loss 22.6719, lr 3.1e-04, dt 2.1s +All GPU(s): step 4236: loss 22.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4237: loss 23.0938, lr 3.1e-04, dt 2.1s +All GPU(s): step 4238: loss 22.8906, lr 3.1e-04, dt 2.2s +All GPU(s): step 4239: loss 23.0156, lr 3.1e-04, dt 2.1s +All GPU(s): step 4240: loss 22.7031, lr 3.1e-04, dt 2.1s +All GPU(s): step 4241: loss 22.8594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4242: loss 22.7812, lr 3.1e-04, dt 2.1s +All GPU(s): step 4243: loss 22.6875, lr 3.1e-04, dt 2.1s +All GPU(s): step 4244: loss 22.4375, lr 3.1e-04, dt 2.0s +All GPU(s): step 4245: loss 22.7500, lr 3.1e-04, dt 2.0s +All GPU(s): step 4246: loss 22.7188, lr 3.1e-04, dt 2.1s +All GPU(s): step 4247: loss 22.8281, lr 3.1e-04, dt 2.1s +All GPU(s): step 4248: loss 22.8438, lr 3.1e-04, dt 2.1s +All GPU(s): step 4249: loss 22.8125, lr 3.1e-04, dt 2.1s +All GPU(s): step 4250: loss 22.9688, lr 3.1e-04, dt 2.0s +All GPU(s): step 4251: loss 22.7969, lr 3.1e-04, dt 2.0s +All GPU(s): step 4252: loss 22.4844, lr 3.1e-04, dt 2.1s +All GPU(s): step 4253: loss 22.1875, lr 3.1e-04, dt 2.0s +All GPU(s): step 4254: loss 21.7031, lr 3.1e-04, dt 2.1s +All GPU(s): step 4255: loss 21.7344, lr 3.1e-04, dt 2.1s +All GPU(s): step 4256: loss 21.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4257: loss 21.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4258: loss 21.3594, lr 3.1e-04, dt 2.1s +All GPU(s): step 4259: loss 21.6562, lr 3.1e-04, dt 2.0s +All GPU(s): step 4260: loss 21.3750, lr 3.1e-04, dt 2.0s +All GPU(s): step 4261: loss 21.3906, lr 3.1e-04, dt 2.0s +All GPU(s): step 4262: loss 21.5625, lr 3.1e-04, dt 2.2s +All GPU(s): step 4263: loss 21.7344, lr 3.1e-04, dt 2.1s +All GPU(s): step 4264: loss 21.5156, lr 3.1e-04, dt 2.0s +All GPU(s): step 4265: loss 21.5625, lr 3.1e-04, dt 2.0s +All GPU(s): step 4266: loss 21.2344, lr 3.1e-04, dt 2.0s +All GPU(s): step 4267: loss 20.9062, lr 3.1e-04, dt 2.1s +All GPU(s): step 4268: loss 21.4531, lr 3.1e-04, dt 2.0s +All GPU(s): step 4269: loss 21.6250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4270: loss 21.3594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4271: loss 21.4844, lr 3.1e-04, dt 2.1s +All GPU(s): step 4272: loss 21.5781, lr 3.1e-04, dt 2.1s +All GPU(s): step 4273: loss 21.2500, lr 3.1e-04, dt 2.0s +All GPU(s): step 4274: loss 21.2656, lr 3.1e-04, dt 2.0s +All GPU(s): step 4275: loss 21.3438, lr 3.1e-04, dt 2.1s +All GPU(s): step 4276: loss 21.4062, lr 3.1e-04, dt 2.1s +All GPU(s): step 4277: loss 21.5469, lr 3.1e-04, dt 2.1s +All GPU(s): step 4278: loss 21.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4279: loss 21.3906, lr 3.1e-04, dt 2.1s +All GPU(s): step 4280: loss 21.3594, lr 3.1e-04, dt 2.1s +All GPU(s): step 4281: loss 21.4375, lr 3.1e-04, dt 2.1s +All GPU(s): step 4282: loss 21.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4283: loss 21.2500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4284: loss 21.2969, lr 3.1e-04, dt 2.1s +All GPU(s): step 4285: loss 21.1250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4286: loss 21.2656, lr 3.1e-04, dt 2.2s +All GPU(s): step 4287: loss 21.3125, lr 3.1e-04, dt 2.0s +All GPU(s): step 4288: loss 21.0625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4289: loss 20.9844, lr 3.1e-04, dt 2.1s +All GPU(s): step 4290: loss 20.7969, lr 3.1e-04, dt 2.1s +All GPU(s): step 4291: loss 20.6719, lr 3.1e-04, dt 2.1s +All GPU(s): step 4292: loss 21.0000, lr 3.1e-04, dt 2.1s +All GPU(s): step 4293: loss 20.8281, lr 3.1e-04, dt 2.1s +All GPU(s): step 4294: loss 21.0625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4295: loss 20.8594, lr 3.1e-04, dt 2.1s +All GPU(s): step 4296: loss 21.2500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4297: loss 20.9219, lr 3.1e-04, dt 2.0s +All GPU(s): step 4298: loss 21.2656, lr 3.1e-04, dt 2.1s +All GPU(s): step 4299: loss 20.9688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4300: loss 21.0312, lr 3.1e-04, dt 2.1s +All GPU(s): step 4301: loss 21.2656, lr 3.1e-04, dt 2.1s +All GPU(s): step 4302: loss 21.1406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4303: loss 21.1250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4304: loss 21.2500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4305: loss 21.5000, lr 3.1e-04, dt 2.1s +All GPU(s): step 4306: loss 21.2969, lr 3.1e-04, dt 2.1s +All GPU(s): step 4307: loss 21.3906, lr 3.1e-04, dt 2.1s +All GPU(s): step 4308: loss 21.7812, lr 3.1e-04, dt 2.1s +All GPU(s): step 4309: loss 21.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4310: loss 21.6250, lr 3.1e-04, dt 2.2s +All GPU(s): step 4311: loss 21.2031, lr 3.1e-04, dt 2.0s +All GPU(s): step 4312: loss 21.6094, lr 3.1e-04, dt 2.1s +All GPU(s): step 4313: loss 21.6875, lr 3.1e-04, dt 2.1s +All GPU(s): step 4314: loss 21.5938, lr 3.1e-04, dt 2.1s +All GPU(s): step 4315: loss 21.6250, lr 3.1e-04, dt 2.2s +All GPU(s): step 4316: loss 21.2969, lr 3.1e-04, dt 2.0s +All GPU(s): step 4317: loss 21.5000, lr 3.1e-04, dt 2.0s +All GPU(s): step 4318: loss 21.5000, lr 3.1e-04, dt 2.0s +All GPU(s): step 4319: loss 21.2656, lr 3.1e-04, dt 2.1s +All GPU(s): step 4320: loss 21.4531, lr 3.1e-04, dt 2.1s +All GPU(s): step 4321: loss 21.2969, lr 3.1e-04, dt 2.1s +All GPU(s): step 4322: loss 21.4219, lr 3.1e-04, dt 2.0s +All GPU(s): step 4323: loss 21.4375, lr 3.1e-04, dt 2.1s +All GPU(s): step 4324: loss 21.4062, lr 3.1e-04, dt 2.1s +All GPU(s): step 4325: loss 21.2031, lr 3.1e-04, dt 2.1s +All GPU(s): step 4326: loss 21.2656, lr 3.1e-04, dt 2.0s +All GPU(s): step 4327: loss 21.1875, lr 3.1e-04, dt 2.0s +All GPU(s): step 4328: loss 20.7500, lr 3.1e-04, dt 2.0s +All GPU(s): step 4329: loss 21.3750, lr 3.1e-04, dt 2.1s +All GPU(s): step 4330: loss 21.3594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4331: loss 21.4375, lr 3.1e-04, dt 2.1s +All GPU(s): step 4332: loss 21.5938, lr 3.1e-04, dt 2.0s +All GPU(s): step 4333: loss 21.1250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4334: loss 21.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 4335: loss 21.1562, lr 3.1e-04, dt 2.0s +All GPU(s): step 4336: loss 21.1250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4337: loss 21.3594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4338: loss 21.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 4339: loss 21.0781, lr 3.1e-04, dt 2.1s +All GPU(s): step 4340: loss 21.0938, lr 3.1e-04, dt 2.0s +All GPU(s): step 4341: loss 21.2812, lr 3.1e-04, dt 2.0s +All GPU(s): step 4342: loss 21.3594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4343: loss 21.2188, lr 3.1e-04, dt 2.0s +All GPU(s): step 4344: loss 21.2969, lr 3.1e-04, dt 2.1s +All GPU(s): step 4345: loss 21.1719, lr 3.1e-04, dt 2.0s +All GPU(s): step 4346: loss 21.6250, lr 3.1e-04, dt 2.0s +All GPU(s): step 4347: loss 21.3125, lr 3.1e-04, dt 2.0s +All GPU(s): step 4348: loss 21.3750, lr 3.1e-04, dt 2.1s +All GPU(s): step 4349: loss 21.4375, lr 3.1e-04, dt 2.1s +All GPU(s): step 4350: loss 21.8594, lr 3.1e-04, dt 2.1s +All GPU(s): step 4351: loss 21.6094, lr 3.1e-04, dt 2.0s +All GPU(s): step 4352: loss 21.4688, lr 3.1e-04, dt 2.0s +All GPU(s): step 4353: loss 21.4531, lr 3.1e-04, dt 2.1s +All GPU(s): step 4354: loss 21.7188, lr 3.1e-04, dt 2.0s +All GPU(s): step 4355: loss 21.7188, lr 3.1e-04, dt 2.0s +All GPU(s): step 4356: loss 21.7812, lr 3.1e-04, dt 2.1s +All GPU(s): step 4357: loss 21.4844, lr 3.1e-04, dt 2.0s +All GPU(s): step 4358: loss 21.7344, lr 3.1e-04, dt 2.1s +All GPU(s): step 4359: loss 21.8281, lr 3.1e-04, dt 2.1s +All GPU(s): step 4360: loss 21.6406, lr 3.1e-04, dt 2.0s +All GPU(s): step 4361: loss 21.8750, lr 3.1e-04, dt 2.0s +All GPU(s): step 4362: loss 21.4844, lr 3.1e-04, dt 2.1s +All GPU(s): step 4363: loss 22.0469, lr 3.1e-04, dt 2.1s +All GPU(s): step 4364: loss 21.8438, lr 3.1e-04, dt 2.0s +All GPU(s): step 4365: loss 21.7500, lr 3.1e-04, dt 2.0s +All GPU(s): step 4366: loss 21.8594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4367: loss 22.0938, lr 3.1e-04, dt 2.1s +All GPU(s): step 4368: loss 21.5469, lr 3.1e-04, dt 2.1s +All GPU(s): step 4369: loss 21.6719, lr 3.1e-04, dt 2.0s +All GPU(s): step 4370: loss 21.8281, lr 3.1e-04, dt 2.0s +All GPU(s): step 4371: loss 21.6719, lr 3.1e-04, dt 2.0s +All GPU(s): step 4372: loss 21.6094, lr 3.1e-04, dt 2.1s +All GPU(s): step 4373: loss 21.3906, lr 3.1e-04, dt 2.1s +All GPU(s): step 4374: loss 21.3438, lr 3.1e-04, dt 2.0s +All GPU(s): step 4375: loss 21.0625, lr 3.1e-04, dt 2.0s +All GPU(s): step 4376: loss 21.3594, lr 3.1e-04, dt 2.0s +All GPU(s): step 4377: loss 20.9531, lr 3.1e-04, dt 2.1s +All GPU(s): step 4378: loss 21.2500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4379: loss 21.0312, lr 3.1e-04, dt 2.0s +All GPU(s): step 4380: loss 20.5312, lr 3.1e-04, dt 2.0s +All GPU(s): step 4381: loss 19.6875, lr 3.1e-04, dt 2.1s +All GPU(s): step 4382: loss 19.5625, lr 3.1e-04, dt 2.1s +All GPU(s): step 4383: loss 20.2969, lr 3.1e-04, dt 2.0s +All GPU(s): step 4384: loss 20.5469, lr 3.1e-04, dt 2.1s +All GPU(s): step 4385: loss 21.3125, lr 3.1e-04, dt 2.0s +All GPU(s): step 4386: loss 21.2500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4387: loss 21.0781, lr 3.1e-04, dt 2.1s +All GPU(s): step 4388: loss 20.8281, lr 3.1e-04, dt 2.0s +All GPU(s): step 4389: loss 21.1719, lr 3.1e-04, dt 2.1s +All GPU(s): step 4390: loss 20.7500, lr 3.1e-04, dt 2.1s +All GPU(s): step 4391: loss 21.1250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4392: loss 21.0156, lr 3.1e-04, dt 2.2s +All GPU(s): step 4393: loss 20.8906, lr 3.1e-04, dt 2.0s +All GPU(s): step 4394: loss 20.7500, lr 3.1e-04, dt 2.0s +All GPU(s): step 4395: loss 20.8750, lr 3.1e-04, dt 2.0s +All GPU(s): step 4396: loss 21.0625, lr 3.1e-04, dt 2.0s +All GPU(s): step 4397: loss 21.1875, lr 3.1e-04, dt 2.1s +All GPU(s): step 4398: loss 21.2344, lr 3.1e-04, dt 2.1s +All GPU(s): step 4399: loss 21.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 4400: loss 21.7812, lr 3.1e-04, dt 2.0s +All GPU(s): step 4401: loss 21.5312, lr 3.1e-04, dt 2.1s +All GPU(s): step 4402: loss 21.7656, lr 3.1e-04, dt 2.1s +All GPU(s): step 4403: loss 21.5469, lr 3.1e-04, dt 2.0s +All GPU(s): step 4404: loss 21.9062, lr 3.1e-04, dt 2.1s +All GPU(s): step 4405: loss 21.8906, lr 3.1e-04, dt 2.0s +All GPU(s): step 4406: loss 21.8906, lr 3.1e-04, dt 2.1s +All GPU(s): step 4407: loss 21.9219, lr 3.1e-04, dt 2.0s +All GPU(s): step 4408: loss 21.7656, lr 3.1e-04, dt 2.0s +All GPU(s): step 4409: loss 21.7031, lr 3.1e-04, dt 2.0s +All GPU(s): step 4410: loss 21.9062, lr 3.1e-04, dt 2.0s +All GPU(s): step 4411: loss 21.9688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4412: loss 21.9844, lr 3.1e-04, dt 2.1s +All GPU(s): step 4413: loss 22.1250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4414: loss 21.9062, lr 3.1e-04, dt 2.0s +All GPU(s): step 4415: loss 22.2344, lr 3.1e-04, dt 2.1s +All GPU(s): step 4416: loss 22.4531, lr 3.1e-04, dt 2.1s +All GPU(s): step 4417: loss 22.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 4418: loss 22.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4419: loss 22.4688, lr 3.1e-04, dt 2.1s +All GPU(s): step 4420: loss 22.6250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4421: loss 22.3281, lr 3.1e-04, dt 2.1s +All GPU(s): step 4422: loss 22.3281, lr 3.1e-04, dt 2.0s +All GPU(s): step 4423: loss 22.0156, lr 3.1e-04, dt 2.1s +All GPU(s): step 4424: loss 22.0469, lr 3.1e-04, dt 2.1s +All GPU(s): step 4425: loss 22.0781, lr 3.1e-04, dt 2.1s +All GPU(s): step 4426: loss 21.6094, lr 3.1e-04, dt 2.1s +All GPU(s): step 4427: loss 21.6094, lr 3.1e-04, dt 2.1s +All GPU(s): step 4428: loss 21.4219, lr 3.1e-04, dt 2.1s +All GPU(s): step 4429: loss 21.6250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4430: loss 21.2656, lr 3.1e-04, dt 2.1s +All GPU(s): step 4431: loss 21.3750, lr 3.1e-04, dt 2.1s +All GPU(s): step 4432: loss 21.3438, lr 3.1e-04, dt 2.1s +All GPU(s): step 4433: loss 21.6406, lr 3.1e-04, dt 2.1s +All GPU(s): step 4434: loss 21.6250, lr 3.1e-04, dt 2.1s +All GPU(s): step 4435: loss 21.6719, lr 3.0e-04, dt 2.1s +All GPU(s): step 4436: loss 21.0469, lr 3.0e-04, dt 2.1s +All GPU(s): step 4437: loss 21.2969, lr 3.0e-04, dt 2.1s +All GPU(s): step 4438: loss 21.1875, lr 3.0e-04, dt 2.1s +All GPU(s): step 4439: loss 21.0312, lr 3.0e-04, dt 2.1s +All GPU(s): step 4440: loss 21.2188, lr 3.0e-04, dt 2.2s +All GPU(s): step 4441: loss 21.3906, lr 3.0e-04, dt 2.1s +All GPU(s): step 4442: loss 21.2969, lr 3.0e-04, dt 2.1s +All GPU(s): step 4443: loss 20.9062, lr 3.0e-04, dt 2.0s +All GPU(s): step 4444: loss 21.0000, lr 3.0e-04, dt 2.1s +All GPU(s): step 4445: loss 21.5000, lr 3.0e-04, dt 2.1s +All GPU(s): step 4446: loss 21.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4447: loss 21.6094, lr 3.0e-04, dt 2.0s +All GPU(s): step 4448: loss 21.4844, lr 3.0e-04, dt 2.0s +All GPU(s): step 4449: loss 21.1250, lr 3.0e-04, dt 2.1s +All GPU(s): step 4450: loss 21.6094, lr 3.0e-04, dt 2.1s +All GPU(s): step 4451: loss 21.4531, lr 3.0e-04, dt 2.0s +All GPU(s): step 4452: loss 21.2500, lr 3.0e-04, dt 2.0s +All GPU(s): step 4453: loss 21.3594, lr 3.0e-04, dt 2.0s +All GPU(s): step 4454: loss 21.0625, lr 3.0e-04, dt 2.1s +All GPU(s): step 4455: loss 21.5312, lr 3.0e-04, dt 2.1s +All GPU(s): step 4456: loss 21.4531, lr 3.0e-04, dt 2.0s +All GPU(s): step 4457: loss 21.3594, lr 3.0e-04, dt 2.0s +All GPU(s): step 4458: loss 21.1719, lr 3.0e-04, dt 2.0s +All GPU(s): step 4459: loss 21.1406, lr 3.0e-04, dt 2.1s +All GPU(s): step 4460: loss 21.3125, lr 3.0e-04, dt 2.0s +All GPU(s): step 4461: loss 21.4375, lr 3.0e-04, dt 2.0s +All GPU(s): step 4462: loss 21.2500, lr 3.0e-04, dt 2.0s +All GPU(s): step 4463: loss 21.5469, lr 3.0e-04, dt 2.0s +All GPU(s): step 4464: loss 21.0938, lr 3.0e-04, dt 2.1s +All GPU(s): step 4465: loss 21.2656, lr 3.0e-04, dt 2.1s +All GPU(s): step 4466: loss 21.0938, lr 3.0e-04, dt 2.0s +All GPU(s): step 4467: loss 20.8438, lr 3.0e-04, dt 2.0s +All GPU(s): step 4468: loss 20.1250, lr 3.0e-04, dt 2.0s +All GPU(s): step 4469: loss 18.8125, lr 3.0e-04, dt 2.1s +All GPU(s): step 4470: loss 18.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4471: loss 16.9219, lr 3.0e-04, dt 2.1s +All GPU(s): step 4472: loss 17.3906, lr 3.0e-04, dt 2.0s +All GPU(s): step 4473: loss 17.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4474: loss 16.9062, lr 3.0e-04, dt 2.1s +All GPU(s): step 4475: loss 16.2656, lr 3.0e-04, dt 2.1s +All GPU(s): step 4476: loss 17.2812, lr 3.0e-04, dt 2.0s +All GPU(s): step 4477: loss 17.0938, lr 3.0e-04, dt 2.0s +All GPU(s): step 4478: loss 16.0000, lr 3.0e-04, dt 2.1s +All GPU(s): step 4479: loss 14.3281, lr 3.0e-04, dt 2.1s +All GPU(s): step 4480: loss 13.3203, lr 3.0e-04, dt 2.0s +All GPU(s): step 4481: loss 12.3828, lr 3.0e-04, dt 2.0s +All GPU(s): step 4482: loss 11.4297, lr 3.0e-04, dt 2.0s +All GPU(s): step 4483: loss 11.0234, lr 3.0e-04, dt 2.1s +All GPU(s): step 4484: loss 10.5547, lr 3.0e-04, dt 2.1s +All GPU(s): step 4485: loss 10.3047, lr 3.0e-04, dt 2.0s +All GPU(s): step 4486: loss 10.4688, lr 3.0e-04, dt 2.0s +All GPU(s): step 4487: loss 11.1484, lr 3.0e-04, dt 2.0s +All GPU(s): step 4488: loss 10.8516, lr 3.0e-04, dt 2.1s +All GPU(s): step 4489: loss 12.2734, lr 3.0e-04, dt 2.0s +All GPU(s): step 4490: loss 13.5781, lr 3.0e-04, dt 2.0s +All GPU(s): step 4491: loss 13.9922, lr 3.0e-04, dt 2.0s +All GPU(s): step 4492: loss 13.6328, lr 3.0e-04, dt 2.0s +All GPU(s): step 4493: loss 13.4375, lr 3.0e-04, dt 2.1s +All GPU(s): step 4494: loss 12.9609, lr 3.0e-04, dt 2.1s +All GPU(s): step 4495: loss 12.6172, lr 3.0e-04, dt 2.1s +All GPU(s): step 4496: loss 12.2266, lr 3.0e-04, dt 2.0s +All GPU(s): step 4497: loss 10.6094, lr 3.0e-04, dt 2.1s +All GPU(s): step 4498: loss 9.1797, lr 3.0e-04, dt 2.2s +All GPU(s): step 4499: loss 6.2695, lr 3.0e-04, dt 2.1s +All GPU(s): step 4500: loss 3.6719, lr 3.0e-04, dt 2.1s +All GPU(s): step 4501: loss 1.7236, lr 3.0e-04, dt 2.1s +All GPU(s): step 4502: loss 1.7637, lr 3.0e-04, dt 2.1s +All GPU(s): step 4503: loss 0.8745, lr 3.0e-04, dt 2.1s +All GPU(s): step 4504: loss 1.1667, lr 3.0e-04, dt 2.1s +All GPU(s): step 4505: loss 0.7778, lr 3.0e-04, dt 2.1s +All GPU(s): step 4506: loss 0.9355, lr 3.0e-04, dt 2.1s +All GPU(s): step 4507: loss 0.9243, lr 3.0e-04, dt 2.1s +All GPU(s): step 4508: loss 0.8083, lr 3.0e-04, dt 2.1s +All GPU(s): step 4509: loss 0.8613, lr 3.0e-04, dt 2.1s +All GPU(s): step 4510: loss 0.8210, lr 3.0e-04, dt 2.1s +All GPU(s): step 4511: loss 0.7954, lr 3.0e-04, dt 2.1s +All GPU(s): step 4512: loss 1.0415, lr 3.0e-04, dt 2.2s +All GPU(s): step 4513: loss 0.7432, lr 3.0e-04, dt 2.1s +All GPU(s): step 4514: loss 1.0887, lr 3.0e-04, dt 2.1s +All GPU(s): step 4515: loss 0.8733, lr 3.0e-04, dt 2.1s +All GPU(s): step 4516: loss 1.3350, lr 3.0e-04, dt 2.1s +All GPU(s): step 4517: loss 1.8936, lr 3.0e-04, dt 2.2s +All GPU(s): step 4518: loss 3.5586, lr 3.0e-04, dt 2.1s +All GPU(s): step 4519: loss 5.3691, lr 3.0e-04, dt 2.1s +All GPU(s): step 4520: loss 9.5469, lr 3.0e-04, dt 2.1s +All GPU(s): step 4521: loss 12.7734, lr 3.0e-04, dt 2.1s +All GPU(s): step 4522: loss 14.3750, lr 3.0e-04, dt 2.1s +All GPU(s): step 4523: loss 15.7109, lr 3.0e-04, dt 2.0s +All GPU(s): step 4524: loss 14.8672, lr 3.0e-04, dt 2.1s +All GPU(s): step 4525: loss 14.2266, lr 3.0e-04, dt 2.1s +All GPU(s): step 4526: loss 12.6094, lr 3.0e-04, dt 2.1s +All GPU(s): step 4527: loss 12.2031, lr 3.0e-04, dt 2.1s +All GPU(s): step 4528: loss 9.2383, lr 3.0e-04, dt 2.0s +All GPU(s): step 4529: loss 7.3438, lr 3.0e-04, dt 2.1s +All GPU(s): step 4530: loss 6.0938, lr 3.0e-04, dt 2.1s +All GPU(s): step 4531: loss 3.9941, lr 3.0e-04, dt 2.1s +All GPU(s): step 4532: loss 3.1523, lr 3.0e-04, dt 2.1s +All GPU(s): step 4533: loss 2.2949, lr 3.0e-04, dt 2.1s +All GPU(s): step 4534: loss 2.0576, lr 3.0e-04, dt 2.0s +All GPU(s): step 4535: loss 1.5581, lr 3.0e-04, dt 2.1s +All GPU(s): step 4536: loss 0.9331, lr 3.0e-04, dt 2.2s +All GPU(s): step 4537: loss 1.0449, lr 3.0e-04, dt 2.0s +All GPU(s): step 4538: loss 1.0469, lr 3.0e-04, dt 2.0s +All GPU(s): step 4539: loss 0.9800, lr 3.0e-04, dt 2.0s +All GPU(s): step 4540: loss 0.6743, lr 3.0e-04, dt 2.1s +All GPU(s): step 4541: loss 0.4324, lr 3.0e-04, dt 2.1s +All GPU(s): step 4542: loss 0.8348, lr 3.0e-04, dt 2.1s +All GPU(s): step 4543: loss 0.4479, lr 3.0e-04, dt 2.0s +All GPU(s): step 4544: loss 0.5102, lr 3.0e-04, dt 2.1s +All GPU(s): step 4545: loss 0.5393, lr 3.0e-04, dt 2.1s +All GPU(s): step 4546: loss 0.4255, lr 3.0e-04, dt 2.2s +All GPU(s): step 4547: loss 0.3146, lr 3.0e-04, dt 2.1s +All GPU(s): step 4548: loss 0.2717, lr 3.0e-04, dt 2.1s +All GPU(s): step 4549: loss 0.2730, lr 3.0e-04, dt 2.0s +All GPU(s): step 4550: loss 0.5686, lr 3.0e-04, dt 2.1s +All GPU(s): step 4551: loss 0.5066, lr 3.0e-04, dt 2.1s +All GPU(s): step 4552: loss 0.5089, lr 3.0e-04, dt 2.1s +All GPU(s): step 4553: loss 0.6194, lr 3.0e-04, dt 2.1s +All GPU(s): step 4554: loss 0.5007, lr 3.0e-04, dt 2.1s +All GPU(s): step 4555: loss 0.7644, lr 3.0e-04, dt 2.1s +All GPU(s): step 4556: loss 1.0015, lr 3.0e-04, dt 2.1s +All GPU(s): step 4557: loss 0.7542, lr 3.0e-04, dt 2.1s +All GPU(s): step 4558: loss 0.8599, lr 3.0e-04, dt 2.1s +All GPU(s): step 4559: loss 1.0659, lr 3.0e-04, dt 2.1s +All GPU(s): step 4560: loss 0.9409, lr 3.0e-04, dt 2.1s +All GPU(s): step 4561: loss 0.9656, lr 3.0e-04, dt 2.1s +All GPU(s): step 4562: loss 0.9663, lr 3.0e-04, dt 2.1s +All GPU(s): step 4563: loss 1.3433, lr 3.0e-04, dt 2.1s +All GPU(s): step 4564: loss 1.1597, lr 3.0e-04, dt 2.1s +All GPU(s): step 4565: loss 1.1299, lr 3.0e-04, dt 2.1s +All GPU(s): step 4566: loss 0.9146, lr 3.0e-04, dt 2.1s +All GPU(s): step 4567: loss 0.8560, lr 3.0e-04, dt 2.1s +All GPU(s): step 4568: loss 0.8264, lr 3.0e-04, dt 2.1s +All GPU(s): step 4569: loss 0.5449, lr 3.0e-04, dt 2.1s +All GPU(s): step 4570: loss 0.6132, lr 3.0e-04, dt 2.1s +All GPU(s): step 4571: loss 0.6350, lr 3.0e-04, dt 2.1s +All GPU(s): step 4572: loss 0.7676, lr 3.0e-04, dt 2.1s +All GPU(s): step 4573: loss 0.8667, lr 3.0e-04, dt 2.1s +All GPU(s): step 4574: loss 0.6772, lr 3.0e-04, dt 2.1s +All GPU(s): step 4575: loss 0.4390, lr 3.0e-04, dt 2.1s +All GPU(s): step 4576: loss 0.3237, lr 3.0e-04, dt 2.1s +All GPU(s): step 4577: loss 0.4135, lr 3.0e-04, dt 2.1s +All GPU(s): step 4578: loss 0.5822, lr 3.0e-04, dt 2.1s +All GPU(s): step 4579: loss 0.6567, lr 3.0e-04, dt 2.2s +All GPU(s): step 4580: loss 0.8154, lr 3.0e-04, dt 2.1s +All GPU(s): step 4581: loss 1.0444, lr 3.0e-04, dt 2.0s +All GPU(s): step 4582: loss 1.5508, lr 3.0e-04, dt 2.1s +All GPU(s): step 4583: loss 1.0103, lr 3.0e-04, dt 2.1s +All GPU(s): step 4584: loss 1.0054, lr 3.0e-04, dt 2.2s +All GPU(s): step 4585: loss 0.8165, lr 3.0e-04, dt 2.1s +All GPU(s): step 4586: loss 0.6615, lr 3.0e-04, dt 2.1s +All GPU(s): step 4587: loss 1.1821, lr 3.0e-04, dt 2.1s +All GPU(s): step 4588: loss 0.9351, lr 3.0e-04, dt 2.1s +All GPU(s): step 4589: loss 1.2886, lr 3.0e-04, dt 2.1s +All GPU(s): step 4590: loss 1.7754, lr 3.0e-04, dt 2.0s +All GPU(s): step 4591: loss 1.6787, lr 3.0e-04, dt 2.1s +All GPU(s): step 4592: loss 2.7529, lr 3.0e-04, dt 2.1s +All GPU(s): step 4593: loss 4.2480, lr 3.0e-04, dt 2.1s +All GPU(s): step 4594: loss 7.1445, lr 3.0e-04, dt 2.1s +All GPU(s): step 4595: loss 8.9727, lr 3.0e-04, dt 2.1s +All GPU(s): step 4596: loss 12.8594, lr 3.0e-04, dt 2.0s +All GPU(s): step 4597: loss 16.0625, lr 3.0e-04, dt 2.1s +All GPU(s): step 4598: loss 17.4688, lr 3.0e-04, dt 2.1s +All GPU(s): step 4599: loss 18.5781, lr 3.0e-04, dt 2.0s +All GPU(s): step 4600: loss 19.1406, lr 3.0e-04, dt 2.0s +All GPU(s): step 4601: loss 19.9531, lr 3.0e-04, dt 2.1s +All GPU(s): step 4602: loss 20.3906, lr 3.0e-04, dt 2.1s +All GPU(s): step 4603: loss 20.0938, lr 3.0e-04, dt 2.1s +All GPU(s): step 4604: loss 20.1250, lr 3.0e-04, dt 2.1s +All GPU(s): step 4605: loss 19.0625, lr 3.0e-04, dt 2.1s +All GPU(s): step 4606: loss 18.1719, lr 3.0e-04, dt 2.1s +All GPU(s): step 4607: loss 17.9219, lr 3.0e-04, dt 2.1s +All GPU(s): step 4608: loss 17.2656, lr 3.0e-04, dt 2.2s +All GPU(s): step 4609: loss 16.1484, lr 3.0e-04, dt 2.1s +All GPU(s): step 4610: loss 15.4531, lr 3.0e-04, dt 2.1s +All GPU(s): step 4611: loss 14.2578, lr 3.0e-04, dt 2.0s +All GPU(s): step 4612: loss 14.7578, lr 3.0e-04, dt 2.1s +All GPU(s): step 4613: loss 15.7422, lr 3.0e-04, dt 2.2s +All GPU(s): step 4614: loss 16.1797, lr 3.0e-04, dt 2.1s +All GPU(s): step 4615: loss 15.9141, lr 3.0e-04, dt 2.1s +All GPU(s): step 4616: loss 15.3672, lr 3.0e-04, dt 2.0s +All GPU(s): step 4617: loss 16.5625, lr 3.0e-04, dt 2.1s +All GPU(s): step 4618: loss 18.7969, lr 3.0e-04, dt 2.1s +All GPU(s): step 4619: loss 20.3750, lr 3.0e-04, dt 2.1s +All GPU(s): step 4620: loss 21.5781, lr 3.0e-04, dt 2.0s +All GPU(s): step 4621: loss 21.5469, lr 3.0e-04, dt 2.0s +All GPU(s): step 4622: loss 22.0938, lr 3.0e-04, dt 2.1s +All GPU(s): step 4623: loss 22.3125, lr 3.0e-04, dt 2.1s +All GPU(s): step 4624: loss 22.3125, lr 3.0e-04, dt 2.0s +All GPU(s): step 4625: loss 22.1875, lr 3.0e-04, dt 2.0s +All GPU(s): step 4626: loss 22.2969, lr 3.0e-04, dt 2.1s +All GPU(s): step 4627: loss 22.0781, lr 3.0e-04, dt 2.1s +All GPU(s): step 4628: loss 22.0781, lr 3.0e-04, dt 2.1s +All GPU(s): step 4629: loss 21.7344, lr 3.0e-04, dt 2.1s +All GPU(s): step 4630: loss 20.1562, lr 3.0e-04, dt 2.1s +All GPU(s): step 4631: loss 17.1250, lr 3.0e-04, dt 2.1s +All GPU(s): step 4632: loss 11.9375, lr 2.9e-04, dt 2.1s +All GPU(s): step 4633: loss 8.8242, lr 2.9e-04, dt 2.1s +All GPU(s): step 4634: loss 8.6406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4635: loss 8.7031, lr 2.9e-04, dt 2.1s +All GPU(s): step 4636: loss 9.0156, lr 2.9e-04, dt 2.1s +All GPU(s): step 4637: loss 10.1875, lr 2.9e-04, dt 2.2s +All GPU(s): step 4638: loss 13.8438, lr 2.9e-04, dt 2.1s +All GPU(s): step 4639: loss 15.6406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4640: loss 17.2812, lr 2.9e-04, dt 2.1s +All GPU(s): step 4641: loss 18.9062, lr 2.9e-04, dt 2.1s +All GPU(s): step 4642: loss 19.2188, lr 2.9e-04, dt 2.1s +All GPU(s): step 4643: loss 19.7969, lr 2.9e-04, dt 2.0s +All GPU(s): step 4644: loss 20.0938, lr 2.9e-04, dt 2.0s +All GPU(s): step 4645: loss 20.8906, lr 2.9e-04, dt 2.0s +All GPU(s): step 4646: loss 21.0625, lr 2.9e-04, dt 2.1s +All GPU(s): step 4647: loss 21.3906, lr 2.9e-04, dt 2.1s +All GPU(s): step 4648: loss 21.3438, lr 2.9e-04, dt 2.0s +All GPU(s): step 4649: loss 21.4375, lr 2.9e-04, dt 2.0s +All GPU(s): step 4650: loss 21.6562, lr 2.9e-04, dt 2.0s +All GPU(s): step 4651: loss 21.5312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4652: loss 21.4531, lr 2.9e-04, dt 2.0s +All GPU(s): step 4653: loss 21.5469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4654: loss 21.6562, lr 2.9e-04, dt 2.0s +All GPU(s): step 4655: loss 21.2031, lr 2.9e-04, dt 2.0s +All GPU(s): step 4656: loss 21.4375, lr 2.9e-04, dt 2.1s +All GPU(s): step 4657: loss 21.3125, lr 2.9e-04, dt 2.0s +All GPU(s): step 4658: loss 21.0156, lr 2.9e-04, dt 2.0s +All GPU(s): step 4659: loss 21.2188, lr 2.9e-04, dt 2.0s +All GPU(s): step 4660: loss 21.6094, lr 2.9e-04, dt 2.1s +All GPU(s): step 4661: loss 22.3125, lr 2.9e-04, dt 2.2s +All GPU(s): step 4662: loss 22.0938, lr 2.9e-04, dt 2.1s +All GPU(s): step 4663: loss 22.3906, lr 2.9e-04, dt 2.1s +All GPU(s): step 4664: loss 22.4688, lr 2.9e-04, dt 2.1s +All GPU(s): step 4665: loss 22.2812, lr 2.9e-04, dt 2.1s +All GPU(s): step 4666: loss 22.3125, lr 2.9e-04, dt 2.2s +All GPU(s): step 4667: loss 22.5469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4668: loss 22.3750, lr 2.9e-04, dt 2.0s +All GPU(s): step 4669: loss 23.0156, lr 2.9e-04, dt 2.0s +All GPU(s): step 4670: loss 22.9062, lr 2.9e-04, dt 2.1s +All GPU(s): step 4671: loss 22.5938, lr 2.9e-04, dt 2.1s +All GPU(s): step 4672: loss 22.8438, lr 2.9e-04, dt 2.0s +All GPU(s): step 4673: loss 23.1875, lr 2.9e-04, dt 2.0s +All GPU(s): step 4674: loss 22.8281, lr 2.9e-04, dt 2.0s +All GPU(s): step 4675: loss 22.7812, lr 2.9e-04, dt 2.1s +All GPU(s): step 4676: loss 22.5781, lr 2.9e-04, dt 2.1s +All GPU(s): step 4677: loss 22.7500, lr 2.9e-04, dt 2.0s +All GPU(s): step 4678: loss 22.5938, lr 2.9e-04, dt 2.0s +All GPU(s): step 4679: loss 22.5469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4680: loss 22.6094, lr 2.9e-04, dt 2.2s +All GPU(s): step 4681: loss 22.3281, lr 2.9e-04, dt 2.0s +All GPU(s): step 4682: loss 22.4062, lr 2.9e-04, dt 2.1s +All GPU(s): step 4683: loss 22.5312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4684: loss 22.0781, lr 2.9e-04, dt 2.0s +All GPU(s): step 4685: loss 21.9688, lr 2.9e-04, dt 2.2s +All GPU(s): step 4686: loss 21.3438, lr 2.9e-04, dt 2.1s +All GPU(s): step 4687: loss 21.4219, lr 2.9e-04, dt 2.0s +All GPU(s): step 4688: loss 21.4375, lr 2.9e-04, dt 2.0s +All GPU(s): step 4689: loss 21.4375, lr 2.9e-04, dt 2.1s +All GPU(s): step 4690: loss 21.5781, lr 2.9e-04, dt 2.1s +All GPU(s): step 4691: loss 21.3750, lr 2.9e-04, dt 2.0s +All GPU(s): step 4692: loss 21.6406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4693: loss 21.5938, lr 2.9e-04, dt 2.1s +All GPU(s): step 4694: loss 21.6406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4695: loss 21.3906, lr 2.9e-04, dt 2.1s +All GPU(s): step 4696: loss 21.6250, lr 2.9e-04, dt 2.1s +All GPU(s): step 4697: loss 21.5156, lr 2.9e-04, dt 2.1s +All GPU(s): step 4698: loss 21.9375, lr 2.9e-04, dt 2.0s +All GPU(s): step 4699: loss 21.8281, lr 2.9e-04, dt 2.1s +All GPU(s): step 4700: loss 21.6719, lr 2.9e-04, dt 2.1s +All GPU(s): step 4701: loss 21.3594, lr 2.9e-04, dt 2.0s +All GPU(s): step 4702: loss 21.5000, lr 2.9e-04, dt 2.0s +All GPU(s): step 4703: loss 21.1875, lr 2.9e-04, dt 2.0s +All GPU(s): step 4704: loss 21.5312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4705: loss 21.3750, lr 2.9e-04, dt 2.0s +All GPU(s): step 4706: loss 21.1875, lr 2.9e-04, dt 2.0s +All GPU(s): step 4707: loss 21.0469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4708: loss 21.0312, lr 2.9e-04, dt 2.0s +All GPU(s): step 4709: loss 21.0000, lr 2.9e-04, dt 2.2s +All GPU(s): step 4710: loss 20.7500, lr 2.9e-04, dt 2.0s +All GPU(s): step 4711: loss 20.9688, lr 2.9e-04, dt 2.1s +All GPU(s): step 4712: loss 20.3594, lr 2.9e-04, dt 2.1s +All GPU(s): step 4713: loss 21.1875, lr 2.9e-04, dt 2.1s +All GPU(s): step 4714: loss 21.1719, lr 2.9e-04, dt 2.1s +All GPU(s): step 4715: loss 20.5469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4716: loss 20.8750, lr 2.9e-04, dt 2.0s +All GPU(s): step 4717: loss 21.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4718: loss 20.9531, lr 2.9e-04, dt 2.0s +All GPU(s): step 4719: loss 21.2500, lr 2.9e-04, dt 2.1s +All GPU(s): step 4720: loss 20.9688, lr 2.9e-04, dt 2.1s +All GPU(s): step 4721: loss 21.0156, lr 2.9e-04, dt 2.0s +All GPU(s): step 4722: loss 20.6875, lr 2.9e-04, dt 2.1s +All GPU(s): step 4723: loss 20.5625, lr 2.9e-04, dt 2.1s +All GPU(s): step 4724: loss 20.7500, lr 2.9e-04, dt 2.1s +All GPU(s): step 4725: loss 21.1406, lr 2.9e-04, dt 2.1s +All GPU(s): step 4726: loss 21.4375, lr 2.9e-04, dt 2.1s +All GPU(s): step 4727: loss 21.0625, lr 2.9e-04, dt 2.1s +All GPU(s): step 4728: loss 21.2969, lr 2.9e-04, dt 2.1s +All GPU(s): step 4729: loss 21.1562, lr 2.9e-04, dt 2.1s +All GPU(s): step 4730: loss 21.2500, lr 2.9e-04, dt 2.1s +All GPU(s): step 4731: loss 21.4375, lr 2.9e-04, dt 2.1s +All GPU(s): step 4732: loss 21.1562, lr 2.9e-04, dt 2.0s +All GPU(s): step 4733: loss 21.3750, lr 2.9e-04, dt 2.1s +All GPU(s): step 4734: loss 21.5000, lr 2.9e-04, dt 2.1s +All GPU(s): step 4735: loss 21.5156, lr 2.9e-04, dt 2.1s +All GPU(s): step 4736: loss 21.5312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4737: loss 21.7344, lr 2.9e-04, dt 2.1s +All GPU(s): step 4738: loss 21.8750, lr 2.9e-04, dt 2.1s +All GPU(s): step 4739: loss 22.0781, lr 2.9e-04, dt 2.0s +All GPU(s): step 4740: loss 21.6875, lr 2.9e-04, dt 2.0s +All GPU(s): step 4741: loss 21.9375, lr 2.9e-04, dt 2.0s +All GPU(s): step 4742: loss 21.7969, lr 2.9e-04, dt 2.1s +All GPU(s): step 4743: loss 21.6875, lr 2.9e-04, dt 2.1s +All GPU(s): step 4744: loss 21.8281, lr 2.9e-04, dt 2.0s +All GPU(s): step 4745: loss 21.7500, lr 2.9e-04, dt 2.0s +All GPU(s): step 4746: loss 21.7656, lr 2.9e-04, dt 2.1s +All GPU(s): step 4747: loss 22.0312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4748: loss 22.1094, lr 2.9e-04, dt 2.1s +All GPU(s): step 4749: loss 22.0000, lr 2.9e-04, dt 2.1s +All GPU(s): step 4750: loss 22.0469, lr 2.9e-04, dt 2.0s +All GPU(s): step 4751: loss 22.0312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4752: loss 21.7812, lr 2.9e-04, dt 2.1s +All GPU(s): step 4753: loss 21.7031, lr 2.9e-04, dt 2.1s +All GPU(s): step 4754: loss 21.9062, lr 2.9e-04, dt 2.1s +All GPU(s): step 4755: loss 21.7500, lr 2.9e-04, dt 2.1s +All GPU(s): step 4756: loss 21.8594, lr 2.9e-04, dt 2.1s +All GPU(s): step 4757: loss 21.8594, lr 2.9e-04, dt 2.1s +All GPU(s): step 4758: loss 21.9219, lr 2.9e-04, dt 2.0s +All GPU(s): step 4759: loss 21.4219, lr 2.9e-04, dt 2.1s +All GPU(s): step 4760: loss 21.9219, lr 2.9e-04, dt 2.0s +All GPU(s): step 4761: loss 21.7812, lr 2.9e-04, dt 2.0s +All GPU(s): step 4762: loss 21.8281, lr 2.9e-04, dt 2.1s +All GPU(s): step 4763: loss 21.9688, lr 2.9e-04, dt 2.1s +All GPU(s): step 4764: loss 21.7031, lr 2.9e-04, dt 2.1s +All GPU(s): step 4765: loss 22.1250, lr 2.9e-04, dt 2.0s +All GPU(s): step 4766: loss 22.1562, lr 2.9e-04, dt 2.0s +All GPU(s): step 4767: loss 22.1875, lr 2.9e-04, dt 2.1s +All GPU(s): step 4768: loss 22.4531, lr 2.9e-04, dt 2.1s +All GPU(s): step 4769: loss 22.5156, lr 2.9e-04, dt 2.1s +All GPU(s): step 4770: loss 22.5156, lr 2.9e-04, dt 2.1s +All GPU(s): step 4771: loss 22.5938, lr 2.9e-04, dt 2.1s +All GPU(s): step 4772: loss 22.4531, lr 2.9e-04, dt 2.2s +All GPU(s): step 4773: loss 22.6875, lr 2.9e-04, dt 2.1s +All GPU(s): step 4774: loss 22.5625, lr 2.9e-04, dt 2.1s +All GPU(s): step 4775: loss 22.4062, lr 2.9e-04, dt 2.1s +All GPU(s): step 4776: loss 22.6562, lr 2.9e-04, dt 2.1s +All GPU(s): step 4777: loss 22.9844, lr 2.9e-04, dt 2.1s +All GPU(s): step 4778: loss 22.9844, lr 2.9e-04, dt 2.0s +All GPU(s): step 4779: loss 22.6094, lr 2.9e-04, dt 2.0s +All GPU(s): step 4780: loss 22.4062, lr 2.9e-04, dt 2.0s +All GPU(s): step 4781: loss 22.6250, lr 2.9e-04, dt 2.1s +All GPU(s): step 4782: loss 22.6250, lr 2.9e-04, dt 2.1s +All GPU(s): step 4783: loss 22.7188, lr 2.9e-04, dt 2.0s +All GPU(s): step 4784: loss 22.5469, lr 2.9e-04, dt 2.1s +All GPU(s): step 4785: loss 22.8438, lr 2.9e-04, dt 2.1s +All GPU(s): step 4786: loss 22.5000, lr 2.9e-04, dt 2.1s +All GPU(s): step 4787: loss 22.9844, lr 2.9e-04, dt 2.0s +All GPU(s): step 4788: loss 22.5938, lr 2.9e-04, dt 2.0s +All GPU(s): step 4789: loss 22.2500, lr 2.9e-04, dt 2.1s +All GPU(s): step 4790: loss 22.2812, lr 2.9e-04, dt 2.1s +All GPU(s): step 4791: loss 22.3438, lr 2.9e-04, dt 2.1s +All GPU(s): step 4792: loss 22.1406, lr 2.9e-04, dt 2.0s +All GPU(s): step 4793: loss 22.4688, lr 2.9e-04, dt 2.0s +All GPU(s): step 4794: loss 22.7188, lr 2.9e-04, dt 2.0s +All GPU(s): step 4795: loss 23.1094, lr 2.9e-04, dt 2.1s +All GPU(s): step 4796: loss 22.8281, lr 2.9e-04, dt 2.1s +All GPU(s): step 4797: loss 23.1562, lr 2.9e-04, dt 2.0s +All GPU(s): step 4798: loss 23.0312, lr 2.9e-04, dt 2.0s +All GPU(s): step 4799: loss 22.8594, lr 2.9e-04, dt 2.0s +All GPU(s): step 4800: loss 22.8125, lr 2.9e-04, dt 2.0s +All GPU(s): step 4801: loss 22.6562, lr 2.9e-04, dt 2.1s +All GPU(s): step 4802: loss 22.7031, lr 2.9e-04, dt 2.1s +All GPU(s): step 4803: loss 22.6250, lr 2.9e-04, dt 2.0s +All GPU(s): step 4804: loss 23.0938, lr 2.9e-04, dt 2.0s +All GPU(s): step 4805: loss 22.5938, lr 2.9e-04, dt 2.1s +All GPU(s): step 4806: loss 22.1094, lr 2.9e-04, dt 2.1s +All GPU(s): step 4807: loss 22.4531, lr 2.9e-04, dt 2.1s +All GPU(s): step 4808: loss 21.7812, lr 2.9e-04, dt 2.0s +All GPU(s): step 4809: loss 22.1875, lr 2.9e-04, dt 2.0s +All GPU(s): step 4810: loss 22.2656, lr 2.9e-04, dt 2.1s +All GPU(s): step 4811: loss 22.0625, lr 2.9e-04, dt 2.0s +All GPU(s): step 4812: loss 22.3281, lr 2.9e-04, dt 2.0s +All GPU(s): step 4813: loss 22.2031, lr 2.9e-04, dt 2.0s +All GPU(s): step 4814: loss 22.0000, lr 2.9e-04, dt 2.1s +All GPU(s): step 4815: loss 22.0312, lr 2.9e-04, dt 2.1s +All GPU(s): step 4816: loss 22.0000, lr 2.9e-04, dt 2.0s +All GPU(s): step 4817: loss 22.0312, lr 2.9e-04, dt 2.0s +All GPU(s): step 4818: loss 22.1250, lr 2.9e-04, dt 2.0s +All GPU(s): step 4819: loss 22.1250, lr 2.9e-04, dt 2.1s +All GPU(s): step 4820: loss 22.2656, lr 2.9e-04, dt 2.2s +All GPU(s): step 4821: loss 22.7656, lr 2.9e-04, dt 2.1s +All GPU(s): step 4822: loss 22.9375, lr 2.9e-04, dt 2.0s +All GPU(s): step 4823: loss 22.8438, lr 2.9e-04, dt 2.0s +All GPU(s): step 4824: loss 22.7500, lr 2.8e-04, dt 2.1s +All GPU(s): step 4825: loss 22.7344, lr 2.8e-04, dt 2.1s +All GPU(s): step 4826: loss 22.7500, lr 2.8e-04, dt 2.0s +All GPU(s): step 4827: loss 22.6719, lr 2.8e-04, dt 2.0s +All GPU(s): step 4828: loss 22.7500, lr 2.8e-04, dt 2.0s +All GPU(s): step 4829: loss 22.7656, lr 2.8e-04, dt 2.1s +All GPU(s): step 4830: loss 22.9688, lr 2.8e-04, dt 2.1s +All GPU(s): step 4831: loss 22.8281, lr 2.8e-04, dt 2.0s +All GPU(s): step 4832: loss 22.8906, lr 2.8e-04, dt 2.1s +All GPU(s): step 4833: loss 22.7188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4834: loss 22.8125, lr 2.8e-04, dt 2.1s +All GPU(s): step 4835: loss 22.8750, lr 2.8e-04, dt 2.1s +All GPU(s): step 4836: loss 22.9531, lr 2.8e-04, dt 2.0s +All GPU(s): step 4837: loss 22.6406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4838: loss 22.6250, lr 2.8e-04, dt 2.1s +All GPU(s): step 4839: loss 22.7031, lr 2.8e-04, dt 2.1s +All GPU(s): step 4840: loss 22.7656, lr 2.8e-04, dt 2.0s +All GPU(s): step 4841: loss 22.9062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4842: loss 22.7969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4843: loss 22.7344, lr 2.8e-04, dt 2.1s +All GPU(s): step 4844: loss 23.0156, lr 2.8e-04, dt 2.1s +All GPU(s): step 4845: loss 22.9844, lr 2.8e-04, dt 2.0s +All GPU(s): step 4846: loss 23.0000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4847: loss 22.8594, lr 2.8e-04, dt 2.0s +All GPU(s): step 4848: loss 23.0000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4849: loss 23.0312, lr 2.8e-04, dt 2.1s +All GPU(s): step 4850: loss 22.9844, lr 2.8e-04, dt 2.0s +All GPU(s): step 4851: loss 23.3438, lr 2.8e-04, dt 2.0s +All GPU(s): step 4852: loss 23.0156, lr 2.8e-04, dt 2.0s +All GPU(s): step 4853: loss 23.0469, lr 2.8e-04, dt 2.1s +All GPU(s): step 4854: loss 22.4844, lr 2.8e-04, dt 2.1s +All GPU(s): step 4855: loss 22.5938, lr 2.8e-04, dt 2.1s +All GPU(s): step 4856: loss 22.7188, lr 2.8e-04, dt 2.1s +All GPU(s): step 4857: loss 22.7188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4858: loss 22.6875, lr 2.8e-04, dt 2.0s +All GPU(s): step 4859: loss 22.4219, lr 2.8e-04, dt 2.1s +All GPU(s): step 4860: loss 22.6406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4861: loss 22.6094, lr 2.8e-04, dt 2.0s +All GPU(s): step 4862: loss 22.6719, lr 2.8e-04, dt 2.0s +All GPU(s): step 4863: loss 22.7188, lr 2.8e-04, dt 2.1s +All GPU(s): step 4864: loss 22.9062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4865: loss 22.6562, lr 2.8e-04, dt 2.0s +All GPU(s): step 4866: loss 22.8281, lr 2.8e-04, dt 2.0s +All GPU(s): step 4867: loss 22.8594, lr 2.8e-04, dt 2.1s +All GPU(s): step 4868: loss 23.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4869: loss 22.9375, lr 2.8e-04, dt 2.0s +All GPU(s): step 4870: loss 23.1562, lr 2.8e-04, dt 2.0s +All GPU(s): step 4871: loss 23.0938, lr 2.8e-04, dt 2.0s +All GPU(s): step 4872: loss 22.9844, lr 2.8e-04, dt 2.0s +All GPU(s): step 4873: loss 23.1875, lr 2.8e-04, dt 2.1s +All GPU(s): step 4874: loss 23.4375, lr 2.8e-04, dt 2.0s +All GPU(s): step 4875: loss 22.7969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4876: loss 23.2344, lr 2.8e-04, dt 2.0s +All GPU(s): step 4877: loss 23.0000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4878: loss 22.8594, lr 2.8e-04, dt 2.2s +All GPU(s): step 4879: loss 22.7969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4880: loss 22.8281, lr 2.8e-04, dt 2.0s +All GPU(s): step 4881: loss 23.2344, lr 2.8e-04, dt 2.0s +All GPU(s): step 4882: loss 22.9688, lr 2.8e-04, dt 2.1s +All GPU(s): step 4883: loss 22.9531, lr 2.8e-04, dt 2.1s +All GPU(s): step 4884: loss 23.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4885: loss 23.1094, lr 2.8e-04, dt 2.0s +All GPU(s): step 4886: loss 22.9531, lr 2.8e-04, dt 2.1s +All GPU(s): step 4887: loss 23.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4888: loss 23.1562, lr 2.8e-04, dt 2.1s +All GPU(s): step 4889: loss 23.4688, lr 2.8e-04, dt 2.0s +All GPU(s): step 4890: loss 23.2188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4891: loss 23.2500, lr 2.8e-04, dt 2.0s +All GPU(s): step 4892: loss 23.6250, lr 2.8e-04, dt 2.1s +All GPU(s): step 4893: loss 23.7188, lr 2.8e-04, dt 2.1s +All GPU(s): step 4894: loss 23.4688, lr 2.8e-04, dt 2.0s +All GPU(s): step 4895: loss 23.3438, lr 2.8e-04, dt 2.1s +All GPU(s): step 4896: loss 23.5469, lr 2.8e-04, dt 2.1s +All GPU(s): step 4897: loss 23.2188, lr 2.8e-04, dt 2.1s +All GPU(s): step 4898: loss 23.3594, lr 2.8e-04, dt 2.0s +All GPU(s): step 4899: loss 23.0000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4900: loss 23.2500, lr 2.8e-04, dt 2.0s +All GPU(s): step 4901: loss 23.5156, lr 2.8e-04, dt 2.1s +All GPU(s): step 4902: loss 23.2812, lr 2.8e-04, dt 2.1s +All GPU(s): step 4903: loss 23.6250, lr 2.8e-04, dt 2.0s +All GPU(s): step 4904: loss 23.7969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4905: loss 23.6562, lr 2.8e-04, dt 2.0s +All GPU(s): step 4906: loss 23.5000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4907: loss 23.6719, lr 2.8e-04, dt 2.2s +All GPU(s): step 4908: loss 24.0312, lr 2.8e-04, dt 2.0s +All GPU(s): step 4909: loss 23.8125, lr 2.8e-04, dt 2.0s +All GPU(s): step 4910: loss 24.0000, lr 2.8e-04, dt 2.0s +All GPU(s): step 4911: loss 23.7656, lr 2.8e-04, dt 2.1s +All GPU(s): step 4912: loss 24.0156, lr 2.8e-04, dt 2.1s +All GPU(s): step 4913: loss 23.9688, lr 2.8e-04, dt 2.0s +All GPU(s): step 4914: loss 24.0312, lr 2.8e-04, dt 2.0s +All GPU(s): step 4915: loss 23.9688, lr 2.8e-04, dt 2.0s +All GPU(s): step 4916: loss 23.9844, lr 2.8e-04, dt 2.1s +All GPU(s): step 4917: loss 23.7500, lr 2.8e-04, dt 2.1s +All GPU(s): step 4918: loss 23.5312, lr 2.8e-04, dt 2.0s +All GPU(s): step 4919: loss 23.7188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4920: loss 24.1875, lr 2.8e-04, dt 2.0s +All GPU(s): step 4921: loss 23.9219, lr 2.8e-04, dt 2.1s +All GPU(s): step 4922: loss 23.8906, lr 2.8e-04, dt 2.0s +All GPU(s): step 4923: loss 23.7969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4924: loss 23.8438, lr 2.8e-04, dt 2.0s +All GPU(s): step 4925: loss 23.8906, lr 2.8e-04, dt 2.0s +All GPU(s): step 4926: loss 23.6875, lr 2.8e-04, dt 2.1s +All GPU(s): step 4927: loss 23.6406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4928: loss 23.8906, lr 2.8e-04, dt 2.1s +All GPU(s): step 4929: loss 24.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4930: loss 23.9844, lr 2.8e-04, dt 2.1s +All GPU(s): step 4931: loss 23.5781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4932: loss 23.8281, lr 2.8e-04, dt 2.0s +All GPU(s): step 4933: loss 24.0156, lr 2.8e-04, dt 2.1s +All GPU(s): step 4934: loss 23.9844, lr 2.8e-04, dt 2.0s +All GPU(s): step 4935: loss 24.1094, lr 2.8e-04, dt 2.0s +All GPU(s): step 4936: loss 24.2812, lr 2.8e-04, dt 2.1s +All GPU(s): step 4937: loss 23.9375, lr 2.8e-04, dt 2.1s +All GPU(s): step 4938: loss 24.4531, lr 2.8e-04, dt 2.1s +All GPU(s): step 4939: loss 24.1875, lr 2.8e-04, dt 2.1s +All GPU(s): step 4940: loss 24.2188, lr 2.8e-04, dt 2.1s +All GPU(s): step 4941: loss 24.2656, lr 2.8e-04, dt 2.1s +All GPU(s): step 4942: loss 24.4219, lr 2.8e-04, dt 2.1s +All GPU(s): step 4943: loss 24.3750, lr 2.8e-04, dt 2.1s +All GPU(s): step 4944: loss 24.3281, lr 2.8e-04, dt 2.1s +All GPU(s): step 4945: loss 24.4375, lr 2.8e-04, dt 2.1s +All GPU(s): step 4946: loss 24.6719, lr 2.8e-04, dt 2.0s +All GPU(s): step 4947: loss 24.4062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4948: loss 24.7188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4949: loss 24.7656, lr 2.8e-04, dt 2.0s +All GPU(s): step 4950: loss 25.0000, lr 2.8e-04, dt 2.1s +All GPU(s): step 4951: loss 25.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4952: loss 24.8906, lr 2.8e-04, dt 2.0s +All GPU(s): step 4953: loss 25.0781, lr 2.8e-04, dt 2.0s +All GPU(s): step 4954: loss 25.0938, lr 2.8e-04, dt 2.1s +All GPU(s): step 4955: loss 24.7656, lr 2.8e-04, dt 2.1s +All GPU(s): step 4956: loss 24.5312, lr 2.8e-04, dt 2.0s +All GPU(s): step 4957: loss 24.2031, lr 2.8e-04, dt 2.0s +All GPU(s): step 4958: loss 24.0781, lr 2.8e-04, dt 2.0s +All GPU(s): step 4959: loss 24.2031, lr 2.8e-04, dt 2.0s +All GPU(s): step 4960: loss 23.8125, lr 2.8e-04, dt 2.2s +All GPU(s): step 4961: loss 24.1406, lr 2.8e-04, dt 2.0s +All GPU(s): step 4962: loss 23.7656, lr 2.8e-04, dt 2.0s +All GPU(s): step 4963: loss 24.1250, lr 2.8e-04, dt 2.0s +All GPU(s): step 4964: loss 23.9531, lr 2.8e-04, dt 2.0s +All GPU(s): step 4965: loss 24.0469, lr 2.8e-04, dt 2.1s +All GPU(s): step 4966: loss 23.9062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4967: loss 23.9375, lr 2.8e-04, dt 2.0s +All GPU(s): step 4968: loss 24.1719, lr 2.8e-04, dt 2.0s +All GPU(s): step 4969: loss 23.9688, lr 2.8e-04, dt 2.1s +All GPU(s): step 4970: loss 24.0469, lr 2.8e-04, dt 2.1s +All GPU(s): step 4971: loss 23.9219, lr 2.8e-04, dt 2.0s +All GPU(s): step 4972: loss 23.7656, lr 2.8e-04, dt 2.0s +All GPU(s): step 4973: loss 23.9531, lr 2.8e-04, dt 2.0s +All GPU(s): step 4974: loss 23.8438, lr 2.8e-04, dt 2.1s +All GPU(s): step 4975: loss 23.6719, lr 2.8e-04, dt 2.0s +All GPU(s): step 4976: loss 23.9688, lr 2.8e-04, dt 2.0s +All GPU(s): step 4977: loss 24.0781, lr 2.8e-04, dt 2.0s +All GPU(s): step 4978: loss 24.0938, lr 2.8e-04, dt 2.0s +All GPU(s): step 4979: loss 24.0781, lr 2.8e-04, dt 2.1s +All GPU(s): step 4980: loss 24.0156, lr 2.8e-04, dt 2.0s +All GPU(s): step 4981: loss 23.9062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4982: loss 24.2188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4983: loss 24.0000, lr 2.8e-04, dt 2.1s +All GPU(s): step 4984: loss 23.9844, lr 2.8e-04, dt 2.1s +All GPU(s): step 4985: loss 24.2969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4986: loss 24.2031, lr 2.8e-04, dt 2.0s +All GPU(s): step 4987: loss 24.2812, lr 2.8e-04, dt 2.0s +All GPU(s): step 4988: loss 24.2969, lr 2.8e-04, dt 2.0s +All GPU(s): step 4989: loss 24.1719, lr 2.8e-04, dt 2.1s +All GPU(s): step 4990: loss 24.0156, lr 2.8e-04, dt 2.0s +All GPU(s): step 4991: loss 24.4062, lr 2.8e-04, dt 2.0s +All GPU(s): step 4992: loss 24.5781, lr 2.8e-04, dt 2.0s +All GPU(s): step 4993: loss 24.1875, lr 2.8e-04, dt 2.1s +All GPU(s): step 4994: loss 24.3125, lr 2.8e-04, dt 2.1s +All GPU(s): step 4995: loss 24.2188, lr 2.8e-04, dt 2.0s +All GPU(s): step 4996: loss 24.1562, lr 2.8e-04, dt 2.0s +All GPU(s): step 4997: loss 24.2500, lr 2.8e-04, dt 2.0s +All GPU(s): step 4998: loss 24.1719, lr 2.8e-04, dt 2.1s +All GPU(s): step 4999: loss 24.0625, lr 2.8e-04, dt 2.1s +saving checkpoint to checkpoints/ckpt_5000.pt +All GPU(s): step 5000: loss 24.3750, lr 2.8e-04, dt 2.1s +All GPU(s): step 5001: loss 24.2656, lr 2.8e-04, dt 2.0s +All GPU(s): step 5002: loss 24.0781, lr 2.8e-04, dt 2.0s +All GPU(s): step 5003: loss 24.2500, lr 2.8e-04, dt 2.1s +All GPU(s): step 5004: loss 24.3125, lr 2.8e-04, dt 2.0s +All GPU(s): step 5005: loss 24.2812, lr 2.8e-04, dt 2.0s +All GPU(s): step 5006: loss 24.4062, lr 2.8e-04, dt 2.0s +All GPU(s): step 5007: loss 24.5469, lr 2.8e-04, dt 2.1s +All GPU(s): step 5008: loss 24.5469, lr 2.8e-04, dt 2.2s +All GPU(s): step 5009: loss 24.3438, lr 2.8e-04, dt 2.1s +All GPU(s): step 5010: loss 24.2500, lr 2.8e-04, dt 2.0s +All GPU(s): step 5011: loss 24.2656, lr 2.8e-04, dt 2.1s +All GPU(s): step 5012: loss 24.1875, lr 2.7e-04, dt 2.1s +All GPU(s): step 5013: loss 24.4375, lr 2.7e-04, dt 2.1s +All GPU(s): step 5014: loss 24.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5015: loss 24.3594, lr 2.7e-04, dt 2.0s +All GPU(s): step 5016: loss 24.0625, lr 2.7e-04, dt 2.1s +All GPU(s): step 5017: loss 24.4062, lr 2.7e-04, dt 2.1s +All GPU(s): step 5018: loss 24.2344, lr 2.7e-04, dt 2.1s +All GPU(s): step 5019: loss 24.0469, lr 2.7e-04, dt 2.0s +All GPU(s): step 5020: loss 24.0781, lr 2.7e-04, dt 2.0s +All GPU(s): step 5021: loss 23.8594, lr 2.7e-04, dt 2.0s +All GPU(s): step 5022: loss 24.0312, lr 2.7e-04, dt 2.1s +All GPU(s): step 5023: loss 23.9531, lr 2.7e-04, dt 2.1s +All GPU(s): step 5024: loss 23.7188, lr 2.7e-04, dt 2.0s +All GPU(s): step 5025: loss 24.0156, lr 2.7e-04, dt 2.1s +All GPU(s): step 5026: loss 24.0625, lr 2.7e-04, dt 2.1s +All GPU(s): step 5027: loss 24.0625, lr 2.7e-04, dt 2.1s +All GPU(s): step 5028: loss 23.9219, lr 2.7e-04, dt 2.0s +All GPU(s): step 5029: loss 24.4844, lr 2.7e-04, dt 2.1s +All GPU(s): step 5030: loss 24.4219, lr 2.7e-04, dt 2.0s +All GPU(s): step 5031: loss 24.1875, lr 2.7e-04, dt 2.0s +All GPU(s): step 5032: loss 24.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5033: loss 24.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5034: loss 24.1875, lr 2.7e-04, dt 2.0s +All GPU(s): step 5035: loss 24.3906, lr 2.7e-04, dt 2.0s +All GPU(s): step 5036: loss 24.3125, lr 2.7e-04, dt 2.1s +All GPU(s): step 5037: loss 24.5469, lr 2.7e-04, dt 2.2s +All GPU(s): step 5038: loss 24.3438, lr 2.7e-04, dt 2.1s +All GPU(s): step 5039: loss 24.5156, lr 2.7e-04, dt 2.0s +All GPU(s): step 5040: loss 24.3281, lr 2.7e-04, dt 2.0s +All GPU(s): step 5041: loss 24.6250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5042: loss 24.4844, lr 2.7e-04, dt 2.1s +All GPU(s): step 5043: loss 24.4688, lr 2.7e-04, dt 2.0s +All GPU(s): step 5044: loss 24.5938, lr 2.7e-04, dt 2.1s +All GPU(s): step 5045: loss 24.3750, lr 2.7e-04, dt 2.1s +All GPU(s): step 5046: loss 24.4375, lr 2.7e-04, dt 2.1s +All GPU(s): step 5047: loss 24.5469, lr 2.7e-04, dt 2.1s +All GPU(s): step 5048: loss 24.5469, lr 2.7e-04, dt 2.0s +All GPU(s): step 5049: loss 24.3594, lr 2.7e-04, dt 2.0s +All GPU(s): step 5050: loss 24.4219, lr 2.7e-04, dt 2.0s +All GPU(s): step 5051: loss 24.6406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5052: loss 24.5625, lr 2.7e-04, dt 2.2s +All GPU(s): step 5053: loss 24.4688, lr 2.7e-04, dt 2.0s +All GPU(s): step 5054: loss 24.8125, lr 2.7e-04, dt 2.0s +All GPU(s): step 5055: loss 24.2188, lr 2.7e-04, dt 2.1s +All GPU(s): step 5056: loss 24.2656, lr 2.7e-04, dt 2.1s +All GPU(s): step 5057: loss 24.4062, lr 2.7e-04, dt 2.0s +All GPU(s): step 5058: loss 24.4531, lr 2.7e-04, dt 2.1s +All GPU(s): step 5059: loss 24.4531, lr 2.7e-04, dt 2.1s +All GPU(s): step 5060: loss 24.4375, lr 2.7e-04, dt 2.1s +All GPU(s): step 5061: loss 24.5312, lr 2.7e-04, dt 2.1s +All GPU(s): step 5062: loss 24.5781, lr 2.7e-04, dt 2.1s +All GPU(s): step 5063: loss 24.5781, lr 2.7e-04, dt 2.1s +All GPU(s): step 5064: loss 24.4219, lr 2.7e-04, dt 2.1s +All GPU(s): step 5065: loss 24.6250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5066: loss 24.8125, lr 2.7e-04, dt 2.1s +All GPU(s): step 5067: loss 24.7031, lr 2.7e-04, dt 2.0s +All GPU(s): step 5068: loss 24.8125, lr 2.7e-04, dt 2.0s +All GPU(s): step 5069: loss 24.5781, lr 2.7e-04, dt 2.1s +All GPU(s): step 5070: loss 24.6875, lr 2.7e-04, dt 2.1s +All GPU(s): step 5071: loss 24.4531, lr 2.7e-04, dt 2.1s +All GPU(s): step 5072: loss 24.8125, lr 2.7e-04, dt 2.0s +All GPU(s): step 5073: loss 24.7812, lr 2.7e-04, dt 2.0s +All GPU(s): step 5074: loss 24.6406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5075: loss 24.6406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5076: loss 24.6094, lr 2.7e-04, dt 2.1s +All GPU(s): step 5077: loss 24.9375, lr 2.7e-04, dt 2.0s +All GPU(s): step 5078: loss 24.6562, lr 2.7e-04, dt 2.0s +All GPU(s): step 5079: loss 24.5469, lr 2.7e-04, dt 2.0s +All GPU(s): step 5080: loss 24.6719, lr 2.7e-04, dt 2.1s +All GPU(s): step 5081: loss 24.5312, lr 2.7e-04, dt 2.0s +All GPU(s): step 5082: loss 24.4688, lr 2.7e-04, dt 2.1s +All GPU(s): step 5083: loss 24.3750, lr 2.7e-04, dt 2.0s +All GPU(s): step 5084: loss 24.3750, lr 2.7e-04, dt 2.1s +All GPU(s): step 5085: loss 24.4219, lr 2.7e-04, dt 2.2s +All GPU(s): step 5086: loss 24.5938, lr 2.7e-04, dt 2.0s +All GPU(s): step 5087: loss 24.6562, lr 2.7e-04, dt 2.1s +All GPU(s): step 5088: loss 24.8281, lr 2.7e-04, dt 2.0s +All GPU(s): step 5089: loss 24.7031, lr 2.7e-04, dt 2.1s +All GPU(s): step 5090: loss 24.4844, lr 2.7e-04, dt 2.1s +All GPU(s): step 5091: loss 24.7031, lr 2.7e-04, dt 2.1s +All GPU(s): step 5092: loss 24.5312, lr 2.7e-04, dt 2.1s +All GPU(s): step 5093: loss 24.3594, lr 2.7e-04, dt 2.0s +All GPU(s): step 5094: loss 24.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5095: loss 24.5938, lr 2.7e-04, dt 2.1s +All GPU(s): step 5096: loss 24.5938, lr 2.7e-04, dt 2.0s +All GPU(s): step 5097: loss 24.6406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5098: loss 24.6406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5099: loss 24.9219, lr 2.7e-04, dt 2.0s +All GPU(s): step 5100: loss 24.5625, lr 2.7e-04, dt 2.1s +All GPU(s): step 5101: loss 24.7500, lr 2.7e-04, dt 2.0s +All GPU(s): step 5102: loss 24.3906, lr 2.7e-04, dt 2.0s +All GPU(s): step 5103: loss 24.5312, lr 2.7e-04, dt 2.0s +All GPU(s): step 5104: loss 24.5469, lr 2.7e-04, dt 2.1s +All GPU(s): step 5105: loss 24.4219, lr 2.7e-04, dt 2.1s +All GPU(s): step 5106: loss 24.2344, lr 2.7e-04, dt 2.0s +All GPU(s): step 5107: loss 24.4375, lr 2.7e-04, dt 2.0s +All GPU(s): step 5108: loss 24.1719, lr 2.7e-04, dt 2.0s +All GPU(s): step 5109: loss 24.4688, lr 2.7e-04, dt 2.2s +All GPU(s): step 5110: loss 24.5000, lr 2.7e-04, dt 2.0s +All GPU(s): step 5111: loss 24.5000, lr 2.7e-04, dt 2.1s +All GPU(s): step 5112: loss 24.5781, lr 2.7e-04, dt 2.0s +All GPU(s): step 5113: loss 24.5781, lr 2.7e-04, dt 2.0s +All GPU(s): step 5114: loss 24.6094, lr 2.7e-04, dt 2.2s +All GPU(s): step 5115: loss 24.5000, lr 2.7e-04, dt 2.0s +All GPU(s): step 5116: loss 24.5469, lr 2.7e-04, dt 2.0s +All GPU(s): step 5117: loss 24.4844, lr 2.7e-04, dt 2.0s +All GPU(s): step 5118: loss 24.4062, lr 2.7e-04, dt 2.1s +All GPU(s): step 5119: loss 24.4219, lr 2.7e-04, dt 2.1s +All GPU(s): step 5120: loss 24.5781, lr 2.7e-04, dt 2.1s +All GPU(s): step 5121: loss 24.2031, lr 2.7e-04, dt 2.1s +All GPU(s): step 5122: loss 24.3750, lr 2.7e-04, dt 2.0s +All GPU(s): step 5123: loss 24.2500, lr 2.7e-04, dt 2.1s +All GPU(s): step 5124: loss 24.3594, lr 2.7e-04, dt 2.1s +All GPU(s): step 5125: loss 24.2500, lr 2.7e-04, dt 2.1s +All GPU(s): step 5126: loss 24.5000, lr 2.7e-04, dt 2.0s +All GPU(s): step 5127: loss 24.1094, lr 2.7e-04, dt 2.0s +All GPU(s): step 5128: loss 24.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5129: loss 24.3750, lr 2.7e-04, dt 2.1s +All GPU(s): step 5130: loss 24.3750, lr 2.7e-04, dt 2.1s +All GPU(s): step 5131: loss 24.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5132: loss 24.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5133: loss 24.3281, lr 2.7e-04, dt 2.1s +All GPU(s): step 5134: loss 24.4688, lr 2.7e-04, dt 2.1s +All GPU(s): step 5135: loss 24.2188, lr 2.7e-04, dt 2.1s +All GPU(s): step 5136: loss 24.3438, lr 2.7e-04, dt 2.1s +All GPU(s): step 5137: loss 24.6094, lr 2.7e-04, dt 2.1s +All GPU(s): step 5138: loss 24.4219, lr 2.7e-04, dt 2.1s +All GPU(s): step 5139: loss 24.7344, lr 2.7e-04, dt 2.0s +All GPU(s): step 5140: loss 24.3750, lr 2.7e-04, dt 2.0s +All GPU(s): step 5141: loss 24.3594, lr 2.7e-04, dt 2.0s +All GPU(s): step 5142: loss 24.3125, lr 2.7e-04, dt 2.1s +All GPU(s): step 5143: loss 24.0625, lr 2.7e-04, dt 2.1s +All GPU(s): step 5144: loss 24.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5145: loss 24.2031, lr 2.7e-04, dt 2.1s +All GPU(s): step 5146: loss 23.6562, lr 2.7e-04, dt 2.1s +All GPU(s): step 5147: loss 24.0469, lr 2.7e-04, dt 2.1s +All GPU(s): step 5148: loss 24.0625, lr 2.7e-04, dt 2.2s +All GPU(s): step 5149: loss 24.0000, lr 2.7e-04, dt 2.1s +All GPU(s): step 5150: loss 23.9531, lr 2.7e-04, dt 2.1s +All GPU(s): step 5151: loss 23.5156, lr 2.7e-04, dt 2.1s +All GPU(s): step 5152: loss 23.6250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5153: loss 23.4844, lr 2.7e-04, dt 2.1s +All GPU(s): step 5154: loss 23.5156, lr 2.7e-04, dt 2.0s +All GPU(s): step 5155: loss 23.6406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5156: loss 23.2031, lr 2.7e-04, dt 2.0s +All GPU(s): step 5157: loss 22.9062, lr 2.7e-04, dt 2.1s +All GPU(s): step 5158: loss 22.7969, lr 2.7e-04, dt 2.0s +All GPU(s): step 5159: loss 23.1094, lr 2.7e-04, dt 2.1s +All GPU(s): step 5160: loss 23.1094, lr 2.7e-04, dt 2.0s +All GPU(s): step 5161: loss 23.2344, lr 2.7e-04, dt 2.1s +All GPU(s): step 5162: loss 23.4844, lr 2.7e-04, dt 2.1s +All GPU(s): step 5163: loss 23.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5164: loss 23.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5165: loss 23.3125, lr 2.7e-04, dt 2.1s +All GPU(s): step 5166: loss 23.3750, lr 2.7e-04, dt 2.1s +All GPU(s): step 5167: loss 23.5469, lr 2.7e-04, dt 2.2s +All GPU(s): step 5168: loss 23.1250, lr 2.7e-04, dt 2.0s +All GPU(s): step 5169: loss 23.3125, lr 2.7e-04, dt 2.1s +All GPU(s): step 5170: loss 23.2812, lr 2.7e-04, dt 2.0s +All GPU(s): step 5171: loss 23.3906, lr 2.7e-04, dt 2.0s +All GPU(s): step 5172: loss 23.3438, lr 2.7e-04, dt 2.1s +All GPU(s): step 5173: loss 23.2188, lr 2.7e-04, dt 2.0s +All GPU(s): step 5174: loss 23.2500, lr 2.7e-04, dt 2.1s +All GPU(s): step 5175: loss 23.1406, lr 2.7e-04, dt 2.0s +All GPU(s): step 5176: loss 23.0938, lr 2.7e-04, dt 2.1s +All GPU(s): step 5177: loss 23.2031, lr 2.7e-04, dt 2.1s +All GPU(s): step 5178: loss 22.7500, lr 2.7e-04, dt 2.1s +All GPU(s): step 5179: loss 23.1250, lr 2.7e-04, dt 2.1s +All GPU(s): step 5180: loss 23.0312, lr 2.7e-04, dt 2.1s +All GPU(s): step 5181: loss 23.0781, lr 2.7e-04, dt 2.1s +All GPU(s): step 5182: loss 23.0000, lr 2.7e-04, dt 2.1s +All GPU(s): step 5183: loss 23.3750, lr 2.7e-04, dt 2.0s +All GPU(s): step 5184: loss 23.1406, lr 2.7e-04, dt 2.1s +All GPU(s): step 5185: loss 23.1094, lr 2.7e-04, dt 2.1s +All GPU(s): step 5186: loss 22.9531, lr 2.7e-04, dt 2.2s +All GPU(s): step 5187: loss 22.9375, lr 2.7e-04, dt 2.1s +All GPU(s): step 5188: loss 23.1094, lr 2.7e-04, dt 2.0s +All GPU(s): step 5189: loss 23.3438, lr 2.7e-04, dt 2.1s +All GPU(s): step 5190: loss 23.2969, lr 2.7e-04, dt 2.1s +All GPU(s): step 5191: loss 23.1094, lr 2.7e-04, dt 2.1s +All GPU(s): step 5192: loss 23.2344, lr 2.7e-04, dt 2.1s +All GPU(s): step 5193: loss 23.3750, lr 2.7e-04, dt 2.0s +All GPU(s): step 5194: loss 23.4688, lr 2.7e-04, dt 2.1s +All GPU(s): step 5195: loss 23.7188, lr 2.7e-04, dt 2.1s +All GPU(s): step 5196: loss 23.5000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5197: loss 23.5000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5198: loss 23.5469, lr 2.6e-04, dt 2.0s +All GPU(s): step 5199: loss 23.5781, lr 2.6e-04, dt 2.0s +All GPU(s): step 5200: loss 23.6406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5201: loss 23.4219, lr 2.6e-04, dt 2.1s +All GPU(s): step 5202: loss 23.6406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5203: loss 23.7188, lr 2.6e-04, dt 2.0s +All GPU(s): step 5204: loss 23.6250, lr 2.6e-04, dt 2.1s +All GPU(s): step 5205: loss 23.8438, lr 2.6e-04, dt 2.1s +All GPU(s): step 5206: loss 23.9062, lr 2.6e-04, dt 2.0s +All GPU(s): step 5207: loss 23.6406, lr 2.6e-04, dt 2.0s +All GPU(s): step 5208: loss 23.6094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5209: loss 23.8125, lr 2.6e-04, dt 2.0s +All GPU(s): step 5210: loss 23.5312, lr 2.6e-04, dt 2.1s +All GPU(s): step 5211: loss 23.9375, lr 2.6e-04, dt 2.0s +All GPU(s): step 5212: loss 23.7500, lr 2.6e-04, dt 2.0s +All GPU(s): step 5213: loss 23.6562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5214: loss 23.5625, lr 2.6e-04, dt 2.0s +All GPU(s): step 5215: loss 23.8125, lr 2.6e-04, dt 2.1s +All GPU(s): step 5216: loss 23.2500, lr 2.6e-04, dt 2.0s +All GPU(s): step 5217: loss 23.6094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5218: loss 24.0312, lr 2.6e-04, dt 2.0s +All GPU(s): step 5219: loss 23.9219, lr 2.6e-04, dt 2.1s +All GPU(s): step 5220: loss 23.7656, lr 2.6e-04, dt 2.1s +All GPU(s): step 5221: loss 23.9844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5222: loss 23.6250, lr 2.6e-04, dt 2.1s +All GPU(s): step 5223: loss 24.0156, lr 2.6e-04, dt 2.1s +All GPU(s): step 5224: loss 24.0000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5225: loss 23.9688, lr 2.6e-04, dt 2.1s +All GPU(s): step 5226: loss 23.7344, lr 2.6e-04, dt 2.0s +All GPU(s): step 5227: loss 24.0000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5228: loss 23.9531, lr 2.6e-04, dt 2.1s +All GPU(s): step 5229: loss 24.0000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5230: loss 23.8594, lr 2.6e-04, dt 2.1s +All GPU(s): step 5231: loss 23.4688, lr 2.6e-04, dt 2.0s +All GPU(s): step 5232: loss 23.4844, lr 2.6e-04, dt 2.0s +All GPU(s): step 5233: loss 23.8906, lr 2.6e-04, dt 2.1s +All GPU(s): step 5234: loss 23.3906, lr 2.6e-04, dt 2.1s +All GPU(s): step 5235: loss 23.6094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5236: loss 23.4375, lr 2.6e-04, dt 2.1s +All GPU(s): step 5237: loss 23.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 5238: loss 23.6250, lr 2.6e-04, dt 2.0s +All GPU(s): step 5239: loss 23.4219, lr 2.6e-04, dt 2.2s +All GPU(s): step 5240: loss 23.4844, lr 2.6e-04, dt 2.0s +All GPU(s): step 5241: loss 23.2031, lr 2.6e-04, dt 2.1s +All GPU(s): step 5242: loss 23.1094, lr 2.6e-04, dt 2.1s +All GPU(s): step 5243: loss 23.2500, lr 2.6e-04, dt 2.0s +All GPU(s): step 5244: loss 23.2031, lr 2.6e-04, dt 2.1s +All GPU(s): step 5245: loss 23.3750, lr 2.6e-04, dt 2.1s +All GPU(s): step 5246: loss 23.3594, lr 2.6e-04, dt 2.0s +All GPU(s): step 5247: loss 23.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5248: loss 23.3594, lr 2.6e-04, dt 2.1s +All GPU(s): step 5249: loss 23.1406, lr 2.6e-04, dt 2.1s +All GPU(s): step 5250: loss 23.1094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5251: loss 23.0938, lr 2.6e-04, dt 2.0s +All GPU(s): step 5252: loss 23.0469, lr 2.6e-04, dt 2.1s +All GPU(s): step 5253: loss 23.0781, lr 2.6e-04, dt 2.1s +All GPU(s): step 5254: loss 22.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 5255: loss 22.3281, lr 2.6e-04, dt 2.0s +All GPU(s): step 5256: loss 22.2500, lr 2.6e-04, dt 2.1s +All GPU(s): step 5257: loss 22.0000, lr 2.6e-04, dt 2.0s +All GPU(s): step 5258: loss 21.7812, lr 2.6e-04, dt 2.1s +All GPU(s): step 5259: loss 22.4531, lr 2.6e-04, dt 2.0s +All GPU(s): step 5260: loss 21.9844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5261: loss 21.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 5262: loss 21.6875, lr 2.6e-04, dt 2.1s +All GPU(s): step 5263: loss 21.4062, lr 2.6e-04, dt 2.2s +All GPU(s): step 5264: loss 21.4219, lr 2.6e-04, dt 2.1s +All GPU(s): step 5265: loss 21.0156, lr 2.6e-04, dt 2.1s +All GPU(s): step 5266: loss 20.9531, lr 2.6e-04, dt 2.1s +All GPU(s): step 5267: loss 20.8750, lr 2.6e-04, dt 2.1s +All GPU(s): step 5268: loss 21.1094, lr 2.6e-04, dt 2.1s +All GPU(s): step 5269: loss 21.2031, lr 2.6e-04, dt 2.1s +All GPU(s): step 5270: loss 21.0156, lr 2.6e-04, dt 2.0s +All GPU(s): step 5271: loss 20.7344, lr 2.6e-04, dt 2.0s +All GPU(s): step 5272: loss 20.9219, lr 2.6e-04, dt 2.1s +All GPU(s): step 5273: loss 21.4219, lr 2.6e-04, dt 2.2s +All GPU(s): step 5274: loss 20.7969, lr 2.6e-04, dt 2.1s +All GPU(s): step 5275: loss 20.5625, lr 2.6e-04, dt 2.1s +All GPU(s): step 5276: loss 21.1562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5277: loss 21.0781, lr 2.6e-04, dt 2.0s +All GPU(s): step 5278: loss 21.3438, lr 2.6e-04, dt 2.1s +All GPU(s): step 5279: loss 21.2812, lr 2.6e-04, dt 2.0s +All GPU(s): step 5280: loss 21.4688, lr 2.6e-04, dt 2.0s +All GPU(s): step 5281: loss 21.3594, lr 2.6e-04, dt 2.0s +All GPU(s): step 5282: loss 21.4844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5283: loss 21.3438, lr 2.6e-04, dt 2.1s +All GPU(s): step 5284: loss 21.5938, lr 2.6e-04, dt 2.0s +All GPU(s): step 5285: loss 21.4375, lr 2.6e-04, dt 2.0s +All GPU(s): step 5286: loss 21.7969, lr 2.6e-04, dt 2.0s +All GPU(s): step 5287: loss 21.6250, lr 2.6e-04, dt 2.1s +All GPU(s): step 5288: loss 21.4688, lr 2.6e-04, dt 2.1s +All GPU(s): step 5289: loss 21.6719, lr 2.6e-04, dt 2.0s +All GPU(s): step 5290: loss 21.7500, lr 2.6e-04, dt 2.0s +All GPU(s): step 5291: loss 21.7188, lr 2.6e-04, dt 2.1s +All GPU(s): step 5292: loss 21.6562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5293: loss 21.9219, lr 2.6e-04, dt 2.0s +All GPU(s): step 5294: loss 21.5469, lr 2.6e-04, dt 2.1s +All GPU(s): step 5295: loss 21.6562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5296: loss 21.4844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5297: loss 21.6562, lr 2.6e-04, dt 2.2s +All GPU(s): step 5298: loss 21.3125, lr 2.6e-04, dt 2.0s +All GPU(s): step 5299: loss 21.2188, lr 2.6e-04, dt 2.0s +All GPU(s): step 5300: loss 21.5625, lr 2.6e-04, dt 2.0s +All GPU(s): step 5301: loss 21.5312, lr 2.6e-04, dt 2.1s +All GPU(s): step 5302: loss 21.1562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5303: loss 21.7031, lr 2.6e-04, dt 2.0s +All GPU(s): step 5304: loss 21.5000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5305: loss 21.3906, lr 2.6e-04, dt 2.0s +All GPU(s): step 5306: loss 21.5938, lr 2.6e-04, dt 2.1s +All GPU(s): step 5307: loss 21.8125, lr 2.6e-04, dt 2.1s +All GPU(s): step 5308: loss 21.5781, lr 2.6e-04, dt 2.0s +All GPU(s): step 5309: loss 21.7500, lr 2.6e-04, dt 2.1s +All GPU(s): step 5310: loss 21.8281, lr 2.6e-04, dt 2.0s +All GPU(s): step 5311: loss 21.8594, lr 2.6e-04, dt 2.1s +All GPU(s): step 5312: loss 22.1250, lr 2.6e-04, dt 2.1s +All GPU(s): step 5313: loss 21.9531, lr 2.6e-04, dt 2.0s +All GPU(s): step 5314: loss 22.0625, lr 2.6e-04, dt 2.0s +All GPU(s): step 5315: loss 22.0781, lr 2.6e-04, dt 2.1s +All GPU(s): step 5316: loss 22.4062, lr 2.6e-04, dt 2.2s +All GPU(s): step 5317: loss 21.8594, lr 2.6e-04, dt 2.0s +All GPU(s): step 5318: loss 21.9531, lr 2.6e-04, dt 2.0s +All GPU(s): step 5319: loss 22.0938, lr 2.6e-04, dt 2.0s +All GPU(s): step 5320: loss 21.9062, lr 2.6e-04, dt 2.1s +All GPU(s): step 5321: loss 21.9531, lr 2.6e-04, dt 2.1s +All GPU(s): step 5322: loss 21.7656, lr 2.6e-04, dt 2.1s +All GPU(s): step 5323: loss 21.6562, lr 2.6e-04, dt 2.0s +All GPU(s): step 5324: loss 21.9531, lr 2.6e-04, dt 2.0s +All GPU(s): step 5325: loss 21.8281, lr 2.6e-04, dt 2.1s +All GPU(s): step 5326: loss 21.7656, lr 2.6e-04, dt 2.1s +All GPU(s): step 5327: loss 21.7031, lr 2.6e-04, dt 2.1s +All GPU(s): step 5328: loss 22.0781, lr 2.6e-04, dt 2.1s +All GPU(s): step 5329: loss 21.7344, lr 2.6e-04, dt 2.1s +All GPU(s): step 5330: loss 22.3125, lr 2.6e-04, dt 2.1s +All GPU(s): step 5331: loss 21.9375, lr 2.6e-04, dt 2.1s +All GPU(s): step 5332: loss 21.6875, lr 2.6e-04, dt 2.0s +All GPU(s): step 5333: loss 21.7812, lr 2.6e-04, dt 2.0s +All GPU(s): step 5334: loss 21.9062, lr 2.6e-04, dt 2.0s +All GPU(s): step 5335: loss 21.8594, lr 2.6e-04, dt 2.1s +All GPU(s): step 5336: loss 22.1719, lr 2.6e-04, dt 2.0s +All GPU(s): step 5337: loss 22.1719, lr 2.6e-04, dt 2.1s +All GPU(s): step 5338: loss 22.1094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5339: loss 21.9844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5340: loss 22.0000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5341: loss 21.8906, lr 2.6e-04, dt 2.0s +All GPU(s): step 5342: loss 22.1094, lr 2.6e-04, dt 2.0s +All GPU(s): step 5343: loss 21.8750, lr 2.6e-04, dt 2.1s +All GPU(s): step 5344: loss 22.1719, lr 2.6e-04, dt 2.1s +All GPU(s): step 5345: loss 22.2656, lr 2.6e-04, dt 2.2s +All GPU(s): step 5346: loss 22.2969, lr 2.6e-04, dt 2.0s +All GPU(s): step 5347: loss 22.2031, lr 2.6e-04, dt 2.0s +All GPU(s): step 5348: loss 21.7656, lr 2.6e-04, dt 2.0s +All GPU(s): step 5349: loss 21.9219, lr 2.6e-04, dt 2.0s +All GPU(s): step 5350: loss 22.2500, lr 2.6e-04, dt 2.1s +All GPU(s): step 5351: loss 21.8906, lr 2.6e-04, dt 2.0s +All GPU(s): step 5352: loss 22.2031, lr 2.6e-04, dt 2.0s +All GPU(s): step 5353: loss 22.4531, lr 2.6e-04, dt 2.0s +All GPU(s): step 5354: loss 22.4688, lr 2.6e-04, dt 2.1s +All GPU(s): step 5355: loss 22.5781, lr 2.6e-04, dt 2.1s +All GPU(s): step 5356: loss 22.3750, lr 2.6e-04, dt 2.0s +All GPU(s): step 5357: loss 22.5156, lr 2.6e-04, dt 2.0s +All GPU(s): step 5358: loss 22.3438, lr 2.6e-04, dt 2.1s +All GPU(s): step 5359: loss 22.4844, lr 2.6e-04, dt 2.1s +All GPU(s): step 5360: loss 22.2969, lr 2.6e-04, dt 2.1s +All GPU(s): step 5361: loss 21.9688, lr 2.6e-04, dt 2.0s +All GPU(s): step 5362: loss 22.0781, lr 2.6e-04, dt 2.0s +All GPU(s): step 5363: loss 21.8125, lr 2.6e-04, dt 2.0s +All GPU(s): step 5364: loss 22.3906, lr 2.6e-04, dt 2.1s +All GPU(s): step 5365: loss 22.0000, lr 2.6e-04, dt 2.1s +All GPU(s): step 5366: loss 22.4062, lr 2.6e-04, dt 2.1s +All GPU(s): step 5367: loss 22.7812, lr 2.6e-04, dt 2.0s +All GPU(s): step 5368: loss 22.6562, lr 2.6e-04, dt 2.1s +All GPU(s): step 5369: loss 22.7344, lr 2.6e-04, dt 2.1s +All GPU(s): step 5370: loss 22.2188, lr 2.6e-04, dt 2.0s +All GPU(s): step 5371: loss 22.9531, lr 2.6e-04, dt 2.1s +All GPU(s): step 5372: loss 22.7656, lr 2.6e-04, dt 2.1s +All GPU(s): step 5373: loss 23.0781, lr 2.6e-04, dt 2.1s +All GPU(s): step 5374: loss 23.1406, lr 2.6e-04, dt 2.2s +All GPU(s): step 5375: loss 22.9219, lr 2.6e-04, dt 2.0s +All GPU(s): step 5376: loss 22.8750, lr 2.6e-04, dt 2.0s +All GPU(s): step 5377: loss 22.9844, lr 2.6e-04, dt 2.0s +All GPU(s): step 5378: loss 22.6875, lr 2.5e-04, dt 2.1s +All GPU(s): step 5379: loss 22.6719, lr 2.5e-04, dt 2.1s +All GPU(s): step 5380: loss 22.6094, lr 2.5e-04, dt 2.1s +All GPU(s): step 5381: loss 22.7188, lr 2.5e-04, dt 2.1s +All GPU(s): step 5382: loss 22.4844, lr 2.5e-04, dt 2.1s +All GPU(s): step 5383: loss 22.2812, lr 2.5e-04, dt 2.1s +All GPU(s): step 5384: loss 22.4844, lr 2.5e-04, dt 2.1s +All GPU(s): step 5385: loss 22.6250, lr 2.5e-04, dt 2.0s +All GPU(s): step 5386: loss 21.9531, lr 2.5e-04, dt 2.0s +All GPU(s): step 5387: loss 21.9375, lr 2.5e-04, dt 2.0s +All GPU(s): step 5388: loss 22.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5389: loss 22.0000, lr 2.5e-04, dt 2.1s +All GPU(s): step 5390: loss 22.4219, lr 2.5e-04, dt 2.1s +All GPU(s): step 5391: loss 22.6406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5392: loss 22.7656, lr 2.5e-04, dt 2.1s +All GPU(s): step 5393: loss 22.4844, lr 2.5e-04, dt 2.1s +All GPU(s): step 5394: loss 22.4844, lr 2.5e-04, dt 2.0s +All GPU(s): step 5395: loss 22.0781, lr 2.5e-04, dt 2.0s +All GPU(s): step 5396: loss 21.9219, lr 2.5e-04, dt 2.0s +All GPU(s): step 5397: loss 22.0156, lr 2.5e-04, dt 2.0s +All GPU(s): step 5398: loss 21.6562, lr 2.5e-04, dt 2.2s +All GPU(s): step 5399: loss 21.7812, lr 2.5e-04, dt 2.1s +All GPU(s): step 5400: loss 21.9219, lr 2.5e-04, dt 2.0s +All GPU(s): step 5401: loss 21.7969, lr 2.5e-04, dt 2.1s +All GPU(s): step 5402: loss 21.5156, lr 2.5e-04, dt 2.1s +All GPU(s): step 5403: loss 21.6719, lr 2.5e-04, dt 2.1s +All GPU(s): step 5404: loss 21.5781, lr 2.5e-04, dt 2.1s +All GPU(s): step 5405: loss 21.8125, lr 2.5e-04, dt 2.0s +All GPU(s): step 5406: loss 21.7812, lr 2.5e-04, dt 2.0s +All GPU(s): step 5407: loss 21.9844, lr 2.5e-04, dt 2.1s +All GPU(s): step 5408: loss 21.6719, lr 2.5e-04, dt 2.1s +All GPU(s): step 5409: loss 21.8281, lr 2.5e-04, dt 2.0s +All GPU(s): step 5410: loss 21.8281, lr 2.5e-04, dt 2.0s +All GPU(s): step 5411: loss 21.8906, lr 2.5e-04, dt 2.0s +All GPU(s): step 5412: loss 22.1250, lr 2.5e-04, dt 2.1s +All GPU(s): step 5413: loss 22.1094, lr 2.5e-04, dt 2.1s +All GPU(s): step 5414: loss 21.6562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5415: loss 21.5625, lr 2.5e-04, dt 2.0s +All GPU(s): step 5416: loss 21.9688, lr 2.5e-04, dt 2.0s +All GPU(s): step 5417: loss 22.0312, lr 2.5e-04, dt 2.1s +All GPU(s): step 5418: loss 22.0312, lr 2.5e-04, dt 2.0s +All GPU(s): step 5419: loss 21.9688, lr 2.5e-04, dt 2.0s +All GPU(s): step 5420: loss 21.8281, lr 2.5e-04, dt 2.0s +All GPU(s): step 5421: loss 21.9219, lr 2.5e-04, dt 2.1s +All GPU(s): step 5422: loss 21.7969, lr 2.5e-04, dt 2.1s +All GPU(s): step 5423: loss 22.0000, lr 2.5e-04, dt 2.0s +All GPU(s): step 5424: loss 21.7656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5425: loss 21.7969, lr 2.5e-04, dt 2.0s +All GPU(s): step 5426: loss 21.9375, lr 2.5e-04, dt 2.0s +All GPU(s): step 5427: loss 22.0625, lr 2.5e-04, dt 2.1s +All GPU(s): step 5428: loss 21.8438, lr 2.5e-04, dt 2.0s +All GPU(s): step 5429: loss 21.8906, lr 2.5e-04, dt 2.0s +All GPU(s): step 5430: loss 21.8125, lr 2.5e-04, dt 2.0s +All GPU(s): step 5431: loss 21.6406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5432: loss 21.9375, lr 2.5e-04, dt 2.1s +All GPU(s): step 5433: loss 21.9219, lr 2.5e-04, dt 2.0s +All GPU(s): step 5434: loss 22.4844, lr 2.5e-04, dt 2.0s +All GPU(s): step 5435: loss 22.0469, lr 2.5e-04, dt 2.0s +All GPU(s): step 5436: loss 22.4531, lr 2.5e-04, dt 2.1s +All GPU(s): step 5437: loss 22.1875, lr 2.5e-04, dt 2.0s +All GPU(s): step 5438: loss 22.7031, lr 2.5e-04, dt 2.1s +All GPU(s): step 5439: loss 22.4688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5440: loss 22.5938, lr 2.5e-04, dt 2.0s +All GPU(s): step 5441: loss 22.6562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5442: loss 22.4062, lr 2.5e-04, dt 2.0s +All GPU(s): step 5443: loss 22.6719, lr 2.5e-04, dt 2.0s +All GPU(s): step 5444: loss 22.3125, lr 2.5e-04, dt 2.0s +All GPU(s): step 5445: loss 22.3438, lr 2.5e-04, dt 2.0s +All GPU(s): step 5446: loss 22.5000, lr 2.5e-04, dt 2.1s +All GPU(s): step 5447: loss 22.6562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5448: loss 22.1094, lr 2.5e-04, dt 2.0s +All GPU(s): step 5449: loss 22.4844, lr 2.5e-04, dt 2.0s +All GPU(s): step 5450: loss 22.4062, lr 2.5e-04, dt 2.0s +All GPU(s): step 5451: loss 22.2812, lr 2.5e-04, dt 2.2s +All GPU(s): step 5452: loss 22.4531, lr 2.5e-04, dt 2.0s +All GPU(s): step 5453: loss 22.2188, lr 2.5e-04, dt 2.1s +All GPU(s): step 5454: loss 22.3438, lr 2.5e-04, dt 2.1s +All GPU(s): step 5455: loss 21.9375, lr 2.5e-04, dt 2.1s +All GPU(s): step 5456: loss 21.9688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5457: loss 22.0781, lr 2.5e-04, dt 2.1s +All GPU(s): step 5458: loss 21.7969, lr 2.5e-04, dt 2.0s +All GPU(s): step 5459: loss 21.5469, lr 2.5e-04, dt 2.0s +All GPU(s): step 5460: loss 21.5625, lr 2.5e-04, dt 2.1s +All GPU(s): step 5461: loss 21.0000, lr 2.5e-04, dt 2.1s +All GPU(s): step 5462: loss 20.9688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5463: loss 21.1562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5464: loss 21.2656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5465: loss 21.1875, lr 2.5e-04, dt 2.1s +All GPU(s): step 5466: loss 20.9531, lr 2.5e-04, dt 2.0s +All GPU(s): step 5467: loss 20.6719, lr 2.5e-04, dt 2.0s +All GPU(s): step 5468: loss 21.2188, lr 2.5e-04, dt 2.0s +All GPU(s): step 5469: loss 21.0000, lr 2.5e-04, dt 2.0s +All GPU(s): step 5470: loss 20.8438, lr 2.5e-04, dt 2.1s +All GPU(s): step 5471: loss 20.7500, lr 2.5e-04, dt 2.1s +All GPU(s): step 5472: loss 20.8750, lr 2.5e-04, dt 2.0s +All GPU(s): step 5473: loss 20.4062, lr 2.5e-04, dt 2.1s +All GPU(s): step 5474: loss 20.2656, lr 2.5e-04, dt 2.1s +All GPU(s): step 5475: loss 20.5156, lr 2.5e-04, dt 2.2s +All GPU(s): step 5476: loss 20.3281, lr 2.5e-04, dt 2.0s +All GPU(s): step 5477: loss 20.2812, lr 2.5e-04, dt 2.0s +All GPU(s): step 5478: loss 20.3125, lr 2.5e-04, dt 2.1s +All GPU(s): step 5479: loss 20.8125, lr 2.5e-04, dt 2.0s +All GPU(s): step 5480: loss 20.7031, lr 2.5e-04, dt 2.1s +All GPU(s): step 5481: loss 20.5625, lr 2.5e-04, dt 2.0s +All GPU(s): step 5482: loss 20.9531, lr 2.5e-04, dt 2.0s +All GPU(s): step 5483: loss 20.4375, lr 2.5e-04, dt 2.0s +All GPU(s): step 5484: loss 20.9219, lr 2.5e-04, dt 2.0s +All GPU(s): step 5485: loss 20.6094, lr 2.5e-04, dt 2.1s +All GPU(s): step 5486: loss 20.7031, lr 2.5e-04, dt 2.0s +All GPU(s): step 5487: loss 20.7656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5488: loss 20.2656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5489: loss 20.6562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5490: loss 20.6562, lr 2.5e-04, dt 2.0s +All GPU(s): step 5491: loss 20.7656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5492: loss 20.6875, lr 2.5e-04, dt 2.0s +All GPU(s): step 5493: loss 21.0312, lr 2.5e-04, dt 2.0s +All GPU(s): step 5494: loss 21.1406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5495: loss 20.8438, lr 2.5e-04, dt 2.1s +All GPU(s): step 5496: loss 20.7969, lr 2.5e-04, dt 2.0s +All GPU(s): step 5497: loss 20.5312, lr 2.5e-04, dt 2.0s +All GPU(s): step 5498: loss 20.4688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5499: loss 20.7344, lr 2.5e-04, dt 2.2s +All GPU(s): step 5500: loss 20.7031, lr 2.5e-04, dt 2.1s +All GPU(s): step 5501: loss 21.0000, lr 2.5e-04, dt 2.1s +All GPU(s): step 5502: loss 21.0469, lr 2.5e-04, dt 2.1s +All GPU(s): step 5503: loss 21.4375, lr 2.5e-04, dt 2.0s +All GPU(s): step 5504: loss 21.7031, lr 2.5e-04, dt 2.1s +All GPU(s): step 5505: loss 21.7500, lr 2.5e-04, dt 2.0s +All GPU(s): step 5506: loss 21.5312, lr 2.5e-04, dt 2.0s +All GPU(s): step 5507: loss 21.7031, lr 2.5e-04, dt 2.0s +All GPU(s): step 5508: loss 21.6094, lr 2.5e-04, dt 2.1s +All GPU(s): step 5509: loss 21.3750, lr 2.5e-04, dt 2.1s +All GPU(s): step 5510: loss 21.7500, lr 2.5e-04, dt 2.1s +All GPU(s): step 5511: loss 22.1406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5512: loss 21.9688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5513: loss 21.7188, lr 2.5e-04, dt 2.1s +All GPU(s): step 5514: loss 22.0156, lr 2.5e-04, dt 2.1s +All GPU(s): step 5515: loss 22.1094, lr 2.5e-04, dt 2.0s +All GPU(s): step 5516: loss 22.2188, lr 2.5e-04, dt 2.1s +All GPU(s): step 5517: loss 22.1719, lr 2.5e-04, dt 2.1s +All GPU(s): step 5518: loss 22.2969, lr 2.5e-04, dt 2.1s +All GPU(s): step 5519: loss 21.9375, lr 2.5e-04, dt 2.0s +All GPU(s): step 5520: loss 22.5781, lr 2.5e-04, dt 2.1s +All GPU(s): step 5521: loss 22.3125, lr 2.5e-04, dt 2.0s +All GPU(s): step 5522: loss 22.4219, lr 2.5e-04, dt 2.0s +All GPU(s): step 5523: loss 22.0781, lr 2.5e-04, dt 2.1s +All GPU(s): step 5524: loss 22.2031, lr 2.5e-04, dt 2.0s +All GPU(s): step 5525: loss 22.4375, lr 2.5e-04, dt 2.1s +All GPU(s): step 5526: loss 22.3906, lr 2.5e-04, dt 2.1s +All GPU(s): step 5527: loss 22.3438, lr 2.5e-04, dt 2.1s +All GPU(s): step 5528: loss 22.3906, lr 2.5e-04, dt 2.2s +All GPU(s): step 5529: loss 22.2969, lr 2.5e-04, dt 2.1s +All GPU(s): step 5530: loss 22.3594, lr 2.5e-04, dt 2.1s +All GPU(s): step 5531: loss 22.2969, lr 2.5e-04, dt 2.1s +All GPU(s): step 5532: loss 22.0156, lr 2.5e-04, dt 2.1s +All GPU(s): step 5533: loss 22.5000, lr 2.5e-04, dt 2.1s +All GPU(s): step 5534: loss 22.4688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5535: loss 22.3750, lr 2.5e-04, dt 2.0s +All GPU(s): step 5536: loss 22.1562, lr 2.5e-04, dt 2.1s +All GPU(s): step 5537: loss 22.5156, lr 2.5e-04, dt 2.1s +All GPU(s): step 5538: loss 22.3906, lr 2.5e-04, dt 2.1s +All GPU(s): step 5539: loss 22.6250, lr 2.5e-04, dt 2.0s +All GPU(s): step 5540: loss 22.7656, lr 2.5e-04, dt 2.0s +All GPU(s): step 5541: loss 22.7031, lr 2.5e-04, dt 2.0s +All GPU(s): step 5542: loss 22.3906, lr 2.5e-04, dt 2.1s +All GPU(s): step 5543: loss 22.3906, lr 2.5e-04, dt 2.0s +All GPU(s): step 5544: loss 22.6250, lr 2.5e-04, dt 2.0s +All GPU(s): step 5545: loss 22.7812, lr 2.5e-04, dt 2.0s +All GPU(s): step 5546: loss 22.5000, lr 2.5e-04, dt 2.0s +All GPU(s): step 5547: loss 22.8125, lr 2.5e-04, dt 2.2s +All GPU(s): step 5548: loss 22.7188, lr 2.5e-04, dt 2.0s +All GPU(s): step 5549: loss 22.6250, lr 2.5e-04, dt 2.0s +All GPU(s): step 5550: loss 22.8594, lr 2.5e-04, dt 2.1s +All GPU(s): step 5551: loss 22.4688, lr 2.5e-04, dt 2.1s +All GPU(s): step 5552: loss 22.6406, lr 2.5e-04, dt 2.1s +All GPU(s): step 5553: loss 22.8750, lr 2.5e-04, dt 2.1s +All GPU(s): step 5554: loss 22.8594, lr 2.5e-04, dt 2.0s +All GPU(s): step 5555: loss 22.7812, lr 2.5e-04, dt 2.0s +All GPU(s): step 5556: loss 23.1094, lr 2.5e-04, dt 2.1s +All GPU(s): step 5557: loss 23.1875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5558: loss 23.3750, lr 2.4e-04, dt 2.1s +All GPU(s): step 5559: loss 23.5469, lr 2.4e-04, dt 2.1s +All GPU(s): step 5560: loss 24.2969, lr 2.4e-04, dt 2.0s +All GPU(s): step 5561: loss 24.2344, lr 2.4e-04, dt 2.0s +All GPU(s): step 5562: loss 24.5156, lr 2.4e-04, dt 2.1s +All GPU(s): step 5563: loss 24.2656, lr 2.4e-04, dt 2.1s +All GPU(s): step 5564: loss 24.2500, lr 2.4e-04, dt 2.0s +All GPU(s): step 5565: loss 24.5312, lr 2.4e-04, dt 2.0s +All GPU(s): step 5566: loss 24.3906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5567: loss 24.3594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5568: loss 24.4219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5569: loss 24.4688, lr 2.4e-04, dt 2.0s +All GPU(s): step 5570: loss 24.4375, lr 2.4e-04, dt 2.1s +All GPU(s): step 5571: loss 24.4375, lr 2.4e-04, dt 2.1s +All GPU(s): step 5572: loss 24.4531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5573: loss 24.3125, lr 2.4e-04, dt 2.0s +All GPU(s): step 5574: loss 24.4219, lr 2.4e-04, dt 2.0s +All GPU(s): step 5575: loss 24.5625, lr 2.4e-04, dt 2.1s +All GPU(s): step 5576: loss 24.4375, lr 2.4e-04, dt 2.1s +All GPU(s): step 5577: loss 24.3281, lr 2.4e-04, dt 2.1s +All GPU(s): step 5578: loss 24.5156, lr 2.4e-04, dt 2.1s +All GPU(s): step 5579: loss 24.5625, lr 2.4e-04, dt 2.1s +All GPU(s): step 5580: loss 24.5312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5581: loss 24.8750, lr 2.4e-04, dt 2.1s +All GPU(s): step 5582: loss 24.2969, lr 2.4e-04, dt 2.0s +All GPU(s): step 5583: loss 24.2969, lr 2.4e-04, dt 2.0s +All GPU(s): step 5584: loss 24.4531, lr 2.4e-04, dt 2.0s +All GPU(s): step 5585: loss 24.6875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5586: loss 24.5938, lr 2.4e-04, dt 2.1s +All GPU(s): step 5587: loss 24.6094, lr 2.4e-04, dt 2.1s +All GPU(s): step 5588: loss 24.6406, lr 2.4e-04, dt 2.0s +All GPU(s): step 5589: loss 24.4531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5590: loss 24.5625, lr 2.4e-04, dt 2.1s +All GPU(s): step 5591: loss 24.3125, lr 2.4e-04, dt 2.1s +All GPU(s): step 5592: loss 24.3594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5593: loss 24.4688, lr 2.4e-04, dt 2.0s +All GPU(s): step 5594: loss 24.7031, lr 2.4e-04, dt 2.0s +All GPU(s): step 5595: loss 24.3750, lr 2.4e-04, dt 2.1s +All GPU(s): step 5596: loss 24.4688, lr 2.4e-04, dt 2.0s +All GPU(s): step 5597: loss 24.5781, lr 2.4e-04, dt 2.0s +All GPU(s): step 5598: loss 24.4375, lr 2.4e-04, dt 2.0s +All GPU(s): step 5599: loss 24.3906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5600: loss 24.3906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5601: loss 24.3594, lr 2.4e-04, dt 2.1s +All GPU(s): step 5602: loss 24.3594, lr 2.4e-04, dt 2.1s +All GPU(s): step 5603: loss 24.3594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5604: loss 24.4375, lr 2.4e-04, dt 2.0s +All GPU(s): step 5605: loss 24.6719, lr 2.4e-04, dt 2.1s +All GPU(s): step 5606: loss 24.3906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5607: loss 24.5781, lr 2.4e-04, dt 2.0s +All GPU(s): step 5608: loss 24.4219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5609: loss 24.3750, lr 2.4e-04, dt 2.1s +All GPU(s): step 5610: loss 24.3594, lr 2.4e-04, dt 2.1s +All GPU(s): step 5611: loss 24.2031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5612: loss 24.1094, lr 2.4e-04, dt 2.1s +All GPU(s): step 5613: loss 24.0312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5614: loss 24.0781, lr 2.4e-04, dt 2.1s +All GPU(s): step 5615: loss 23.9531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5616: loss 23.9062, lr 2.4e-04, dt 2.1s +All GPU(s): step 5617: loss 24.0938, lr 2.4e-04, dt 2.1s +All GPU(s): step 5618: loss 23.7500, lr 2.4e-04, dt 2.1s +All GPU(s): step 5619: loss 23.8906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5620: loss 23.8906, lr 2.4e-04, dt 2.0s +All GPU(s): step 5621: loss 23.5312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5622: loss 23.4688, lr 2.4e-04, dt 2.1s +All GPU(s): step 5623: loss 23.7500, lr 2.4e-04, dt 2.1s +All GPU(s): step 5624: loss 23.4531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5625: loss 23.6875, lr 2.4e-04, dt 2.0s +All GPU(s): step 5626: loss 23.5000, lr 2.4e-04, dt 2.0s +All GPU(s): step 5627: loss 23.1250, lr 2.4e-04, dt 2.0s +All GPU(s): step 5628: loss 23.1094, lr 2.4e-04, dt 2.1s +All GPU(s): step 5629: loss 22.7500, lr 2.4e-04, dt 2.2s +All GPU(s): step 5630: loss 23.2500, lr 2.4e-04, dt 2.1s +All GPU(s): step 5631: loss 22.9219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5632: loss 22.3125, lr 2.4e-04, dt 2.0s +All GPU(s): step 5633: loss 22.3750, lr 2.4e-04, dt 2.0s +All GPU(s): step 5634: loss 22.7656, lr 2.4e-04, dt 2.1s +All GPU(s): step 5635: loss 22.6094, lr 2.4e-04, dt 2.0s +All GPU(s): step 5636: loss 22.5156, lr 2.4e-04, dt 2.0s +All GPU(s): step 5637: loss 22.1875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5638: loss 22.2031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5639: loss 22.2031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5640: loss 22.4688, lr 2.4e-04, dt 2.0s +All GPU(s): step 5641: loss 22.2969, lr 2.4e-04, dt 2.0s +All GPU(s): step 5642: loss 22.5312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5643: loss 22.5469, lr 2.4e-04, dt 2.1s +All GPU(s): step 5644: loss 22.7344, lr 2.4e-04, dt 2.1s +All GPU(s): step 5645: loss 22.6719, lr 2.4e-04, dt 2.1s +All GPU(s): step 5646: loss 22.7188, lr 2.4e-04, dt 2.1s +All GPU(s): step 5647: loss 22.8125, lr 2.4e-04, dt 2.1s +All GPU(s): step 5648: loss 22.4531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5649: loss 22.9688, lr 2.4e-04, dt 2.1s +All GPU(s): step 5650: loss 22.4062, lr 2.4e-04, dt 2.1s +All GPU(s): step 5651: loss 22.5312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5652: loss 22.7344, lr 2.4e-04, dt 2.1s +All GPU(s): step 5653: loss 23.0469, lr 2.4e-04, dt 2.1s +All GPU(s): step 5654: loss 22.6875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5655: loss 22.8438, lr 2.4e-04, dt 2.1s +All GPU(s): step 5656: loss 22.9375, lr 2.4e-04, dt 2.0s +All GPU(s): step 5657: loss 22.7656, lr 2.4e-04, dt 2.1s +All GPU(s): step 5658: loss 23.0781, lr 2.4e-04, dt 2.1s +All GPU(s): step 5659: loss 22.8594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5660: loss 23.4844, lr 2.4e-04, dt 2.1s +All GPU(s): step 5661: loss 23.3750, lr 2.4e-04, dt 2.0s +All GPU(s): step 5662: loss 23.5469, lr 2.4e-04, dt 2.0s +All GPU(s): step 5663: loss 23.4219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5664: loss 23.4844, lr 2.4e-04, dt 2.1s +All GPU(s): step 5665: loss 23.5312, lr 2.4e-04, dt 2.1s +All GPU(s): step 5666: loss 23.8594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5667: loss 23.6875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5668: loss 23.5469, lr 2.4e-04, dt 2.1s +All GPU(s): step 5669: loss 23.7656, lr 2.4e-04, dt 2.1s +All GPU(s): step 5670: loss 23.6562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5671: loss 23.7031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5672: loss 23.7812, lr 2.4e-04, dt 2.2s +All GPU(s): step 5673: loss 23.7969, lr 2.4e-04, dt 2.1s +All GPU(s): step 5674: loss 23.8438, lr 2.4e-04, dt 2.1s +All GPU(s): step 5675: loss 23.9219, lr 2.4e-04, dt 2.0s +All GPU(s): step 5676: loss 24.0469, lr 2.4e-04, dt 2.1s +All GPU(s): step 5677: loss 23.9688, lr 2.4e-04, dt 2.2s +All GPU(s): step 5678: loss 24.3438, lr 2.4e-04, dt 2.0s +All GPU(s): step 5679: loss 24.1719, lr 2.4e-04, dt 2.1s +All GPU(s): step 5680: loss 24.3125, lr 2.4e-04, dt 2.0s +All GPU(s): step 5681: loss 24.2812, lr 2.4e-04, dt 2.0s +All GPU(s): step 5682: loss 24.4688, lr 2.4e-04, dt 2.2s +All GPU(s): step 5683: loss 24.3281, lr 2.4e-04, dt 2.1s +All GPU(s): step 5684: loss 24.2031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5685: loss 23.9844, lr 2.4e-04, dt 2.0s +All GPU(s): step 5686: loss 23.8281, lr 2.4e-04, dt 2.1s +All GPU(s): step 5687: loss 23.6875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5688: loss 23.9219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5689: loss 23.9375, lr 2.4e-04, dt 2.1s +All GPU(s): step 5690: loss 23.8906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5691: loss 23.9062, lr 2.4e-04, dt 2.1s +All GPU(s): step 5692: loss 24.0156, lr 2.4e-04, dt 2.0s +All GPU(s): step 5693: loss 24.0000, lr 2.4e-04, dt 2.0s +All GPU(s): step 5694: loss 23.8750, lr 2.4e-04, dt 2.0s +All GPU(s): step 5695: loss 23.7812, lr 2.4e-04, dt 2.1s +All GPU(s): step 5696: loss 23.6719, lr 2.4e-04, dt 2.1s +All GPU(s): step 5697: loss 23.7344, lr 2.4e-04, dt 2.0s +All GPU(s): step 5698: loss 23.6719, lr 2.4e-04, dt 2.0s +All GPU(s): step 5699: loss 23.5781, lr 2.4e-04, dt 2.0s +All GPU(s): step 5700: loss 23.4219, lr 2.4e-04, dt 2.1s +All GPU(s): step 5701: loss 23.1719, lr 2.4e-04, dt 2.2s +All GPU(s): step 5702: loss 22.8594, lr 2.4e-04, dt 2.0s +All GPU(s): step 5703: loss 22.6250, lr 2.4e-04, dt 2.1s +All GPU(s): step 5704: loss 23.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5705: loss 23.4062, lr 2.4e-04, dt 2.1s +All GPU(s): step 5706: loss 23.3281, lr 2.4e-04, dt 2.1s +All GPU(s): step 5707: loss 23.4531, lr 2.4e-04, dt 2.0s +All GPU(s): step 5708: loss 23.1562, lr 2.4e-04, dt 2.0s +All GPU(s): step 5709: loss 23.1094, lr 2.4e-04, dt 2.0s +All GPU(s): step 5710: loss 22.9531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5711: loss 22.7812, lr 2.4e-04, dt 2.1s +All GPU(s): step 5712: loss 22.7031, lr 2.4e-04, dt 2.0s +All GPU(s): step 5713: loss 22.7656, lr 2.4e-04, dt 2.0s +All GPU(s): step 5714: loss 22.7969, lr 2.4e-04, dt 2.1s +All GPU(s): step 5715: loss 22.7344, lr 2.4e-04, dt 2.1s +All GPU(s): step 5716: loss 22.4375, lr 2.4e-04, dt 2.1s +All GPU(s): step 5717: loss 22.2969, lr 2.4e-04, dt 2.1s +All GPU(s): step 5718: loss 22.2031, lr 2.4e-04, dt 2.1s +All GPU(s): step 5719: loss 22.1562, lr 2.4e-04, dt 2.1s +All GPU(s): step 5720: loss 22.1875, lr 2.4e-04, dt 2.1s +All GPU(s): step 5721: loss 22.0781, lr 2.4e-04, dt 2.0s +All GPU(s): step 5722: loss 22.2031, lr 2.4e-04, dt 2.0s +All GPU(s): step 5723: loss 21.9531, lr 2.4e-04, dt 2.1s +All GPU(s): step 5724: loss 21.8906, lr 2.4e-04, dt 2.1s +All GPU(s): step 5725: loss 21.8125, lr 2.4e-04, dt 2.2s +All GPU(s): step 5726: loss 21.9531, lr 2.4e-04, dt 2.0s +All GPU(s): step 5727: loss 22.0781, lr 2.4e-04, dt 2.0s +All GPU(s): step 5728: loss 22.3750, lr 2.4e-04, dt 2.0s +All GPU(s): step 5729: loss 22.2031, lr 2.4e-04, dt 2.0s +All GPU(s): step 5730: loss 22.2500, lr 2.4e-04, dt 2.2s +All GPU(s): step 5731: loss 22.2656, lr 2.4e-04, dt 2.1s +All GPU(s): step 5732: loss 22.3125, lr 2.4e-04, dt 2.1s +All GPU(s): step 5733: loss 22.2344, lr 2.4e-04, dt 2.1s +All GPU(s): step 5734: loss 22.1719, lr 2.4e-04, dt 2.1s +All GPU(s): step 5735: loss 21.5625, lr 2.3e-04, dt 2.1s +All GPU(s): step 5736: loss 21.6875, lr 2.3e-04, dt 2.0s +All GPU(s): step 5737: loss 21.8125, lr 2.3e-04, dt 2.0s +All GPU(s): step 5738: loss 21.9688, lr 2.3e-04, dt 2.1s +All GPU(s): step 5739: loss 22.0781, lr 2.3e-04, dt 2.1s +All GPU(s): step 5740: loss 22.1719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5741: loss 22.0781, lr 2.3e-04, dt 2.1s +All GPU(s): step 5742: loss 22.2188, lr 2.3e-04, dt 2.1s +All GPU(s): step 5743: loss 22.1094, lr 2.3e-04, dt 2.0s +All GPU(s): step 5744: loss 21.8750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5745: loss 21.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5746: loss 22.0625, lr 2.3e-04, dt 2.0s +All GPU(s): step 5747: loss 21.8594, lr 2.3e-04, dt 2.0s +All GPU(s): step 5748: loss 21.5781, lr 2.3e-04, dt 2.1s +All GPU(s): step 5749: loss 22.1875, lr 2.3e-04, dt 2.2s +All GPU(s): step 5750: loss 21.9688, lr 2.3e-04, dt 2.1s +All GPU(s): step 5751: loss 21.9688, lr 2.3e-04, dt 2.0s +All GPU(s): step 5752: loss 21.8750, lr 2.3e-04, dt 2.0s +All GPU(s): step 5753: loss 21.7656, lr 2.3e-04, dt 2.1s +All GPU(s): step 5754: loss 21.4219, lr 2.3e-04, dt 2.2s +All GPU(s): step 5755: loss 21.4375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5756: loss 20.9531, lr 2.3e-04, dt 2.1s +All GPU(s): step 5757: loss 20.9219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5758: loss 21.3906, lr 2.3e-04, dt 2.1s +All GPU(s): step 5759: loss 21.0000, lr 2.3e-04, dt 2.1s +All GPU(s): step 5760: loss 21.0781, lr 2.3e-04, dt 2.0s +All GPU(s): step 5761: loss 21.4062, lr 2.3e-04, dt 2.0s +All GPU(s): step 5762: loss 21.2656, lr 2.3e-04, dt 2.0s +All GPU(s): step 5763: loss 21.0938, lr 2.3e-04, dt 2.1s +All GPU(s): step 5764: loss 21.0000, lr 2.3e-04, dt 2.1s +All GPU(s): step 5765: loss 21.2344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5766: loss 20.9531, lr 2.3e-04, dt 2.0s +All GPU(s): step 5767: loss 20.9844, lr 2.3e-04, dt 2.0s +All GPU(s): step 5768: loss 20.5938, lr 2.3e-04, dt 2.1s +All GPU(s): step 5769: loss 20.6875, lr 2.3e-04, dt 2.0s +All GPU(s): step 5770: loss 20.7656, lr 2.3e-04, dt 2.1s +All GPU(s): step 5771: loss 20.5469, lr 2.3e-04, dt 2.0s +All GPU(s): step 5772: loss 20.8125, lr 2.3e-04, dt 2.0s +All GPU(s): step 5773: loss 20.4531, lr 2.3e-04, dt 2.1s +All GPU(s): step 5774: loss 20.7344, lr 2.3e-04, dt 2.1s +All GPU(s): step 5775: loss 20.4219, lr 2.3e-04, dt 2.1s +All GPU(s): step 5776: loss 20.9375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5777: loss 20.7500, lr 2.3e-04, dt 2.1s +All GPU(s): step 5778: loss 20.4062, lr 2.3e-04, dt 2.1s +All GPU(s): step 5779: loss 20.9531, lr 2.3e-04, dt 2.0s +All GPU(s): step 5780: loss 21.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5781: loss 20.6250, lr 2.3e-04, dt 2.0s +All GPU(s): step 5782: loss 21.1875, lr 2.3e-04, dt 2.0s +All GPU(s): step 5783: loss 20.9375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5784: loss 20.8750, lr 2.3e-04, dt 2.0s +All GPU(s): step 5785: loss 21.0781, lr 2.3e-04, dt 2.0s +All GPU(s): step 5786: loss 20.6094, lr 2.3e-04, dt 2.0s +All GPU(s): step 5787: loss 20.7969, lr 2.3e-04, dt 2.1s +All GPU(s): step 5788: loss 20.8438, lr 2.3e-04, dt 2.1s +All GPU(s): step 5789: loss 20.9219, lr 2.3e-04, dt 2.1s +All GPU(s): step 5790: loss 20.9219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5791: loss 21.1250, lr 2.3e-04, dt 2.0s +All GPU(s): step 5792: loss 21.2656, lr 2.3e-04, dt 2.1s +All GPU(s): step 5793: loss 21.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5794: loss 21.4219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5795: loss 21.6719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5796: loss 21.4844, lr 2.3e-04, dt 2.0s +All GPU(s): step 5797: loss 21.9375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5798: loss 22.1250, lr 2.3e-04, dt 2.1s +All GPU(s): step 5799: loss 22.0625, lr 2.3e-04, dt 2.1s +All GPU(s): step 5800: loss 21.9219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5801: loss 22.2031, lr 2.3e-04, dt 2.1s +All GPU(s): step 5802: loss 22.2969, lr 2.3e-04, dt 2.1s +All GPU(s): step 5803: loss 21.8906, lr 2.3e-04, dt 2.1s +All GPU(s): step 5804: loss 21.9219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5805: loss 22.2812, lr 2.3e-04, dt 2.0s +All GPU(s): step 5806: loss 22.5000, lr 2.3e-04, dt 2.1s +All GPU(s): step 5807: loss 22.5469, lr 2.3e-04, dt 2.2s +All GPU(s): step 5808: loss 22.5625, lr 2.3e-04, dt 2.0s +All GPU(s): step 5809: loss 22.7188, lr 2.3e-04, dt 2.0s +All GPU(s): step 5810: loss 22.5000, lr 2.3e-04, dt 2.0s +All GPU(s): step 5811: loss 22.4531, lr 2.3e-04, dt 2.0s +All GPU(s): step 5812: loss 22.1719, lr 2.3e-04, dt 2.1s +All GPU(s): step 5813: loss 22.2344, lr 2.3e-04, dt 2.1s +All GPU(s): step 5814: loss 22.3750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5815: loss 22.3125, lr 2.3e-04, dt 2.0s +All GPU(s): step 5816: loss 22.6875, lr 2.3e-04, dt 2.1s +All GPU(s): step 5817: loss 22.4688, lr 2.3e-04, dt 2.1s +All GPU(s): step 5818: loss 22.4219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5819: loss 22.6250, lr 2.3e-04, dt 2.0s +All GPU(s): step 5820: loss 22.5938, lr 2.3e-04, dt 2.0s +All GPU(s): step 5821: loss 22.8594, lr 2.3e-04, dt 2.1s +All GPU(s): step 5822: loss 22.5938, lr 2.3e-04, dt 2.1s +All GPU(s): step 5823: loss 22.7656, lr 2.3e-04, dt 2.0s +All GPU(s): step 5824: loss 22.8750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5825: loss 22.2031, lr 2.3e-04, dt 2.1s +All GPU(s): step 5826: loss 21.9844, lr 2.3e-04, dt 2.1s +All GPU(s): step 5827: loss 22.7031, lr 2.3e-04, dt 2.0s +All GPU(s): step 5828: loss 21.8750, lr 2.3e-04, dt 2.0s +All GPU(s): step 5829: loss 21.7969, lr 2.3e-04, dt 2.0s +All GPU(s): step 5830: loss 21.7031, lr 2.3e-04, dt 2.0s +All GPU(s): step 5831: loss 21.9375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5832: loss 21.9375, lr 2.3e-04, dt 2.0s +All GPU(s): step 5833: loss 22.1875, lr 2.3e-04, dt 2.0s +All GPU(s): step 5834: loss 21.7812, lr 2.3e-04, dt 2.0s +All GPU(s): step 5835: loss 22.1094, lr 2.3e-04, dt 2.1s +All GPU(s): step 5836: loss 21.8281, lr 2.3e-04, dt 2.1s +All GPU(s): step 5837: loss 21.8125, lr 2.3e-04, dt 2.0s +All GPU(s): step 5838: loss 21.7188, lr 2.3e-04, dt 2.0s +All GPU(s): step 5839: loss 22.0000, lr 2.3e-04, dt 2.0s +All GPU(s): step 5840: loss 21.7656, lr 2.3e-04, dt 2.1s +All GPU(s): step 5841: loss 21.5938, lr 2.3e-04, dt 2.1s +All GPU(s): step 5842: loss 21.9844, lr 2.3e-04, dt 2.1s +All GPU(s): step 5843: loss 21.7188, lr 2.3e-04, dt 2.0s +All GPU(s): step 5844: loss 21.9531, lr 2.3e-04, dt 2.0s +All GPU(s): step 5845: loss 21.8438, lr 2.3e-04, dt 2.1s +All GPU(s): step 5846: loss 22.1719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5847: loss 22.0938, lr 2.3e-04, dt 2.0s +All GPU(s): step 5848: loss 21.9531, lr 2.3e-04, dt 2.1s +All GPU(s): step 5849: loss 21.4062, lr 2.3e-04, dt 2.1s +All GPU(s): step 5850: loss 21.4844, lr 2.3e-04, dt 2.2s +All GPU(s): step 5851: loss 21.9062, lr 2.3e-04, dt 2.0s +All GPU(s): step 5852: loss 21.4531, lr 2.3e-04, dt 2.1s +All GPU(s): step 5853: loss 21.9062, lr 2.3e-04, dt 2.1s +All GPU(s): step 5854: loss 21.9219, lr 2.3e-04, dt 2.1s +All GPU(s): step 5855: loss 21.2969, lr 2.3e-04, dt 2.2s +All GPU(s): step 5856: loss 21.5312, lr 2.3e-04, dt 2.0s +All GPU(s): step 5857: loss 21.6719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5858: loss 21.6094, lr 2.3e-04, dt 2.0s +All GPU(s): step 5859: loss 21.6406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5860: loss 21.3125, lr 2.3e-04, dt 2.1s +All GPU(s): step 5861: loss 21.4844, lr 2.3e-04, dt 2.0s +All GPU(s): step 5862: loss 21.4219, lr 2.3e-04, dt 2.1s +All GPU(s): step 5863: loss 21.7031, lr 2.3e-04, dt 2.1s +All GPU(s): step 5864: loss 21.5781, lr 2.3e-04, dt 2.1s +All GPU(s): step 5865: loss 21.2656, lr 2.3e-04, dt 2.1s +All GPU(s): step 5866: loss 21.6406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5867: loss 22.1094, lr 2.3e-04, dt 2.0s +All GPU(s): step 5868: loss 21.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5869: loss 21.6406, lr 2.3e-04, dt 2.1s +All GPU(s): step 5870: loss 21.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5871: loss 21.7656, lr 2.3e-04, dt 2.0s +All GPU(s): step 5872: loss 21.7344, lr 2.3e-04, dt 2.0s +All GPU(s): step 5873: loss 21.6719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5874: loss 21.5938, lr 2.3e-04, dt 2.1s +All GPU(s): step 5875: loss 21.9375, lr 2.3e-04, dt 2.0s +All GPU(s): step 5876: loss 21.6406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5877: loss 21.6719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5878: loss 21.8750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5879: loss 21.8750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5880: loss 21.9062, lr 2.3e-04, dt 2.0s +All GPU(s): step 5881: loss 21.5625, lr 2.3e-04, dt 2.1s +All GPU(s): step 5882: loss 21.6719, lr 2.3e-04, dt 2.0s +All GPU(s): step 5883: loss 21.9219, lr 2.3e-04, dt 2.0s +All GPU(s): step 5884: loss 21.6875, lr 2.3e-04, dt 2.1s +All GPU(s): step 5885: loss 22.0312, lr 2.3e-04, dt 2.0s +All GPU(s): step 5886: loss 21.7188, lr 2.3e-04, dt 2.1s +All GPU(s): step 5887: loss 21.9688, lr 2.3e-04, dt 2.1s +All GPU(s): step 5888: loss 21.9844, lr 2.3e-04, dt 2.1s +All GPU(s): step 5889: loss 22.2031, lr 2.3e-04, dt 2.1s +All GPU(s): step 5890: loss 21.7656, lr 2.3e-04, dt 2.0s +All GPU(s): step 5891: loss 21.9375, lr 2.3e-04, dt 2.1s +All GPU(s): step 5892: loss 22.3438, lr 2.3e-04, dt 2.0s +All GPU(s): step 5893: loss 21.9062, lr 2.3e-04, dt 2.1s +All GPU(s): step 5894: loss 22.0312, lr 2.3e-04, dt 2.1s +All GPU(s): step 5895: loss 22.1562, lr 2.3e-04, dt 2.1s +All GPU(s): step 5896: loss 22.3438, lr 2.3e-04, dt 2.0s +All GPU(s): step 5897: loss 22.5469, lr 2.3e-04, dt 2.1s +All GPU(s): step 5898: loss 22.4688, lr 2.3e-04, dt 2.1s +All GPU(s): step 5899: loss 22.3906, lr 2.3e-04, dt 2.0s +All GPU(s): step 5900: loss 22.2656, lr 2.3e-04, dt 2.0s +All GPU(s): step 5901: loss 22.4531, lr 2.3e-04, dt 2.0s +All GPU(s): step 5902: loss 22.5000, lr 2.3e-04, dt 2.0s +All GPU(s): step 5903: loss 22.3438, lr 2.3e-04, dt 2.2s +All GPU(s): step 5904: loss 22.4375, lr 2.3e-04, dt 2.0s +All GPU(s): step 5905: loss 22.1406, lr 2.3e-04, dt 2.0s +All GPU(s): step 5906: loss 22.0312, lr 2.3e-04, dt 2.0s +All GPU(s): step 5907: loss 22.2500, lr 2.3e-04, dt 2.0s +All GPU(s): step 5908: loss 22.3750, lr 2.3e-04, dt 2.1s +All GPU(s): step 5909: loss 22.4844, lr 2.3e-04, dt 2.0s +All GPU(s): step 5910: loss 22.0938, lr 2.3e-04, dt 2.0s +All GPU(s): step 5911: loss 22.0000, lr 2.3e-04, dt 2.0s +All GPU(s): step 5912: loss 22.3438, lr 2.2e-04, dt 2.1s +All GPU(s): step 5913: loss 22.2500, lr 2.2e-04, dt 2.2s +All GPU(s): step 5914: loss 22.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 5915: loss 22.8281, lr 2.2e-04, dt 2.0s +All GPU(s): step 5916: loss 22.6875, lr 2.2e-04, dt 2.0s +All GPU(s): step 5917: loss 22.9531, lr 2.2e-04, dt 2.0s +All GPU(s): step 5918: loss 22.9062, lr 2.2e-04, dt 2.1s +All GPU(s): step 5919: loss 22.8125, lr 2.2e-04, dt 2.0s +All GPU(s): step 5920: loss 22.6719, lr 2.2e-04, dt 2.0s +All GPU(s): step 5921: loss 22.7500, lr 2.2e-04, dt 2.0s +All GPU(s): step 5922: loss 22.9219, lr 2.2e-04, dt 2.1s +All GPU(s): step 5923: loss 22.6094, lr 2.2e-04, dt 2.1s +All GPU(s): step 5924: loss 22.4219, lr 2.2e-04, dt 2.0s +All GPU(s): step 5925: loss 22.2500, lr 2.2e-04, dt 2.0s +All GPU(s): step 5926: loss 22.4219, lr 2.2e-04, dt 2.0s +All GPU(s): step 5927: loss 22.2031, lr 2.2e-04, dt 2.1s +All GPU(s): step 5928: loss 22.3438, lr 2.2e-04, dt 2.0s +All GPU(s): step 5929: loss 22.2031, lr 2.2e-04, dt 2.0s +All GPU(s): step 5930: loss 22.1250, lr 2.2e-04, dt 2.0s +All GPU(s): step 5931: loss 21.9688, lr 2.2e-04, dt 2.0s +All GPU(s): step 5932: loss 21.8594, lr 2.2e-04, dt 2.1s +All GPU(s): step 5933: loss 22.0938, lr 2.2e-04, dt 2.1s +All GPU(s): step 5934: loss 22.3594, lr 2.2e-04, dt 2.0s +All GPU(s): step 5935: loss 22.4375, lr 2.2e-04, dt 2.0s +All GPU(s): step 5936: loss 22.1875, lr 2.2e-04, dt 2.0s +All GPU(s): step 5937: loss 22.4844, lr 2.2e-04, dt 2.2s +All GPU(s): step 5938: loss 22.5312, lr 2.2e-04, dt 2.1s +All GPU(s): step 5939: loss 22.4375, lr 2.2e-04, dt 2.1s +All GPU(s): step 5940: loss 22.4219, lr 2.2e-04, dt 2.1s +All GPU(s): step 5941: loss 22.6875, lr 2.2e-04, dt 2.1s +All GPU(s): step 5942: loss 23.1094, lr 2.2e-04, dt 2.1s +All GPU(s): step 5943: loss 22.8906, lr 2.2e-04, dt 2.0s +All GPU(s): step 5944: loss 23.5000, lr 2.2e-04, dt 2.0s +All GPU(s): step 5945: loss 23.2969, lr 2.2e-04, dt 2.0s +All GPU(s): step 5946: loss 23.1562, lr 2.2e-04, dt 2.1s +All GPU(s): step 5947: loss 22.8594, lr 2.2e-04, dt 2.1s +All GPU(s): step 5948: loss 22.8281, lr 2.2e-04, dt 2.1s +All GPU(s): step 5949: loss 23.3750, lr 2.2e-04, dt 2.0s +All GPU(s): step 5950: loss 23.1719, lr 2.2e-04, dt 2.0s +All GPU(s): step 5951: loss 23.0469, lr 2.2e-04, dt 2.1s +All GPU(s): step 5952: loss 23.0312, lr 2.2e-04, dt 2.1s +All GPU(s): step 5953: loss 23.2656, lr 2.2e-04, dt 2.0s +All GPU(s): step 5954: loss 23.3750, lr 2.2e-04, dt 2.0s +All GPU(s): step 5955: loss 23.4062, lr 2.2e-04, dt 2.0s +All GPU(s): step 5956: loss 23.1250, lr 2.2e-04, dt 2.1s +All GPU(s): step 5957: loss 23.2969, lr 2.2e-04, dt 2.0s +All GPU(s): step 5958: loss 23.4844, lr 2.2e-04, dt 2.1s +All GPU(s): step 5959: loss 23.3750, lr 2.2e-04, dt 2.0s +All GPU(s): step 5960: loss 23.2188, lr 2.2e-04, dt 2.0s +All GPU(s): step 5961: loss 23.1875, lr 2.2e-04, dt 2.1s +All GPU(s): step 5962: loss 23.3281, lr 2.2e-04, dt 2.1s +All GPU(s): step 5963: loss 23.3438, lr 2.2e-04, dt 2.0s +All GPU(s): step 5964: loss 23.1406, lr 2.2e-04, dt 2.1s +All GPU(s): step 5965: loss 23.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 5966: loss 23.5469, lr 2.2e-04, dt 2.2s +All GPU(s): step 5967: loss 23.4219, lr 2.2e-04, dt 2.0s +All GPU(s): step 5968: loss 23.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 5969: loss 23.3750, lr 2.2e-04, dt 2.0s +All GPU(s): step 5970: loss 23.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 5971: loss 23.4688, lr 2.2e-04, dt 2.1s +All GPU(s): step 5972: loss 23.5000, lr 2.2e-04, dt 2.0s +All GPU(s): step 5973: loss 23.5312, lr 2.2e-04, dt 2.0s +All GPU(s): step 5974: loss 23.6875, lr 2.2e-04, dt 2.0s +All GPU(s): step 5975: loss 23.7188, lr 2.2e-04, dt 2.1s +All GPU(s): step 5976: loss 23.6562, lr 2.2e-04, dt 2.1s +All GPU(s): step 5977: loss 23.6562, lr 2.2e-04, dt 2.1s +All GPU(s): step 5978: loss 23.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 5979: loss 23.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 5980: loss 23.4062, lr 2.2e-04, dt 2.1s +All GPU(s): step 5981: loss 23.8125, lr 2.2e-04, dt 2.0s +All GPU(s): step 5982: loss 23.4688, lr 2.2e-04, dt 2.0s +All GPU(s): step 5983: loss 23.7031, lr 2.2e-04, dt 2.0s +All GPU(s): step 5984: loss 23.7344, lr 2.2e-04, dt 2.1s +All GPU(s): step 5985: loss 23.4688, lr 2.2e-04, dt 2.2s +All GPU(s): step 5986: loss 23.8125, lr 2.2e-04, dt 2.0s +All GPU(s): step 5987: loss 23.6094, lr 2.2e-04, dt 2.1s +All GPU(s): step 5988: loss 23.3906, lr 2.2e-04, dt 2.0s +All GPU(s): step 5989: loss 23.4375, lr 2.2e-04, dt 2.0s +All GPU(s): step 5990: loss 23.3125, lr 2.2e-04, dt 2.1s +All GPU(s): step 5991: loss 23.5469, lr 2.2e-04, dt 2.0s +All GPU(s): step 5992: loss 23.5000, lr 2.2e-04, dt 2.0s +All GPU(s): step 5993: loss 23.4219, lr 2.2e-04, dt 2.0s +All GPU(s): step 5994: loss 23.3906, lr 2.2e-04, dt 2.1s +All GPU(s): step 5995: loss 23.1094, lr 2.2e-04, dt 2.1s +All GPU(s): step 5996: loss 22.7656, lr 2.2e-04, dt 2.0s +All GPU(s): step 5997: loss 22.8906, lr 2.2e-04, dt 2.0s +All GPU(s): step 5998: loss 22.8594, lr 2.2e-04, dt 2.0s +All GPU(s): step 5999: loss 22.9219, lr 2.2e-04, dt 2.1s +saving checkpoint to checkpoints/ckpt_6000.pt +All GPU(s): step 6000: loss 22.9375, lr 2.2e-04, dt 2.2s +All GPU(s): step 6001: loss 23.1094, lr 2.2e-04, dt 2.0s +All GPU(s): step 6002: loss 22.8750, lr 2.2e-04, dt 2.1s +All GPU(s): step 6003: loss 22.9531, lr 2.2e-04, dt 2.1s +All GPU(s): step 6004: loss 23.0000, lr 2.2e-04, dt 2.2s +All GPU(s): step 6005: loss 23.0781, lr 2.2e-04, dt 2.0s +All GPU(s): step 6006: loss 23.1562, lr 2.2e-04, dt 2.1s +All GPU(s): step 6007: loss 23.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 6008: loss 23.5000, lr 2.2e-04, dt 2.0s +All GPU(s): step 6009: loss 23.1875, lr 2.2e-04, dt 2.2s +All GPU(s): step 6010: loss 23.3281, lr 2.2e-04, dt 2.1s +All GPU(s): step 6011: loss 23.7500, lr 2.2e-04, dt 2.1s +All GPU(s): step 6012: loss 23.8281, lr 2.2e-04, dt 2.0s +All GPU(s): step 6013: loss 23.6406, lr 2.2e-04, dt 2.0s +All GPU(s): step 6014: loss 23.5938, lr 2.2e-04, dt 2.2s +All GPU(s): step 6015: loss 23.7344, lr 2.2e-04, dt 2.1s +All GPU(s): step 6016: loss 23.4688, lr 2.2e-04, dt 2.1s +All GPU(s): step 6017: loss 23.9219, lr 2.2e-04, dt 2.1s +All GPU(s): step 6018: loss 23.9688, lr 2.2e-04, dt 2.1s +All GPU(s): step 6019: loss 24.5781, lr 2.2e-04, dt 2.1s +All GPU(s): step 6020: loss 24.3438, lr 2.2e-04, dt 2.1s +All GPU(s): step 6021: loss 24.3125, lr 2.2e-04, dt 2.1s +All GPU(s): step 6022: loss 24.6875, lr 2.2e-04, dt 2.1s +All GPU(s): step 6023: loss 24.4688, lr 2.2e-04, dt 2.1s +All GPU(s): step 6024: loss 24.5156, lr 2.2e-04, dt 2.2s +All GPU(s): step 6025: loss 24.9531, lr 2.2e-04, dt 2.0s +All GPU(s): step 6026: loss 24.9844, lr 2.2e-04, dt 2.1s +All GPU(s): step 6027: loss 25.1875, lr 2.2e-04, dt 2.1s +All GPU(s): step 6028: loss 25.4219, lr 2.2e-04, dt 2.1s +All GPU(s): step 6029: loss 25.4375, lr 2.2e-04, dt 2.1s +All GPU(s): step 6030: loss 25.5312, lr 2.2e-04, dt 2.1s +All GPU(s): step 6031: loss 25.6406, lr 2.2e-04, dt 2.0s +All GPU(s): step 6032: loss 25.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 6033: loss 25.5469, lr 2.2e-04, dt 2.1s +All GPU(s): step 6034: loss 25.6250, lr 2.2e-04, dt 2.0s +All GPU(s): step 6035: loss 25.0938, lr 2.2e-04, dt 2.0s +All GPU(s): step 6036: loss 25.2188, lr 2.2e-04, dt 2.1s +All GPU(s): step 6037: loss 24.8438, lr 2.2e-04, dt 2.1s +All GPU(s): step 6038: loss 24.7344, lr 2.2e-04, dt 2.1s +All GPU(s): step 6039: loss 24.7188, lr 2.2e-04, dt 2.0s +All GPU(s): step 6040: loss 24.4375, lr 2.2e-04, dt 2.0s +All GPU(s): step 6041: loss 24.0625, lr 2.2e-04, dt 2.1s +All GPU(s): step 6042: loss 23.9844, lr 2.2e-04, dt 2.1s +All GPU(s): step 6043: loss 24.2969, lr 2.2e-04, dt 2.1s +All GPU(s): step 6044: loss 24.5156, lr 2.2e-04, dt 2.0s +All GPU(s): step 6045: loss 24.0469, lr 2.2e-04, dt 2.0s +All GPU(s): step 6046: loss 24.2344, lr 2.2e-04, dt 2.0s +All GPU(s): step 6047: loss 24.2031, lr 2.2e-04, dt 2.1s +All GPU(s): step 6048: loss 24.3906, lr 2.2e-04, dt 2.1s +All GPU(s): step 6049: loss 24.7656, lr 2.2e-04, dt 2.1s +All GPU(s): step 6050: loss 25.3125, lr 2.2e-04, dt 2.1s +All GPU(s): step 6051: loss 25.4531, lr 2.2e-04, dt 2.1s +All GPU(s): step 6052: loss 25.4531, lr 2.2e-04, dt 2.1s +All GPU(s): step 6053: loss 25.6562, lr 2.2e-04, dt 2.0s +All GPU(s): step 6054: loss 25.6875, lr 2.2e-04, dt 2.0s +All GPU(s): step 6055: loss 26.0312, lr 2.2e-04, dt 2.0s +All GPU(s): step 6056: loss 26.0938, lr 2.2e-04, dt 2.0s +All GPU(s): step 6057: loss 26.0938, lr 2.2e-04, dt 2.1s +All GPU(s): step 6058: loss 26.1094, lr 2.2e-04, dt 2.0s +All GPU(s): step 6059: loss 26.1250, lr 2.2e-04, dt 2.0s +All GPU(s): step 6060: loss 26.2500, lr 2.2e-04, dt 2.1s +All GPU(s): step 6061: loss 26.1562, lr 2.2e-04, dt 2.0s +All GPU(s): step 6062: loss 25.9062, lr 2.2e-04, dt 2.1s +All GPU(s): step 6063: loss 26.0156, lr 2.2e-04, dt 2.0s +All GPU(s): step 6064: loss 25.6562, lr 2.2e-04, dt 2.1s +All GPU(s): step 6065: loss 25.5781, lr 2.2e-04, dt 2.0s +All GPU(s): step 6066: loss 25.7656, lr 2.2e-04, dt 2.0s +All GPU(s): step 6067: loss 25.5000, lr 2.2e-04, dt 2.2s +All GPU(s): step 6068: loss 25.1875, lr 2.2e-04, dt 2.0s +All GPU(s): step 6069: loss 25.2188, lr 2.2e-04, dt 2.0s +All GPU(s): step 6070: loss 25.3594, lr 2.2e-04, dt 2.0s +All GPU(s): step 6071: loss 25.3281, lr 2.2e-04, dt 2.1s +All GPU(s): step 6072: loss 25.1250, lr 2.2e-04, dt 2.1s +All GPU(s): step 6073: loss 24.5938, lr 2.2e-04, dt 2.0s +All GPU(s): step 6074: loss 24.9844, lr 2.2e-04, dt 2.0s +All GPU(s): step 6075: loss 24.9219, lr 2.2e-04, dt 2.0s +All GPU(s): step 6076: loss 24.9375, lr 2.2e-04, dt 2.1s +All GPU(s): step 6077: loss 25.2031, lr 2.2e-04, dt 2.1s +All GPU(s): step 6078: loss 25.1250, lr 2.2e-04, dt 2.0s +All GPU(s): step 6079: loss 25.1875, lr 2.2e-04, dt 2.0s +All GPU(s): step 6080: loss 25.5156, lr 2.2e-04, dt 2.0s +All GPU(s): step 6081: loss 25.2656, lr 2.2e-04, dt 2.1s +All GPU(s): step 6082: loss 25.3125, lr 2.2e-04, dt 2.0s +All GPU(s): step 6083: loss 25.1875, lr 2.2e-04, dt 2.0s +All GPU(s): step 6084: loss 25.4531, lr 2.2e-04, dt 2.0s +All GPU(s): step 6085: loss 25.2812, lr 2.2e-04, dt 2.0s +All GPU(s): step 6086: loss 25.6875, lr 2.2e-04, dt 2.1s +All GPU(s): step 6087: loss 25.7344, lr 2.2e-04, dt 2.1s +All GPU(s): step 6088: loss 25.9219, lr 2.2e-04, dt 2.0s +All GPU(s): step 6089: loss 26.2031, lr 2.1e-04, dt 2.0s +All GPU(s): step 6090: loss 25.8438, lr 2.1e-04, dt 2.1s +All GPU(s): step 6091: loss 26.3750, lr 2.1e-04, dt 2.1s +All GPU(s): step 6092: loss 25.9062, lr 2.1e-04, dt 2.0s +All GPU(s): step 6093: loss 26.2344, lr 2.1e-04, dt 2.1s +All GPU(s): step 6094: loss 25.9219, lr 2.1e-04, dt 2.0s +All GPU(s): step 6095: loss 26.2812, lr 2.1e-04, dt 2.1s +All GPU(s): step 6096: loss 26.4531, lr 2.1e-04, dt 2.1s +All GPU(s): step 6097: loss 26.3438, lr 2.1e-04, dt 2.0s +All GPU(s): step 6098: loss 26.4219, lr 2.1e-04, dt 2.0s +All GPU(s): step 6099: loss 26.5000, lr 2.1e-04, dt 2.0s +All GPU(s): step 6100: loss 26.6250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6101: loss 26.3594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6102: loss 26.5625, lr 2.1e-04, dt 2.1s +All GPU(s): step 6103: loss 26.2969, lr 2.1e-04, dt 2.1s +All GPU(s): step 6104: loss 26.5781, lr 2.1e-04, dt 2.0s +All GPU(s): step 6105: loss 26.5469, lr 2.1e-04, dt 2.1s +All GPU(s): step 6106: loss 26.3594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6107: loss 26.2500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6108: loss 26.3438, lr 2.1e-04, dt 2.0s +All GPU(s): step 6109: loss 26.4531, lr 2.1e-04, dt 2.0s +All GPU(s): step 6110: loss 26.6250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6111: loss 26.5469, lr 2.1e-04, dt 2.0s +All GPU(s): step 6112: loss 26.6406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6113: loss 26.4531, lr 2.1e-04, dt 2.0s +All GPU(s): step 6114: loss 26.5312, lr 2.1e-04, dt 2.0s +All GPU(s): step 6115: loss 26.5000, lr 2.1e-04, dt 2.1s +All GPU(s): step 6116: loss 26.7344, lr 2.1e-04, dt 2.1s +All GPU(s): step 6117: loss 26.4844, lr 2.1e-04, dt 2.1s +All GPU(s): step 6118: loss 26.5156, lr 2.1e-04, dt 2.0s +All GPU(s): step 6119: loss 26.7500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6120: loss 26.4688, lr 2.1e-04, dt 2.2s +All GPU(s): step 6121: loss 26.7812, lr 2.1e-04, dt 2.0s +All GPU(s): step 6122: loss 26.7188, lr 2.1e-04, dt 2.0s +All GPU(s): step 6123: loss 26.5469, lr 2.1e-04, dt 2.0s +All GPU(s): step 6124: loss 26.5938, lr 2.1e-04, dt 2.1s +All GPU(s): step 6125: loss 26.7031, lr 2.1e-04, dt 2.1s +All GPU(s): step 6126: loss 26.4531, lr 2.1e-04, dt 2.0s +All GPU(s): step 6127: loss 26.3281, lr 2.1e-04, dt 2.1s +All GPU(s): step 6128: loss 26.7344, lr 2.1e-04, dt 2.1s +All GPU(s): step 6129: loss 26.4375, lr 2.1e-04, dt 2.1s +All GPU(s): step 6130: loss 26.4375, lr 2.1e-04, dt 2.1s +All GPU(s): step 6131: loss 26.5312, lr 2.1e-04, dt 2.0s +All GPU(s): step 6132: loss 26.4375, lr 2.1e-04, dt 2.0s +All GPU(s): step 6133: loss 26.5156, lr 2.1e-04, dt 2.0s +All GPU(s): step 6134: loss 26.1719, lr 2.1e-04, dt 2.1s +All GPU(s): step 6135: loss 26.5625, lr 2.1e-04, dt 2.0s +All GPU(s): step 6136: loss 26.4062, lr 2.1e-04, dt 2.0s +All GPU(s): step 6137: loss 26.2812, lr 2.1e-04, dt 2.0s +All GPU(s): step 6138: loss 26.1562, lr 2.1e-04, dt 2.1s +All GPU(s): step 6139: loss 26.6875, lr 2.1e-04, dt 2.1s +All GPU(s): step 6140: loss 26.4219, lr 2.1e-04, dt 2.1s +All GPU(s): step 6141: loss 26.1719, lr 2.1e-04, dt 2.1s +All GPU(s): step 6142: loss 26.7500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6143: loss 26.3125, lr 2.1e-04, dt 2.1s +All GPU(s): step 6144: loss 26.3750, lr 2.1e-04, dt 2.2s +All GPU(s): step 6145: loss 26.2656, lr 2.1e-04, dt 2.1s +All GPU(s): step 6146: loss 26.2500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6147: loss 26.1250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6148: loss 26.3594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6149: loss 26.2656, lr 2.1e-04, dt 2.1s +All GPU(s): step 6150: loss 26.6094, lr 2.1e-04, dt 2.0s +All GPU(s): step 6151: loss 26.4531, lr 2.1e-04, dt 2.0s +All GPU(s): step 6152: loss 26.2500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6153: loss 26.1719, lr 2.1e-04, dt 2.0s +All GPU(s): step 6154: loss 26.0312, lr 2.1e-04, dt 2.1s +All GPU(s): step 6155: loss 25.6719, lr 2.1e-04, dt 2.1s +All GPU(s): step 6156: loss 25.4688, lr 2.1e-04, dt 2.0s +All GPU(s): step 6157: loss 25.5469, lr 2.1e-04, dt 2.1s +All GPU(s): step 6158: loss 25.4219, lr 2.1e-04, dt 2.1s +All GPU(s): step 6159: loss 25.6406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6160: loss 25.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6161: loss 25.8438, lr 2.1e-04, dt 2.0s +All GPU(s): step 6162: loss 26.2812, lr 2.1e-04, dt 2.0s +All GPU(s): step 6163: loss 26.0156, lr 2.1e-04, dt 2.2s +All GPU(s): step 6164: loss 25.9375, lr 2.1e-04, dt 2.1s +All GPU(s): step 6165: loss 25.9219, lr 2.1e-04, dt 2.1s +All GPU(s): step 6166: loss 25.6719, lr 2.1e-04, dt 2.1s +All GPU(s): step 6167: loss 25.4688, lr 2.1e-04, dt 2.1s +All GPU(s): step 6168: loss 24.9531, lr 2.1e-04, dt 2.1s +All GPU(s): step 6169: loss 24.1250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6170: loss 23.7969, lr 2.1e-04, dt 2.0s +All GPU(s): step 6171: loss 23.8125, lr 2.1e-04, dt 2.1s +All GPU(s): step 6172: loss 23.1094, lr 2.1e-04, dt 2.1s +All GPU(s): step 6173: loss 22.5625, lr 2.1e-04, dt 2.2s +All GPU(s): step 6174: loss 22.4531, lr 2.1e-04, dt 2.1s +All GPU(s): step 6175: loss 22.2969, lr 2.1e-04, dt 2.1s +All GPU(s): step 6176: loss 22.2500, lr 2.1e-04, dt 2.1s +All GPU(s): step 6177: loss 22.1094, lr 2.1e-04, dt 2.1s +All GPU(s): step 6178: loss 21.8906, lr 2.1e-04, dt 2.1s +All GPU(s): step 6179: loss 22.2031, lr 2.1e-04, dt 2.0s +All GPU(s): step 6180: loss 21.4844, lr 2.1e-04, dt 2.0s +All GPU(s): step 6181: loss 21.9531, lr 2.1e-04, dt 2.1s +All GPU(s): step 6182: loss 21.6719, lr 2.1e-04, dt 2.1s +All GPU(s): step 6183: loss 21.7031, lr 2.1e-04, dt 2.0s +All GPU(s): step 6184: loss 21.8281, lr 2.1e-04, dt 2.1s +All GPU(s): step 6185: loss 21.7031, lr 2.1e-04, dt 2.0s +All GPU(s): step 6186: loss 21.8594, lr 2.1e-04, dt 2.0s +All GPU(s): step 6187: loss 21.7656, lr 2.1e-04, dt 2.1s +All GPU(s): step 6188: loss 21.8906, lr 2.1e-04, dt 2.1s +All GPU(s): step 6189: loss 21.8281, lr 2.1e-04, dt 2.1s +All GPU(s): step 6190: loss 21.6719, lr 2.1e-04, dt 2.0s +All GPU(s): step 6191: loss 22.1094, lr 2.1e-04, dt 2.0s +All GPU(s): step 6192: loss 21.5938, lr 2.1e-04, dt 2.1s +All GPU(s): step 6193: loss 21.7344, lr 2.1e-04, dt 2.0s +All GPU(s): step 6194: loss 21.6250, lr 2.1e-04, dt 2.0s +All GPU(s): step 6195: loss 21.7344, lr 2.1e-04, dt 2.0s +All GPU(s): step 6196: loss 21.6094, lr 2.1e-04, dt 2.1s +All GPU(s): step 6197: loss 21.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6198: loss 21.4844, lr 2.1e-04, dt 2.1s +All GPU(s): step 6199: loss 21.7031, lr 2.1e-04, dt 2.0s +All GPU(s): step 6200: loss 21.4844, lr 2.1e-04, dt 2.0s +All GPU(s): step 6201: loss 21.7500, lr 2.1e-04, dt 2.1s +All GPU(s): step 6202: loss 21.5000, lr 2.1e-04, dt 2.2s +All GPU(s): step 6203: loss 21.5469, lr 2.1e-04, dt 2.0s +All GPU(s): step 6204: loss 21.7344, lr 2.1e-04, dt 2.1s +All GPU(s): step 6205: loss 21.5625, lr 2.1e-04, dt 2.0s +All GPU(s): step 6206: loss 21.7812, lr 2.1e-04, dt 2.1s +All GPU(s): step 6207: loss 21.4219, lr 2.1e-04, dt 2.0s +All GPU(s): step 6208: loss 21.4688, lr 2.1e-04, dt 2.0s +All GPU(s): step 6209: loss 21.7188, lr 2.1e-04, dt 2.0s +All GPU(s): step 6210: loss 21.5781, lr 2.1e-04, dt 2.0s +All GPU(s): step 6211: loss 21.9219, lr 2.1e-04, dt 2.2s +All GPU(s): step 6212: loss 21.2812, lr 2.1e-04, dt 2.0s +All GPU(s): step 6213: loss 21.6875, lr 2.1e-04, dt 2.0s +All GPU(s): step 6214: loss 21.7344, lr 2.1e-04, dt 2.0s +All GPU(s): step 6215: loss 21.6250, lr 2.1e-04, dt 2.0s +All GPU(s): step 6216: loss 21.9688, lr 2.1e-04, dt 2.2s +All GPU(s): step 6217: loss 21.7344, lr 2.1e-04, dt 2.0s +All GPU(s): step 6218: loss 21.3281, lr 2.1e-04, dt 2.0s +All GPU(s): step 6219: loss 21.4219, lr 2.1e-04, dt 2.0s +All GPU(s): step 6220: loss 21.6719, lr 2.1e-04, dt 2.0s +All GPU(s): step 6221: loss 21.5781, lr 2.1e-04, dt 2.2s +All GPU(s): step 6222: loss 21.8906, lr 2.1e-04, dt 2.0s +All GPU(s): step 6223: loss 21.6250, lr 2.1e-04, dt 2.0s +All GPU(s): step 6224: loss 22.0625, lr 2.1e-04, dt 2.0s +All GPU(s): step 6225: loss 21.7500, lr 2.1e-04, dt 2.1s +All GPU(s): step 6226: loss 21.8906, lr 2.1e-04, dt 2.1s +All GPU(s): step 6227: loss 21.7500, lr 2.1e-04, dt 2.0s +All GPU(s): step 6228: loss 21.5938, lr 2.1e-04, dt 2.0s +All GPU(s): step 6229: loss 21.7969, lr 2.1e-04, dt 2.0s +All GPU(s): step 6230: loss 21.6406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6231: loss 21.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6232: loss 21.8594, lr 2.1e-04, dt 2.1s +All GPU(s): step 6233: loss 21.8906, lr 2.1e-04, dt 2.0s +All GPU(s): step 6234: loss 22.1875, lr 2.1e-04, dt 2.1s +All GPU(s): step 6235: loss 21.9844, lr 2.1e-04, dt 2.2s +All GPU(s): step 6236: loss 22.1250, lr 2.1e-04, dt 2.1s +All GPU(s): step 6237: loss 22.0938, lr 2.1e-04, dt 2.1s +All GPU(s): step 6238: loss 21.6406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6239: loss 21.7031, lr 2.1e-04, dt 2.1s +All GPU(s): step 6240: loss 21.6562, lr 2.1e-04, dt 2.2s +All GPU(s): step 6241: loss 21.5625, lr 2.1e-04, dt 2.1s +All GPU(s): step 6242: loss 21.7656, lr 2.1e-04, dt 2.1s +All GPU(s): step 6243: loss 21.6406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6244: loss 21.7500, lr 2.1e-04, dt 2.1s +All GPU(s): step 6245: loss 21.9531, lr 2.1e-04, dt 2.2s +All GPU(s): step 6246: loss 21.4375, lr 2.1e-04, dt 2.0s +All GPU(s): step 6247: loss 21.1875, lr 2.1e-04, dt 2.1s +All GPU(s): step 6248: loss 21.6250, lr 2.1e-04, dt 2.0s +All GPU(s): step 6249: loss 21.8125, lr 2.1e-04, dt 2.1s +All GPU(s): step 6250: loss 21.5000, lr 2.1e-04, dt 2.1s +All GPU(s): step 6251: loss 21.3125, lr 2.1e-04, dt 2.0s +All GPU(s): step 6252: loss 21.1406, lr 2.1e-04, dt 2.0s +All GPU(s): step 6253: loss 21.1719, lr 2.1e-04, dt 2.0s +All GPU(s): step 6254: loss 21.4375, lr 2.1e-04, dt 2.1s +All GPU(s): step 6255: loss 21.2969, lr 2.1e-04, dt 2.0s +All GPU(s): step 6256: loss 21.5156, lr 2.1e-04, dt 2.0s +All GPU(s): step 6257: loss 21.5469, lr 2.1e-04, dt 2.0s +All GPU(s): step 6258: loss 21.2812, lr 2.1e-04, dt 2.0s +All GPU(s): step 6259: loss 21.6875, lr 2.1e-04, dt 2.1s +All GPU(s): step 6260: loss 21.4688, lr 2.1e-04, dt 2.0s +All GPU(s): step 6261: loss 21.4062, lr 2.1e-04, dt 2.0s +All GPU(s): step 6262: loss 21.5156, lr 2.1e-04, dt 2.0s +All GPU(s): step 6263: loss 21.6406, lr 2.1e-04, dt 2.1s +All GPU(s): step 6264: loss 21.4531, lr 2.1e-04, dt 2.1s +All GPU(s): step 6265: loss 21.4375, lr 2.1e-04, dt 2.1s +All GPU(s): step 6266: loss 21.5938, lr 2.0e-04, dt 2.0s +All GPU(s): step 6267: loss 21.6719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6268: loss 21.5781, lr 2.0e-04, dt 2.1s +All GPU(s): step 6269: loss 21.5781, lr 2.0e-04, dt 2.2s +All GPU(s): step 6270: loss 21.5312, lr 2.0e-04, dt 2.0s +All GPU(s): step 6271: loss 21.2969, lr 2.0e-04, dt 2.0s +All GPU(s): step 6272: loss 21.2031, lr 2.0e-04, dt 2.0s +All GPU(s): step 6273: loss 21.3906, lr 2.0e-04, dt 2.1s +All GPU(s): step 6274: loss 21.2812, lr 2.0e-04, dt 2.1s +All GPU(s): step 6275: loss 21.5938, lr 2.0e-04, dt 2.0s +All GPU(s): step 6276: loss 21.5781, lr 2.0e-04, dt 2.1s +All GPU(s): step 6277: loss 21.0000, lr 2.0e-04, dt 2.0s +All GPU(s): step 6278: loss 21.5156, lr 2.0e-04, dt 2.1s +All GPU(s): step 6279: loss 21.5312, lr 2.0e-04, dt 2.1s +All GPU(s): step 6280: loss 21.7500, lr 2.0e-04, dt 2.1s +All GPU(s): step 6281: loss 21.8750, lr 2.0e-04, dt 2.0s +All GPU(s): step 6282: loss 21.5938, lr 2.0e-04, dt 2.0s +All GPU(s): step 6283: loss 21.7969, lr 2.0e-04, dt 2.1s +All GPU(s): step 6284: loss 21.6406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6285: loss 21.3438, lr 2.0e-04, dt 2.1s +All GPU(s): step 6286: loss 21.7656, lr 2.0e-04, dt 2.1s +All GPU(s): step 6287: loss 21.6875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6288: loss 21.6562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6289: loss 21.4062, lr 2.0e-04, dt 2.0s +All GPU(s): step 6290: loss 21.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6291: loss 21.6250, lr 2.0e-04, dt 2.1s +All GPU(s): step 6292: loss 21.6562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6293: loss 21.5469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6294: loss 21.6094, lr 2.0e-04, dt 2.1s +All GPU(s): step 6295: loss 21.6875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6296: loss 21.7344, lr 2.0e-04, dt 2.1s +All GPU(s): step 6297: loss 21.7031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6298: loss 21.5469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6299: loss 21.8125, lr 2.0e-04, dt 2.0s +All GPU(s): step 6300: loss 21.7031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6301: loss 21.7188, lr 2.0e-04, dt 2.1s +All GPU(s): step 6302: loss 21.8750, lr 2.0e-04, dt 2.1s +All GPU(s): step 6303: loss 21.6719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6304: loss 21.8125, lr 2.0e-04, dt 2.1s +All GPU(s): step 6305: loss 21.5000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6306: loss 21.7656, lr 2.0e-04, dt 2.1s +All GPU(s): step 6307: loss 21.8438, lr 2.0e-04, dt 2.1s +All GPU(s): step 6308: loss 21.6875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6309: loss 21.5469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6310: loss 21.3438, lr 2.0e-04, dt 2.1s +All GPU(s): step 6311: loss 21.5000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6312: loss 21.4375, lr 2.0e-04, dt 2.2s +All GPU(s): step 6313: loss 21.4688, lr 2.0e-04, dt 2.0s +All GPU(s): step 6314: loss 21.6875, lr 2.0e-04, dt 2.0s +All GPU(s): step 6315: loss 21.2656, lr 2.0e-04, dt 2.0s +All GPU(s): step 6316: loss 21.2969, lr 2.0e-04, dt 2.1s +All GPU(s): step 6317: loss 21.2344, lr 2.0e-04, dt 2.2s +All GPU(s): step 6318: loss 21.1250, lr 2.0e-04, dt 2.1s +All GPU(s): step 6319: loss 21.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6320: loss 21.0000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6321: loss 21.1875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6322: loss 20.8125, lr 2.0e-04, dt 2.1s +All GPU(s): step 6323: loss 20.7969, lr 2.0e-04, dt 2.0s +All GPU(s): step 6324: loss 20.3750, lr 2.0e-04, dt 2.0s +All GPU(s): step 6325: loss 20.9531, lr 2.0e-04, dt 2.1s +All GPU(s): step 6326: loss 20.1562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6327: loss 20.6719, lr 2.0e-04, dt 2.0s +All GPU(s): step 6328: loss 20.5625, lr 2.0e-04, dt 2.0s +All GPU(s): step 6329: loss 20.7812, lr 2.0e-04, dt 2.0s +All GPU(s): step 6330: loss 20.3594, lr 2.0e-04, dt 2.0s +All GPU(s): step 6331: loss 20.4062, lr 2.0e-04, dt 2.1s +All GPU(s): step 6332: loss 20.3906, lr 2.0e-04, dt 2.1s +All GPU(s): step 6333: loss 20.6094, lr 2.0e-04, dt 2.1s +All GPU(s): step 6334: loss 20.4062, lr 2.0e-04, dt 2.1s +All GPU(s): step 6335: loss 20.7969, lr 2.0e-04, dt 2.0s +All GPU(s): step 6336: loss 20.6094, lr 2.0e-04, dt 2.2s +All GPU(s): step 6337: loss 20.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6338: loss 20.4062, lr 2.0e-04, dt 2.0s +All GPU(s): step 6339: loss 20.3125, lr 2.0e-04, dt 2.1s +All GPU(s): step 6340: loss 20.6875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6341: loss 20.6406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6342: loss 20.8281, lr 2.0e-04, dt 2.0s +All GPU(s): step 6343: loss 20.8125, lr 2.0e-04, dt 2.1s +All GPU(s): step 6344: loss 20.2500, lr 2.0e-04, dt 2.0s +All GPU(s): step 6345: loss 20.3438, lr 2.0e-04, dt 2.1s +All GPU(s): step 6346: loss 20.2188, lr 2.0e-04, dt 2.1s +All GPU(s): step 6347: loss 20.2188, lr 2.0e-04, dt 2.0s +All GPU(s): step 6348: loss 19.9688, lr 2.0e-04, dt 2.0s +All GPU(s): step 6349: loss 20.6094, lr 2.0e-04, dt 2.1s +All GPU(s): step 6350: loss 20.6562, lr 2.0e-04, dt 2.1s +All GPU(s): step 6351: loss 20.2031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6352: loss 20.3438, lr 2.0e-04, dt 2.0s +All GPU(s): step 6353: loss 20.4375, lr 2.0e-04, dt 2.0s +All GPU(s): step 6354: loss 20.3906, lr 2.0e-04, dt 2.0s +All GPU(s): step 6355: loss 20.4844, lr 2.0e-04, dt 2.2s +All GPU(s): step 6356: loss 19.2188, lr 2.0e-04, dt 2.1s +All GPU(s): step 6357: loss 20.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6358: loss 20.4062, lr 2.0e-04, dt 2.1s +All GPU(s): step 6359: loss 20.5000, lr 2.0e-04, dt 2.0s +All GPU(s): step 6360: loss 21.0625, lr 2.0e-04, dt 2.1s +All GPU(s): step 6361: loss 20.9531, lr 2.0e-04, dt 2.0s +All GPU(s): step 6362: loss 21.1406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6363: loss 21.2812, lr 2.0e-04, dt 2.1s +All GPU(s): step 6364: loss 21.5312, lr 2.0e-04, dt 2.1s +All GPU(s): step 6365: loss 21.0156, lr 2.0e-04, dt 2.2s +All GPU(s): step 6366: loss 21.0312, lr 2.0e-04, dt 2.1s +All GPU(s): step 6367: loss 21.2031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6368: loss 21.0469, lr 2.0e-04, dt 2.0s +All GPU(s): step 6369: loss 21.5781, lr 2.0e-04, dt 2.0s +All GPU(s): step 6370: loss 21.1406, lr 2.0e-04, dt 2.2s +All GPU(s): step 6371: loss 21.4062, lr 2.0e-04, dt 2.1s +All GPU(s): step 6372: loss 21.4531, lr 2.0e-04, dt 2.0s +All GPU(s): step 6373: loss 21.5625, lr 2.0e-04, dt 2.1s +All GPU(s): step 6374: loss 21.6719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6375: loss 21.2969, lr 2.0e-04, dt 2.1s +All GPU(s): step 6376: loss 21.5625, lr 2.0e-04, dt 2.1s +All GPU(s): step 6377: loss 21.7969, lr 2.0e-04, dt 2.1s +All GPU(s): step 6378: loss 21.8750, lr 2.0e-04, dt 2.1s +All GPU(s): step 6379: loss 21.5625, lr 2.0e-04, dt 2.1s +All GPU(s): step 6380: loss 21.8125, lr 2.0e-04, dt 2.0s +All GPU(s): step 6381: loss 21.9375, lr 2.0e-04, dt 2.1s +All GPU(s): step 6382: loss 21.8906, lr 2.0e-04, dt 2.1s +All GPU(s): step 6383: loss 21.5781, lr 2.0e-04, dt 2.0s +All GPU(s): step 6384: loss 21.6719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6385: loss 21.6094, lr 2.0e-04, dt 2.1s +All GPU(s): step 6386: loss 21.5312, lr 2.0e-04, dt 2.0s +All GPU(s): step 6387: loss 21.5156, lr 2.0e-04, dt 2.0s +All GPU(s): step 6388: loss 21.6250, lr 2.0e-04, dt 2.1s +All GPU(s): step 6389: loss 21.7500, lr 2.0e-04, dt 2.2s +All GPU(s): step 6390: loss 21.7500, lr 2.0e-04, dt 2.1s +All GPU(s): step 6391: loss 21.8125, lr 2.0e-04, dt 2.0s +All GPU(s): step 6392: loss 21.5000, lr 2.0e-04, dt 2.0s +All GPU(s): step 6393: loss 21.8906, lr 2.0e-04, dt 2.1s +All GPU(s): step 6394: loss 22.0625, lr 2.0e-04, dt 2.1s +All GPU(s): step 6395: loss 21.6250, lr 2.0e-04, dt 2.1s +All GPU(s): step 6396: loss 21.5000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6397: loss 21.8438, lr 2.0e-04, dt 2.0s +All GPU(s): step 6398: loss 21.8281, lr 2.0e-04, dt 2.1s +All GPU(s): step 6399: loss 21.7812, lr 2.0e-04, dt 2.1s +All GPU(s): step 6400: loss 21.7500, lr 2.0e-04, dt 2.1s +All GPU(s): step 6401: loss 21.8125, lr 2.0e-04, dt 2.1s +All GPU(s): step 6402: loss 21.7656, lr 2.0e-04, dt 2.0s +All GPU(s): step 6403: loss 21.8750, lr 2.0e-04, dt 2.1s +All GPU(s): step 6404: loss 21.5938, lr 2.0e-04, dt 2.1s +All GPU(s): step 6405: loss 21.7031, lr 2.0e-04, dt 2.0s +All GPU(s): step 6406: loss 21.5781, lr 2.0e-04, dt 2.0s +All GPU(s): step 6407: loss 21.5312, lr 2.0e-04, dt 2.1s +All GPU(s): step 6408: loss 21.6875, lr 2.0e-04, dt 2.1s +All GPU(s): step 6409: loss 21.5469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6410: loss 21.7031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6411: loss 21.5625, lr 2.0e-04, dt 2.0s +All GPU(s): step 6412: loss 21.7500, lr 2.0e-04, dt 2.0s +All GPU(s): step 6413: loss 22.0469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6414: loss 21.9844, lr 2.0e-04, dt 2.0s +All GPU(s): step 6415: loss 21.7656, lr 2.0e-04, dt 2.1s +All GPU(s): step 6416: loss 22.0000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6417: loss 21.4688, lr 2.0e-04, dt 2.1s +All GPU(s): step 6418: loss 21.9219, lr 2.0e-04, dt 2.1s +All GPU(s): step 6419: loss 21.3906, lr 2.0e-04, dt 2.0s +All GPU(s): step 6420: loss 21.7344, lr 2.0e-04, dt 2.0s +All GPU(s): step 6421: loss 21.7344, lr 2.0e-04, dt 2.0s +All GPU(s): step 6422: loss 21.7656, lr 2.0e-04, dt 2.1s +All GPU(s): step 6423: loss 21.6719, lr 2.0e-04, dt 2.1s +All GPU(s): step 6424: loss 21.5469, lr 2.0e-04, dt 2.1s +All GPU(s): step 6425: loss 21.7344, lr 2.0e-04, dt 2.0s +All GPU(s): step 6426: loss 21.6875, lr 2.0e-04, dt 2.0s +All GPU(s): step 6427: loss 21.8281, lr 2.0e-04, dt 2.1s +All GPU(s): step 6428: loss 21.6562, lr 2.0e-04, dt 2.0s +All GPU(s): step 6429: loss 21.7812, lr 2.0e-04, dt 2.0s +All GPU(s): step 6430: loss 21.7812, lr 2.0e-04, dt 2.0s +All GPU(s): step 6431: loss 21.5312, lr 2.0e-04, dt 2.1s +All GPU(s): step 6432: loss 21.6406, lr 2.0e-04, dt 2.1s +All GPU(s): step 6433: loss 21.4688, lr 2.0e-04, dt 2.0s +All GPU(s): step 6434: loss 21.6406, lr 2.0e-04, dt 2.0s +All GPU(s): step 6435: loss 22.1094, lr 2.0e-04, dt 2.0s +All GPU(s): step 6436: loss 21.3281, lr 2.0e-04, dt 2.1s +All GPU(s): step 6437: loss 21.2188, lr 2.0e-04, dt 2.1s +All GPU(s): step 6438: loss 21.5312, lr 2.0e-04, dt 2.0s +All GPU(s): step 6439: loss 21.5000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6440: loss 21.5938, lr 2.0e-04, dt 2.0s +All GPU(s): step 6441: loss 21.5000, lr 2.0e-04, dt 2.1s +All GPU(s): step 6442: loss 21.7031, lr 2.0e-04, dt 2.1s +All GPU(s): step 6443: loss 21.8750, lr 2.0e-04, dt 2.0s +All GPU(s): step 6444: loss 21.9844, lr 1.9e-04, dt 2.0s +All GPU(s): step 6445: loss 21.8594, lr 1.9e-04, dt 2.0s +All GPU(s): step 6446: loss 21.5938, lr 1.9e-04, dt 2.1s +All GPU(s): step 6447: loss 21.5469, lr 1.9e-04, dt 2.1s +All GPU(s): step 6448: loss 21.5469, lr 1.9e-04, dt 2.0s +All GPU(s): step 6449: loss 21.6719, lr 1.9e-04, dt 2.0s +All GPU(s): step 6450: loss 21.4062, lr 1.9e-04, dt 2.0s +All GPU(s): step 6451: loss 21.6719, lr 1.9e-04, dt 2.1s +All GPU(s): step 6452: loss 21.4062, lr 1.9e-04, dt 2.1s +All GPU(s): step 6453: loss 21.5312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6454: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6455: loss 21.5156, lr 1.9e-04, dt 2.0s +All GPU(s): step 6456: loss 21.7344, lr 1.9e-04, dt 2.1s +All GPU(s): step 6457: loss 21.4844, lr 1.9e-04, dt 2.0s +All GPU(s): step 6458: loss 21.3125, lr 1.9e-04, dt 2.0s +All GPU(s): step 6459: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6460: loss 21.6250, lr 1.9e-04, dt 2.1s +All GPU(s): step 6461: loss 21.3594, lr 1.9e-04, dt 2.1s +All GPU(s): step 6462: loss 21.8906, lr 1.9e-04, dt 2.1s +All GPU(s): step 6463: loss 21.6094, lr 1.9e-04, dt 2.1s +All GPU(s): step 6464: loss 21.7188, lr 1.9e-04, dt 2.1s +All GPU(s): step 6465: loss 21.8281, lr 1.9e-04, dt 2.1s +All GPU(s): step 6466: loss 21.5938, lr 1.9e-04, dt 2.1s +All GPU(s): step 6467: loss 21.8438, lr 1.9e-04, dt 2.1s +All GPU(s): step 6468: loss 21.6562, lr 1.9e-04, dt 2.0s +All GPU(s): step 6469: loss 21.7344, lr 1.9e-04, dt 2.0s +All GPU(s): step 6470: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6471: loss 21.9375, lr 1.9e-04, dt 2.1s +All GPU(s): step 6472: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6473: loss 21.6406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6474: loss 21.5938, lr 1.9e-04, dt 2.0s +All GPU(s): step 6475: loss 21.6875, lr 1.9e-04, dt 2.1s +All GPU(s): step 6476: loss 21.8594, lr 1.9e-04, dt 2.1s +All GPU(s): step 6477: loss 21.3125, lr 1.9e-04, dt 2.0s +All GPU(s): step 6478: loss 21.7188, lr 1.9e-04, dt 2.0s +All GPU(s): step 6479: loss 21.7656, lr 1.9e-04, dt 2.0s +All GPU(s): step 6480: loss 21.7031, lr 1.9e-04, dt 2.1s +All GPU(s): step 6481: loss 21.5938, lr 1.9e-04, dt 2.0s +All GPU(s): step 6482: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6483: loss 21.7188, lr 1.9e-04, dt 2.0s +All GPU(s): step 6484: loss 21.5781, lr 1.9e-04, dt 2.1s +All GPU(s): step 6485: loss 21.5000, lr 1.9e-04, dt 2.1s +All GPU(s): step 6486: loss 21.5312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6487: loss 21.7031, lr 1.9e-04, dt 2.0s +All GPU(s): step 6488: loss 21.6250, lr 1.9e-04, dt 2.1s +All GPU(s): step 6489: loss 22.0469, lr 1.9e-04, dt 2.0s +All GPU(s): step 6490: loss 22.1719, lr 1.9e-04, dt 2.1s +All GPU(s): step 6491: loss 21.9219, lr 1.9e-04, dt 2.0s +All GPU(s): step 6492: loss 21.9219, lr 1.9e-04, dt 2.0s +All GPU(s): step 6493: loss 21.9219, lr 1.9e-04, dt 2.0s +All GPU(s): step 6494: loss 21.7656, lr 1.9e-04, dt 2.1s +All GPU(s): step 6495: loss 22.0000, lr 1.9e-04, dt 2.1s +All GPU(s): step 6496: loss 21.9219, lr 1.9e-04, dt 2.0s +All GPU(s): step 6497: loss 21.8906, lr 1.9e-04, dt 2.0s +All GPU(s): step 6498: loss 21.8438, lr 1.9e-04, dt 2.1s +All GPU(s): step 6499: loss 21.9688, lr 1.9e-04, dt 2.1s +All GPU(s): step 6500: loss 21.7656, lr 1.9e-04, dt 2.1s +All GPU(s): step 6501: loss 21.8438, lr 1.9e-04, dt 2.0s +All GPU(s): step 6502: loss 21.8906, lr 1.9e-04, dt 2.1s +All GPU(s): step 6503: loss 21.9531, lr 1.9e-04, dt 2.0s +All GPU(s): step 6504: loss 22.0312, lr 1.9e-04, dt 2.1s +All GPU(s): step 6505: loss 21.8750, lr 1.9e-04, dt 2.1s +All GPU(s): step 6506: loss 22.0781, lr 1.9e-04, dt 2.1s +All GPU(s): step 6507: loss 21.8438, lr 1.9e-04, dt 2.0s +All GPU(s): step 6508: loss 21.9844, lr 1.9e-04, dt 2.0s +All GPU(s): step 6509: loss 21.8906, lr 1.9e-04, dt 2.2s +All GPU(s): step 6510: loss 21.8281, lr 1.9e-04, dt 2.0s +All GPU(s): step 6511: loss 22.3438, lr 1.9e-04, dt 2.0s +All GPU(s): step 6512: loss 21.7969, lr 1.9e-04, dt 2.0s +All GPU(s): step 6513: loss 22.3281, lr 1.9e-04, dt 2.1s +All GPU(s): step 6514: loss 22.1250, lr 1.9e-04, dt 2.1s +All GPU(s): step 6515: loss 21.8750, lr 1.9e-04, dt 2.1s +All GPU(s): step 6516: loss 22.2656, lr 1.9e-04, dt 2.0s +All GPU(s): step 6517: loss 22.0938, lr 1.9e-04, dt 2.0s +All GPU(s): step 6518: loss 22.1562, lr 1.9e-04, dt 2.1s +All GPU(s): step 6519: loss 21.7500, lr 1.9e-04, dt 2.2s +All GPU(s): step 6520: loss 22.0000, lr 1.9e-04, dt 2.0s +All GPU(s): step 6521: loss 21.7969, lr 1.9e-04, dt 2.0s +All GPU(s): step 6522: loss 22.0312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6523: loss 21.7500, lr 1.9e-04, dt 2.0s +All GPU(s): step 6524: loss 21.9062, lr 1.9e-04, dt 2.1s +All GPU(s): step 6525: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6526: loss 21.9531, lr 1.9e-04, dt 2.1s +All GPU(s): step 6527: loss 21.6094, lr 1.9e-04, dt 2.1s +All GPU(s): step 6528: loss 21.2188, lr 1.9e-04, dt 2.1s +All GPU(s): step 6529: loss 21.7031, lr 1.9e-04, dt 2.1s +All GPU(s): step 6530: loss 21.5938, lr 1.9e-04, dt 2.1s +All GPU(s): step 6531: loss 21.5625, lr 1.9e-04, dt 2.0s +All GPU(s): step 6532: loss 21.8125, lr 1.9e-04, dt 2.0s +All GPU(s): step 6533: loss 21.5312, lr 1.9e-04, dt 2.1s +All GPU(s): step 6534: loss 21.7656, lr 1.9e-04, dt 2.0s +All GPU(s): step 6535: loss 21.9375, lr 1.9e-04, dt 2.0s +All GPU(s): step 6536: loss 21.4688, lr 1.9e-04, dt 2.0s +All GPU(s): step 6537: loss 21.6406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6538: loss 21.4844, lr 1.9e-04, dt 2.2s +All GPU(s): step 6539: loss 20.8438, lr 1.9e-04, dt 2.0s +All GPU(s): step 6540: loss 21.0312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6541: loss 21.2969, lr 1.9e-04, dt 2.1s +All GPU(s): step 6542: loss 21.5000, lr 1.9e-04, dt 2.0s +All GPU(s): step 6543: loss 21.0469, lr 1.9e-04, dt 2.1s +All GPU(s): step 6544: loss 21.0625, lr 1.9e-04, dt 2.0s +All GPU(s): step 6545: loss 21.2656, lr 1.9e-04, dt 2.0s +All GPU(s): step 6546: loss 21.4844, lr 1.9e-04, dt 2.1s +All GPU(s): step 6547: loss 21.0156, lr 1.9e-04, dt 2.1s +All GPU(s): step 6548: loss 21.0781, lr 1.9e-04, dt 2.1s +All GPU(s): step 6549: loss 21.2500, lr 1.9e-04, dt 2.0s +All GPU(s): step 6550: loss 21.1875, lr 1.9e-04, dt 2.1s +All GPU(s): step 6551: loss 20.9688, lr 1.9e-04, dt 2.0s +All GPU(s): step 6552: loss 20.7969, lr 1.9e-04, dt 2.1s +All GPU(s): step 6553: loss 20.4531, lr 1.9e-04, dt 2.1s +All GPU(s): step 6554: loss 20.0469, lr 1.9e-04, dt 2.0s +All GPU(s): step 6555: loss 20.1875, lr 1.9e-04, dt 2.0s +All GPU(s): step 6556: loss 20.4062, lr 1.9e-04, dt 2.0s +All GPU(s): step 6557: loss 20.6719, lr 1.9e-04, dt 2.1s +All GPU(s): step 6558: loss 20.7188, lr 1.9e-04, dt 2.0s +All GPU(s): step 6559: loss 21.1094, lr 1.9e-04, dt 2.0s +All GPU(s): step 6560: loss 21.0938, lr 1.9e-04, dt 2.1s +All GPU(s): step 6561: loss 21.1875, lr 1.9e-04, dt 2.1s +All GPU(s): step 6562: loss 21.0000, lr 1.9e-04, dt 2.1s +All GPU(s): step 6563: loss 21.1406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6564: loss 21.6094, lr 1.9e-04, dt 2.0s +All GPU(s): step 6565: loss 21.6406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6566: loss 21.7969, lr 1.9e-04, dt 2.1s +All GPU(s): step 6567: loss 21.7031, lr 1.9e-04, dt 2.2s +All GPU(s): step 6568: loss 21.3438, lr 1.9e-04, dt 2.1s +All GPU(s): step 6569: loss 21.6875, lr 1.9e-04, dt 2.0s +All GPU(s): step 6570: loss 21.7969, lr 1.9e-04, dt 2.1s +All GPU(s): step 6571: loss 21.9688, lr 1.9e-04, dt 2.1s +All GPU(s): step 6572: loss 21.7812, lr 1.9e-04, dt 2.1s +All GPU(s): step 6573: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6574: loss 21.6719, lr 1.9e-04, dt 2.0s +All GPU(s): step 6575: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6576: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6577: loss 21.6250, lr 1.9e-04, dt 2.1s +All GPU(s): step 6578: loss 21.6094, lr 1.9e-04, dt 2.0s +All GPU(s): step 6579: loss 21.5312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6580: loss 21.5469, lr 1.9e-04, dt 2.0s +All GPU(s): step 6581: loss 21.6094, lr 1.9e-04, dt 2.1s +All GPU(s): step 6582: loss 21.6094, lr 1.9e-04, dt 2.0s +All GPU(s): step 6583: loss 22.0312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6584: loss 21.5938, lr 1.9e-04, dt 2.0s +All GPU(s): step 6585: loss 21.8125, lr 1.9e-04, dt 2.1s +All GPU(s): step 6586: loss 21.6406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6587: loss 21.5000, lr 1.9e-04, dt 2.0s +All GPU(s): step 6588: loss 21.5312, lr 1.9e-04, dt 2.0s +All GPU(s): step 6589: loss 21.7188, lr 1.9e-04, dt 2.0s +All GPU(s): step 6590: loss 21.8281, lr 1.9e-04, dt 2.0s +All GPU(s): step 6591: loss 21.8281, lr 1.9e-04, dt 2.1s +All GPU(s): step 6592: loss 21.8906, lr 1.9e-04, dt 2.0s +All GPU(s): step 6593: loss 21.6875, lr 1.9e-04, dt 2.0s +All GPU(s): step 6594: loss 21.8281, lr 1.9e-04, dt 2.0s +All GPU(s): step 6595: loss 21.5469, lr 1.9e-04, dt 2.0s +All GPU(s): step 6596: loss 21.5469, lr 1.9e-04, dt 2.1s +All GPU(s): step 6597: loss 22.0156, lr 1.9e-04, dt 2.0s +All GPU(s): step 6598: loss 22.0000, lr 1.9e-04, dt 2.1s +All GPU(s): step 6599: loss 21.8906, lr 1.9e-04, dt 2.1s +All GPU(s): step 6600: loss 21.7500, lr 1.9e-04, dt 2.1s +All GPU(s): step 6601: loss 21.6406, lr 1.9e-04, dt 2.1s +All GPU(s): step 6602: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6603: loss 21.7031, lr 1.9e-04, dt 2.0s +All GPU(s): step 6604: loss 21.7656, lr 1.9e-04, dt 2.0s +All GPU(s): step 6605: loss 21.3125, lr 1.9e-04, dt 2.1s +All GPU(s): step 6606: loss 21.6094, lr 1.9e-04, dt 2.0s +All GPU(s): step 6607: loss 21.9688, lr 1.9e-04, dt 2.0s +All GPU(s): step 6608: loss 21.6719, lr 1.9e-04, dt 2.0s +All GPU(s): step 6609: loss 21.6250, lr 1.9e-04, dt 2.0s +All GPU(s): step 6610: loss 21.7188, lr 1.9e-04, dt 2.1s +All GPU(s): step 6611: loss 21.7969, lr 1.9e-04, dt 2.0s +All GPU(s): step 6612: loss 21.6562, lr 1.9e-04, dt 2.0s +All GPU(s): step 6613: loss 21.5781, lr 1.9e-04, dt 2.0s +All GPU(s): step 6614: loss 21.6719, lr 1.9e-04, dt 2.0s +All GPU(s): step 6615: loss 22.0312, lr 1.9e-04, dt 2.1s +All GPU(s): step 6616: loss 21.7188, lr 1.9e-04, dt 2.0s +All GPU(s): step 6617: loss 21.6562, lr 1.9e-04, dt 2.0s +All GPU(s): step 6618: loss 21.7031, lr 1.9e-04, dt 2.0s +All GPU(s): step 6619: loss 21.6406, lr 1.9e-04, dt 2.0s +All GPU(s): step 6620: loss 21.5156, lr 1.9e-04, dt 2.2s +All GPU(s): step 6621: loss 21.6094, lr 1.9e-04, dt 2.1s +All GPU(s): step 6622: loss 21.2969, lr 1.9e-04, dt 2.1s +All GPU(s): step 6623: loss 21.5625, lr 1.8e-04, dt 2.0s +All GPU(s): step 6624: loss 21.9062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6625: loss 21.7969, lr 1.8e-04, dt 2.1s +All GPU(s): step 6626: loss 21.7656, lr 1.8e-04, dt 2.0s +All GPU(s): step 6627: loss 21.7812, lr 1.8e-04, dt 2.0s +All GPU(s): step 6628: loss 22.0000, lr 1.8e-04, dt 2.1s +All GPU(s): step 6629: loss 21.8594, lr 1.8e-04, dt 2.1s +All GPU(s): step 6630: loss 21.4688, lr 1.8e-04, dt 2.2s +All GPU(s): step 6631: loss 22.0781, lr 1.8e-04, dt 2.1s +All GPU(s): step 6632: loss 21.5312, lr 1.8e-04, dt 2.1s +All GPU(s): step 6633: loss 21.9062, lr 1.8e-04, dt 2.0s +All GPU(s): step 6634: loss 21.8125, lr 1.8e-04, dt 2.1s +All GPU(s): step 6635: loss 21.6719, lr 1.8e-04, dt 2.1s +All GPU(s): step 6636: loss 21.6719, lr 1.8e-04, dt 2.0s +All GPU(s): step 6637: loss 21.6250, lr 1.8e-04, dt 2.1s +All GPU(s): step 6638: loss 21.4844, lr 1.8e-04, dt 2.1s +All GPU(s): step 6639: loss 21.6719, lr 1.8e-04, dt 2.2s +All GPU(s): step 6640: loss 21.6562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6641: loss 21.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6642: loss 21.7188, lr 1.8e-04, dt 2.0s +All GPU(s): step 6643: loss 21.6562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6644: loss 21.4531, lr 1.8e-04, dt 2.2s +All GPU(s): step 6645: loss 21.3281, lr 1.8e-04, dt 2.0s +All GPU(s): step 6646: loss 21.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6647: loss 21.5781, lr 1.8e-04, dt 2.0s +All GPU(s): step 6648: loss 21.4062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6649: loss 21.5156, lr 1.8e-04, dt 2.1s +All GPU(s): step 6650: loss 21.7656, lr 1.8e-04, dt 2.0s +All GPU(s): step 6651: loss 21.5625, lr 1.8e-04, dt 2.0s +All GPU(s): step 6652: loss 21.2500, lr 1.8e-04, dt 2.1s +All GPU(s): step 6653: loss 21.7188, lr 1.8e-04, dt 2.1s +All GPU(s): step 6654: loss 21.7656, lr 1.8e-04, dt 2.1s +All GPU(s): step 6655: loss 21.7656, lr 1.8e-04, dt 2.1s +All GPU(s): step 6656: loss 21.2969, lr 1.8e-04, dt 2.0s +All GPU(s): step 6657: loss 21.1875, lr 1.8e-04, dt 2.1s +All GPU(s): step 6658: loss 21.4062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6659: loss 21.6406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6660: loss 21.7031, lr 1.8e-04, dt 2.1s +All GPU(s): step 6661: loss 21.7031, lr 1.8e-04, dt 2.1s +All GPU(s): step 6662: loss 21.3906, lr 1.8e-04, dt 2.0s +All GPU(s): step 6663: loss 21.5781, lr 1.8e-04, dt 2.2s +All GPU(s): step 6664: loss 21.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6665: loss 21.4531, lr 1.8e-04, dt 2.0s +All GPU(s): step 6666: loss 21.5156, lr 1.8e-04, dt 2.0s +All GPU(s): step 6667: loss 21.3594, lr 1.8e-04, dt 2.0s +All GPU(s): step 6668: loss 21.2812, lr 1.8e-04, dt 2.1s +All GPU(s): step 6669: loss 21.4531, lr 1.8e-04, dt 2.1s +All GPU(s): step 6670: loss 21.7969, lr 1.8e-04, dt 2.1s +All GPU(s): step 6671: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6672: loss 21.5156, lr 1.8e-04, dt 2.1s +All GPU(s): step 6673: loss 21.3125, lr 1.8e-04, dt 2.1s +All GPU(s): step 6674: loss 21.8281, lr 1.8e-04, dt 2.0s +All GPU(s): step 6675: loss 21.3125, lr 1.8e-04, dt 2.0s +All GPU(s): step 6676: loss 21.7188, lr 1.8e-04, dt 2.1s +All GPU(s): step 6677: loss 21.5469, lr 1.8e-04, dt 2.1s +All GPU(s): step 6678: loss 21.5625, lr 1.8e-04, dt 2.1s +All GPU(s): step 6679: loss 21.4844, lr 1.8e-04, dt 2.0s +All GPU(s): step 6680: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6681: loss 21.7031, lr 1.8e-04, dt 2.0s +All GPU(s): step 6682: loss 21.7656, lr 1.8e-04, dt 2.1s +All GPU(s): step 6683: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6684: loss 21.4375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6685: loss 21.4375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6686: loss 21.3125, lr 1.8e-04, dt 2.0s +All GPU(s): step 6687: loss 21.6250, lr 1.8e-04, dt 2.1s +All GPU(s): step 6688: loss 21.2188, lr 1.8e-04, dt 2.0s +All GPU(s): step 6689: loss 21.6406, lr 1.8e-04, dt 2.0s +All GPU(s): step 6690: loss 21.1875, lr 1.8e-04, dt 2.0s +All GPU(s): step 6691: loss 21.2656, lr 1.8e-04, dt 2.1s +All GPU(s): step 6692: loss 21.5156, lr 1.8e-04, dt 2.2s +All GPU(s): step 6693: loss 21.3281, lr 1.8e-04, dt 2.1s +All GPU(s): step 6694: loss 20.9531, lr 1.8e-04, dt 2.1s +All GPU(s): step 6695: loss 21.3594, lr 1.8e-04, dt 2.0s +All GPU(s): step 6696: loss 21.5312, lr 1.8e-04, dt 2.1s +All GPU(s): step 6697: loss 21.5781, lr 1.8e-04, dt 2.1s +All GPU(s): step 6698: loss 21.2500, lr 1.8e-04, dt 2.0s +All GPU(s): step 6699: loss 21.5000, lr 1.8e-04, dt 2.1s +All GPU(s): step 6700: loss 21.5156, lr 1.8e-04, dt 2.0s +All GPU(s): step 6701: loss 21.8594, lr 1.8e-04, dt 2.1s +All GPU(s): step 6702: loss 21.7812, lr 1.8e-04, dt 2.1s +All GPU(s): step 6703: loss 21.7500, lr 1.8e-04, dt 2.0s +All GPU(s): step 6704: loss 21.7188, lr 1.8e-04, dt 2.0s +All GPU(s): step 6705: loss 21.7031, lr 1.8e-04, dt 2.0s +All GPU(s): step 6706: loss 22.0312, lr 1.8e-04, dt 2.1s +All GPU(s): step 6707: loss 21.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6708: loss 21.8125, lr 1.8e-04, dt 2.0s +All GPU(s): step 6709: loss 21.8594, lr 1.8e-04, dt 2.0s +All GPU(s): step 6710: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6711: loss 21.7500, lr 1.8e-04, dt 2.1s +All GPU(s): step 6712: loss 21.9531, lr 1.8e-04, dt 2.0s +All GPU(s): step 6713: loss 21.8594, lr 1.8e-04, dt 2.0s +All GPU(s): step 6714: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6715: loss 21.3750, lr 1.8e-04, dt 2.1s +All GPU(s): step 6716: loss 21.6094, lr 1.8e-04, dt 2.1s +All GPU(s): step 6717: loss 21.4375, lr 1.8e-04, dt 2.1s +All GPU(s): step 6718: loss 21.7656, lr 1.8e-04, dt 2.0s +All GPU(s): step 6719: loss 21.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6720: loss 21.7969, lr 1.8e-04, dt 2.0s +All GPU(s): step 6721: loss 21.2812, lr 1.8e-04, dt 2.2s +All GPU(s): step 6722: loss 21.3750, lr 1.8e-04, dt 2.1s +All GPU(s): step 6723: loss 21.7812, lr 1.8e-04, dt 2.0s +All GPU(s): step 6724: loss 21.3438, lr 1.8e-04, dt 2.0s +All GPU(s): step 6725: loss 21.5781, lr 1.8e-04, dt 2.1s +All GPU(s): step 6726: loss 21.3906, lr 1.8e-04, dt 2.1s +All GPU(s): step 6727: loss 21.4375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6728: loss 21.3281, lr 1.8e-04, dt 2.0s +All GPU(s): step 6729: loss 21.5312, lr 1.8e-04, dt 2.0s +All GPU(s): step 6730: loss 21.6406, lr 1.8e-04, dt 2.1s +All GPU(s): step 6731: loss 21.7344, lr 1.8e-04, dt 2.1s +All GPU(s): step 6732: loss 21.5469, lr 1.8e-04, dt 2.0s +All GPU(s): step 6733: loss 21.7188, lr 1.8e-04, dt 2.0s +All GPU(s): step 6734: loss 21.5469, lr 1.8e-04, dt 2.0s +All GPU(s): step 6735: loss 21.7031, lr 1.8e-04, dt 2.1s +All GPU(s): step 6736: loss 21.7656, lr 1.8e-04, dt 2.0s +All GPU(s): step 6737: loss 21.4219, lr 1.8e-04, dt 2.0s +All GPU(s): step 6738: loss 21.7344, lr 1.8e-04, dt 2.0s +All GPU(s): step 6739: loss 21.8594, lr 1.8e-04, dt 2.1s +All GPU(s): step 6740: loss 22.0312, lr 1.8e-04, dt 2.1s +All GPU(s): step 6741: loss 22.0156, lr 1.8e-04, dt 2.0s +All GPU(s): step 6742: loss 21.5781, lr 1.8e-04, dt 2.0s +All GPU(s): step 6743: loss 21.9375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6744: loss 21.9062, lr 1.8e-04, dt 2.0s +All GPU(s): step 6745: loss 21.9844, lr 1.8e-04, dt 2.1s +All GPU(s): step 6746: loss 21.5938, lr 1.8e-04, dt 2.0s +All GPU(s): step 6747: loss 21.7969, lr 1.8e-04, dt 2.0s +All GPU(s): step 6748: loss 21.7812, lr 1.8e-04, dt 2.0s +All GPU(s): step 6749: loss 21.9062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6750: loss 22.0469, lr 1.8e-04, dt 2.1s +All GPU(s): step 6751: loss 21.7344, lr 1.8e-04, dt 2.0s +All GPU(s): step 6752: loss 21.7500, lr 1.8e-04, dt 2.0s +All GPU(s): step 6753: loss 21.9375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6754: loss 21.7344, lr 1.8e-04, dt 2.1s +All GPU(s): step 6755: loss 21.7344, lr 1.8e-04, dt 2.1s +All GPU(s): step 6756: loss 21.7812, lr 1.8e-04, dt 2.1s +All GPU(s): step 6757: loss 21.8594, lr 1.8e-04, dt 2.1s +All GPU(s): step 6758: loss 21.7812, lr 1.8e-04, dt 2.1s +All GPU(s): step 6759: loss 22.1562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6760: loss 21.9062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6761: loss 21.8438, lr 1.8e-04, dt 2.1s +All GPU(s): step 6762: loss 22.0156, lr 1.8e-04, dt 2.1s +All GPU(s): step 6763: loss 21.6562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6764: loss 21.8750, lr 1.8e-04, dt 2.1s +All GPU(s): step 6765: loss 21.8438, lr 1.8e-04, dt 2.0s +All GPU(s): step 6766: loss 21.9219, lr 1.8e-04, dt 2.0s +All GPU(s): step 6767: loss 22.0469, lr 1.8e-04, dt 2.0s +All GPU(s): step 6768: loss 22.0469, lr 1.8e-04, dt 2.1s +All GPU(s): step 6769: loss 21.5000, lr 1.8e-04, dt 2.2s +All GPU(s): step 6770: loss 21.9062, lr 1.8e-04, dt 2.1s +All GPU(s): step 6771: loss 21.9688, lr 1.8e-04, dt 2.1s +All GPU(s): step 6772: loss 21.8125, lr 1.8e-04, dt 2.0s +All GPU(s): step 6773: loss 21.6250, lr 1.8e-04, dt 2.1s +All GPU(s): step 6774: loss 22.0156, lr 1.8e-04, dt 2.1s +All GPU(s): step 6775: loss 21.8750, lr 1.8e-04, dt 2.0s +All GPU(s): step 6776: loss 22.1094, lr 1.8e-04, dt 2.0s +All GPU(s): step 6777: loss 22.0469, lr 1.8e-04, dt 2.0s +All GPU(s): step 6778: loss 21.7812, lr 1.8e-04, dt 2.0s +All GPU(s): step 6779: loss 22.0000, lr 1.8e-04, dt 2.1s +All GPU(s): step 6780: loss 22.1719, lr 1.8e-04, dt 2.0s +All GPU(s): step 6781: loss 22.2188, lr 1.8e-04, dt 2.0s +All GPU(s): step 6782: loss 22.2656, lr 1.8e-04, dt 2.0s +All GPU(s): step 6783: loss 22.3438, lr 1.8e-04, dt 2.1s +All GPU(s): step 6784: loss 21.9531, lr 1.8e-04, dt 2.1s +All GPU(s): step 6785: loss 21.7656, lr 1.8e-04, dt 2.1s +All GPU(s): step 6786: loss 22.0312, lr 1.8e-04, dt 2.0s +All GPU(s): step 6787: loss 22.3594, lr 1.8e-04, dt 2.0s +All GPU(s): step 6788: loss 22.5938, lr 1.8e-04, dt 2.1s +All GPU(s): step 6789: loss 22.5000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6790: loss 22.6250, lr 1.8e-04, dt 2.0s +All GPU(s): step 6791: loss 22.5469, lr 1.8e-04, dt 2.1s +All GPU(s): step 6792: loss 22.8594, lr 1.8e-04, dt 2.1s +All GPU(s): step 6793: loss 23.1875, lr 1.8e-04, dt 2.2s +All GPU(s): step 6794: loss 22.6875, lr 1.8e-04, dt 2.1s +All GPU(s): step 6795: loss 22.8750, lr 1.8e-04, dt 2.1s +All GPU(s): step 6796: loss 22.9375, lr 1.8e-04, dt 2.0s +All GPU(s): step 6797: loss 23.0469, lr 1.8e-04, dt 2.1s +All GPU(s): step 6798: loss 22.6875, lr 1.8e-04, dt 2.1s +All GPU(s): step 6799: loss 23.0000, lr 1.8e-04, dt 2.0s +All GPU(s): step 6800: loss 23.3438, lr 1.8e-04, dt 2.0s +All GPU(s): step 6801: loss 23.3438, lr 1.8e-04, dt 2.1s +All GPU(s): step 6802: loss 23.0938, lr 1.8e-04, dt 2.1s +All GPU(s): step 6803: loss 23.0156, lr 1.8e-04, dt 2.1s +All GPU(s): step 6804: loss 23.1562, lr 1.8e-04, dt 2.1s +All GPU(s): step 6805: loss 22.8125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6806: loss 22.8594, lr 1.7e-04, dt 2.0s +All GPU(s): step 6807: loss 22.7812, lr 1.7e-04, dt 2.1s +All GPU(s): step 6808: loss 23.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6809: loss 23.2031, lr 1.7e-04, dt 2.0s +All GPU(s): step 6810: loss 23.3438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6811: loss 23.7656, lr 1.7e-04, dt 2.0s +All GPU(s): step 6812: loss 23.3906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6813: loss 23.5781, lr 1.7e-04, dt 2.0s +All GPU(s): step 6814: loss 23.3906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6815: loss 23.3438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6816: loss 23.3125, lr 1.7e-04, dt 2.1s +All GPU(s): step 6817: loss 23.2969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6818: loss 23.4844, lr 1.7e-04, dt 2.1s +All GPU(s): step 6819: loss 23.5156, lr 1.7e-04, dt 2.1s +All GPU(s): step 6820: loss 23.9375, lr 1.7e-04, dt 2.0s +All GPU(s): step 6821: loss 23.7344, lr 1.7e-04, dt 2.1s +All GPU(s): step 6822: loss 24.2031, lr 1.7e-04, dt 2.1s +All GPU(s): step 6823: loss 24.4531, lr 1.7e-04, dt 2.0s +All GPU(s): step 6824: loss 24.9531, lr 1.7e-04, dt 2.0s +All GPU(s): step 6825: loss 24.8281, lr 1.7e-04, dt 2.0s +All GPU(s): step 6826: loss 24.9531, lr 1.7e-04, dt 2.1s +All GPU(s): step 6827: loss 24.8906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6828: loss 25.2656, lr 1.7e-04, dt 2.0s +All GPU(s): step 6829: loss 25.3125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6830: loss 25.6406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6831: loss 25.4062, lr 1.7e-04, dt 2.1s +All GPU(s): step 6832: loss 25.3906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6833: loss 25.4688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6834: loss 25.7188, lr 1.7e-04, dt 2.0s +All GPU(s): step 6835: loss 25.7188, lr 1.7e-04, dt 2.0s +All GPU(s): step 6836: loss 25.7812, lr 1.7e-04, dt 2.1s +All GPU(s): step 6837: loss 25.7188, lr 1.7e-04, dt 2.1s +All GPU(s): step 6838: loss 25.6562, lr 1.7e-04, dt 2.0s +All GPU(s): step 6839: loss 25.5469, lr 1.7e-04, dt 2.0s +All GPU(s): step 6840: loss 25.8281, lr 1.7e-04, dt 2.0s +All GPU(s): step 6841: loss 25.5938, lr 1.7e-04, dt 2.1s +All GPU(s): step 6842: loss 26.0781, lr 1.7e-04, dt 2.0s +All GPU(s): step 6843: loss 25.8438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6844: loss 25.8906, lr 1.7e-04, dt 2.0s +All GPU(s): step 6845: loss 26.1719, lr 1.7e-04, dt 2.0s +All GPU(s): step 6846: loss 26.4531, lr 1.7e-04, dt 2.1s +All GPU(s): step 6847: loss 26.2500, lr 1.7e-04, dt 2.0s +All GPU(s): step 6848: loss 26.3125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6849: loss 26.3906, lr 1.7e-04, dt 2.0s +All GPU(s): step 6850: loss 26.1875, lr 1.7e-04, dt 2.1s +All GPU(s): step 6851: loss 26.4062, lr 1.7e-04, dt 2.1s +All GPU(s): step 6852: loss 25.7188, lr 1.7e-04, dt 2.0s +All GPU(s): step 6853: loss 25.4219, lr 1.7e-04, dt 2.0s +All GPU(s): step 6854: loss 24.7812, lr 1.7e-04, dt 2.0s +All GPU(s): step 6855: loss 24.9219, lr 1.7e-04, dt 2.1s +All GPU(s): step 6856: loss 24.5000, lr 1.7e-04, dt 2.1s +All GPU(s): step 6857: loss 24.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6858: loss 23.7656, lr 1.7e-04, dt 2.1s +All GPU(s): step 6859: loss 23.5625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6860: loss 23.0781, lr 1.7e-04, dt 2.1s +All GPU(s): step 6861: loss 23.3594, lr 1.7e-04, dt 2.1s +All GPU(s): step 6862: loss 23.2031, lr 1.7e-04, dt 2.1s +All GPU(s): step 6863: loss 23.0000, lr 1.7e-04, dt 2.1s +All GPU(s): step 6864: loss 22.8594, lr 1.7e-04, dt 2.1s +All GPU(s): step 6865: loss 22.8125, lr 1.7e-04, dt 2.2s +All GPU(s): step 6866: loss 22.5469, lr 1.7e-04, dt 2.0s +All GPU(s): step 6867: loss 22.4219, lr 1.7e-04, dt 2.1s +All GPU(s): step 6868: loss 22.3125, lr 1.7e-04, dt 2.1s +All GPU(s): step 6869: loss 22.4688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6870: loss 22.4062, lr 1.7e-04, dt 2.2s +All GPU(s): step 6871: loss 22.5781, lr 1.7e-04, dt 2.0s +All GPU(s): step 6872: loss 22.5000, lr 1.7e-04, dt 2.0s +All GPU(s): step 6873: loss 22.3750, lr 1.7e-04, dt 2.0s +All GPU(s): step 6874: loss 22.0781, lr 1.7e-04, dt 2.0s +All GPU(s): step 6875: loss 22.4375, lr 1.7e-04, dt 2.1s +All GPU(s): step 6876: loss 22.2344, lr 1.7e-04, dt 2.0s +All GPU(s): step 6877: loss 22.1094, lr 1.7e-04, dt 2.0s +All GPU(s): step 6878: loss 22.1875, lr 1.7e-04, dt 2.0s +All GPU(s): step 6879: loss 22.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6880: loss 22.2812, lr 1.7e-04, dt 2.1s +All GPU(s): step 6881: loss 21.7812, lr 1.7e-04, dt 2.0s +All GPU(s): step 6882: loss 22.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6883: loss 22.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6884: loss 22.1719, lr 1.7e-04, dt 2.1s +All GPU(s): step 6885: loss 21.7656, lr 1.7e-04, dt 2.0s +All GPU(s): step 6886: loss 21.7344, lr 1.7e-04, dt 2.1s +All GPU(s): step 6887: loss 21.9219, lr 1.7e-04, dt 2.0s +All GPU(s): step 6888: loss 22.1094, lr 1.7e-04, dt 2.0s +All GPU(s): step 6889: loss 21.9688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6890: loss 21.7344, lr 1.7e-04, dt 2.0s +All GPU(s): step 6891: loss 21.8438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6892: loss 22.2344, lr 1.7e-04, dt 2.1s +All GPU(s): step 6893: loss 21.8594, lr 1.7e-04, dt 2.1s +All GPU(s): step 6894: loss 21.9375, lr 1.7e-04, dt 2.1s +All GPU(s): step 6895: loss 22.3125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6896: loss 21.8906, lr 1.7e-04, dt 2.0s +All GPU(s): step 6897: loss 21.9062, lr 1.7e-04, dt 2.0s +All GPU(s): step 6898: loss 21.6406, lr 1.7e-04, dt 2.1s +All GPU(s): step 6899: loss 21.7344, lr 1.7e-04, dt 2.2s +All GPU(s): step 6900: loss 22.1719, lr 1.7e-04, dt 2.1s +All GPU(s): step 6901: loss 21.9688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6902: loss 21.9219, lr 1.7e-04, dt 2.1s +All GPU(s): step 6903: loss 21.7969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6904: loss 21.9688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6905: loss 22.0625, lr 1.7e-04, dt 2.0s +All GPU(s): step 6906: loss 22.0312, lr 1.7e-04, dt 2.0s +All GPU(s): step 6907: loss 22.1250, lr 1.7e-04, dt 2.0s +All GPU(s): step 6908: loss 22.0000, lr 1.7e-04, dt 2.1s +All GPU(s): step 6909: loss 22.0781, lr 1.7e-04, dt 2.1s +All GPU(s): step 6910: loss 21.9375, lr 1.7e-04, dt 2.0s +All GPU(s): step 6911: loss 22.0000, lr 1.7e-04, dt 2.1s +All GPU(s): step 6912: loss 21.7656, lr 1.7e-04, dt 2.1s +All GPU(s): step 6913: loss 21.8281, lr 1.7e-04, dt 2.1s +All GPU(s): step 6914: loss 21.9688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6915: loss 21.4844, lr 1.7e-04, dt 2.0s +All GPU(s): step 6916: loss 22.2969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6917: loss 22.0625, lr 1.7e-04, dt 2.0s +All GPU(s): step 6918: loss 21.9062, lr 1.7e-04, dt 2.2s +All GPU(s): step 6919: loss 21.7969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6920: loss 22.3281, lr 1.7e-04, dt 2.0s +All GPU(s): step 6921: loss 22.1719, lr 1.7e-04, dt 2.0s +All GPU(s): step 6922: loss 22.0469, lr 1.7e-04, dt 2.1s +All GPU(s): step 6923: loss 22.1719, lr 1.7e-04, dt 2.2s +All GPU(s): step 6924: loss 22.4688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6925: loss 22.4375, lr 1.7e-04, dt 2.0s +All GPU(s): step 6926: loss 21.8438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6927: loss 22.0000, lr 1.7e-04, dt 2.0s +All GPU(s): step 6928: loss 22.0312, lr 1.7e-04, dt 2.1s +All GPU(s): step 6929: loss 21.8438, lr 1.7e-04, dt 2.0s +All GPU(s): step 6930: loss 21.9688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6931: loss 22.2812, lr 1.7e-04, dt 2.0s +All GPU(s): step 6932: loss 21.9219, lr 1.7e-04, dt 2.1s +All GPU(s): step 6933: loss 21.8906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6934: loss 22.2188, lr 1.7e-04, dt 2.1s +All GPU(s): step 6935: loss 22.4062, lr 1.7e-04, dt 2.1s +All GPU(s): step 6936: loss 21.9844, lr 1.7e-04, dt 2.1s +All GPU(s): step 6937: loss 21.6719, lr 1.7e-04, dt 2.1s +All GPU(s): step 6938: loss 21.9844, lr 1.7e-04, dt 2.0s +All GPU(s): step 6939: loss 22.2969, lr 1.7e-04, dt 2.0s +All GPU(s): step 6940: loss 21.9688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6941: loss 21.8594, lr 1.7e-04, dt 2.0s +All GPU(s): step 6942: loss 21.9219, lr 1.7e-04, dt 2.1s +All GPU(s): step 6943: loss 21.9688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6944: loss 22.0469, lr 1.7e-04, dt 2.0s +All GPU(s): step 6945: loss 21.7969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6946: loss 22.1094, lr 1.7e-04, dt 2.1s +All GPU(s): step 6947: loss 22.2031, lr 1.7e-04, dt 2.2s +All GPU(s): step 6948: loss 22.1875, lr 1.7e-04, dt 2.1s +All GPU(s): step 6949: loss 21.9531, lr 1.7e-04, dt 2.0s +All GPU(s): step 6950: loss 21.9531, lr 1.7e-04, dt 2.0s +All GPU(s): step 6951: loss 21.6875, lr 1.7e-04, dt 2.0s +All GPU(s): step 6952: loss 21.7656, lr 1.7e-04, dt 2.1s +All GPU(s): step 6953: loss 21.7344, lr 1.7e-04, dt 2.1s +All GPU(s): step 6954: loss 21.6875, lr 1.7e-04, dt 2.0s +All GPU(s): step 6955: loss 21.7969, lr 1.7e-04, dt 2.1s +All GPU(s): step 6956: loss 21.9375, lr 1.7e-04, dt 2.0s +All GPU(s): step 6957: loss 21.9531, lr 1.7e-04, dt 2.1s +All GPU(s): step 6958: loss 21.9062, lr 1.7e-04, dt 2.1s +All GPU(s): step 6959: loss 21.9688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6960: loss 21.7031, lr 1.7e-04, dt 2.1s +All GPU(s): step 6961: loss 21.6719, lr 1.7e-04, dt 2.1s +All GPU(s): step 6962: loss 21.8906, lr 1.7e-04, dt 2.1s +All GPU(s): step 6963: loss 21.8125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6964: loss 21.7344, lr 1.7e-04, dt 2.1s +All GPU(s): step 6965: loss 21.9688, lr 1.7e-04, dt 2.0s +All GPU(s): step 6966: loss 22.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6967: loss 21.8594, lr 1.7e-04, dt 2.0s +All GPU(s): step 6968: loss 21.8906, lr 1.7e-04, dt 2.0s +All GPU(s): step 6969: loss 21.8125, lr 1.7e-04, dt 2.0s +All GPU(s): step 6970: loss 21.9688, lr 1.7e-04, dt 2.1s +All GPU(s): step 6971: loss 21.9062, lr 1.7e-04, dt 2.1s +All GPU(s): step 6972: loss 22.1406, lr 1.7e-04, dt 2.0s +All GPU(s): step 6973: loss 21.9062, lr 1.7e-04, dt 2.0s +All GPU(s): step 6974: loss 22.0469, lr 1.7e-04, dt 2.0s +All GPU(s): step 6975: loss 21.8594, lr 1.7e-04, dt 2.1s +All GPU(s): step 6976: loss 22.0312, lr 1.7e-04, dt 2.2s +All GPU(s): step 6977: loss 22.0000, lr 1.7e-04, dt 2.1s +All GPU(s): step 6978: loss 21.7812, lr 1.7e-04, dt 2.0s +All GPU(s): step 6979: loss 21.9219, lr 1.7e-04, dt 2.0s +All GPU(s): step 6980: loss 21.8281, lr 1.7e-04, dt 2.1s +All GPU(s): step 6981: loss 22.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6982: loss 21.7812, lr 1.7e-04, dt 2.1s +All GPU(s): step 6983: loss 22.0625, lr 1.7e-04, dt 2.0s +All GPU(s): step 6984: loss 22.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6985: loss 22.0625, lr 1.7e-04, dt 2.1s +All GPU(s): step 6986: loss 21.7188, lr 1.7e-04, dt 2.1s +All GPU(s): step 6987: loss 21.9375, lr 1.7e-04, dt 2.0s +All GPU(s): step 6988: loss 22.0312, lr 1.7e-04, dt 2.1s +All GPU(s): step 6989: loss 22.1875, lr 1.6e-04, dt 2.0s +All GPU(s): step 6990: loss 21.8438, lr 1.6e-04, dt 2.1s +All GPU(s): step 6991: loss 21.9062, lr 1.6e-04, dt 2.1s +All GPU(s): step 6992: loss 22.0469, lr 1.6e-04, dt 2.1s +All GPU(s): step 6993: loss 21.9062, lr 1.6e-04, dt 2.0s +All GPU(s): step 6994: loss 21.9062, lr 1.6e-04, dt 2.1s +All GPU(s): step 6995: loss 21.7188, lr 1.6e-04, dt 2.1s +All GPU(s): step 6996: loss 21.8438, lr 1.6e-04, dt 2.1s +All GPU(s): step 6997: loss 21.8906, lr 1.6e-04, dt 2.1s +All GPU(s): step 6998: loss 21.7500, lr 1.6e-04, dt 2.1s +All GPU(s): step 6999: loss 21.7812, lr 1.6e-04, dt 2.1s +saving checkpoint to checkpoints/ckpt_7000.pt +All GPU(s): step 7000: loss 21.5312, lr 1.6e-04, dt 2.2s +All GPU(s): step 7001: loss 22.0469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7002: loss 21.7656, lr 1.6e-04, dt 2.0s +All GPU(s): step 7003: loss 22.2500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7004: loss 22.0156, lr 1.6e-04, dt 2.1s +All GPU(s): step 7005: loss 22.1875, lr 1.6e-04, dt 2.1s +All GPU(s): step 7006: loss 22.0469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7007: loss 22.6875, lr 1.6e-04, dt 2.0s +All GPU(s): step 7008: loss 22.0625, lr 1.6e-04, dt 2.1s +All GPU(s): step 7009: loss 22.1094, lr 1.6e-04, dt 2.1s +All GPU(s): step 7010: loss 22.5469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7011: loss 21.9844, lr 1.6e-04, dt 2.0s +All GPU(s): step 7012: loss 22.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 7013: loss 22.2656, lr 1.6e-04, dt 2.0s +All GPU(s): step 7014: loss 21.9375, lr 1.6e-04, dt 2.1s +All GPU(s): step 7015: loss 22.2188, lr 1.6e-04, dt 2.1s +All GPU(s): step 7016: loss 22.3125, lr 1.6e-04, dt 2.1s +All GPU(s): step 7017: loss 22.5000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7018: loss 22.4375, lr 1.6e-04, dt 2.1s +All GPU(s): step 7019: loss 22.3594, lr 1.6e-04, dt 2.2s +All GPU(s): step 7020: loss 22.6250, lr 1.6e-04, dt 2.1s +All GPU(s): step 7021: loss 22.4219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7022: loss 22.8125, lr 1.6e-04, dt 2.1s +All GPU(s): step 7023: loss 22.8281, lr 1.6e-04, dt 2.1s +All GPU(s): step 7024: loss 23.0938, lr 1.6e-04, dt 2.2s +All GPU(s): step 7025: loss 22.8125, lr 1.6e-04, dt 2.1s +All GPU(s): step 7026: loss 23.1875, lr 1.6e-04, dt 2.1s +All GPU(s): step 7027: loss 23.0781, lr 1.6e-04, dt 2.1s +All GPU(s): step 7028: loss 23.5000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7029: loss 23.4062, lr 1.6e-04, dt 2.2s +All GPU(s): step 7030: loss 23.6875, lr 1.6e-04, dt 2.1s +All GPU(s): step 7031: loss 24.2344, lr 1.6e-04, dt 2.1s +All GPU(s): step 7032: loss 24.0469, lr 1.6e-04, dt 2.0s +All GPU(s): step 7033: loss 24.0000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7034: loss 24.3906, lr 1.6e-04, dt 2.1s +All GPU(s): step 7035: loss 23.9531, lr 1.6e-04, dt 2.1s +All GPU(s): step 7036: loss 23.8906, lr 1.6e-04, dt 2.0s +All GPU(s): step 7037: loss 23.1719, lr 1.6e-04, dt 2.1s +All GPU(s): step 7038: loss 23.5469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7039: loss 23.2344, lr 1.6e-04, dt 2.1s +All GPU(s): step 7040: loss 23.3906, lr 1.6e-04, dt 2.1s +All GPU(s): step 7041: loss 23.5625, lr 1.6e-04, dt 2.1s +All GPU(s): step 7042: loss 23.2812, lr 1.6e-04, dt 2.1s +All GPU(s): step 7043: loss 23.5312, lr 1.6e-04, dt 2.2s +All GPU(s): step 7044: loss 23.4844, lr 1.6e-04, dt 2.0s +All GPU(s): step 7045: loss 23.7969, lr 1.6e-04, dt 2.0s +All GPU(s): step 7046: loss 23.9219, lr 1.6e-04, dt 2.0s +All GPU(s): step 7047: loss 24.2969, lr 1.6e-04, dt 2.1s +All GPU(s): step 7048: loss 24.2969, lr 1.6e-04, dt 2.2s +All GPU(s): step 7049: loss 24.7500, lr 1.6e-04, dt 2.1s +All GPU(s): step 7050: loss 24.9062, lr 1.6e-04, dt 2.0s +All GPU(s): step 7051: loss 25.1562, lr 1.6e-04, dt 2.0s +All GPU(s): step 7052: loss 24.6562, lr 1.6e-04, dt 2.1s +All GPU(s): step 7053: loss 25.2500, lr 1.6e-04, dt 2.1s +All GPU(s): step 7054: loss 25.1719, lr 1.6e-04, dt 2.0s +All GPU(s): step 7055: loss 25.0625, lr 1.6e-04, dt 2.0s +All GPU(s): step 7056: loss 24.5000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7057: loss 24.1250, lr 1.6e-04, dt 2.1s +All GPU(s): step 7058: loss 24.2188, lr 1.6e-04, dt 2.1s +All GPU(s): step 7059: loss 24.0000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7060: loss 23.9844, lr 1.6e-04, dt 2.0s +All GPU(s): step 7061: loss 24.1094, lr 1.6e-04, dt 2.0s +All GPU(s): step 7062: loss 24.7031, lr 1.6e-04, dt 2.1s +All GPU(s): step 7063: loss 24.5938, lr 1.6e-04, dt 2.0s +All GPU(s): step 7064: loss 24.9062, lr 1.6e-04, dt 2.0s +All GPU(s): step 7065: loss 25.5000, lr 1.6e-04, dt 2.0s +All GPU(s): step 7066: loss 25.1250, lr 1.6e-04, dt 2.1s +All GPU(s): step 7067: loss 24.9688, lr 1.6e-04, dt 2.1s +All GPU(s): step 7068: loss 25.0781, lr 1.6e-04, dt 2.1s +All GPU(s): step 7069: loss 24.5000, lr 1.6e-04, dt 2.0s +All GPU(s): step 7070: loss 24.6094, lr 1.6e-04, dt 2.0s +All GPU(s): step 7071: loss 24.3906, lr 1.6e-04, dt 2.1s +All GPU(s): step 7072: loss 24.3594, lr 1.6e-04, dt 2.1s +All GPU(s): step 7073: loss 24.0156, lr 1.6e-04, dt 2.1s +All GPU(s): step 7074: loss 23.7344, lr 1.6e-04, dt 2.1s +All GPU(s): step 7075: loss 23.8281, lr 1.6e-04, dt 2.0s +All GPU(s): step 7076: loss 24.0938, lr 1.6e-04, dt 2.1s +All GPU(s): step 7077: loss 23.4531, lr 1.6e-04, dt 2.1s +All GPU(s): step 7078: loss 23.3750, lr 1.6e-04, dt 2.0s +All GPU(s): step 7079: loss 23.3906, lr 1.6e-04, dt 2.1s +All GPU(s): step 7080: loss 23.7500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7081: loss 23.1094, lr 1.6e-04, dt 2.1s +All GPU(s): step 7082: loss 23.2188, lr 1.6e-04, dt 2.1s +All GPU(s): step 7083: loss 23.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 7084: loss 23.2344, lr 1.6e-04, dt 2.0s +All GPU(s): step 7085: loss 23.4688, lr 1.6e-04, dt 2.1s +All GPU(s): step 7086: loss 23.3594, lr 1.6e-04, dt 2.1s +All GPU(s): step 7087: loss 23.2031, lr 1.6e-04, dt 2.1s +All GPU(s): step 7088: loss 23.3906, lr 1.6e-04, dt 2.0s +All GPU(s): step 7089: loss 23.2031, lr 1.6e-04, dt 2.0s +All GPU(s): step 7090: loss 23.8594, lr 1.6e-04, dt 2.0s +All GPU(s): step 7091: loss 23.5469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7092: loss 24.0312, lr 1.6e-04, dt 2.1s +All GPU(s): step 7093: loss 23.8281, lr 1.6e-04, dt 2.0s +All GPU(s): step 7094: loss 23.7500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7095: loss 23.9062, lr 1.6e-04, dt 2.1s +All GPU(s): step 7096: loss 23.9844, lr 1.6e-04, dt 2.2s +All GPU(s): step 7097: loss 23.7344, lr 1.6e-04, dt 2.0s +All GPU(s): step 7098: loss 23.6875, lr 1.6e-04, dt 2.0s +All GPU(s): step 7099: loss 23.6406, lr 1.6e-04, dt 2.1s +All GPU(s): step 7100: loss 23.7812, lr 1.6e-04, dt 2.1s +All GPU(s): step 7101: loss 23.7969, lr 1.6e-04, dt 2.2s +All GPU(s): step 7102: loss 23.8281, lr 1.6e-04, dt 2.0s +All GPU(s): step 7103: loss 23.7344, lr 1.6e-04, dt 2.0s +All GPU(s): step 7104: loss 24.0625, lr 1.6e-04, dt 2.0s +All GPU(s): step 7105: loss 24.2188, lr 1.6e-04, dt 2.1s +All GPU(s): step 7106: loss 24.0469, lr 1.6e-04, dt 2.1s +All GPU(s): step 7107: loss 24.0312, lr 1.6e-04, dt 2.1s +All GPU(s): step 7108: loss 23.9219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7109: loss 23.5000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7110: loss 23.4531, lr 1.6e-04, dt 2.1s +All GPU(s): step 7111: loss 23.1719, lr 1.6e-04, dt 2.1s +All GPU(s): step 7112: loss 23.0781, lr 1.6e-04, dt 2.0s +All GPU(s): step 7113: loss 23.2656, lr 1.6e-04, dt 2.0s +All GPU(s): step 7114: loss 22.9531, lr 1.6e-04, dt 2.1s +All GPU(s): step 7115: loss 22.9688, lr 1.6e-04, dt 2.1s +All GPU(s): step 7116: loss 22.9688, lr 1.6e-04, dt 2.1s +All GPU(s): step 7117: loss 23.2344, lr 1.6e-04, dt 2.1s +All GPU(s): step 7118: loss 23.0781, lr 1.6e-04, dt 2.1s +All GPU(s): step 7119: loss 23.2500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7120: loss 23.5781, lr 1.6e-04, dt 2.2s +All GPU(s): step 7121: loss 23.3125, lr 1.6e-04, dt 2.0s +All GPU(s): step 7122: loss 23.2656, lr 1.6e-04, dt 2.1s +All GPU(s): step 7123: loss 23.2344, lr 1.6e-04, dt 2.1s +All GPU(s): step 7124: loss 23.3125, lr 1.6e-04, dt 2.1s +All GPU(s): step 7125: loss 23.0625, lr 1.6e-04, dt 2.1s +All GPU(s): step 7126: loss 22.7500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7127: loss 22.9844, lr 1.6e-04, dt 2.1s +All GPU(s): step 7128: loss 22.8906, lr 1.6e-04, dt 2.1s +All GPU(s): step 7129: loss 22.9844, lr 1.6e-04, dt 2.0s +All GPU(s): step 7130: loss 22.5156, lr 1.6e-04, dt 2.1s +All GPU(s): step 7131: loss 22.5000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7132: loss 22.6875, lr 1.6e-04, dt 2.0s +All GPU(s): step 7133: loss 22.6562, lr 1.6e-04, dt 2.1s +All GPU(s): step 7134: loss 22.4219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7135: loss 22.3281, lr 1.6e-04, dt 2.1s +All GPU(s): step 7136: loss 22.4219, lr 1.6e-04, dt 2.0s +All GPU(s): step 7137: loss 22.5000, lr 1.6e-04, dt 2.0s +All GPU(s): step 7138: loss 22.5312, lr 1.6e-04, dt 2.0s +All GPU(s): step 7139: loss 22.4375, lr 1.6e-04, dt 2.1s +All GPU(s): step 7140: loss 22.2031, lr 1.6e-04, dt 2.0s +All GPU(s): step 7141: loss 22.4062, lr 1.6e-04, dt 2.0s +All GPU(s): step 7142: loss 22.6719, lr 1.6e-04, dt 2.0s +All GPU(s): step 7143: loss 22.4531, lr 1.6e-04, dt 2.0s +All GPU(s): step 7144: loss 22.4844, lr 1.6e-04, dt 2.1s +All GPU(s): step 7145: loss 22.6719, lr 1.6e-04, dt 2.1s +All GPU(s): step 7146: loss 22.7812, lr 1.6e-04, dt 2.0s +All GPU(s): step 7147: loss 22.7188, lr 1.6e-04, dt 2.0s +All GPU(s): step 7148: loss 22.9531, lr 1.6e-04, dt 2.0s +All GPU(s): step 7149: loss 23.2188, lr 1.6e-04, dt 2.1s +All GPU(s): step 7150: loss 22.8438, lr 1.6e-04, dt 2.1s +All GPU(s): step 7151: loss 22.7188, lr 1.6e-04, dt 2.0s +All GPU(s): step 7152: loss 22.9219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7153: loss 22.9219, lr 1.6e-04, dt 2.0s +All GPU(s): step 7154: loss 23.0000, lr 1.6e-04, dt 2.1s +All GPU(s): step 7155: loss 22.6406, lr 1.6e-04, dt 2.0s +All GPU(s): step 7156: loss 22.2500, lr 1.6e-04, dt 2.0s +All GPU(s): step 7157: loss 22.3438, lr 1.6e-04, dt 2.0s +All GPU(s): step 7158: loss 22.4219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7159: loss 22.2969, lr 1.6e-04, dt 2.1s +All GPU(s): step 7160: loss 22.0000, lr 1.6e-04, dt 2.0s +All GPU(s): step 7161: loss 22.1875, lr 1.6e-04, dt 2.0s +All GPU(s): step 7162: loss 22.1406, lr 1.6e-04, dt 2.1s +All GPU(s): step 7163: loss 22.2656, lr 1.6e-04, dt 2.1s +All GPU(s): step 7164: loss 22.1406, lr 1.6e-04, dt 2.0s +All GPU(s): step 7165: loss 22.4531, lr 1.6e-04, dt 2.0s +All GPU(s): step 7166: loss 22.1094, lr 1.6e-04, dt 2.1s +All GPU(s): step 7167: loss 22.2969, lr 1.6e-04, dt 2.0s +All GPU(s): step 7168: loss 21.9844, lr 1.6e-04, dt 2.1s +All GPU(s): step 7169: loss 22.3906, lr 1.6e-04, dt 2.0s +All GPU(s): step 7170: loss 22.2031, lr 1.6e-04, dt 2.0s +All GPU(s): step 7171: loss 22.1562, lr 1.6e-04, dt 2.0s +All GPU(s): step 7172: loss 22.2031, lr 1.6e-04, dt 2.1s +All GPU(s): step 7173: loss 22.2656, lr 1.6e-04, dt 2.1s +All GPU(s): step 7174: loss 22.6250, lr 1.6e-04, dt 2.0s +All GPU(s): step 7175: loss 22.9219, lr 1.6e-04, dt 2.1s +All GPU(s): step 7176: loss 22.6562, lr 1.6e-04, dt 2.0s +All GPU(s): step 7177: loss 23.0000, lr 1.5e-04, dt 2.1s +All GPU(s): step 7178: loss 22.7812, lr 1.5e-04, dt 2.1s +All GPU(s): step 7179: loss 22.4844, lr 1.5e-04, dt 2.0s +All GPU(s): step 7180: loss 22.4844, lr 1.5e-04, dt 2.0s +All GPU(s): step 7181: loss 22.1719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7182: loss 22.0469, lr 1.5e-04, dt 2.1s +All GPU(s): step 7183: loss 22.0938, lr 1.5e-04, dt 2.1s +All GPU(s): step 7184: loss 22.1250, lr 1.5e-04, dt 2.0s +All GPU(s): step 7185: loss 21.9531, lr 1.5e-04, dt 2.0s +All GPU(s): step 7186: loss 21.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7187: loss 21.9844, lr 1.5e-04, dt 2.1s +All GPU(s): step 7188: loss 22.2656, lr 1.5e-04, dt 2.1s +All GPU(s): step 7189: loss 21.9844, lr 1.5e-04, dt 2.1s +All GPU(s): step 7190: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7191: loss 22.0938, lr 1.5e-04, dt 2.0s +All GPU(s): step 7192: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7193: loss 22.2188, lr 1.5e-04, dt 2.0s +All GPU(s): step 7194: loss 22.1875, lr 1.5e-04, dt 2.0s +All GPU(s): step 7195: loss 22.2031, lr 1.5e-04, dt 2.0s +All GPU(s): step 7196: loss 22.4219, lr 1.5e-04, dt 2.0s +All GPU(s): step 7197: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 7198: loss 22.0938, lr 1.5e-04, dt 2.0s +All GPU(s): step 7199: loss 22.1250, lr 1.5e-04, dt 2.0s +All GPU(s): step 7200: loss 22.0938, lr 1.5e-04, dt 2.0s +All GPU(s): step 7201: loss 22.1875, lr 1.5e-04, dt 2.0s +All GPU(s): step 7202: loss 22.0938, lr 1.5e-04, dt 2.1s +All GPU(s): step 7203: loss 22.2344, lr 1.5e-04, dt 2.0s +All GPU(s): step 7204: loss 22.5625, lr 1.5e-04, dt 2.0s +All GPU(s): step 7205: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7206: loss 22.0156, lr 1.5e-04, dt 2.1s +All GPU(s): step 7207: loss 22.1406, lr 1.5e-04, dt 2.2s +All GPU(s): step 7208: loss 22.1094, lr 1.5e-04, dt 2.1s +All GPU(s): step 7209: loss 22.1875, lr 1.5e-04, dt 2.0s +All GPU(s): step 7210: loss 22.1094, lr 1.5e-04, dt 2.0s +All GPU(s): step 7211: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7212: loss 22.0000, lr 1.5e-04, dt 2.1s +All GPU(s): step 7213: loss 22.0781, lr 1.5e-04, dt 2.1s +All GPU(s): step 7214: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 7215: loss 21.9219, lr 1.5e-04, dt 2.1s +All GPU(s): step 7216: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7217: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7218: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7219: loss 21.9531, lr 1.5e-04, dt 2.0s +All GPU(s): step 7220: loss 22.0625, lr 1.5e-04, dt 2.1s +All GPU(s): step 7221: loss 22.1562, lr 1.5e-04, dt 2.1s +All GPU(s): step 7222: loss 21.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7223: loss 22.2812, lr 1.5e-04, dt 2.0s +All GPU(s): step 7224: loss 22.0312, lr 1.5e-04, dt 2.0s +All GPU(s): step 7225: loss 21.9062, lr 1.5e-04, dt 2.0s +All GPU(s): step 7226: loss 21.7969, lr 1.5e-04, dt 2.2s +All GPU(s): step 7227: loss 21.9375, lr 1.5e-04, dt 2.1s +All GPU(s): step 7228: loss 22.0156, lr 1.5e-04, dt 2.1s +All GPU(s): step 7229: loss 21.7656, lr 1.5e-04, dt 2.0s +All GPU(s): step 7230: loss 22.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7231: loss 21.9844, lr 1.5e-04, dt 2.1s +All GPU(s): step 7232: loss 22.0469, lr 1.5e-04, dt 2.0s +All GPU(s): step 7233: loss 22.2188, lr 1.5e-04, dt 2.0s +All GPU(s): step 7234: loss 22.0469, lr 1.5e-04, dt 2.0s +All GPU(s): step 7235: loss 21.9219, lr 1.5e-04, dt 2.1s +All GPU(s): step 7236: loss 21.8750, lr 1.5e-04, dt 2.1s +All GPU(s): step 7237: loss 22.2344, lr 1.5e-04, dt 2.0s +All GPU(s): step 7238: loss 22.0156, lr 1.5e-04, dt 2.0s +All GPU(s): step 7239: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7240: loss 21.9062, lr 1.5e-04, dt 2.1s +All GPU(s): step 7241: loss 22.1250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7242: loss 21.9375, lr 1.5e-04, dt 2.0s +All GPU(s): step 7243: loss 22.0000, lr 1.5e-04, dt 2.0s +All GPU(s): step 7244: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7245: loss 21.7656, lr 1.5e-04, dt 2.1s +All GPU(s): step 7246: loss 22.0156, lr 1.5e-04, dt 2.0s +All GPU(s): step 7247: loss 21.7500, lr 1.5e-04, dt 2.0s +All GPU(s): step 7248: loss 22.1562, lr 1.5e-04, dt 2.0s +All GPU(s): step 7249: loss 22.2500, lr 1.5e-04, dt 2.0s +All GPU(s): step 7250: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 7251: loss 21.9844, lr 1.5e-04, dt 2.0s +All GPU(s): step 7252: loss 21.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7253: loss 22.0156, lr 1.5e-04, dt 2.0s +All GPU(s): step 7254: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7255: loss 21.6875, lr 1.5e-04, dt 2.1s +All GPU(s): step 7256: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7257: loss 21.8125, lr 1.5e-04, dt 2.0s +All GPU(s): step 7258: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7259: loss 22.2188, lr 1.5e-04, dt 2.1s +All GPU(s): step 7260: loss 22.0000, lr 1.5e-04, dt 2.1s +All GPU(s): step 7261: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7262: loss 21.8438, lr 1.5e-04, dt 2.0s +All GPU(s): step 7263: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7264: loss 22.1719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7265: loss 21.9531, lr 1.5e-04, dt 2.1s +All GPU(s): step 7266: loss 22.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7267: loss 22.1094, lr 1.5e-04, dt 2.0s +All GPU(s): step 7268: loss 21.9062, lr 1.5e-04, dt 2.0s +All GPU(s): step 7269: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 7270: loss 22.2344, lr 1.5e-04, dt 2.1s +All GPU(s): step 7271: loss 21.9844, lr 1.5e-04, dt 2.1s +All GPU(s): step 7272: loss 22.0000, lr 1.5e-04, dt 2.1s +All GPU(s): step 7273: loss 21.7500, lr 1.5e-04, dt 2.1s +All GPU(s): step 7274: loss 21.9219, lr 1.5e-04, dt 2.1s +All GPU(s): step 7275: loss 22.0156, lr 1.5e-04, dt 2.0s +All GPU(s): step 7276: loss 21.7344, lr 1.5e-04, dt 2.1s +All GPU(s): step 7277: loss 21.8750, lr 1.5e-04, dt 2.0s +All GPU(s): step 7278: loss 22.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7279: loss 21.6562, lr 1.5e-04, dt 2.2s +All GPU(s): step 7280: loss 21.9375, lr 1.5e-04, dt 2.0s +All GPU(s): step 7281: loss 21.9375, lr 1.5e-04, dt 2.1s +All GPU(s): step 7282: loss 21.9219, lr 1.5e-04, dt 2.0s +All GPU(s): step 7283: loss 22.1875, lr 1.5e-04, dt 2.1s +All GPU(s): step 7284: loss 22.0156, lr 1.5e-04, dt 2.2s +All GPU(s): step 7285: loss 21.9688, lr 1.5e-04, dt 2.1s +All GPU(s): step 7286: loss 21.5938, lr 1.5e-04, dt 2.0s +All GPU(s): step 7287: loss 21.7969, lr 1.5e-04, dt 2.0s +All GPU(s): step 7288: loss 21.9531, lr 1.5e-04, dt 2.1s +All GPU(s): step 7289: loss 21.7031, lr 1.5e-04, dt 2.1s +All GPU(s): step 7290: loss 21.9062, lr 1.5e-04, dt 2.0s +All GPU(s): step 7291: loss 22.1094, lr 1.5e-04, dt 2.0s +All GPU(s): step 7292: loss 22.0000, lr 1.5e-04, dt 2.1s +All GPU(s): step 7293: loss 22.1406, lr 1.5e-04, dt 2.1s +All GPU(s): step 7294: loss 22.0781, lr 1.5e-04, dt 2.1s +All GPU(s): step 7295: loss 21.6562, lr 1.5e-04, dt 2.0s +All GPU(s): step 7296: loss 22.0156, lr 1.5e-04, dt 2.0s +All GPU(s): step 7297: loss 21.7969, lr 1.5e-04, dt 2.0s +All GPU(s): step 7298: loss 22.0156, lr 1.5e-04, dt 2.1s +All GPU(s): step 7299: loss 21.9531, lr 1.5e-04, dt 2.0s +All GPU(s): step 7300: loss 22.0312, lr 1.5e-04, dt 2.0s +All GPU(s): step 7301: loss 22.1250, lr 1.5e-04, dt 2.0s +All GPU(s): step 7302: loss 22.0781, lr 1.5e-04, dt 2.1s +All GPU(s): step 7303: loss 22.1719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7304: loss 22.2812, lr 1.5e-04, dt 2.0s +All GPU(s): step 7305: loss 21.8281, lr 1.5e-04, dt 2.1s +All GPU(s): step 7306: loss 22.0781, lr 1.5e-04, dt 2.0s +All GPU(s): step 7307: loss 21.9062, lr 1.5e-04, dt 2.0s +All GPU(s): step 7308: loss 22.0781, lr 1.5e-04, dt 2.1s +All GPU(s): step 7309: loss 22.1094, lr 1.5e-04, dt 2.0s +All GPU(s): step 7310: loss 22.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7311: loss 21.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7312: loss 21.8750, lr 1.5e-04, dt 2.1s +All GPU(s): step 7313: loss 22.1562, lr 1.5e-04, dt 2.1s +All GPU(s): step 7314: loss 22.1406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7315: loss 22.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7316: loss 21.9375, lr 1.5e-04, dt 2.1s +All GPU(s): step 7317: loss 22.1094, lr 1.5e-04, dt 2.1s +All GPU(s): step 7318: loss 21.8594, lr 1.5e-04, dt 2.1s +All GPU(s): step 7319: loss 22.1562, lr 1.5e-04, dt 2.0s +All GPU(s): step 7320: loss 22.4531, lr 1.5e-04, dt 2.0s +All GPU(s): step 7321: loss 22.1719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7322: loss 22.0312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7323: loss 22.5312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7324: loss 22.0625, lr 1.5e-04, dt 2.1s +All GPU(s): step 7325: loss 22.2500, lr 1.5e-04, dt 2.1s +All GPU(s): step 7326: loss 22.3125, lr 1.5e-04, dt 2.1s +All GPU(s): step 7327: loss 22.4531, lr 1.5e-04, dt 2.2s +All GPU(s): step 7328: loss 22.5312, lr 1.5e-04, dt 2.1s +All GPU(s): step 7329: loss 22.4531, lr 1.5e-04, dt 2.0s +All GPU(s): step 7330: loss 22.8281, lr 1.5e-04, dt 2.1s +All GPU(s): step 7331: loss 22.2656, lr 1.5e-04, dt 2.1s +All GPU(s): step 7332: loss 22.6719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7333: loss 22.8594, lr 1.5e-04, dt 2.0s +All GPU(s): step 7334: loss 22.7031, lr 1.5e-04, dt 2.0s +All GPU(s): step 7335: loss 22.8594, lr 1.5e-04, dt 2.0s +All GPU(s): step 7336: loss 22.3750, lr 1.5e-04, dt 2.1s +All GPU(s): step 7337: loss 22.4219, lr 1.5e-04, dt 2.1s +All GPU(s): step 7338: loss 22.5938, lr 1.5e-04, dt 2.1s +All GPU(s): step 7339: loss 22.8125, lr 1.5e-04, dt 2.0s +All GPU(s): step 7340: loss 22.5781, lr 1.5e-04, dt 2.1s +All GPU(s): step 7341: loss 22.7812, lr 1.5e-04, dt 2.1s +All GPU(s): step 7342: loss 22.7344, lr 1.5e-04, dt 2.1s +All GPU(s): step 7343: loss 22.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7344: loss 23.0625, lr 1.5e-04, dt 2.0s +All GPU(s): step 7345: loss 23.1719, lr 1.5e-04, dt 2.0s +All GPU(s): step 7346: loss 23.1562, lr 1.5e-04, dt 2.1s +All GPU(s): step 7347: loss 23.1094, lr 1.5e-04, dt 2.0s +All GPU(s): step 7348: loss 23.0469, lr 1.5e-04, dt 2.0s +All GPU(s): step 7349: loss 23.3125, lr 1.5e-04, dt 2.0s +All GPU(s): step 7350: loss 23.7188, lr 1.5e-04, dt 2.0s +All GPU(s): step 7351: loss 23.4844, lr 1.5e-04, dt 2.1s +All GPU(s): step 7352: loss 23.4062, lr 1.5e-04, dt 2.0s +All GPU(s): step 7353: loss 23.9688, lr 1.5e-04, dt 2.0s +All GPU(s): step 7354: loss 23.9844, lr 1.5e-04, dt 2.0s +All GPU(s): step 7355: loss 24.2344, lr 1.5e-04, dt 2.0s +All GPU(s): step 7356: loss 24.3594, lr 1.5e-04, dt 2.1s +All GPU(s): step 7357: loss 24.6250, lr 1.5e-04, dt 2.1s +All GPU(s): step 7358: loss 24.5469, lr 1.5e-04, dt 2.1s +All GPU(s): step 7359: loss 24.7969, lr 1.5e-04, dt 2.1s +All GPU(s): step 7360: loss 24.5938, lr 1.5e-04, dt 2.1s +All GPU(s): step 7361: loss 24.6719, lr 1.5e-04, dt 2.1s +All GPU(s): step 7362: loss 24.8750, lr 1.5e-04, dt 2.0s +All GPU(s): step 7363: loss 24.5156, lr 1.5e-04, dt 2.1s +All GPU(s): step 7364: loss 24.7812, lr 1.5e-04, dt 2.0s +All GPU(s): step 7365: loss 24.4688, lr 1.5e-04, dt 2.1s +All GPU(s): step 7366: loss 24.8281, lr 1.5e-04, dt 2.1s +All GPU(s): step 7367: loss 24.6250, lr 1.5e-04, dt 2.0s +All GPU(s): step 7368: loss 24.6406, lr 1.5e-04, dt 2.0s +All GPU(s): step 7369: loss 24.2969, lr 1.4e-04, dt 2.0s +All GPU(s): step 7370: loss 24.6094, lr 1.4e-04, dt 2.1s +All GPU(s): step 7371: loss 24.4844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7372: loss 24.8438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7373: loss 24.5000, lr 1.4e-04, dt 2.0s +All GPU(s): step 7374: loss 24.2656, lr 1.4e-04, dt 2.0s +All GPU(s): step 7375: loss 24.8125, lr 1.4e-04, dt 2.1s +All GPU(s): step 7376: loss 24.2500, lr 1.4e-04, dt 2.1s +All GPU(s): step 7377: loss 24.3594, lr 1.4e-04, dt 2.1s +All GPU(s): step 7378: loss 24.1562, lr 1.4e-04, dt 2.1s +All GPU(s): step 7379: loss 24.3750, lr 1.4e-04, dt 2.1s +All GPU(s): step 7380: loss 24.3438, lr 1.4e-04, dt 2.2s +All GPU(s): step 7381: loss 24.6094, lr 1.4e-04, dt 2.0s +All GPU(s): step 7382: loss 24.7812, lr 1.4e-04, dt 2.1s +All GPU(s): step 7383: loss 24.8750, lr 1.4e-04, dt 2.1s +All GPU(s): step 7384: loss 24.9844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7385: loss 24.8594, lr 1.4e-04, dt 2.2s +All GPU(s): step 7386: loss 25.1250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7387: loss 25.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7388: loss 24.9844, lr 1.4e-04, dt 2.0s +All GPU(s): step 7389: loss 24.8906, lr 1.4e-04, dt 2.1s +All GPU(s): step 7390: loss 24.7656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7391: loss 24.8125, lr 1.4e-04, dt 2.0s +All GPU(s): step 7392: loss 24.5625, lr 1.4e-04, dt 2.0s +All GPU(s): step 7393: loss 24.8281, lr 1.4e-04, dt 2.0s +All GPU(s): step 7394: loss 24.6406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7395: loss 24.9219, lr 1.4e-04, dt 2.1s +All GPU(s): step 7396: loss 25.0000, lr 1.4e-04, dt 2.0s +All GPU(s): step 7397: loss 24.6250, lr 1.4e-04, dt 2.1s +All GPU(s): step 7398: loss 24.4062, lr 1.4e-04, dt 2.0s +All GPU(s): step 7399: loss 24.5156, lr 1.4e-04, dt 2.1s +All GPU(s): step 7400: loss 23.7188, lr 1.4e-04, dt 2.1s +All GPU(s): step 7401: loss 23.9219, lr 1.4e-04, dt 2.1s +All GPU(s): step 7402: loss 23.6562, lr 1.4e-04, dt 2.1s +All GPU(s): step 7403: loss 23.2344, lr 1.4e-04, dt 2.1s +All GPU(s): step 7404: loss 23.4375, lr 1.4e-04, dt 2.1s +All GPU(s): step 7405: loss 23.2969, lr 1.4e-04, dt 2.1s +All GPU(s): step 7406: loss 22.7969, lr 1.4e-04, dt 2.1s +All GPU(s): step 7407: loss 23.0625, lr 1.4e-04, dt 2.1s +All GPU(s): step 7408: loss 22.8594, lr 1.4e-04, dt 2.1s +All GPU(s): step 7409: loss 22.7656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7410: loss 22.8125, lr 1.4e-04, dt 2.0s +All GPU(s): step 7411: loss 22.3594, lr 1.4e-04, dt 2.0s +All GPU(s): step 7412: loss 22.1562, lr 1.4e-04, dt 2.0s +All GPU(s): step 7413: loss 22.4375, lr 1.4e-04, dt 2.0s +All GPU(s): step 7414: loss 22.8281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7415: loss 22.4531, lr 1.4e-04, dt 2.0s +All GPU(s): step 7416: loss 22.7031, lr 1.4e-04, dt 2.0s +All GPU(s): step 7417: loss 22.7188, lr 1.4e-04, dt 2.0s +All GPU(s): step 7418: loss 22.6094, lr 1.4e-04, dt 2.1s +All GPU(s): step 7419: loss 22.3906, lr 1.4e-04, dt 2.1s +All GPU(s): step 7420: loss 22.9219, lr 1.4e-04, dt 2.0s +All GPU(s): step 7421: loss 22.6094, lr 1.4e-04, dt 2.0s +All GPU(s): step 7422: loss 22.4531, lr 1.4e-04, dt 2.1s +All GPU(s): step 7423: loss 22.6406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7424: loss 23.0156, lr 1.4e-04, dt 2.0s +All GPU(s): step 7425: loss 22.7656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7426: loss 23.0781, lr 1.4e-04, dt 2.0s +All GPU(s): step 7427: loss 23.1562, lr 1.4e-04, dt 2.0s +All GPU(s): step 7428: loss 23.0938, lr 1.4e-04, dt 2.1s +All GPU(s): step 7429: loss 22.9844, lr 1.4e-04, dt 2.0s +All GPU(s): step 7430: loss 23.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7431: loss 23.0938, lr 1.4e-04, dt 2.0s +All GPU(s): step 7432: loss 23.0000, lr 1.4e-04, dt 2.1s +All GPU(s): step 7433: loss 22.4531, lr 1.4e-04, dt 2.1s +All GPU(s): step 7434: loss 23.1250, lr 1.4e-04, dt 2.0s +All GPU(s): step 7435: loss 23.0781, lr 1.4e-04, dt 2.1s +All GPU(s): step 7436: loss 22.9531, lr 1.4e-04, dt 2.0s +All GPU(s): step 7437: loss 23.2969, lr 1.4e-04, dt 2.1s +All GPU(s): step 7438: loss 23.1875, lr 1.4e-04, dt 2.1s +All GPU(s): step 7439: loss 23.3594, lr 1.4e-04, dt 2.0s +All GPU(s): step 7440: loss 23.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7441: loss 23.4062, lr 1.4e-04, dt 2.0s +All GPU(s): step 7442: loss 23.4531, lr 1.4e-04, dt 2.1s +All GPU(s): step 7443: loss 23.4688, lr 1.4e-04, dt 2.1s +All GPU(s): step 7444: loss 23.5000, lr 1.4e-04, dt 2.0s +All GPU(s): step 7445: loss 23.4844, lr 1.4e-04, dt 2.0s +All GPU(s): step 7446: loss 23.5938, lr 1.4e-04, dt 2.0s +All GPU(s): step 7447: loss 23.7500, lr 1.4e-04, dt 2.1s +All GPU(s): step 7448: loss 23.8750, lr 1.4e-04, dt 2.1s +All GPU(s): step 7449: loss 23.8438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7450: loss 24.3906, lr 1.4e-04, dt 2.0s +All GPU(s): step 7451: loss 24.0781, lr 1.4e-04, dt 2.0s +All GPU(s): step 7452: loss 24.2656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7453: loss 24.2969, lr 1.4e-04, dt 2.0s +All GPU(s): step 7454: loss 24.3281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7455: loss 24.7344, lr 1.4e-04, dt 2.0s +All GPU(s): step 7456: loss 24.4844, lr 1.4e-04, dt 2.0s +All GPU(s): step 7457: loss 24.6875, lr 1.4e-04, dt 2.1s +All GPU(s): step 7458: loss 24.5312, lr 1.4e-04, dt 2.1s +All GPU(s): step 7459: loss 24.4375, lr 1.4e-04, dt 2.1s +All GPU(s): step 7460: loss 24.6719, lr 1.4e-04, dt 2.0s +All GPU(s): step 7461: loss 24.6562, lr 1.4e-04, dt 2.1s +All GPU(s): step 7462: loss 24.9688, lr 1.4e-04, dt 2.1s +All GPU(s): step 7463: loss 24.5469, lr 1.4e-04, dt 2.0s +All GPU(s): step 7464: loss 24.3125, lr 1.4e-04, dt 2.0s +All GPU(s): step 7465: loss 24.1719, lr 1.4e-04, dt 2.0s +All GPU(s): step 7466: loss 23.7500, lr 1.4e-04, dt 2.0s +All GPU(s): step 7467: loss 23.2344, lr 1.4e-04, dt 2.1s +All GPU(s): step 7468: loss 23.4219, lr 1.4e-04, dt 2.0s +All GPU(s): step 7469: loss 23.1094, lr 1.4e-04, dt 2.0s +All GPU(s): step 7470: loss 22.7812, lr 1.4e-04, dt 2.0s +All GPU(s): step 7471: loss 22.8438, lr 1.4e-04, dt 2.1s +All GPU(s): step 7472: loss 22.5781, lr 1.4e-04, dt 2.1s +All GPU(s): step 7473: loss 22.5156, lr 1.4e-04, dt 2.0s +All GPU(s): step 7474: loss 22.5312, lr 1.4e-04, dt 2.1s +All GPU(s): step 7475: loss 22.3594, lr 1.4e-04, dt 2.0s +All GPU(s): step 7476: loss 22.3281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7477: loss 22.2188, lr 1.4e-04, dt 2.0s +All GPU(s): step 7478: loss 22.1875, lr 1.4e-04, dt 2.0s +All GPU(s): step 7479: loss 22.1875, lr 1.4e-04, dt 2.0s +All GPU(s): step 7480: loss 21.8594, lr 1.4e-04, dt 2.1s +All GPU(s): step 7481: loss 22.2656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7482: loss 21.9531, lr 1.4e-04, dt 2.0s +All GPU(s): step 7483: loss 22.0781, lr 1.4e-04, dt 2.0s +All GPU(s): step 7484: loss 21.8438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7485: loss 22.3281, lr 1.4e-04, dt 2.0s +All GPU(s): step 7486: loss 21.9375, lr 1.4e-04, dt 2.1s +All GPU(s): step 7487: loss 22.0938, lr 1.4e-04, dt 2.0s +All GPU(s): step 7488: loss 22.2188, lr 1.4e-04, dt 2.0s +All GPU(s): step 7489: loss 22.4844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7490: loss 21.9062, lr 1.4e-04, dt 2.1s +All GPU(s): step 7491: loss 22.5156, lr 1.4e-04, dt 2.1s +All GPU(s): step 7492: loss 21.8750, lr 1.4e-04, dt 2.0s +All GPU(s): step 7493: loss 22.0781, lr 1.4e-04, dt 2.0s +All GPU(s): step 7494: loss 22.3125, lr 1.4e-04, dt 2.1s +All GPU(s): step 7495: loss 22.4844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7496: loss 21.8750, lr 1.4e-04, dt 2.1s +All GPU(s): step 7497: loss 21.8906, lr 1.4e-04, dt 2.0s +All GPU(s): step 7498: loss 22.1406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7499: loss 22.3438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7500: loss 22.1719, lr 1.4e-04, dt 2.1s +All GPU(s): step 7501: loss 22.3594, lr 1.4e-04, dt 2.1s +All GPU(s): step 7502: loss 22.0469, lr 1.4e-04, dt 2.0s +All GPU(s): step 7503: loss 22.1094, lr 1.4e-04, dt 2.0s +All GPU(s): step 7504: loss 22.0469, lr 1.4e-04, dt 2.0s +All GPU(s): step 7505: loss 22.0469, lr 1.4e-04, dt 2.1s +All GPU(s): step 7506: loss 22.4219, lr 1.4e-04, dt 2.0s +All GPU(s): step 7507: loss 22.2656, lr 1.4e-04, dt 2.0s +All GPU(s): step 7508: loss 22.1875, lr 1.4e-04, dt 2.0s +All GPU(s): step 7509: loss 22.4062, lr 1.4e-04, dt 2.1s +All GPU(s): step 7510: loss 22.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7511: loss 22.5312, lr 1.4e-04, dt 2.0s +All GPU(s): step 7512: loss 22.4844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7513: loss 22.3125, lr 1.4e-04, dt 2.0s +All GPU(s): step 7514: loss 22.0938, lr 1.4e-04, dt 2.1s +All GPU(s): step 7515: loss 22.0312, lr 1.4e-04, dt 2.1s +All GPU(s): step 7516: loss 22.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7517: loss 22.0469, lr 1.4e-04, dt 2.0s +All GPU(s): step 7518: loss 22.5469, lr 1.4e-04, dt 2.0s +All GPU(s): step 7519: loss 22.1406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7520: loss 22.2031, lr 1.4e-04, dt 2.1s +All GPU(s): step 7521: loss 22.2031, lr 1.4e-04, dt 2.1s +All GPU(s): step 7522: loss 22.5312, lr 1.4e-04, dt 2.1s +All GPU(s): step 7523: loss 22.4844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7524: loss 22.6406, lr 1.4e-04, dt 2.1s +All GPU(s): step 7525: loss 22.3281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7526: loss 22.1562, lr 1.4e-04, dt 2.0s +All GPU(s): step 7527: loss 22.5625, lr 1.4e-04, dt 2.0s +All GPU(s): step 7528: loss 21.8750, lr 1.4e-04, dt 2.0s +All GPU(s): step 7529: loss 22.2812, lr 1.4e-04, dt 2.1s +All GPU(s): step 7530: loss 22.4219, lr 1.4e-04, dt 2.1s +All GPU(s): step 7531: loss 22.4062, lr 1.4e-04, dt 2.0s +All GPU(s): step 7532: loss 22.5156, lr 1.4e-04, dt 2.0s +All GPU(s): step 7533: loss 22.2969, lr 1.4e-04, dt 2.0s +All GPU(s): step 7534: loss 22.4531, lr 1.4e-04, dt 2.1s +All GPU(s): step 7535: loss 22.3281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7536: loss 22.3906, lr 1.4e-04, dt 2.1s +All GPU(s): step 7537: loss 22.5938, lr 1.4e-04, dt 2.1s +All GPU(s): step 7538: loss 22.6094, lr 1.4e-04, dt 2.1s +All GPU(s): step 7539: loss 22.3750, lr 1.4e-04, dt 2.1s +All GPU(s): step 7540: loss 22.3281, lr 1.4e-04, dt 2.1s +All GPU(s): step 7541: loss 22.3438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7542: loss 22.4219, lr 1.4e-04, dt 2.1s +All GPU(s): step 7543: loss 22.5625, lr 1.4e-04, dt 2.1s +All GPU(s): step 7544: loss 22.5469, lr 1.4e-04, dt 2.1s +All GPU(s): step 7545: loss 22.7188, lr 1.4e-04, dt 2.0s +All GPU(s): step 7546: loss 22.9531, lr 1.4e-04, dt 2.0s +All GPU(s): step 7547: loss 22.7656, lr 1.4e-04, dt 2.1s +All GPU(s): step 7548: loss 22.5625, lr 1.4e-04, dt 2.1s +All GPU(s): step 7549: loss 22.7812, lr 1.4e-04, dt 2.2s +All GPU(s): step 7550: loss 22.6562, lr 1.4e-04, dt 2.0s +All GPU(s): step 7551: loss 22.5312, lr 1.4e-04, dt 2.0s +All GPU(s): step 7552: loss 22.3438, lr 1.4e-04, dt 2.0s +All GPU(s): step 7553: loss 22.9844, lr 1.4e-04, dt 2.1s +All GPU(s): step 7554: loss 22.5938, lr 1.4e-04, dt 2.1s +All GPU(s): step 7555: loss 22.6406, lr 1.4e-04, dt 2.0s +All GPU(s): step 7556: loss 22.5156, lr 1.4e-04, dt 2.1s +All GPU(s): step 7557: loss 22.6719, lr 1.4e-04, dt 2.1s +All GPU(s): step 7558: loss 22.7031, lr 1.4e-04, dt 2.1s +All GPU(s): step 7559: loss 22.5000, lr 1.4e-04, dt 2.1s +All GPU(s): step 7560: loss 22.5156, lr 1.4e-04, dt 2.0s +All GPU(s): step 7561: loss 22.2812, lr 1.4e-04, dt 2.0s +All GPU(s): step 7562: loss 22.5312, lr 1.4e-04, dt 2.1s +All GPU(s): step 7563: loss 21.9688, lr 1.4e-04, dt 2.1s +All GPU(s): step 7564: loss 22.3125, lr 1.4e-04, dt 2.0s +All GPU(s): step 7565: loss 22.2344, lr 1.4e-04, dt 2.1s +All GPU(s): step 7566: loss 22.3750, lr 1.3e-04, dt 2.0s +All GPU(s): step 7567: loss 22.1875, lr 1.3e-04, dt 2.1s +All GPU(s): step 7568: loss 21.9219, lr 1.3e-04, dt 2.1s +All GPU(s): step 7569: loss 22.1250, lr 1.3e-04, dt 2.0s +All GPU(s): step 7570: loss 22.3594, lr 1.3e-04, dt 2.1s +All GPU(s): step 7571: loss 22.4375, lr 1.3e-04, dt 2.0s +All GPU(s): step 7572: loss 22.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7573: loss 22.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7574: loss 22.3125, lr 1.3e-04, dt 2.0s +All GPU(s): step 7575: loss 21.8281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7576: loss 22.2969, lr 1.3e-04, dt 2.1s +All GPU(s): step 7577: loss 22.1719, lr 1.3e-04, dt 2.1s +All GPU(s): step 7578: loss 22.0781, lr 1.3e-04, dt 2.0s +All GPU(s): step 7579: loss 22.2500, lr 1.3e-04, dt 2.0s +All GPU(s): step 7580: loss 22.0000, lr 1.3e-04, dt 2.1s +All GPU(s): step 7581: loss 22.2031, lr 1.3e-04, dt 2.0s +All GPU(s): step 7582: loss 21.8125, lr 1.3e-04, dt 2.1s +All GPU(s): step 7583: loss 21.8906, lr 1.3e-04, dt 2.0s +All GPU(s): step 7584: loss 22.2812, lr 1.3e-04, dt 2.0s +All GPU(s): step 7585: loss 21.7188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7586: loss 22.4219, lr 1.3e-04, dt 2.0s +All GPU(s): step 7587: loss 22.3906, lr 1.3e-04, dt 2.2s +All GPU(s): step 7588: loss 22.1094, lr 1.3e-04, dt 2.0s +All GPU(s): step 7589: loss 21.9219, lr 1.3e-04, dt 2.1s +All GPU(s): step 7590: loss 22.4844, lr 1.3e-04, dt 2.0s +All GPU(s): step 7591: loss 21.6094, lr 1.3e-04, dt 2.1s +All GPU(s): step 7592: loss 22.0312, lr 1.3e-04, dt 2.2s +All GPU(s): step 7593: loss 21.8438, lr 1.3e-04, dt 2.1s +All GPU(s): step 7594: loss 21.8906, lr 1.3e-04, dt 2.1s +All GPU(s): step 7595: loss 22.1719, lr 1.3e-04, dt 2.0s +All GPU(s): step 7596: loss 22.0938, lr 1.3e-04, dt 2.1s +All GPU(s): step 7597: loss 22.1250, lr 1.3e-04, dt 2.1s +All GPU(s): step 7598: loss 22.5938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7599: loss 21.7188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7600: loss 22.1719, lr 1.3e-04, dt 2.1s +All GPU(s): step 7601: loss 21.8750, lr 1.3e-04, dt 2.1s +All GPU(s): step 7602: loss 22.1875, lr 1.3e-04, dt 2.0s +All GPU(s): step 7603: loss 22.0156, lr 1.3e-04, dt 2.0s +All GPU(s): step 7604: loss 21.7500, lr 1.3e-04, dt 2.0s +All GPU(s): step 7605: loss 21.8750, lr 1.3e-04, dt 2.1s +All GPU(s): step 7606: loss 22.0938, lr 1.3e-04, dt 2.2s +All GPU(s): step 7607: loss 21.9844, lr 1.3e-04, dt 2.0s +All GPU(s): step 7608: loss 21.7969, lr 1.3e-04, dt 2.0s +All GPU(s): step 7609: loss 21.9375, lr 1.3e-04, dt 2.0s +All GPU(s): step 7610: loss 22.0156, lr 1.3e-04, dt 2.0s +All GPU(s): step 7611: loss 21.8750, lr 1.3e-04, dt 2.1s +All GPU(s): step 7612: loss 21.9062, lr 1.3e-04, dt 2.0s +All GPU(s): step 7613: loss 22.1562, lr 1.3e-04, dt 2.0s +All GPU(s): step 7614: loss 21.8281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7615: loss 21.9531, lr 1.3e-04, dt 2.0s +All GPU(s): step 7616: loss 21.7031, lr 1.3e-04, dt 2.1s +All GPU(s): step 7617: loss 22.0469, lr 1.3e-04, dt 2.0s +All GPU(s): step 7618: loss 21.8438, lr 1.3e-04, dt 2.0s +All GPU(s): step 7619: loss 22.2031, lr 1.3e-04, dt 2.0s +All GPU(s): step 7620: loss 21.8438, lr 1.3e-04, dt 2.0s +All GPU(s): step 7621: loss 21.9688, lr 1.3e-04, dt 2.1s +All GPU(s): step 7622: loss 22.1094, lr 1.3e-04, dt 2.1s +All GPU(s): step 7623: loss 22.1719, lr 1.3e-04, dt 2.0s +All GPU(s): step 7624: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7625: loss 21.9531, lr 1.3e-04, dt 2.1s +All GPU(s): step 7626: loss 22.2344, lr 1.3e-04, dt 2.1s +All GPU(s): step 7627: loss 22.3125, lr 1.3e-04, dt 2.1s +All GPU(s): step 7628: loss 22.1562, lr 1.3e-04, dt 2.0s +All GPU(s): step 7629: loss 21.8438, lr 1.3e-04, dt 2.0s +All GPU(s): step 7630: loss 22.2500, lr 1.3e-04, dt 2.1s +All GPU(s): step 7631: loss 21.8125, lr 1.3e-04, dt 2.0s +All GPU(s): step 7632: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7633: loss 21.9844, lr 1.3e-04, dt 2.0s +All GPU(s): step 7634: loss 22.0781, lr 1.3e-04, dt 2.1s +All GPU(s): step 7635: loss 21.8750, lr 1.3e-04, dt 2.2s +All GPU(s): step 7636: loss 22.1875, lr 1.3e-04, dt 2.0s +All GPU(s): step 7637: loss 22.1406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7638: loss 21.9688, lr 1.3e-04, dt 2.0s +All GPU(s): step 7639: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7640: loss 22.2969, lr 1.3e-04, dt 2.1s +All GPU(s): step 7641: loss 22.1719, lr 1.3e-04, dt 2.1s +All GPU(s): step 7642: loss 22.2031, lr 1.3e-04, dt 2.1s +All GPU(s): step 7643: loss 22.5000, lr 1.3e-04, dt 2.0s +All GPU(s): step 7644: loss 22.4062, lr 1.3e-04, dt 2.0s +All GPU(s): step 7645: loss 21.9062, lr 1.3e-04, dt 2.4s +All GPU(s): step 7646: loss 22.4062, lr 1.3e-04, dt 2.1s +All GPU(s): step 7647: loss 22.0156, lr 1.3e-04, dt 2.1s +All GPU(s): step 7648: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7649: loss 22.2656, lr 1.3e-04, dt 2.1s +All GPU(s): step 7650: loss 22.2188, lr 1.3e-04, dt 2.1s +All GPU(s): step 7651: loss 22.3750, lr 1.3e-04, dt 2.0s +All GPU(s): step 7652: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7653: loss 22.0625, lr 1.3e-04, dt 2.0s +All GPU(s): step 7654: loss 22.2500, lr 1.3e-04, dt 2.1s +All GPU(s): step 7655: loss 22.1875, lr 1.3e-04, dt 2.1s +All GPU(s): step 7656: loss 22.2500, lr 1.3e-04, dt 2.0s +All GPU(s): step 7657: loss 21.8906, lr 1.3e-04, dt 2.1s +All GPU(s): step 7658: loss 22.0781, lr 1.3e-04, dt 2.1s +All GPU(s): step 7659: loss 21.9375, lr 1.3e-04, dt 2.1s +All GPU(s): step 7660: loss 21.8906, lr 1.3e-04, dt 2.0s +All GPU(s): step 7661: loss 22.2344, lr 1.3e-04, dt 2.1s +All GPU(s): step 7662: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7663: loss 21.8906, lr 1.3e-04, dt 2.1s +All GPU(s): step 7664: loss 22.0000, lr 1.3e-04, dt 2.2s +All GPU(s): step 7665: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7666: loss 22.3438, lr 1.3e-04, dt 2.0s +All GPU(s): step 7667: loss 22.2188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7668: loss 22.1406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7669: loss 22.2344, lr 1.3e-04, dt 2.1s +All GPU(s): step 7670: loss 22.3750, lr 1.3e-04, dt 2.0s +All GPU(s): step 7671: loss 22.3906, lr 1.3e-04, dt 2.0s +All GPU(s): step 7672: loss 22.4062, lr 1.3e-04, dt 2.0s +All GPU(s): step 7673: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7674: loss 22.3750, lr 1.3e-04, dt 2.1s +All GPU(s): step 7675: loss 22.1719, lr 1.3e-04, dt 2.0s +All GPU(s): step 7676: loss 22.5781, lr 1.3e-04, dt 2.0s +All GPU(s): step 7677: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7678: loss 22.3281, lr 1.3e-04, dt 2.1s +All GPU(s): step 7679: loss 22.7031, lr 1.3e-04, dt 2.1s +All GPU(s): step 7680: loss 22.3750, lr 1.3e-04, dt 2.0s +All GPU(s): step 7681: loss 22.0938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7682: loss 22.1250, lr 1.3e-04, dt 2.1s +All GPU(s): step 7683: loss 22.3438, lr 1.3e-04, dt 2.1s +All GPU(s): step 7684: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7685: loss 22.6094, lr 1.3e-04, dt 2.0s +All GPU(s): step 7686: loss 22.2188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7687: loss 22.5781, lr 1.3e-04, dt 2.1s +All GPU(s): step 7688: loss 22.3750, lr 1.3e-04, dt 2.2s +All GPU(s): step 7689: loss 23.0000, lr 1.3e-04, dt 2.0s +All GPU(s): step 7690: loss 22.6250, lr 1.3e-04, dt 2.0s +All GPU(s): step 7691: loss 22.6406, lr 1.3e-04, dt 2.0s +All GPU(s): step 7692: loss 22.3281, lr 1.3e-04, dt 2.1s +All GPU(s): step 7693: loss 22.5312, lr 1.3e-04, dt 2.1s +All GPU(s): step 7694: loss 22.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7695: loss 22.9375, lr 1.3e-04, dt 2.0s +All GPU(s): step 7696: loss 22.2031, lr 1.3e-04, dt 2.1s +All GPU(s): step 7697: loss 22.3594, lr 1.3e-04, dt 2.1s +All GPU(s): step 7698: loss 22.3438, lr 1.3e-04, dt 2.1s +All GPU(s): step 7699: loss 22.3594, lr 1.3e-04, dt 2.0s +All GPU(s): step 7700: loss 22.6406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7701: loss 22.5000, lr 1.3e-04, dt 2.0s +All GPU(s): step 7702: loss 22.7500, lr 1.3e-04, dt 2.1s +All GPU(s): step 7703: loss 22.9062, lr 1.3e-04, dt 2.1s +All GPU(s): step 7704: loss 22.5312, lr 1.3e-04, dt 2.0s +All GPU(s): step 7705: loss 23.2188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7706: loss 23.3125, lr 1.3e-04, dt 2.1s +All GPU(s): step 7707: loss 23.0156, lr 1.3e-04, dt 2.1s +All GPU(s): step 7708: loss 23.0469, lr 1.3e-04, dt 2.0s +All GPU(s): step 7709: loss 23.3281, lr 1.3e-04, dt 2.0s +All GPU(s): step 7710: loss 23.3281, lr 1.3e-04, dt 2.1s +All GPU(s): step 7711: loss 23.1094, lr 1.3e-04, dt 2.0s +All GPU(s): step 7712: loss 23.5938, lr 1.3e-04, dt 2.1s +All GPU(s): step 7713: loss 23.5312, lr 1.3e-04, dt 2.0s +All GPU(s): step 7714: loss 23.2656, lr 1.3e-04, dt 2.0s +All GPU(s): step 7715: loss 23.2500, lr 1.3e-04, dt 2.1s +All GPU(s): step 7716: loss 23.3125, lr 1.3e-04, dt 2.1s +All GPU(s): step 7717: loss 23.3438, lr 1.3e-04, dt 2.2s +All GPU(s): step 7718: loss 23.7969, lr 1.3e-04, dt 2.0s +All GPU(s): step 7719: loss 23.5938, lr 1.3e-04, dt 2.1s +All GPU(s): step 7720: loss 23.7188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7721: loss 23.4688, lr 1.3e-04, dt 2.1s +All GPU(s): step 7722: loss 23.3906, lr 1.3e-04, dt 2.1s +All GPU(s): step 7723: loss 23.3438, lr 1.3e-04, dt 2.1s +All GPU(s): step 7724: loss 23.2969, lr 1.3e-04, dt 2.0s +All GPU(s): step 7725: loss 23.9219, lr 1.3e-04, dt 2.1s +All GPU(s): step 7726: loss 24.0938, lr 1.3e-04, dt 2.1s +All GPU(s): step 7727: loss 23.8594, lr 1.3e-04, dt 2.1s +All GPU(s): step 7728: loss 23.9375, lr 1.3e-04, dt 2.1s +All GPU(s): step 7729: loss 23.5938, lr 1.3e-04, dt 2.0s +All GPU(s): step 7730: loss 23.5469, lr 1.3e-04, dt 2.1s +All GPU(s): step 7731: loss 23.8594, lr 1.3e-04, dt 2.1s +All GPU(s): step 7732: loss 23.4844, lr 1.3e-04, dt 2.1s +All GPU(s): step 7733: loss 23.4375, lr 1.3e-04, dt 2.0s +All GPU(s): step 7734: loss 23.2969, lr 1.3e-04, dt 2.0s +All GPU(s): step 7735: loss 23.3594, lr 1.3e-04, dt 2.0s +All GPU(s): step 7736: loss 22.9688, lr 1.3e-04, dt 2.1s +All GPU(s): step 7737: loss 23.7031, lr 1.3e-04, dt 2.0s +All GPU(s): step 7738: loss 23.3438, lr 1.3e-04, dt 2.0s +All GPU(s): step 7739: loss 23.1719, lr 1.3e-04, dt 2.0s +All GPU(s): step 7740: loss 23.3594, lr 1.3e-04, dt 2.0s +All GPU(s): step 7741: loss 23.2969, lr 1.3e-04, dt 2.1s +All GPU(s): step 7742: loss 23.1562, lr 1.3e-04, dt 2.1s +All GPU(s): step 7743: loss 23.4375, lr 1.3e-04, dt 2.0s +All GPU(s): step 7744: loss 23.5781, lr 1.3e-04, dt 2.0s +All GPU(s): step 7745: loss 23.4531, lr 1.3e-04, dt 2.1s +All GPU(s): step 7746: loss 23.4062, lr 1.3e-04, dt 2.2s +All GPU(s): step 7747: loss 23.4062, lr 1.3e-04, dt 2.0s +All GPU(s): step 7748: loss 23.4844, lr 1.3e-04, dt 2.0s +All GPU(s): step 7749: loss 23.5625, lr 1.3e-04, dt 2.0s +All GPU(s): step 7750: loss 23.9688, lr 1.3e-04, dt 2.1s +All GPU(s): step 7751: loss 24.0938, lr 1.3e-04, dt 2.1s +All GPU(s): step 7752: loss 24.2344, lr 1.3e-04, dt 2.0s +All GPU(s): step 7753: loss 24.2188, lr 1.3e-04, dt 2.0s +All GPU(s): step 7754: loss 24.1562, lr 1.3e-04, dt 2.1s +All GPU(s): step 7755: loss 24.3594, lr 1.3e-04, dt 2.1s +All GPU(s): step 7756: loss 24.6094, lr 1.3e-04, dt 2.1s +All GPU(s): step 7757: loss 24.6406, lr 1.3e-04, dt 2.1s +All GPU(s): step 7758: loss 24.4219, lr 1.3e-04, dt 2.0s +All GPU(s): step 7759: loss 24.4219, lr 1.3e-04, dt 2.1s +All GPU(s): step 7760: loss 24.5312, lr 1.3e-04, dt 2.1s +All GPU(s): step 7761: loss 24.2344, lr 1.3e-04, dt 2.1s +All GPU(s): step 7762: loss 24.1719, lr 1.3e-04, dt 2.0s +All GPU(s): step 7763: loss 24.2500, lr 1.3e-04, dt 2.0s +All GPU(s): step 7764: loss 24.3125, lr 1.3e-04, dt 2.0s +All GPU(s): step 7765: loss 23.9219, lr 1.3e-04, dt 2.2s +All GPU(s): step 7766: loss 24.2656, lr 1.3e-04, dt 2.1s +All GPU(s): step 7767: loss 24.2656, lr 1.3e-04, dt 2.0s +All GPU(s): step 7768: loss 24.5312, lr 1.3e-04, dt 2.0s +All GPU(s): step 7769: loss 24.4219, lr 1.3e-04, dt 2.1s +All GPU(s): step 7770: loss 24.5625, lr 1.2e-04, dt 2.1s +All GPU(s): step 7771: loss 24.3125, lr 1.2e-04, dt 2.0s +All GPU(s): step 7772: loss 24.1562, lr 1.2e-04, dt 2.0s +All GPU(s): step 7773: loss 24.3750, lr 1.2e-04, dt 2.0s +All GPU(s): step 7774: loss 24.0312, lr 1.2e-04, dt 2.1s +All GPU(s): step 7775: loss 24.2500, lr 1.2e-04, dt 2.2s +All GPU(s): step 7776: loss 24.0938, lr 1.2e-04, dt 2.1s +All GPU(s): step 7777: loss 23.9688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7778: loss 24.2031, lr 1.2e-04, dt 2.0s +All GPU(s): step 7779: loss 24.0000, lr 1.2e-04, dt 2.1s +All GPU(s): step 7780: loss 24.0312, lr 1.2e-04, dt 2.1s +All GPU(s): step 7781: loss 24.2031, lr 1.2e-04, dt 2.1s +All GPU(s): step 7782: loss 24.2344, lr 1.2e-04, dt 2.1s +All GPU(s): step 7783: loss 24.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7784: loss 24.4844, lr 1.2e-04, dt 2.1s +All GPU(s): step 7785: loss 24.7344, lr 1.2e-04, dt 2.0s +All GPU(s): step 7786: loss 24.7656, lr 1.2e-04, dt 2.0s +All GPU(s): step 7787: loss 24.8438, lr 1.2e-04, dt 2.0s +All GPU(s): step 7788: loss 24.8281, lr 1.2e-04, dt 2.0s +All GPU(s): step 7789: loss 24.7969, lr 1.2e-04, dt 2.2s +All GPU(s): step 7790: loss 25.1719, lr 1.2e-04, dt 2.0s +All GPU(s): step 7791: loss 25.0938, lr 1.2e-04, dt 2.0s +All GPU(s): step 7792: loss 25.0938, lr 1.2e-04, dt 2.0s +All GPU(s): step 7793: loss 25.3281, lr 1.2e-04, dt 2.1s +All GPU(s): step 7794: loss 25.2500, lr 1.2e-04, dt 2.1s +All GPU(s): step 7795: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7796: loss 25.2812, lr 1.2e-04, dt 2.1s +All GPU(s): step 7797: loss 24.9219, lr 1.2e-04, dt 2.0s +All GPU(s): step 7798: loss 25.2656, lr 1.2e-04, dt 2.1s +All GPU(s): step 7799: loss 25.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 7800: loss 25.2500, lr 1.2e-04, dt 2.0s +All GPU(s): step 7801: loss 25.5000, lr 1.2e-04, dt 2.1s +All GPU(s): step 7802: loss 25.6094, lr 1.2e-04, dt 2.0s +All GPU(s): step 7803: loss 25.2031, lr 1.2e-04, dt 2.1s +All GPU(s): step 7804: loss 24.9688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7805: loss 25.4375, lr 1.2e-04, dt 2.0s +All GPU(s): step 7806: loss 25.3125, lr 1.2e-04, dt 2.0s +All GPU(s): step 7807: loss 25.5625, lr 1.2e-04, dt 2.0s +All GPU(s): step 7808: loss 25.5000, lr 1.2e-04, dt 2.1s +All GPU(s): step 7809: loss 25.0156, lr 1.2e-04, dt 2.1s +All GPU(s): step 7810: loss 25.3281, lr 1.2e-04, dt 2.1s +All GPU(s): step 7811: loss 25.2969, lr 1.2e-04, dt 2.1s +All GPU(s): step 7812: loss 25.3438, lr 1.2e-04, dt 2.1s +All GPU(s): step 7813: loss 25.3438, lr 1.2e-04, dt 2.1s +All GPU(s): step 7814: loss 25.5312, lr 1.2e-04, dt 2.1s +All GPU(s): step 7815: loss 25.2656, lr 1.2e-04, dt 2.0s +All GPU(s): step 7816: loss 25.2031, lr 1.2e-04, dt 2.0s +All GPU(s): step 7817: loss 25.1094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7818: loss 25.4844, lr 1.2e-04, dt 2.1s +All GPU(s): step 7819: loss 25.4531, lr 1.2e-04, dt 2.0s +All GPU(s): step 7820: loss 25.1094, lr 1.2e-04, dt 2.0s +All GPU(s): step 7821: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7822: loss 25.7969, lr 1.2e-04, dt 2.0s +All GPU(s): step 7823: loss 25.4688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7824: loss 25.3906, lr 1.2e-04, dt 2.0s +All GPU(s): step 7825: loss 25.3125, lr 1.2e-04, dt 2.0s +All GPU(s): step 7826: loss 25.4219, lr 1.2e-04, dt 2.0s +All GPU(s): step 7827: loss 25.4375, lr 1.2e-04, dt 2.0s +All GPU(s): step 7828: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7829: loss 25.3906, lr 1.2e-04, dt 2.0s +All GPU(s): step 7830: loss 25.4375, lr 1.2e-04, dt 2.0s +All GPU(s): step 7831: loss 25.1719, lr 1.2e-04, dt 2.0s +All GPU(s): step 7832: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7833: loss 25.4219, lr 1.2e-04, dt 2.1s +All GPU(s): step 7834: loss 25.2500, lr 1.2e-04, dt 2.0s +All GPU(s): step 7835: loss 25.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 7836: loss 25.1562, lr 1.2e-04, dt 2.0s +All GPU(s): step 7837: loss 25.3125, lr 1.2e-04, dt 2.1s +All GPU(s): step 7838: loss 25.5781, lr 1.2e-04, dt 2.0s +All GPU(s): step 7839: loss 25.5156, lr 1.2e-04, dt 2.0s +All GPU(s): step 7840: loss 25.5312, lr 1.2e-04, dt 2.0s +All GPU(s): step 7841: loss 25.7188, lr 1.2e-04, dt 2.0s +All GPU(s): step 7842: loss 25.6094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7843: loss 25.2969, lr 1.2e-04, dt 2.1s +All GPU(s): step 7844: loss 25.4219, lr 1.2e-04, dt 2.0s +All GPU(s): step 7845: loss 25.4688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7846: loss 25.5938, lr 1.2e-04, dt 2.0s +All GPU(s): step 7847: loss 25.5469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7848: loss 25.4375, lr 1.2e-04, dt 2.1s +All GPU(s): step 7849: loss 25.8438, lr 1.2e-04, dt 2.1s +All GPU(s): step 7850: loss 25.5000, lr 1.2e-04, dt 2.1s +All GPU(s): step 7851: loss 25.4062, lr 1.2e-04, dt 2.1s +All GPU(s): step 7852: loss 25.3438, lr 1.2e-04, dt 2.1s +All GPU(s): step 7853: loss 25.4688, lr 1.2e-04, dt 2.0s +All GPU(s): step 7854: loss 25.3438, lr 1.2e-04, dt 2.0s +All GPU(s): step 7855: loss 25.4531, lr 1.2e-04, dt 2.0s +All GPU(s): step 7856: loss 25.2500, lr 1.2e-04, dt 2.1s +All GPU(s): step 7857: loss 25.5000, lr 1.2e-04, dt 2.1s +All GPU(s): step 7858: loss 25.5781, lr 1.2e-04, dt 2.1s +All GPU(s): step 7859: loss 25.5781, lr 1.2e-04, dt 2.1s +All GPU(s): step 7860: loss 25.6719, lr 1.2e-04, dt 2.1s +All GPU(s): step 7861: loss 25.4531, lr 1.2e-04, dt 2.1s +All GPU(s): step 7862: loss 25.5312, lr 1.2e-04, dt 2.0s +All GPU(s): step 7863: loss 25.7344, lr 1.2e-04, dt 2.1s +All GPU(s): step 7864: loss 25.5469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7865: loss 25.1875, lr 1.2e-04, dt 2.1s +All GPU(s): step 7866: loss 25.5781, lr 1.2e-04, dt 2.1s +All GPU(s): step 7867: loss 25.5469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7868: loss 25.4844, lr 1.2e-04, dt 2.0s +All GPU(s): step 7869: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7870: loss 25.6094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7871: loss 25.6406, lr 1.2e-04, dt 2.2s +All GPU(s): step 7872: loss 25.5312, lr 1.2e-04, dt 2.1s +All GPU(s): step 7873: loss 25.7188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7874: loss 25.6094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7875: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7876: loss 25.5156, lr 1.2e-04, dt 2.1s +All GPU(s): step 7877: loss 25.6250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7878: loss 25.6406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7879: loss 25.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7880: loss 25.4688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7881: loss 25.8438, lr 1.2e-04, dt 2.1s +All GPU(s): step 7882: loss 25.4531, lr 1.2e-04, dt 2.1s +All GPU(s): step 7883: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7884: loss 25.1250, lr 1.2e-04, dt 2.1s +All GPU(s): step 7885: loss 25.6875, lr 1.2e-04, dt 2.1s +All GPU(s): step 7886: loss 25.4844, lr 1.2e-04, dt 2.1s +All GPU(s): step 7887: loss 25.0625, lr 1.2e-04, dt 2.1s +All GPU(s): step 7888: loss 25.5781, lr 1.2e-04, dt 2.0s +All GPU(s): step 7889: loss 25.6094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7890: loss 25.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 7891: loss 25.2969, lr 1.2e-04, dt 2.1s +All GPU(s): step 7892: loss 25.4531, lr 1.2e-04, dt 2.1s +All GPU(s): step 7893: loss 25.6250, lr 1.2e-04, dt 2.1s +All GPU(s): step 7894: loss 25.1875, lr 1.2e-04, dt 2.1s +All GPU(s): step 7895: loss 25.4375, lr 1.2e-04, dt 2.2s +All GPU(s): step 7896: loss 25.2656, lr 1.2e-04, dt 2.1s +All GPU(s): step 7897: loss 24.9688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7898: loss 25.0156, lr 1.2e-04, dt 2.1s +All GPU(s): step 7899: loss 25.3125, lr 1.2e-04, dt 2.1s +All GPU(s): step 7900: loss 25.6562, lr 1.2e-04, dt 2.2s +All GPU(s): step 7901: loss 25.1250, lr 1.2e-04, dt 2.1s +All GPU(s): step 7902: loss 25.4375, lr 1.2e-04, dt 2.1s +All GPU(s): step 7903: loss 25.2031, lr 1.2e-04, dt 2.1s +All GPU(s): step 7904: loss 25.0938, lr 1.2e-04, dt 2.1s +All GPU(s): step 7905: loss 25.0469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7906: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7907: loss 25.4375, lr 1.2e-04, dt 2.1s +All GPU(s): step 7908: loss 25.4219, lr 1.2e-04, dt 2.1s +All GPU(s): step 7909: loss 25.3125, lr 1.2e-04, dt 2.1s +All GPU(s): step 7910: loss 25.1250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7911: loss 25.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 7912: loss 25.6562, lr 1.2e-04, dt 2.1s +All GPU(s): step 7913: loss 25.2812, lr 1.2e-04, dt 2.0s +All GPU(s): step 7914: loss 25.1875, lr 1.2e-04, dt 2.1s +All GPU(s): step 7915: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7916: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7917: loss 25.0312, lr 1.2e-04, dt 2.0s +All GPU(s): step 7918: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7919: loss 25.0156, lr 1.2e-04, dt 2.1s +All GPU(s): step 7920: loss 25.2969, lr 1.2e-04, dt 2.1s +All GPU(s): step 7921: loss 25.2656, lr 1.2e-04, dt 2.1s +All GPU(s): step 7922: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7923: loss 25.4062, lr 1.2e-04, dt 2.1s +All GPU(s): step 7924: loss 25.1250, lr 1.2e-04, dt 2.2s +All GPU(s): step 7925: loss 25.1562, lr 1.2e-04, dt 2.1s +All GPU(s): step 7926: loss 25.4688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7927: loss 25.0469, lr 1.2e-04, dt 2.0s +All GPU(s): step 7928: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7929: loss 25.0469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7930: loss 25.2500, lr 1.2e-04, dt 2.1s +All GPU(s): step 7931: loss 24.9688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7932: loss 25.2969, lr 1.2e-04, dt 2.1s +All GPU(s): step 7933: loss 25.1562, lr 1.2e-04, dt 2.1s +All GPU(s): step 7934: loss 25.2812, lr 1.2e-04, dt 2.1s +All GPU(s): step 7935: loss 25.2812, lr 1.2e-04, dt 2.1s +All GPU(s): step 7936: loss 25.5625, lr 1.2e-04, dt 2.1s +All GPU(s): step 7937: loss 25.5156, lr 1.2e-04, dt 2.1s +All GPU(s): step 7938: loss 25.3594, lr 1.2e-04, dt 2.1s +All GPU(s): step 7939: loss 25.4375, lr 1.2e-04, dt 2.1s +All GPU(s): step 7940: loss 25.2188, lr 1.2e-04, dt 2.0s +All GPU(s): step 7941: loss 25.2969, lr 1.2e-04, dt 2.0s +All GPU(s): step 7942: loss 25.4219, lr 1.2e-04, dt 2.1s +All GPU(s): step 7943: loss 25.3438, lr 1.2e-04, dt 2.2s +All GPU(s): step 7944: loss 25.2031, lr 1.2e-04, dt 2.1s +All GPU(s): step 7945: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7946: loss 24.9219, lr 1.2e-04, dt 2.0s +All GPU(s): step 7947: loss 25.4062, lr 1.2e-04, dt 2.1s +All GPU(s): step 7948: loss 25.2188, lr 1.2e-04, dt 2.1s +All GPU(s): step 7949: loss 25.0312, lr 1.2e-04, dt 2.1s +All GPU(s): step 7950: loss 25.4219, lr 1.2e-04, dt 2.0s +All GPU(s): step 7951: loss 25.0781, lr 1.2e-04, dt 2.1s +All GPU(s): step 7952: loss 25.3125, lr 1.2e-04, dt 2.1s +All GPU(s): step 7953: loss 25.4531, lr 1.2e-04, dt 2.1s +All GPU(s): step 7954: loss 25.1406, lr 1.2e-04, dt 2.1s +All GPU(s): step 7955: loss 25.4375, lr 1.2e-04, dt 2.0s +All GPU(s): step 7956: loss 25.2656, lr 1.2e-04, dt 2.0s +All GPU(s): step 7957: loss 25.4688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7958: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7959: loss 25.1094, lr 1.2e-04, dt 2.1s +All GPU(s): step 7960: loss 25.2344, lr 1.2e-04, dt 2.1s +All GPU(s): step 7961: loss 25.1719, lr 1.2e-04, dt 2.0s +All GPU(s): step 7962: loss 25.3750, lr 1.2e-04, dt 2.1s +All GPU(s): step 7963: loss 25.0625, lr 1.2e-04, dt 2.0s +All GPU(s): step 7964: loss 25.2969, lr 1.2e-04, dt 2.0s +All GPU(s): step 7965: loss 25.4062, lr 1.2e-04, dt 2.0s +All GPU(s): step 7966: loss 25.1094, lr 1.2e-04, dt 2.0s +All GPU(s): step 7967: loss 25.3906, lr 1.2e-04, dt 2.1s +All GPU(s): step 7968: loss 25.0156, lr 1.2e-04, dt 2.0s +All GPU(s): step 7969: loss 25.0469, lr 1.2e-04, dt 2.1s +All GPU(s): step 7970: loss 25.0000, lr 1.2e-04, dt 2.0s +All GPU(s): step 7971: loss 25.1250, lr 1.2e-04, dt 2.0s +All GPU(s): step 7972: loss 25.1094, lr 1.2e-04, dt 2.2s +All GPU(s): step 7973: loss 24.9688, lr 1.2e-04, dt 2.1s +All GPU(s): step 7974: loss 25.0938, lr 1.2e-04, dt 2.0s +All GPU(s): step 7975: loss 24.9531, lr 1.2e-04, dt 2.0s +All GPU(s): step 7976: loss 25.1406, lr 1.2e-04, dt 2.0s +All GPU(s): step 7977: loss 25.0781, lr 1.2e-04, dt 2.1s +All GPU(s): step 7978: loss 25.3750, lr 1.2e-04, dt 2.0s +All GPU(s): step 7979: loss 25.1562, lr 1.2e-04, dt 2.0s +All GPU(s): step 7980: loss 25.0156, lr 1.2e-04, dt 2.0s +All GPU(s): step 7981: loss 25.3125, lr 1.2e-04, dt 2.1s +All GPU(s): step 7982: loss 25.2812, lr 1.2e-04, dt 2.1s +All GPU(s): step 7983: loss 25.1875, lr 1.1e-04, dt 2.1s +All GPU(s): step 7984: loss 25.2031, lr 1.1e-04, dt 2.0s +All GPU(s): step 7985: loss 25.3594, lr 1.1e-04, dt 2.0s +All GPU(s): step 7986: loss 25.1875, lr 1.1e-04, dt 2.1s +All GPU(s): step 7987: loss 25.0625, lr 1.1e-04, dt 2.1s +All GPU(s): step 7988: loss 24.8594, lr 1.1e-04, dt 2.0s +All GPU(s): step 7989: loss 25.2188, lr 1.1e-04, dt 2.0s +All GPU(s): step 7990: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 7991: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 7992: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 7993: loss 25.5156, lr 1.1e-04, dt 2.1s +All GPU(s): step 7994: loss 25.2812, lr 1.1e-04, dt 2.1s +All GPU(s): step 7995: loss 25.3750, lr 1.1e-04, dt 2.1s +All GPU(s): step 7996: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 7997: loss 25.1719, lr 1.1e-04, dt 2.0s +All GPU(s): step 7998: loss 24.9688, lr 1.1e-04, dt 2.0s +All GPU(s): step 7999: loss 25.2031, lr 1.1e-04, dt 2.0s +saving checkpoint to checkpoints/ckpt_8000.pt +All GPU(s): step 8000: loss 25.0469, lr 1.1e-04, dt 2.1s +All GPU(s): step 8001: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 8002: loss 25.4219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8003: loss 25.3281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8004: loss 25.0938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8005: loss 25.5469, lr 1.1e-04, dt 2.1s +All GPU(s): step 8006: loss 25.7812, lr 1.1e-04, dt 2.1s +All GPU(s): step 8007: loss 25.2031, lr 1.1e-04, dt 2.1s +All GPU(s): step 8008: loss 24.9375, lr 1.1e-04, dt 2.0s +All GPU(s): step 8009: loss 25.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8010: loss 25.2188, lr 1.1e-04, dt 2.1s +All GPU(s): step 8011: loss 25.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 8012: loss 25.2344, lr 1.1e-04, dt 2.1s +All GPU(s): step 8013: loss 25.0938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8014: loss 25.3125, lr 1.1e-04, dt 2.0s +All GPU(s): step 8015: loss 25.1875, lr 1.1e-04, dt 2.2s +All GPU(s): step 8016: loss 25.3125, lr 1.1e-04, dt 2.0s +All GPU(s): step 8017: loss 24.9219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8018: loss 25.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 8019: loss 25.4062, lr 1.1e-04, dt 2.1s +All GPU(s): step 8020: loss 25.3594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8021: loss 25.4531, lr 1.1e-04, dt 2.0s +All GPU(s): step 8022: loss 25.2344, lr 1.1e-04, dt 2.0s +All GPU(s): step 8023: loss 25.2500, lr 1.1e-04, dt 2.1s +All GPU(s): step 8024: loss 25.0781, lr 1.1e-04, dt 2.1s +All GPU(s): step 8025: loss 25.2188, lr 1.1e-04, dt 2.1s +All GPU(s): step 8026: loss 25.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 8027: loss 25.0000, lr 1.1e-04, dt 2.0s +All GPU(s): step 8028: loss 25.1875, lr 1.1e-04, dt 2.0s +All GPU(s): step 8029: loss 25.1719, lr 1.1e-04, dt 2.0s +All GPU(s): step 8030: loss 25.3125, lr 1.1e-04, dt 2.1s +All GPU(s): step 8031: loss 25.0469, lr 1.1e-04, dt 2.0s +All GPU(s): step 8032: loss 25.3438, lr 1.1e-04, dt 2.0s +All GPU(s): step 8033: loss 25.4688, lr 1.1e-04, dt 2.1s +All GPU(s): step 8034: loss 25.1406, lr 1.1e-04, dt 2.1s +All GPU(s): step 8035: loss 25.0156, lr 1.1e-04, dt 2.1s +All GPU(s): step 8036: loss 25.0781, lr 1.1e-04, dt 2.0s +All GPU(s): step 8037: loss 25.3750, lr 1.1e-04, dt 2.0s +All GPU(s): step 8038: loss 25.2812, lr 1.1e-04, dt 2.0s +All GPU(s): step 8039: loss 25.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8040: loss 25.3281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8041: loss 25.3125, lr 1.1e-04, dt 2.0s +All GPU(s): step 8042: loss 25.5312, lr 1.1e-04, dt 2.0s +All GPU(s): step 8043: loss 25.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8044: loss 25.3750, lr 1.1e-04, dt 2.1s +All GPU(s): step 8045: loss 25.0000, lr 1.1e-04, dt 2.0s +All GPU(s): step 8046: loss 25.4062, lr 1.1e-04, dt 2.0s +All GPU(s): step 8047: loss 25.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8048: loss 25.1094, lr 1.1e-04, dt 2.0s +All GPU(s): step 8049: loss 25.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8050: loss 25.1562, lr 1.1e-04, dt 2.0s +All GPU(s): step 8051: loss 25.3906, lr 1.1e-04, dt 2.0s +All GPU(s): step 8052: loss 25.2500, lr 1.1e-04, dt 2.0s +All GPU(s): step 8053: loss 25.1562, lr 1.1e-04, dt 2.0s +All GPU(s): step 8054: loss 25.0469, lr 1.1e-04, dt 2.2s +All GPU(s): step 8055: loss 25.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8056: loss 25.2188, lr 1.1e-04, dt 2.0s +All GPU(s): step 8057: loss 25.3438, lr 1.1e-04, dt 2.0s +All GPU(s): step 8058: loss 25.3125, lr 1.1e-04, dt 2.1s +All GPU(s): step 8059: loss 25.0000, lr 1.1e-04, dt 2.1s +All GPU(s): step 8060: loss 25.8281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8061: loss 25.1562, lr 1.1e-04, dt 2.0s +All GPU(s): step 8062: loss 25.4219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8063: loss 25.2656, lr 1.1e-04, dt 2.1s +All GPU(s): step 8064: loss 25.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8065: loss 24.9375, lr 1.1e-04, dt 2.0s +All GPU(s): step 8066: loss 24.9375, lr 1.1e-04, dt 2.0s +All GPU(s): step 8067: loss 25.0781, lr 1.1e-04, dt 2.0s +All GPU(s): step 8068: loss 24.7656, lr 1.1e-04, dt 2.1s +All GPU(s): step 8069: loss 25.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8070: loss 24.9531, lr 1.1e-04, dt 2.0s +All GPU(s): step 8071: loss 24.7656, lr 1.1e-04, dt 2.0s +All GPU(s): step 8072: loss 24.6719, lr 1.1e-04, dt 2.0s +All GPU(s): step 8073: loss 24.6094, lr 1.1e-04, dt 2.1s +All GPU(s): step 8074: loss 24.6406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8075: loss 24.3594, lr 1.1e-04, dt 2.0s +All GPU(s): step 8076: loss 24.2656, lr 1.1e-04, dt 2.1s +All GPU(s): step 8077: loss 24.6250, lr 1.1e-04, dt 2.1s +All GPU(s): step 8078: loss 24.0625, lr 1.1e-04, dt 2.1s +All GPU(s): step 8079: loss 24.0938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8080: loss 23.6875, lr 1.1e-04, dt 2.0s +All GPU(s): step 8081: loss 23.5312, lr 1.1e-04, dt 2.0s +All GPU(s): step 8082: loss 23.8594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8083: loss 23.8125, lr 1.1e-04, dt 2.1s +All GPU(s): step 8084: loss 23.8594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8085: loss 23.7656, lr 1.1e-04, dt 2.1s +All GPU(s): step 8086: loss 23.8594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8087: loss 24.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 8088: loss 24.0156, lr 1.1e-04, dt 2.1s +All GPU(s): step 8089: loss 24.1094, lr 1.1e-04, dt 2.0s +All GPU(s): step 8090: loss 24.0469, lr 1.1e-04, dt 2.0s +All GPU(s): step 8091: loss 24.3281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8092: loss 24.5000, lr 1.1e-04, dt 2.1s +All GPU(s): step 8093: loss 24.6406, lr 1.1e-04, dt 2.1s +All GPU(s): step 8094: loss 24.5625, lr 1.1e-04, dt 2.1s +All GPU(s): step 8095: loss 24.3281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8096: loss 24.5938, lr 1.1e-04, dt 2.1s +All GPU(s): step 8097: loss 24.4688, lr 1.1e-04, dt 2.2s +All GPU(s): step 8098: loss 24.8906, lr 1.1e-04, dt 2.1s +All GPU(s): step 8099: loss 25.0625, lr 1.1e-04, dt 2.0s +All GPU(s): step 8100: loss 24.5156, lr 1.1e-04, dt 2.1s +All GPU(s): step 8101: loss 24.5312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8102: loss 24.3281, lr 1.1e-04, dt 2.2s +All GPU(s): step 8103: loss 24.6406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8104: loss 24.5469, lr 1.1e-04, dt 2.0s +All GPU(s): step 8105: loss 24.3281, lr 1.1e-04, dt 2.0s +All GPU(s): step 8106: loss 24.5625, lr 1.1e-04, dt 2.1s +All GPU(s): step 8107: loss 24.4531, lr 1.1e-04, dt 2.1s +All GPU(s): step 8108: loss 24.0625, lr 1.1e-04, dt 2.1s +All GPU(s): step 8109: loss 24.2656, lr 1.1e-04, dt 2.0s +All GPU(s): step 8110: loss 24.2344, lr 1.1e-04, dt 2.0s +All GPU(s): step 8111: loss 24.0625, lr 1.1e-04, dt 2.1s +All GPU(s): step 8112: loss 23.9688, lr 1.1e-04, dt 2.1s +All GPU(s): step 8113: loss 24.0625, lr 1.1e-04, dt 2.0s +All GPU(s): step 8114: loss 23.8438, lr 1.1e-04, dt 2.0s +All GPU(s): step 8115: loss 23.9531, lr 1.1e-04, dt 2.1s +All GPU(s): step 8116: loss 23.5000, lr 1.1e-04, dt 2.1s +All GPU(s): step 8117: loss 23.9375, lr 1.1e-04, dt 2.1s +All GPU(s): step 8118: loss 24.0312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8119: loss 24.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 8120: loss 24.0625, lr 1.1e-04, dt 2.0s +All GPU(s): step 8121: loss 24.2344, lr 1.1e-04, dt 2.2s +All GPU(s): step 8122: loss 24.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8123: loss 24.2188, lr 1.1e-04, dt 2.1s +All GPU(s): step 8124: loss 23.9219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8125: loss 24.0938, lr 1.1e-04, dt 2.1s +All GPU(s): step 8126: loss 24.1875, lr 1.1e-04, dt 2.2s +All GPU(s): step 8127: loss 24.0000, lr 1.1e-04, dt 2.1s +All GPU(s): step 8128: loss 24.3281, lr 1.1e-04, dt 2.1s +All GPU(s): step 8129: loss 24.3750, lr 1.1e-04, dt 2.0s +All GPU(s): step 8130: loss 24.0469, lr 1.1e-04, dt 2.1s +All GPU(s): step 8131: loss 24.2969, lr 1.1e-04, dt 2.1s +All GPU(s): step 8132: loss 24.3906, lr 1.1e-04, dt 2.0s +All GPU(s): step 8133: loss 24.2969, lr 1.1e-04, dt 2.0s +All GPU(s): step 8134: loss 24.5312, lr 1.1e-04, dt 2.0s +All GPU(s): step 8135: loss 25.0312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8136: loss 24.4219, lr 1.1e-04, dt 2.1s +All GPU(s): step 8137: loss 24.8125, lr 1.1e-04, dt 2.1s +All GPU(s): step 8138: loss 24.5781, lr 1.1e-04, dt 2.1s +All GPU(s): step 8139: loss 24.5469, lr 1.1e-04, dt 2.1s +All GPU(s): step 8140: loss 24.4375, lr 1.1e-04, dt 2.1s +All GPU(s): step 8141: loss 25.0156, lr 1.1e-04, dt 2.1s +All GPU(s): step 8142: loss 24.8594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8143: loss 24.6094, lr 1.1e-04, dt 2.1s +All GPU(s): step 8144: loss 24.8281, lr 1.1e-04, dt 2.1s +All GPU(s): step 8145: loss 24.8594, lr 1.1e-04, dt 2.1s +All GPU(s): step 8146: loss 25.0781, lr 1.1e-04, dt 2.1s +All GPU(s): step 8147: loss 24.6094, lr 1.1e-04, dt 2.0s +All GPU(s): step 8148: loss 25.1250, lr 1.1e-04, dt 2.0s +All GPU(s): step 8149: loss 24.9375, lr 1.1e-04, dt 2.1s +All GPU(s): step 8150: loss 24.9688, lr 1.1e-04, dt 2.1s +All GPU(s): step 8151: loss 24.7500, lr 1.1e-04, dt 2.0s +All GPU(s): step 8152: loss 24.5312, lr 1.1e-04, dt 2.0s +All GPU(s): step 8153: loss 24.8750, lr 1.1e-04, dt 2.0s +All GPU(s): step 8154: loss 24.5312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8155: loss 24.6875, lr 1.1e-04, dt 2.1s +All GPU(s): step 8156: loss 24.3750, lr 1.1e-04, dt 2.0s +All GPU(s): step 8157: loss 24.1875, lr 1.1e-04, dt 2.0s +All GPU(s): step 8158: loss 24.4531, lr 1.1e-04, dt 2.0s +All GPU(s): step 8159: loss 24.6719, lr 1.1e-04, dt 2.0s +All GPU(s): step 8160: loss 24.6562, lr 1.1e-04, dt 2.1s +All GPU(s): step 8161: loss 24.4219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8162: loss 24.5312, lr 1.1e-04, dt 2.0s +All GPU(s): step 8163: loss 24.4531, lr 1.1e-04, dt 2.1s +All GPU(s): step 8164: loss 24.8750, lr 1.1e-04, dt 2.1s +All GPU(s): step 8165: loss 24.6094, lr 1.1e-04, dt 2.1s +All GPU(s): step 8166: loss 24.6875, lr 1.1e-04, dt 2.0s +All GPU(s): step 8167: loss 24.6875, lr 1.1e-04, dt 2.0s +All GPU(s): step 8168: loss 24.4688, lr 1.1e-04, dt 2.0s +All GPU(s): step 8169: loss 24.7031, lr 1.1e-04, dt 2.1s +All GPU(s): step 8170: loss 24.6562, lr 1.1e-04, dt 2.0s +All GPU(s): step 8171: loss 24.7656, lr 1.1e-04, dt 2.1s +All GPU(s): step 8172: loss 24.4375, lr 1.1e-04, dt 2.0s +All GPU(s): step 8173: loss 24.5938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8174: loss 24.4219, lr 1.1e-04, dt 2.1s +All GPU(s): step 8175: loss 24.5938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8176: loss 24.3594, lr 1.1e-04, dt 2.0s +All GPU(s): step 8177: loss 24.2812, lr 1.1e-04, dt 2.0s +All GPU(s): step 8178: loss 24.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8179: loss 24.2188, lr 1.1e-04, dt 2.2s +All GPU(s): step 8180: loss 24.2969, lr 1.1e-04, dt 2.0s +All GPU(s): step 8181: loss 24.4375, lr 1.1e-04, dt 2.1s +All GPU(s): step 8182: loss 24.0781, lr 1.1e-04, dt 2.1s +All GPU(s): step 8183: loss 24.5938, lr 1.1e-04, dt 2.0s +All GPU(s): step 8184: loss 24.5312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8185: loss 24.6094, lr 1.1e-04, dt 2.0s +All GPU(s): step 8186: loss 24.6875, lr 1.1e-04, dt 2.1s +All GPU(s): step 8187: loss 24.6562, lr 1.1e-04, dt 2.1s +All GPU(s): step 8188: loss 24.8281, lr 1.1e-04, dt 2.1s +All GPU(s): step 8189: loss 25.0938, lr 1.1e-04, dt 2.1s +All GPU(s): step 8190: loss 25.0469, lr 1.1e-04, dt 2.0s +All GPU(s): step 8191: loss 25.0781, lr 1.1e-04, dt 2.0s +All GPU(s): step 8192: loss 25.2500, lr 1.1e-04, dt 2.0s +All GPU(s): step 8193: loss 25.2344, lr 1.1e-04, dt 2.1s +All GPU(s): step 8194: loss 25.2500, lr 1.1e-04, dt 2.1s +All GPU(s): step 8195: loss 25.1406, lr 1.1e-04, dt 2.0s +All GPU(s): step 8196: loss 24.9688, lr 1.1e-04, dt 2.0s +All GPU(s): step 8197: loss 25.0469, lr 1.1e-04, dt 2.1s +All GPU(s): step 8198: loss 25.1719, lr 1.1e-04, dt 2.1s +All GPU(s): step 8199: loss 25.3750, lr 1.1e-04, dt 2.1s +All GPU(s): step 8200: loss 25.1094, lr 1.1e-04, dt 2.1s +All GPU(s): step 8201: loss 25.1250, lr 1.1e-04, dt 2.1s +All GPU(s): step 8202: loss 25.1094, lr 1.1e-04, dt 2.1s +All GPU(s): step 8203: loss 25.0312, lr 1.1e-04, dt 2.1s +All GPU(s): step 8204: loss 25.1094, lr 1.1e-04, dt 2.0s +All GPU(s): step 8205: loss 25.4219, lr 1.1e-04, dt 2.0s +All GPU(s): step 8206: loss 25.0781, lr 1.1e-04, dt 2.0s +All GPU(s): step 8207: loss 25.0312, lr 1.0e-04, dt 2.1s +All GPU(s): step 8208: loss 25.1406, lr 1.0e-04, dt 2.2s +All GPU(s): step 8209: loss 25.2656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8210: loss 25.0312, lr 1.0e-04, dt 2.0s +All GPU(s): step 8211: loss 25.3594, lr 1.0e-04, dt 2.0s +All GPU(s): step 8212: loss 25.0781, lr 1.0e-04, dt 2.0s +All GPU(s): step 8213: loss 25.5781, lr 1.0e-04, dt 2.1s +All GPU(s): step 8214: loss 25.0000, lr 1.0e-04, dt 2.0s +All GPU(s): step 8215: loss 25.2188, lr 1.0e-04, dt 2.0s +All GPU(s): step 8216: loss 25.3125, lr 1.0e-04, dt 2.0s +All GPU(s): step 8217: loss 25.1562, lr 1.0e-04, dt 2.1s +All GPU(s): step 8218: loss 24.9062, lr 1.0e-04, dt 2.1s +All GPU(s): step 8219: loss 25.2969, lr 1.0e-04, dt 2.1s +All GPU(s): step 8220: loss 24.9062, lr 1.0e-04, dt 2.0s +All GPU(s): step 8221: loss 24.9688, lr 1.0e-04, dt 2.0s +All GPU(s): step 8222: loss 24.9531, lr 1.0e-04, dt 2.2s +All GPU(s): step 8223: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8224: loss 25.0469, lr 1.0e-04, dt 2.0s +All GPU(s): step 8225: loss 24.8750, lr 1.0e-04, dt 2.0s +All GPU(s): step 8226: loss 25.0625, lr 1.0e-04, dt 2.0s +All GPU(s): step 8227: loss 25.0625, lr 1.0e-04, dt 2.2s +All GPU(s): step 8228: loss 24.6250, lr 1.0e-04, dt 2.0s +All GPU(s): step 8229: loss 25.0938, lr 1.0e-04, dt 2.0s +All GPU(s): step 8230: loss 25.0781, lr 1.0e-04, dt 2.0s +All GPU(s): step 8231: loss 24.7812, lr 1.0e-04, dt 2.0s +All GPU(s): step 8232: loss 24.8906, lr 1.0e-04, dt 2.1s +All GPU(s): step 8233: loss 25.2500, lr 1.0e-04, dt 2.0s +All GPU(s): step 8234: loss 24.9375, lr 1.0e-04, dt 2.0s +All GPU(s): step 8235: loss 25.0938, lr 1.0e-04, dt 2.0s +All GPU(s): step 8236: loss 25.2188, lr 1.0e-04, dt 2.1s +All GPU(s): step 8237: loss 25.0781, lr 1.0e-04, dt 2.1s +All GPU(s): step 8238: loss 24.8750, lr 1.0e-04, dt 2.1s +All GPU(s): step 8239: loss 25.2188, lr 1.0e-04, dt 2.0s +All GPU(s): step 8240: loss 25.0312, lr 1.0e-04, dt 2.0s +All GPU(s): step 8241: loss 25.4688, lr 1.0e-04, dt 2.1s +All GPU(s): step 8242: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8243: loss 24.9219, lr 1.0e-04, dt 2.1s +All GPU(s): step 8244: loss 25.0625, lr 1.0e-04, dt 2.0s +All GPU(s): step 8245: loss 25.0781, lr 1.0e-04, dt 2.1s +All GPU(s): step 8246: loss 25.2344, lr 1.0e-04, dt 2.1s +All GPU(s): step 8247: loss 24.9375, lr 1.0e-04, dt 2.1s +All GPU(s): step 8248: loss 25.2656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8249: loss 25.0469, lr 1.0e-04, dt 2.0s +All GPU(s): step 8250: loss 25.2031, lr 1.0e-04, dt 2.1s +All GPU(s): step 8251: loss 24.9219, lr 1.0e-04, dt 2.1s +All GPU(s): step 8252: loss 25.1094, lr 1.0e-04, dt 2.1s +All GPU(s): step 8253: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8254: loss 24.7500, lr 1.0e-04, dt 2.1s +All GPU(s): step 8255: loss 24.9219, lr 1.0e-04, dt 2.0s +All GPU(s): step 8256: loss 24.5312, lr 1.0e-04, dt 2.2s +All GPU(s): step 8257: loss 24.6406, lr 1.0e-04, dt 2.1s +All GPU(s): step 8258: loss 24.5625, lr 1.0e-04, dt 2.1s +All GPU(s): step 8259: loss 24.2500, lr 1.0e-04, dt 2.1s +All GPU(s): step 8260: loss 24.6562, lr 1.0e-04, dt 2.1s +All GPU(s): step 8261: loss 24.4531, lr 1.0e-04, dt 2.2s +All GPU(s): step 8262: loss 24.3750, lr 1.0e-04, dt 2.1s +All GPU(s): step 8263: loss 24.5156, lr 1.0e-04, dt 2.0s +All GPU(s): step 8264: loss 24.6250, lr 1.0e-04, dt 2.0s +All GPU(s): step 8265: loss 24.6406, lr 1.0e-04, dt 2.1s +All GPU(s): step 8266: loss 24.6094, lr 1.0e-04, dt 2.1s +All GPU(s): step 8267: loss 24.6562, lr 1.0e-04, dt 2.0s +All GPU(s): step 8268: loss 24.7656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8269: loss 24.4531, lr 1.0e-04, dt 2.0s +All GPU(s): step 8270: loss 24.7812, lr 1.0e-04, dt 2.1s +All GPU(s): step 8271: loss 24.6094, lr 1.0e-04, dt 2.1s +All GPU(s): step 8272: loss 25.0156, lr 1.0e-04, dt 2.1s +All GPU(s): step 8273: loss 25.0938, lr 1.0e-04, dt 2.1s +All GPU(s): step 8274: loss 25.1562, lr 1.0e-04, dt 2.1s +All GPU(s): step 8275: loss 25.2812, lr 1.0e-04, dt 2.1s +All GPU(s): step 8276: loss 25.2969, lr 1.0e-04, dt 2.0s +All GPU(s): step 8277: loss 25.2656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8278: loss 25.0625, lr 1.0e-04, dt 2.0s +All GPU(s): step 8279: loss 25.2344, lr 1.0e-04, dt 2.1s +All GPU(s): step 8280: loss 25.0156, lr 1.0e-04, dt 2.1s +All GPU(s): step 8281: loss 25.0781, lr 1.0e-04, dt 2.1s +All GPU(s): step 8282: loss 25.2656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8283: loss 25.1250, lr 1.0e-04, dt 2.1s +All GPU(s): step 8284: loss 25.2500, lr 1.0e-04, dt 2.1s +All GPU(s): step 8285: loss 25.0156, lr 1.0e-04, dt 2.1s +All GPU(s): step 8286: loss 25.2656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8287: loss 25.0625, lr 1.0e-04, dt 2.0s +All GPU(s): step 8288: loss 24.7812, lr 1.0e-04, dt 2.0s +All GPU(s): step 8289: loss 24.8281, lr 1.0e-04, dt 2.1s +All GPU(s): step 8290: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8291: loss 25.0312, lr 1.0e-04, dt 2.1s +All GPU(s): step 8292: loss 24.9531, lr 1.0e-04, dt 2.1s +All GPU(s): step 8293: loss 25.2812, lr 1.0e-04, dt 2.0s +All GPU(s): step 8294: loss 24.7188, lr 1.0e-04, dt 2.1s +All GPU(s): step 8295: loss 24.9375, lr 1.0e-04, dt 2.0s +All GPU(s): step 8296: loss 24.7969, lr 1.0e-04, dt 2.1s +All GPU(s): step 8297: loss 24.9844, lr 1.0e-04, dt 2.0s +All GPU(s): step 8298: loss 24.7812, lr 1.0e-04, dt 2.0s +All GPU(s): step 8299: loss 24.4844, lr 1.0e-04, dt 2.1s +All GPU(s): step 8300: loss 24.7656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8301: loss 24.6094, lr 1.0e-04, dt 2.0s +All GPU(s): step 8302: loss 24.7656, lr 1.0e-04, dt 2.0s +All GPU(s): step 8303: loss 24.9062, lr 1.0e-04, dt 2.0s +All GPU(s): step 8304: loss 25.0156, lr 1.0e-04, dt 2.1s +All GPU(s): step 8305: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8306: loss 24.7500, lr 1.0e-04, dt 2.1s +All GPU(s): step 8307: loss 24.7969, lr 1.0e-04, dt 2.1s +All GPU(s): step 8308: loss 25.0625, lr 1.0e-04, dt 2.1s +All GPU(s): step 8309: loss 24.7500, lr 1.0e-04, dt 2.2s +All GPU(s): step 8310: loss 24.8281, lr 1.0e-04, dt 2.1s +All GPU(s): step 8311: loss 24.9062, lr 1.0e-04, dt 2.1s +All GPU(s): step 8312: loss 24.8594, lr 1.0e-04, dt 2.1s +All GPU(s): step 8313: loss 25.0000, lr 1.0e-04, dt 2.1s +All GPU(s): step 8314: loss 24.5938, lr 1.0e-04, dt 2.1s +All GPU(s): step 8315: loss 24.9219, lr 1.0e-04, dt 2.1s +All GPU(s): step 8316: loss 24.9219, lr 1.0e-04, dt 2.1s +All GPU(s): step 8317: loss 24.8438, lr 1.0e-04, dt 2.1s +All GPU(s): step 8318: loss 25.0938, lr 1.0e-04, dt 2.1s +All GPU(s): step 8319: loss 24.9219, lr 1.0e-04, dt 2.0s +All GPU(s): step 8320: loss 25.2344, lr 1.0e-04, dt 2.0s +All GPU(s): step 8321: loss 24.9219, lr 1.0e-04, dt 2.1s +All GPU(s): step 8322: loss 25.2344, lr 1.0e-04, dt 2.0s +All GPU(s): step 8323: loss 24.9688, lr 1.0e-04, dt 2.1s +All GPU(s): step 8324: loss 24.7344, lr 1.0e-04, dt 2.0s +All GPU(s): step 8325: loss 24.7031, lr 1.0e-04, dt 2.1s +All GPU(s): step 8326: loss 24.9375, lr 1.0e-04, dt 2.1s +All GPU(s): step 8327: loss 25.0781, lr 1.0e-04, dt 2.1s +All GPU(s): step 8328: loss 24.7969, lr 1.0e-04, dt 2.1s +All GPU(s): step 8329: loss 24.7031, lr 1.0e-04, dt 2.1s +All GPU(s): step 8330: loss 25.0469, lr 1.0e-04, dt 2.1s +All GPU(s): step 8331: loss 24.6562, lr 1.0e-04, dt 2.1s +All GPU(s): step 8332: loss 24.5469, lr 1.0e-04, dt 2.1s +All GPU(s): step 8333: loss 24.7500, lr 1.0e-04, dt 2.2s +All GPU(s): step 8334: loss 24.9219, lr 1.0e-04, dt 2.0s +All GPU(s): step 8335: loss 24.6250, lr 9.9e-05, dt 2.0s +All GPU(s): step 8336: loss 24.9219, lr 9.9e-05, dt 2.0s +All GPU(s): step 8337: loss 24.6094, lr 9.9e-05, dt 2.1s +All GPU(s): step 8338: loss 24.6250, lr 9.9e-05, dt 2.1s +All GPU(s): step 8339: loss 24.9844, lr 9.9e-05, dt 2.1s +All GPU(s): step 8340: loss 24.7969, lr 9.9e-05, dt 2.1s +All GPU(s): step 8341: loss 24.7500, lr 9.9e-05, dt 2.1s +All GPU(s): step 8342: loss 24.9531, lr 9.9e-05, dt 2.1s +All GPU(s): step 8343: loss 25.1406, lr 9.9e-05, dt 2.0s +All GPU(s): step 8344: loss 25.2344, lr 9.9e-05, dt 2.0s +All GPU(s): step 8345: loss 25.2969, lr 9.9e-05, dt 2.0s +All GPU(s): step 8346: loss 25.1562, lr 9.9e-05, dt 2.0s +All GPU(s): step 8347: loss 25.2500, lr 9.9e-05, dt 2.1s +All GPU(s): step 8348: loss 24.9688, lr 9.9e-05, dt 2.0s +All GPU(s): step 8349: loss 24.8750, lr 9.9e-05, dt 2.0s +All GPU(s): step 8350: loss 24.6406, lr 9.9e-05, dt 2.0s +All GPU(s): step 8351: loss 24.8281, lr 9.9e-05, dt 2.0s +All GPU(s): step 8352: loss 24.2031, lr 9.9e-05, dt 2.2s +All GPU(s): step 8353: loss 24.3438, lr 9.9e-05, dt 2.0s +All GPU(s): step 8354: loss 23.8594, lr 9.9e-05, dt 2.0s +All GPU(s): step 8355: loss 23.9844, lr 9.9e-05, dt 2.0s +All GPU(s): step 8356: loss 24.0469, lr 9.9e-05, dt 2.0s +All GPU(s): step 8357: loss 23.8281, lr 9.9e-05, dt 2.2s +All GPU(s): step 8358: loss 23.9844, lr 9.9e-05, dt 2.1s +All GPU(s): step 8359: loss 24.0625, lr 9.8e-05, dt 2.1s +All GPU(s): step 8360: loss 24.0312, lr 9.8e-05, dt 2.1s +All GPU(s): step 8361: loss 24.1094, lr 9.8e-05, dt 2.0s +All GPU(s): step 8362: loss 23.8281, lr 9.8e-05, dt 2.1s +All GPU(s): step 8363: loss 23.9844, lr 9.8e-05, dt 2.0s +All GPU(s): step 8364: loss 23.8438, lr 9.8e-05, dt 2.0s +All GPU(s): step 8365: loss 24.0000, lr 9.8e-05, dt 2.1s +All GPU(s): step 8366: loss 24.1875, lr 9.8e-05, dt 2.1s +All GPU(s): step 8367: loss 24.0156, lr 9.8e-05, dt 2.1s +All GPU(s): step 8368: loss 24.3281, lr 9.8e-05, dt 2.1s +All GPU(s): step 8369: loss 24.5938, lr 9.8e-05, dt 2.1s +All GPU(s): step 8370: loss 24.8281, lr 9.8e-05, dt 2.1s +All GPU(s): step 8371: loss 24.6875, lr 9.8e-05, dt 2.2s +All GPU(s): step 8372: loss 24.1719, lr 9.8e-05, dt 2.1s +All GPU(s): step 8373: loss 24.8750, lr 9.8e-05, dt 2.1s +All GPU(s): step 8374: loss 24.9062, lr 9.8e-05, dt 2.1s +All GPU(s): step 8375: loss 24.4688, lr 9.8e-05, dt 2.1s +All GPU(s): step 8376: loss 24.7344, lr 9.8e-05, dt 2.2s +All GPU(s): step 8377: loss 24.5469, lr 9.8e-05, dt 2.1s +All GPU(s): step 8378: loss 24.4062, lr 9.8e-05, dt 2.1s +All GPU(s): step 8379: loss 23.7656, lr 9.8e-05, dt 2.1s +All GPU(s): step 8380: loss 23.5312, lr 9.8e-05, dt 2.1s +All GPU(s): step 8381: loss 24.2188, lr 9.8e-05, dt 2.2s +All GPU(s): step 8382: loss 23.5781, lr 9.8e-05, dt 2.1s +All GPU(s): step 8383: loss 23.8750, lr 9.7e-05, dt 2.0s +All GPU(s): step 8384: loss 23.2969, lr 9.7e-05, dt 2.0s +All GPU(s): step 8385: loss 23.4219, lr 9.7e-05, dt 2.1s +All GPU(s): step 8386: loss 23.1094, lr 9.7e-05, dt 2.1s +All GPU(s): step 8387: loss 23.2812, lr 9.7e-05, dt 2.1s +All GPU(s): step 8388: loss 23.3281, lr 9.7e-05, dt 2.0s +All GPU(s): step 8389: loss 23.2188, lr 9.7e-05, dt 2.0s +All GPU(s): step 8390: loss 23.3125, lr 9.7e-05, dt 2.1s +All GPU(s): step 8391: loss 23.4844, lr 9.7e-05, dt 2.1s +All GPU(s): step 8392: loss 23.2500, lr 9.7e-05, dt 2.0s +All GPU(s): step 8393: loss 23.1562, lr 9.7e-05, dt 2.0s +All GPU(s): step 8394: loss 23.2031, lr 9.7e-05, dt 2.0s +All GPU(s): step 8395: loss 23.3438, lr 9.7e-05, dt 2.1s +All GPU(s): step 8396: loss 23.0781, lr 9.7e-05, dt 2.1s +All GPU(s): step 8397: loss 23.0312, lr 9.7e-05, dt 2.1s +All GPU(s): step 8398: loss 23.7344, lr 9.7e-05, dt 2.1s +All GPU(s): step 8399: loss 23.5469, lr 9.7e-05, dt 2.0s +All GPU(s): step 8400: loss 23.3750, lr 9.7e-05, dt 2.1s +All GPU(s): step 8401: loss 23.0000, lr 9.7e-05, dt 2.1s +All GPU(s): step 8402: loss 23.2344, lr 9.7e-05, dt 2.0s +All GPU(s): step 8403: loss 23.2812, lr 9.7e-05, dt 2.0s +All GPU(s): step 8404: loss 23.3281, lr 9.7e-05, dt 2.1s +All GPU(s): step 8405: loss 23.2500, lr 9.7e-05, dt 2.2s +All GPU(s): step 8406: loss 23.1562, lr 9.7e-05, dt 2.1s +All GPU(s): step 8407: loss 22.8750, lr 9.6e-05, dt 2.0s +All GPU(s): step 8408: loss 23.0000, lr 9.6e-05, dt 2.0s +All GPU(s): step 8409: loss 22.7656, lr 9.6e-05, dt 2.1s +All GPU(s): step 8410: loss 22.5156, lr 9.6e-05, dt 2.2s +All GPU(s): step 8411: loss 22.7656, lr 9.6e-05, dt 2.1s +All GPU(s): step 8412: loss 22.5312, lr 9.6e-05, dt 2.1s +All GPU(s): step 8413: loss 22.3750, lr 9.6e-05, dt 2.1s +All GPU(s): step 8414: loss 22.2656, lr 9.6e-05, dt 2.1s +All GPU(s): step 8415: loss 22.4062, lr 9.6e-05, dt 2.1s +All GPU(s): step 8416: loss 22.9844, lr 9.6e-05, dt 2.1s +All GPU(s): step 8417: loss 22.5781, lr 9.6e-05, dt 2.0s +All GPU(s): step 8418: loss 22.6406, lr 9.6e-05, dt 2.0s +All GPU(s): step 8419: loss 22.7188, lr 9.6e-05, dt 2.1s +All GPU(s): step 8420: loss 22.2812, lr 9.6e-05, dt 2.0s +All GPU(s): step 8421: loss 22.6094, lr 9.6e-05, dt 2.0s +All GPU(s): step 8422: loss 22.6875, lr 9.6e-05, dt 2.0s +All GPU(s): step 8423: loss 22.5469, lr 9.6e-05, dt 2.1s +All GPU(s): step 8424: loss 22.7188, lr 9.6e-05, dt 2.2s +All GPU(s): step 8425: loss 22.5938, lr 9.6e-05, dt 2.0s +All GPU(s): step 8426: loss 22.6562, lr 9.6e-05, dt 2.1s +All GPU(s): step 8427: loss 22.7031, lr 9.6e-05, dt 2.0s +All GPU(s): step 8428: loss 23.0625, lr 9.6e-05, dt 2.1s +All GPU(s): step 8429: loss 22.8125, lr 9.6e-05, dt 2.2s +All GPU(s): step 8430: loss 22.8750, lr 9.6e-05, dt 2.1s +All GPU(s): step 8431: loss 22.5625, lr 9.6e-05, dt 2.1s +All GPU(s): step 8432: loss 22.6250, lr 9.5e-05, dt 2.0s +All GPU(s): step 8433: loss 22.8906, lr 9.5e-05, dt 2.0s +All GPU(s): step 8434: loss 22.6875, lr 9.5e-05, dt 2.1s +All GPU(s): step 8435: loss 23.0000, lr 9.5e-05, dt 2.0s +All GPU(s): step 8436: loss 23.0781, lr 9.5e-05, dt 2.0s +All GPU(s): step 8437: loss 23.0469, lr 9.5e-05, dt 2.1s +All GPU(s): step 8438: loss 23.2344, lr 9.5e-05, dt 2.1s +All GPU(s): step 8439: loss 23.0781, lr 9.5e-05, dt 2.1s +All GPU(s): step 8440: loss 23.3438, lr 9.5e-05, dt 2.1s +All GPU(s): step 8441: loss 22.7812, lr 9.5e-05, dt 2.0s +All GPU(s): step 8442: loss 22.9219, lr 9.5e-05, dt 2.0s +All GPU(s): step 8443: loss 22.9688, lr 9.5e-05, dt 2.2s +All GPU(s): step 8444: loss 22.7188, lr 9.5e-05, dt 2.1s +All GPU(s): step 8445: loss 22.4688, lr 9.5e-05, dt 2.1s +All GPU(s): step 8446: loss 22.7812, lr 9.5e-05, dt 2.0s +All GPU(s): step 8447: loss 22.7812, lr 9.5e-05, dt 2.1s +All GPU(s): step 8448: loss 22.6250, lr 9.5e-05, dt 2.2s +All GPU(s): step 8449: loss 22.8438, lr 9.5e-05, dt 2.1s +All GPU(s): step 8450: loss 22.6094, lr 9.5e-05, dt 2.1s +All GPU(s): step 8451: loss 22.9688, lr 9.5e-05, dt 2.1s +All GPU(s): step 8452: loss 23.0312, lr 9.5e-05, dt 2.1s +All GPU(s): step 8453: loss 23.0938, lr 9.5e-05, dt 2.2s +All GPU(s): step 8454: loss 22.6562, lr 9.5e-05, dt 2.0s +All GPU(s): step 8455: loss 22.9219, lr 9.5e-05, dt 2.0s +All GPU(s): step 8456: loss 23.3125, lr 9.4e-05, dt 2.0s +All GPU(s): step 8457: loss 23.0625, lr 9.4e-05, dt 2.1s +All GPU(s): step 8458: loss 23.3438, lr 9.4e-05, dt 2.1s +All GPU(s): step 8459: loss 23.4062, lr 9.4e-05, dt 2.0s +All GPU(s): step 8460: loss 23.2656, lr 9.4e-05, dt 2.0s +All GPU(s): step 8461: loss 23.2656, lr 9.4e-05, dt 2.0s +All GPU(s): step 8462: loss 23.2969, lr 9.4e-05, dt 2.1s +All GPU(s): step 8463: loss 23.5938, lr 9.4e-05, dt 2.1s +All GPU(s): step 8464: loss 23.6250, lr 9.4e-05, dt 2.1s +All GPU(s): step 8465: loss 24.0625, lr 9.4e-05, dt 2.0s +All GPU(s): step 8466: loss 23.8594, lr 9.4e-05, dt 2.0s +All GPU(s): step 8467: loss 23.6562, lr 9.4e-05, dt 2.1s +All GPU(s): step 8468: loss 23.9375, lr 9.4e-05, dt 2.0s +All GPU(s): step 8469: loss 24.1094, lr 9.4e-05, dt 2.1s +All GPU(s): step 8470: loss 23.8906, lr 9.4e-05, dt 2.0s +All GPU(s): step 8471: loss 23.9219, lr 9.4e-05, dt 2.1s +All GPU(s): step 8472: loss 23.8281, lr 9.4e-05, dt 2.2s +All GPU(s): step 8473: loss 23.7500, lr 9.4e-05, dt 2.0s +All GPU(s): step 8474: loss 23.8594, lr 9.4e-05, dt 2.0s +All GPU(s): step 8475: loss 23.4375, lr 9.4e-05, dt 2.0s +All GPU(s): step 8476: loss 23.6719, lr 9.4e-05, dt 2.0s +All GPU(s): step 8477: loss 23.6250, lr 9.4e-05, dt 2.2s +All GPU(s): step 8478: loss 23.6094, lr 9.4e-05, dt 2.0s +All GPU(s): step 8479: loss 23.9375, lr 9.4e-05, dt 2.0s +All GPU(s): step 8480: loss 23.7344, lr 9.4e-05, dt 2.0s +All GPU(s): step 8481: loss 23.3906, lr 9.3e-05, dt 2.1s +All GPU(s): step 8482: loss 23.7656, lr 9.3e-05, dt 2.1s +All GPU(s): step 8483: loss 23.6719, lr 9.3e-05, dt 2.0s +All GPU(s): step 8484: loss 23.8125, lr 9.3e-05, dt 2.0s +All GPU(s): step 8485: loss 23.9375, lr 9.3e-05, dt 2.0s +All GPU(s): step 8486: loss 23.9375, lr 9.3e-05, dt 2.1s +All GPU(s): step 8487: loss 24.0312, lr 9.3e-05, dt 2.1s +All GPU(s): step 8488: loss 24.3281, lr 9.3e-05, dt 2.0s +All GPU(s): step 8489: loss 24.3594, lr 9.3e-05, dt 2.1s +All GPU(s): step 8490: loss 24.3125, lr 9.3e-05, dt 2.0s +All GPU(s): step 8491: loss 24.3281, lr 9.3e-05, dt 2.1s +All GPU(s): step 8492: loss 24.2500, lr 9.3e-05, dt 2.0s +All GPU(s): step 8493: loss 24.3281, lr 9.3e-05, dt 2.1s +All GPU(s): step 8494: loss 24.5312, lr 9.3e-05, dt 2.1s +All GPU(s): step 8495: loss 24.5625, lr 9.3e-05, dt 2.0s +All GPU(s): step 8496: loss 24.7656, lr 9.3e-05, dt 2.2s +All GPU(s): step 8497: loss 24.9844, lr 9.3e-05, dt 2.1s +All GPU(s): step 8498: loss 24.9062, lr 9.3e-05, dt 2.1s +All GPU(s): step 8499: loss 25.1250, lr 9.3e-05, dt 2.1s +All GPU(s): step 8500: loss 25.0156, lr 9.3e-05, dt 2.1s +All GPU(s): step 8501: loss 25.3438, lr 9.3e-05, dt 2.2s +All GPU(s): step 8502: loss 25.4062, lr 9.3e-05, dt 2.0s +All GPU(s): step 8503: loss 25.4688, lr 9.3e-05, dt 2.1s +All GPU(s): step 8504: loss 25.5625, lr 9.3e-05, dt 2.1s +All GPU(s): step 8505: loss 25.7812, lr 9.3e-05, dt 2.1s +All GPU(s): step 8506: loss 26.0000, lr 9.2e-05, dt 2.1s +All GPU(s): step 8507: loss 25.6875, lr 9.2e-05, dt 2.0s +All GPU(s): step 8508: loss 25.8281, lr 9.2e-05, dt 2.0s +All GPU(s): step 8509: loss 25.8125, lr 9.2e-05, dt 2.0s +All GPU(s): step 8510: loss 26.0625, lr 9.2e-05, dt 2.1s +All GPU(s): step 8511: loss 26.3125, lr 9.2e-05, dt 2.1s +All GPU(s): step 8512: loss 26.1406, lr 9.2e-05, dt 2.1s +All GPU(s): step 8513: loss 26.4844, lr 9.2e-05, dt 2.0s +All GPU(s): step 8514: loss 26.5156, lr 9.2e-05, dt 2.0s +All GPU(s): step 8515: loss 26.1719, lr 9.2e-05, dt 2.1s +All GPU(s): step 8516: loss 26.1875, lr 9.2e-05, dt 2.0s +All GPU(s): step 8517: loss 25.6406, lr 9.2e-05, dt 2.0s +All GPU(s): step 8518: loss 25.9062, lr 9.2e-05, dt 2.0s +All GPU(s): step 8519: loss 25.5938, lr 9.2e-05, dt 2.0s +All GPU(s): step 8520: loss 25.4531, lr 9.2e-05, dt 2.2s +All GPU(s): step 8521: loss 25.3906, lr 9.2e-05, dt 2.0s +All GPU(s): step 8522: loss 25.0938, lr 9.2e-05, dt 2.1s +All GPU(s): step 8523: loss 25.2344, lr 9.2e-05, dt 2.0s +All GPU(s): step 8524: loss 25.2969, lr 9.2e-05, dt 2.1s +All GPU(s): step 8525: loss 25.0312, lr 9.2e-05, dt 2.2s +All GPU(s): step 8526: loss 24.9375, lr 9.2e-05, dt 2.0s +All GPU(s): step 8527: loss 24.7656, lr 9.2e-05, dt 2.0s +All GPU(s): step 8528: loss 25.0469, lr 9.2e-05, dt 2.0s +All GPU(s): step 8529: loss 24.7344, lr 9.2e-05, dt 2.1s +All GPU(s): step 8530: loss 24.6406, lr 9.2e-05, dt 2.1s +All GPU(s): step 8531: loss 24.7188, lr 9.1e-05, dt 2.0s +All GPU(s): step 8532: loss 24.4844, lr 9.1e-05, dt 2.0s +All GPU(s): step 8533: loss 24.5781, lr 9.1e-05, dt 2.1s +All GPU(s): step 8534: loss 24.4688, lr 9.1e-05, dt 2.1s +All GPU(s): step 8535: loss 24.5781, lr 9.1e-05, dt 2.1s +All GPU(s): step 8536: loss 24.6094, lr 9.1e-05, dt 2.0s +All GPU(s): step 8537: loss 24.6094, lr 9.1e-05, dt 2.0s +All GPU(s): step 8538: loss 24.8281, lr 9.1e-05, dt 2.1s +All GPU(s): step 8539: loss 24.8594, lr 9.1e-05, dt 2.1s +All GPU(s): step 8540: loss 24.6094, lr 9.1e-05, dt 2.1s +All GPU(s): step 8541: loss 24.7656, lr 9.1e-05, dt 2.1s +All GPU(s): step 8542: loss 25.0156, lr 9.1e-05, dt 2.1s +All GPU(s): step 8543: loss 24.6250, lr 9.1e-05, dt 2.1s +All GPU(s): step 8544: loss 24.9219, lr 9.1e-05, dt 2.1s +All GPU(s): step 8545: loss 24.6094, lr 9.1e-05, dt 2.1s +All GPU(s): step 8546: loss 24.9844, lr 9.1e-05, dt 2.1s +All GPU(s): step 8547: loss 25.0156, lr 9.1e-05, dt 2.0s +All GPU(s): step 8548: loss 24.8906, lr 9.1e-05, dt 2.1s +All GPU(s): step 8549: loss 25.2344, lr 9.1e-05, dt 2.2s +All GPU(s): step 8550: loss 25.3906, lr 9.1e-05, dt 2.1s +All GPU(s): step 8551: loss 24.9844, lr 9.1e-05, dt 2.0s +All GPU(s): step 8552: loss 25.1406, lr 9.1e-05, dt 2.0s +All GPU(s): step 8553: loss 25.0938, lr 9.1e-05, dt 2.1s +All GPU(s): step 8554: loss 25.0625, lr 9.1e-05, dt 2.1s +All GPU(s): step 8555: loss 25.2500, lr 9.1e-05, dt 2.1s +All GPU(s): step 8556: loss 24.9844, lr 9.1e-05, dt 2.0s +All GPU(s): step 8557: loss 25.1406, lr 9.0e-05, dt 2.0s +All GPU(s): step 8558: loss 25.2500, lr 9.0e-05, dt 2.1s +All GPU(s): step 8559: loss 24.9844, lr 9.0e-05, dt 2.1s +All GPU(s): step 8560: loss 24.9844, lr 9.0e-05, dt 2.1s +All GPU(s): step 8561: loss 25.0156, lr 9.0e-05, dt 2.0s +All GPU(s): step 8562: loss 25.1562, lr 9.0e-05, dt 2.1s +All GPU(s): step 8563: loss 25.0938, lr 9.0e-05, dt 2.1s +All GPU(s): step 8564: loss 24.7656, lr 9.0e-05, dt 2.1s +All GPU(s): step 8565: loss 24.8750, lr 9.0e-05, dt 2.1s +All GPU(s): step 8566: loss 25.0625, lr 9.0e-05, dt 2.1s +All GPU(s): step 8567: loss 25.0312, lr 9.0e-05, dt 2.1s +All GPU(s): step 8568: loss 24.9219, lr 9.0e-05, dt 2.2s +All GPU(s): step 8569: loss 25.0000, lr 9.0e-05, dt 2.0s +All GPU(s): step 8570: loss 24.8438, lr 9.0e-05, dt 2.0s +All GPU(s): step 8571: loss 25.1250, lr 9.0e-05, dt 2.0s +All GPU(s): step 8572: loss 25.0938, lr 9.0e-05, dt 2.0s +All GPU(s): step 8573: loss 25.3281, lr 9.0e-05, dt 2.2s +All GPU(s): step 8574: loss 25.0000, lr 9.0e-05, dt 2.0s +All GPU(s): step 8575: loss 25.1250, lr 9.0e-05, dt 2.0s +All GPU(s): step 8576: loss 25.1406, lr 9.0e-05, dt 2.1s +All GPU(s): step 8577: loss 25.0000, lr 9.0e-05, dt 2.1s +All GPU(s): step 8578: loss 24.9531, lr 9.0e-05, dt 2.1s +All GPU(s): step 8579: loss 25.0938, lr 9.0e-05, dt 2.0s +All GPU(s): step 8580: loss 25.2500, lr 9.0e-05, dt 2.0s +All GPU(s): step 8581: loss 25.1250, lr 9.0e-05, dt 2.0s +All GPU(s): step 8582: loss 25.2812, lr 8.9e-05, dt 2.1s +All GPU(s): step 8583: loss 25.3594, lr 8.9e-05, dt 2.1s +All GPU(s): step 8584: loss 25.2812, lr 8.9e-05, dt 2.1s +All GPU(s): step 8585: loss 25.2344, lr 8.9e-05, dt 2.1s +All GPU(s): step 8586: loss 25.6094, lr 8.9e-05, dt 2.1s +All GPU(s): step 8587: loss 25.8125, lr 8.9e-05, dt 2.1s +All GPU(s): step 8588: loss 25.6719, lr 8.9e-05, dt 2.1s +All GPU(s): step 8589: loss 25.7188, lr 8.9e-05, dt 2.1s +All GPU(s): step 8590: loss 25.5156, lr 8.9e-05, dt 2.0s +All GPU(s): step 8591: loss 25.7812, lr 8.9e-05, dt 2.0s +All GPU(s): step 8592: loss 26.0000, lr 8.9e-05, dt 2.1s +All GPU(s): step 8593: loss 26.2969, lr 8.9e-05, dt 2.0s +All GPU(s): step 8594: loss 25.7188, lr 8.9e-05, dt 2.0s +All GPU(s): step 8595: loss 26.0625, lr 8.9e-05, dt 2.0s +All GPU(s): step 8596: loss 25.9219, lr 8.9e-05, dt 2.0s +All GPU(s): step 8597: loss 25.8281, lr 8.9e-05, dt 2.1s +All GPU(s): step 8598: loss 25.9688, lr 8.9e-05, dt 2.0s +All GPU(s): step 8599: loss 25.8438, lr 8.9e-05, dt 2.0s +All GPU(s): step 8600: loss 25.9375, lr 8.9e-05, dt 2.1s +All GPU(s): step 8601: loss 25.9219, lr 8.9e-05, dt 2.1s +All GPU(s): step 8602: loss 26.0781, lr 8.9e-05, dt 2.1s +All GPU(s): step 8603: loss 25.8125, lr 8.9e-05, dt 2.0s +All GPU(s): step 8604: loss 25.6562, lr 8.9e-05, dt 2.0s +All GPU(s): step 8605: loss 25.9219, lr 8.9e-05, dt 2.0s +All GPU(s): step 8606: loss 25.9219, lr 8.9e-05, dt 2.1s +All GPU(s): step 8607: loss 25.7344, lr 8.9e-05, dt 2.1s +All GPU(s): step 8608: loss 25.7969, lr 8.8e-05, dt 2.0s +All GPU(s): step 8609: loss 26.0156, lr 8.8e-05, dt 2.0s +All GPU(s): step 8610: loss 25.7344, lr 8.8e-05, dt 2.0s +All GPU(s): step 8611: loss 25.9531, lr 8.8e-05, dt 2.1s +All GPU(s): step 8612: loss 25.8438, lr 8.8e-05, dt 2.1s +All GPU(s): step 8613: loss 26.0781, lr 8.8e-05, dt 2.1s +All GPU(s): step 8614: loss 25.9062, lr 8.8e-05, dt 2.0s +All GPU(s): step 8615: loss 26.1094, lr 8.8e-05, dt 2.0s +All GPU(s): step 8616: loss 26.2031, lr 8.8e-05, dt 2.2s +All GPU(s): step 8617: loss 26.5156, lr 8.8e-05, dt 2.0s +All GPU(s): step 8618: loss 26.3125, lr 8.8e-05, dt 2.0s +All GPU(s): step 8619: loss 26.2656, lr 8.8e-05, dt 2.1s +All GPU(s): step 8620: loss 25.9688, lr 8.8e-05, dt 2.1s +All GPU(s): step 8621: loss 26.2656, lr 8.8e-05, dt 2.1s +All GPU(s): step 8622: loss 26.1094, lr 8.8e-05, dt 2.0s +All GPU(s): step 8623: loss 26.0781, lr 8.8e-05, dt 2.0s +All GPU(s): step 8624: loss 26.2656, lr 8.8e-05, dt 2.0s +All GPU(s): step 8625: loss 25.9062, lr 8.8e-05, dt 2.1s +All GPU(s): step 8626: loss 26.0781, lr 8.8e-05, dt 2.1s +All GPU(s): step 8627: loss 25.9219, lr 8.8e-05, dt 2.0s +All GPU(s): step 8628: loss 25.9688, lr 8.8e-05, dt 2.1s +All GPU(s): step 8629: loss 25.6719, lr 8.8e-05, dt 2.0s +All GPU(s): step 8630: loss 25.6406, lr 8.8e-05, dt 2.1s +All GPU(s): step 8631: loss 26.0469, lr 8.8e-05, dt 2.1s +All GPU(s): step 8632: loss 25.7500, lr 8.8e-05, dt 2.0s +All GPU(s): step 8633: loss 25.6250, lr 8.8e-05, dt 2.0s +All GPU(s): step 8634: loss 25.7500, lr 8.7e-05, dt 2.0s +All GPU(s): step 8635: loss 25.9531, lr 8.7e-05, dt 2.1s +All GPU(s): step 8636: loss 25.8438, lr 8.7e-05, dt 2.1s +All GPU(s): step 8637: loss 25.9219, lr 8.7e-05, dt 2.1s +All GPU(s): step 8638: loss 25.7188, lr 8.7e-05, dt 2.0s +All GPU(s): step 8639: loss 25.8438, lr 8.7e-05, dt 2.0s +All GPU(s): step 8640: loss 26.1719, lr 8.7e-05, dt 2.1s +All GPU(s): step 8641: loss 25.7500, lr 8.7e-05, dt 2.1s +All GPU(s): step 8642: loss 25.8281, lr 8.7e-05, dt 2.0s +All GPU(s): step 8643: loss 25.7031, lr 8.7e-05, dt 2.0s +All GPU(s): step 8644: loss 26.0000, lr 8.7e-05, dt 2.0s +All GPU(s): step 8645: loss 25.7969, lr 8.7e-05, dt 2.1s +All GPU(s): step 8646: loss 26.0000, lr 8.7e-05, dt 2.0s +All GPU(s): step 8647: loss 25.9219, lr 8.7e-05, dt 2.0s +All GPU(s): step 8648: loss 25.6719, lr 8.7e-05, dt 2.0s +All GPU(s): step 8649: loss 26.0469, lr 8.7e-05, dt 2.0s +All GPU(s): step 8650: loss 25.8438, lr 8.7e-05, dt 2.2s +All GPU(s): step 8651: loss 25.5938, lr 8.7e-05, dt 2.1s +All GPU(s): step 8652: loss 25.6094, lr 8.7e-05, dt 2.1s +All GPU(s): step 8653: loss 25.6094, lr 8.7e-05, dt 2.0s +All GPU(s): step 8654: loss 25.5000, lr 8.7e-05, dt 2.0s +All GPU(s): step 8655: loss 25.5156, lr 8.7e-05, dt 2.1s +All GPU(s): step 8656: loss 25.5312, lr 8.7e-05, dt 2.0s +All GPU(s): step 8657: loss 25.4688, lr 8.7e-05, dt 2.0s +All GPU(s): step 8658: loss 25.4375, lr 8.7e-05, dt 2.0s +All GPU(s): step 8659: loss 25.1406, lr 8.7e-05, dt 2.1s +All GPU(s): step 8660: loss 25.5312, lr 8.6e-05, dt 2.1s +All GPU(s): step 8661: loss 25.3750, lr 8.6e-05, dt 2.0s +All GPU(s): step 8662: loss 25.5469, lr 8.6e-05, dt 2.0s +All GPU(s): step 8663: loss 25.2031, lr 8.6e-05, dt 2.0s +All GPU(s): step 8664: loss 25.0938, lr 8.6e-05, dt 2.1s +All GPU(s): step 8665: loss 25.4219, lr 8.6e-05, dt 2.1s +All GPU(s): step 8666: loss 25.1562, lr 8.6e-05, dt 2.0s +All GPU(s): step 8667: loss 25.2188, lr 8.6e-05, dt 2.1s +All GPU(s): step 8668: loss 25.0312, lr 8.6e-05, dt 2.0s +All GPU(s): step 8669: loss 25.0469, lr 8.6e-05, dt 2.1s +All GPU(s): step 8670: loss 25.2031, lr 8.6e-05, dt 2.1s +All GPU(s): step 8671: loss 25.1250, lr 8.6e-05, dt 2.1s +All GPU(s): step 8672: loss 24.9219, lr 8.6e-05, dt 2.1s +All GPU(s): step 8673: loss 25.0000, lr 8.6e-05, dt 2.1s +All GPU(s): step 8674: loss 25.0625, lr 8.6e-05, dt 2.1s +All GPU(s): step 8675: loss 24.6719, lr 8.6e-05, dt 2.0s +All GPU(s): step 8676: loss 24.4531, lr 8.6e-05, dt 2.0s +All GPU(s): step 8677: loss 25.1562, lr 8.6e-05, dt 2.0s +All GPU(s): step 8678: loss 24.6406, lr 8.6e-05, dt 2.0s +All GPU(s): step 8679: loss 24.5312, lr 8.6e-05, dt 2.1s +All GPU(s): step 8680: loss 24.5000, lr 8.6e-05, dt 2.0s +All GPU(s): step 8681: loss 24.4531, lr 8.6e-05, dt 2.1s +All GPU(s): step 8682: loss 24.2188, lr 8.6e-05, dt 2.1s +All GPU(s): step 8683: loss 24.5156, lr 8.6e-05, dt 2.0s +All GPU(s): step 8684: loss 23.9531, lr 8.6e-05, dt 2.1s +All GPU(s): step 8685: loss 24.1562, lr 8.6e-05, dt 2.0s +All GPU(s): step 8686: loss 24.0938, lr 8.6e-05, dt 2.0s +All GPU(s): step 8687: loss 24.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8688: loss 23.9688, lr 8.5e-05, dt 2.1s +All GPU(s): step 8689: loss 24.1406, lr 8.5e-05, dt 2.1s +All GPU(s): step 8690: loss 24.2656, lr 8.5e-05, dt 2.1s +All GPU(s): step 8691: loss 24.2812, lr 8.5e-05, dt 2.0s +All GPU(s): step 8692: loss 24.1719, lr 8.5e-05, dt 2.0s +All GPU(s): step 8693: loss 24.2500, lr 8.5e-05, dt 2.1s +All GPU(s): step 8694: loss 24.1562, lr 8.5e-05, dt 2.0s +All GPU(s): step 8695: loss 24.8750, lr 8.5e-05, dt 2.0s +All GPU(s): step 8696: loss 24.4844, lr 8.5e-05, dt 2.0s +All GPU(s): step 8697: loss 24.5156, lr 8.5e-05, dt 2.0s +All GPU(s): step 8698: loss 24.8750, lr 8.5e-05, dt 2.1s +All GPU(s): step 8699: loss 24.7500, lr 8.5e-05, dt 2.1s +All GPU(s): step 8700: loss 24.5156, lr 8.5e-05, dt 2.1s +All GPU(s): step 8701: loss 24.8438, lr 8.5e-05, dt 2.0s +All GPU(s): step 8702: loss 24.7656, lr 8.5e-05, dt 2.1s +All GPU(s): step 8703: loss 24.8125, lr 8.5e-05, dt 2.1s +All GPU(s): step 8704: loss 24.9219, lr 8.5e-05, dt 2.1s +All GPU(s): step 8705: loss 24.6719, lr 8.5e-05, dt 2.1s +All GPU(s): step 8706: loss 25.0781, lr 8.5e-05, dt 2.0s +All GPU(s): step 8707: loss 24.8281, lr 8.5e-05, dt 2.1s +All GPU(s): step 8708: loss 25.0156, lr 8.5e-05, dt 2.2s +All GPU(s): step 8709: loss 25.4531, lr 8.5e-05, dt 2.0s +All GPU(s): step 8710: loss 25.3594, lr 8.5e-05, dt 2.0s +All GPU(s): step 8711: loss 25.5156, lr 8.5e-05, dt 2.0s +All GPU(s): step 8712: loss 25.8750, lr 8.5e-05, dt 2.1s +All GPU(s): step 8713: loss 25.7188, lr 8.4e-05, dt 2.1s +All GPU(s): step 8714: loss 25.7500, lr 8.4e-05, dt 2.0s +All GPU(s): step 8715: loss 25.8594, lr 8.4e-05, dt 2.0s +All GPU(s): step 8716: loss 26.0312, lr 8.4e-05, dt 2.1s +All GPU(s): step 8717: loss 25.9219, lr 8.4e-05, dt 2.1s +All GPU(s): step 8718: loss 25.8750, lr 8.4e-05, dt 2.1s +All GPU(s): step 8719: loss 26.5000, lr 8.4e-05, dt 2.0s +All GPU(s): step 8720: loss 26.4219, lr 8.4e-05, dt 2.1s +All GPU(s): step 8721: loss 26.5000, lr 8.4e-05, dt 2.1s +All GPU(s): step 8722: loss 26.3125, lr 8.4e-05, dt 2.1s +All GPU(s): step 8723: loss 26.4219, lr 8.4e-05, dt 2.1s +All GPU(s): step 8724: loss 26.8125, lr 8.4e-05, dt 2.1s +All GPU(s): step 8725: loss 26.7500, lr 8.4e-05, dt 2.0s +All GPU(s): step 8726: loss 27.0469, lr 8.4e-05, dt 2.1s +All GPU(s): step 8727: loss 26.5312, lr 8.4e-05, dt 2.1s +All GPU(s): step 8728: loss 26.7656, lr 8.4e-05, dt 2.0s +All GPU(s): step 8729: loss 26.9219, lr 8.4e-05, dt 2.1s +All GPU(s): step 8730: loss 27.2500, lr 8.4e-05, dt 2.1s +All GPU(s): step 8731: loss 27.2656, lr 8.4e-05, dt 2.0s +All GPU(s): step 8732: loss 27.0625, lr 8.4e-05, dt 2.1s +All GPU(s): step 8733: loss 27.2656, lr 8.4e-05, dt 2.0s +All GPU(s): step 8734: loss 27.3750, lr 8.4e-05, dt 2.0s +All GPU(s): step 8735: loss 27.1719, lr 8.4e-05, dt 2.1s +All GPU(s): step 8736: loss 27.1406, lr 8.4e-05, dt 2.1s +All GPU(s): step 8737: loss 26.7969, lr 8.4e-05, dt 2.2s +All GPU(s): step 8738: loss 27.0156, lr 8.4e-05, dt 2.1s +All GPU(s): step 8739: loss 26.9375, lr 8.4e-05, dt 2.1s +All GPU(s): step 8740: loss 26.9375, lr 8.3e-05, dt 2.1s +All GPU(s): step 8741: loss 26.6250, lr 8.3e-05, dt 2.1s +All GPU(s): step 8742: loss 26.6562, lr 8.3e-05, dt 2.1s +All GPU(s): step 8743: loss 26.5625, lr 8.3e-05, dt 2.0s +All GPU(s): step 8744: loss 26.5781, lr 8.3e-05, dt 2.1s +All GPU(s): step 8745: loss 26.7812, lr 8.3e-05, dt 2.0s +All GPU(s): step 8746: loss 26.7656, lr 8.3e-05, dt 2.1s +All GPU(s): step 8747: loss 26.8906, lr 8.3e-05, dt 2.1s +All GPU(s): step 8748: loss 26.6719, lr 8.3e-05, dt 2.0s +All GPU(s): step 8749: loss 26.7656, lr 8.3e-05, dt 2.0s +All GPU(s): step 8750: loss 26.7969, lr 8.3e-05, dt 2.0s +All GPU(s): step 8751: loss 26.9688, lr 8.3e-05, dt 2.1s +All GPU(s): step 8752: loss 27.0312, lr 8.3e-05, dt 2.0s +All GPU(s): step 8753: loss 27.1406, lr 8.3e-05, dt 2.0s +All GPU(s): step 8754: loss 27.4219, lr 8.3e-05, dt 2.0s +All GPU(s): step 8755: loss 27.3125, lr 8.3e-05, dt 2.0s +All GPU(s): step 8756: loss 27.4219, lr 8.3e-05, dt 2.1s +All GPU(s): step 8757: loss 27.5469, lr 8.3e-05, dt 2.0s +All GPU(s): step 8758: loss 27.7188, lr 8.3e-05, dt 2.0s +All GPU(s): step 8759: loss 27.6094, lr 8.3e-05, dt 2.0s +All GPU(s): step 8760: loss 27.7031, lr 8.3e-05, dt 2.0s +All GPU(s): step 8761: loss 27.6719, lr 8.3e-05, dt 2.1s +All GPU(s): step 8762: loss 27.8125, lr 8.3e-05, dt 2.1s +All GPU(s): step 8763: loss 27.8906, lr 8.3e-05, dt 2.0s +All GPU(s): step 8764: loss 28.0938, lr 8.3e-05, dt 2.0s +All GPU(s): step 8765: loss 28.1406, lr 8.3e-05, dt 2.1s +All GPU(s): step 8766: loss 28.2969, lr 8.3e-05, dt 2.1s +All GPU(s): step 8767: loss 28.3281, lr 8.3e-05, dt 2.0s +All GPU(s): step 8768: loss 28.3438, lr 8.2e-05, dt 2.0s +All GPU(s): step 8769: loss 28.1094, lr 8.2e-05, dt 2.0s +All GPU(s): step 8770: loss 28.1719, lr 8.2e-05, dt 2.1s +All GPU(s): step 8771: loss 28.2500, lr 8.2e-05, dt 2.2s +All GPU(s): step 8772: loss 28.4531, lr 8.2e-05, dt 2.0s +All GPU(s): step 8773: loss 28.3281, lr 8.2e-05, dt 2.0s +All GPU(s): step 8774: loss 27.9375, lr 8.2e-05, dt 2.0s +All GPU(s): step 8775: loss 28.0156, lr 8.2e-05, dt 2.1s +All GPU(s): step 8776: loss 28.0781, lr 8.2e-05, dt 2.0s +All GPU(s): step 8777: loss 28.2656, lr 8.2e-05, dt 2.0s +All GPU(s): step 8778: loss 28.1094, lr 8.2e-05, dt 2.0s +All GPU(s): step 8779: loss 27.8438, lr 8.2e-05, dt 2.0s +All GPU(s): step 8780: loss 27.8438, lr 8.2e-05, dt 2.1s +All GPU(s): step 8781: loss 27.7656, lr 8.2e-05, dt 2.1s +All GPU(s): step 8782: loss 27.7969, lr 8.2e-05, dt 2.1s +All GPU(s): step 8783: loss 27.3125, lr 8.2e-05, dt 2.0s +All GPU(s): step 8784: loss 27.3594, lr 8.2e-05, dt 2.1s +All GPU(s): step 8785: loss 26.9062, lr 8.2e-05, dt 2.2s +All GPU(s): step 8786: loss 27.0781, lr 8.2e-05, dt 2.1s +All GPU(s): step 8787: loss 27.2031, lr 8.2e-05, dt 2.0s +All GPU(s): step 8788: loss 26.8125, lr 8.2e-05, dt 2.0s +All GPU(s): step 8789: loss 26.6094, lr 8.2e-05, dt 2.1s +All GPU(s): step 8790: loss 26.4688, lr 8.2e-05, dt 2.1s +All GPU(s): step 8791: loss 26.2500, lr 8.2e-05, dt 2.0s +All GPU(s): step 8792: loss 26.1875, lr 8.2e-05, dt 2.0s +All GPU(s): step 8793: loss 26.1875, lr 8.2e-05, dt 2.0s +All GPU(s): step 8794: loss 26.1250, lr 8.2e-05, dt 2.1s +All GPU(s): step 8795: loss 25.8281, lr 8.1e-05, dt 2.1s +All GPU(s): step 8796: loss 26.0938, lr 8.1e-05, dt 2.0s +All GPU(s): step 8797: loss 26.0938, lr 8.1e-05, dt 2.0s +All GPU(s): step 8798: loss 26.2344, lr 8.1e-05, dt 2.0s +All GPU(s): step 8799: loss 26.3281, lr 8.1e-05, dt 2.1s +All GPU(s): step 8800: loss 26.5625, lr 8.1e-05, dt 2.0s +All GPU(s): step 8801: loss 26.5938, lr 8.1e-05, dt 2.0s +All GPU(s): step 8802: loss 26.3906, lr 8.1e-05, dt 2.1s +All GPU(s): step 8803: loss 26.5781, lr 8.1e-05, dt 2.0s +All GPU(s): step 8804: loss 26.7344, lr 8.1e-05, dt 2.1s +All GPU(s): step 8805: loss 26.3125, lr 8.1e-05, dt 2.0s +All GPU(s): step 8806: loss 26.6094, lr 8.1e-05, dt 2.0s +All GPU(s): step 8807: loss 26.6094, lr 8.1e-05, dt 2.0s +All GPU(s): step 8808: loss 26.8906, lr 8.1e-05, dt 2.0s +All GPU(s): step 8809: loss 26.6406, lr 8.1e-05, dt 2.3s +All GPU(s): step 8810: loss 26.7500, lr 8.1e-05, dt 2.1s +All GPU(s): step 8811: loss 26.9375, lr 8.1e-05, dt 2.0s +All GPU(s): step 8812: loss 26.6875, lr 8.1e-05, dt 2.0s +All GPU(s): step 8813: loss 26.6562, lr 8.1e-05, dt 2.1s +All GPU(s): step 8814: loss 26.4844, lr 8.1e-05, dt 2.1s +All GPU(s): step 8815: loss 26.1250, lr 8.1e-05, dt 2.1s +All GPU(s): step 8816: loss 26.1875, lr 8.1e-05, dt 2.1s +All GPU(s): step 8817: loss 25.8906, lr 8.1e-05, dt 2.0s +All GPU(s): step 8818: loss 26.1250, lr 8.1e-05, dt 2.0s +All GPU(s): step 8819: loss 26.0625, lr 8.1e-05, dt 2.1s +All GPU(s): step 8820: loss 25.9062, lr 8.1e-05, dt 2.1s +All GPU(s): step 8821: loss 25.9844, lr 8.1e-05, dt 2.1s +All GPU(s): step 8822: loss 25.9688, lr 8.1e-05, dt 2.1s +All GPU(s): step 8823: loss 25.5781, lr 8.0e-05, dt 2.1s +All GPU(s): step 8824: loss 25.9375, lr 8.0e-05, dt 2.1s +All GPU(s): step 8825: loss 25.9062, lr 8.0e-05, dt 2.0s +All GPU(s): step 8826: loss 25.4531, lr 8.0e-05, dt 2.0s +All GPU(s): step 8827: loss 25.8906, lr 8.0e-05, dt 2.0s +All GPU(s): step 8828: loss 25.8750, lr 8.0e-05, dt 2.2s +All GPU(s): step 8829: loss 26.2344, lr 8.0e-05, dt 2.0s +All GPU(s): step 8830: loss 26.1094, lr 8.0e-05, dt 2.0s +All GPU(s): step 8831: loss 26.5469, lr 8.0e-05, dt 2.1s +All GPU(s): step 8832: loss 26.7031, lr 8.0e-05, dt 2.0s +All GPU(s): step 8833: loss 26.7500, lr 8.0e-05, dt 2.2s +All GPU(s): step 8834: loss 26.7188, lr 8.0e-05, dt 2.0s +All GPU(s): step 8835: loss 26.8906, lr 8.0e-05, dt 2.1s +All GPU(s): step 8836: loss 26.8438, lr 8.0e-05, dt 2.1s +All GPU(s): step 8837: loss 27.1719, lr 8.0e-05, dt 2.1s +All GPU(s): step 8838: loss 26.8750, lr 8.0e-05, dt 2.2s +All GPU(s): step 8839: loss 27.1406, lr 8.0e-05, dt 2.0s +All GPU(s): step 8840: loss 27.0469, lr 8.0e-05, dt 2.0s +All GPU(s): step 8841: loss 27.0625, lr 8.0e-05, dt 2.0s +All GPU(s): step 8842: loss 27.1250, lr 8.0e-05, dt 2.1s +All GPU(s): step 8843: loss 27.1250, lr 8.0e-05, dt 2.1s +All GPU(s): step 8844: loss 27.1406, lr 8.0e-05, dt 2.0s +All GPU(s): step 8845: loss 27.3594, lr 8.0e-05, dt 2.1s +All GPU(s): step 8846: loss 27.2969, lr 8.0e-05, dt 2.1s +All GPU(s): step 8847: loss 27.3438, lr 8.0e-05, dt 2.1s +All GPU(s): step 8848: loss 27.2812, lr 8.0e-05, dt 2.1s +All GPU(s): step 8849: loss 27.8281, lr 8.0e-05, dt 2.0s +All GPU(s): step 8850: loss 27.8125, lr 8.0e-05, dt 2.0s +All GPU(s): step 8851: loss 28.0000, lr 7.9e-05, dt 2.0s +All GPU(s): step 8852: loss 28.2656, lr 7.9e-05, dt 2.2s +All GPU(s): step 8853: loss 28.4844, lr 7.9e-05, dt 2.0s +All GPU(s): step 8854: loss 28.2969, lr 7.9e-05, dt 2.1s +All GPU(s): step 8855: loss 28.4688, lr 7.9e-05, dt 2.0s +All GPU(s): step 8856: loss 28.5156, lr 7.9e-05, dt 2.1s +All GPU(s): step 8857: loss 28.7969, lr 7.9e-05, dt 2.2s +All GPU(s): step 8858: loss 28.6094, lr 7.9e-05, dt 2.1s +All GPU(s): step 8859: loss 28.4375, lr 7.9e-05, dt 2.1s +All GPU(s): step 8860: loss 28.8281, lr 7.9e-05, dt 2.0s +All GPU(s): step 8861: loss 28.8594, lr 7.9e-05, dt 2.0s +All GPU(s): step 8862: loss 28.5156, lr 7.9e-05, dt 2.1s +All GPU(s): step 8863: loss 28.5156, lr 7.9e-05, dt 2.0s +All GPU(s): step 8864: loss 28.0938, lr 7.9e-05, dt 2.0s +All GPU(s): step 8865: loss 28.3125, lr 7.9e-05, dt 2.0s +All GPU(s): step 8866: loss 28.0156, lr 7.9e-05, dt 2.1s +All GPU(s): step 8867: loss 28.2031, lr 7.9e-05, dt 2.1s +All GPU(s): step 8868: loss 28.1406, lr 7.9e-05, dt 2.1s +All GPU(s): step 8869: loss 27.8750, lr 7.9e-05, dt 2.0s +All GPU(s): step 8870: loss 28.0000, lr 7.9e-05, dt 2.0s +All GPU(s): step 8871: loss 28.0000, lr 7.9e-05, dt 2.1s +All GPU(s): step 8872: loss 27.8438, lr 7.9e-05, dt 2.1s +All GPU(s): step 8873: loss 27.6094, lr 7.9e-05, dt 2.1s +All GPU(s): step 8874: loss 27.4688, lr 7.9e-05, dt 2.1s +All GPU(s): step 8875: loss 27.7344, lr 7.9e-05, dt 2.0s +All GPU(s): step 8876: loss 27.4219, lr 7.9e-05, dt 2.1s +All GPU(s): step 8877: loss 27.3906, lr 7.9e-05, dt 2.0s +All GPU(s): step 8878: loss 26.8594, lr 7.9e-05, dt 2.0s +All GPU(s): step 8879: loss 27.0781, lr 7.9e-05, dt 2.1s +All GPU(s): step 8880: loss 27.0625, lr 7.8e-05, dt 2.1s +All GPU(s): step 8881: loss 27.2969, lr 7.8e-05, dt 2.2s +All GPU(s): step 8882: loss 26.9375, lr 7.8e-05, dt 2.0s +All GPU(s): step 8883: loss 26.9688, lr 7.8e-05, dt 2.1s +All GPU(s): step 8884: loss 26.9688, lr 7.8e-05, dt 2.0s +All GPU(s): step 8885: loss 26.7031, lr 7.8e-05, dt 2.0s +All GPU(s): step 8886: loss 26.6875, lr 7.8e-05, dt 2.2s +All GPU(s): step 8887: loss 26.5625, lr 7.8e-05, dt 2.0s +All GPU(s): step 8888: loss 26.5156, lr 7.8e-05, dt 2.1s +All GPU(s): step 8889: loss 26.7344, lr 7.8e-05, dt 2.1s +All GPU(s): step 8890: loss 26.9375, lr 7.8e-05, dt 2.1s +All GPU(s): step 8891: loss 26.7500, lr 7.8e-05, dt 2.1s +All GPU(s): step 8892: loss 26.9062, lr 7.8e-05, dt 2.1s +All GPU(s): step 8893: loss 26.8750, lr 7.8e-05, dt 2.0s +All GPU(s): step 8894: loss 26.8281, lr 7.8e-05, dt 2.0s +All GPU(s): step 8895: loss 26.5781, lr 7.8e-05, dt 2.1s +All GPU(s): step 8896: loss 26.9219, lr 7.8e-05, dt 2.1s +All GPU(s): step 8897: loss 26.8281, lr 7.8e-05, dt 2.0s +All GPU(s): step 8898: loss 27.2969, lr 7.8e-05, dt 2.1s +All GPU(s): step 8899: loss 27.3125, lr 7.8e-05, dt 2.0s +All GPU(s): step 8900: loss 27.5312, lr 7.8e-05, dt 2.1s +All GPU(s): step 8901: loss 27.4531, lr 7.8e-05, dt 2.0s +All GPU(s): step 8902: loss 27.8906, lr 7.8e-05, dt 2.1s +All GPU(s): step 8903: loss 27.7500, lr 7.8e-05, dt 2.1s +All GPU(s): step 8904: loss 28.2188, lr 7.8e-05, dt 2.1s +All GPU(s): step 8905: loss 27.9844, lr 7.8e-05, dt 2.1s +All GPU(s): step 8906: loss 28.1562, lr 7.8e-05, dt 2.0s +All GPU(s): step 8907: loss 28.3906, lr 7.8e-05, dt 2.1s +All GPU(s): step 8908: loss 28.6094, lr 7.7e-05, dt 2.1s +All GPU(s): step 8909: loss 28.6406, lr 7.7e-05, dt 2.1s +All GPU(s): step 8910: loss 28.8438, lr 7.7e-05, dt 2.2s +All GPU(s): step 8911: loss 29.1094, lr 7.7e-05, dt 2.1s +All GPU(s): step 8912: loss 28.6562, lr 7.7e-05, dt 2.0s +All GPU(s): step 8913: loss 28.9062, lr 7.7e-05, dt 2.1s +All GPU(s): step 8914: loss 28.6250, lr 7.7e-05, dt 2.1s +All GPU(s): step 8915: loss 28.5156, lr 7.7e-05, dt 2.1s +All GPU(s): step 8916: loss 28.5000, lr 7.7e-05, dt 2.1s +All GPU(s): step 8917: loss 28.2344, lr 7.7e-05, dt 2.0s +All GPU(s): step 8918: loss 28.2969, lr 7.7e-05, dt 2.0s +All GPU(s): step 8919: loss 28.4531, lr 7.7e-05, dt 2.0s +All GPU(s): step 8920: loss 28.4844, lr 7.7e-05, dt 2.1s +All GPU(s): step 8921: loss 28.3281, lr 7.7e-05, dt 2.1s +All GPU(s): step 8922: loss 28.4375, lr 7.7e-05, dt 2.1s +All GPU(s): step 8923: loss 28.4844, lr 7.7e-05, dt 2.1s +All GPU(s): step 8924: loss 28.4844, lr 7.7e-05, dt 2.1s +All GPU(s): step 8925: loss 28.5781, lr 7.7e-05, dt 2.1s +All GPU(s): step 8926: loss 28.5156, lr 7.7e-05, dt 2.0s +All GPU(s): step 8927: loss 28.4688, lr 7.7e-05, dt 2.0s +All GPU(s): step 8928: loss 28.5781, lr 7.7e-05, dt 2.1s +All GPU(s): step 8929: loss 29.0781, lr 7.7e-05, dt 2.1s +All GPU(s): step 8930: loss 28.9375, lr 7.7e-05, dt 2.1s +All GPU(s): step 8931: loss 29.0156, lr 7.7e-05, dt 2.1s +All GPU(s): step 8932: loss 29.0781, lr 7.7e-05, dt 2.0s +All GPU(s): step 8933: loss 29.1562, lr 7.7e-05, dt 2.1s +All GPU(s): step 8934: loss 29.2500, lr 7.7e-05, dt 2.2s +All GPU(s): step 8935: loss 29.1406, lr 7.7e-05, dt 2.0s +All GPU(s): step 8936: loss 29.2969, lr 7.7e-05, dt 2.0s +All GPU(s): step 8937: loss 29.2188, lr 7.6e-05, dt 2.0s +All GPU(s): step 8938: loss 29.0000, lr 7.6e-05, dt 2.1s +All GPU(s): step 8939: loss 29.0469, lr 7.6e-05, dt 2.1s +All GPU(s): step 8940: loss 28.8906, lr 7.6e-05, dt 2.1s +All GPU(s): step 8941: loss 29.0156, lr 7.6e-05, dt 2.0s +All GPU(s): step 8942: loss 28.9688, lr 7.6e-05, dt 2.0s +All GPU(s): step 8943: loss 29.1719, lr 7.6e-05, dt 2.0s +All GPU(s): step 8944: loss 29.1250, lr 7.6e-05, dt 2.1s +All GPU(s): step 8945: loss 28.9688, lr 7.6e-05, dt 2.0s +All GPU(s): step 8946: loss 29.1250, lr 7.6e-05, dt 2.0s +All GPU(s): step 8947: loss 29.3125, lr 7.6e-05, dt 2.0s +All GPU(s): step 8948: loss 29.2344, lr 7.6e-05, dt 2.1s +All GPU(s): step 8949: loss 29.2812, lr 7.6e-05, dt 2.1s +All GPU(s): step 8950: loss 29.0938, lr 7.6e-05, dt 2.0s +All GPU(s): step 8951: loss 29.2500, lr 7.6e-05, dt 2.0s +All GPU(s): step 8952: loss 29.2500, lr 7.6e-05, dt 2.0s +All GPU(s): step 8953: loss 29.3906, lr 7.6e-05, dt 2.1s +All GPU(s): step 8954: loss 29.5781, lr 7.6e-05, dt 2.1s +All GPU(s): step 8955: loss 29.3906, lr 7.6e-05, dt 2.0s +All GPU(s): step 8956: loss 29.6250, lr 7.6e-05, dt 2.0s +All GPU(s): step 8957: loss 29.4062, lr 7.6e-05, dt 2.1s +All GPU(s): step 8958: loss 29.4062, lr 7.6e-05, dt 2.1s +All GPU(s): step 8959: loss 29.6250, lr 7.6e-05, dt 2.1s +All GPU(s): step 8960: loss 29.6719, lr 7.6e-05, dt 2.0s +All GPU(s): step 8961: loss 29.4375, lr 7.6e-05, dt 2.0s +All GPU(s): step 8962: loss 29.7031, lr 7.6e-05, dt 2.1s +All GPU(s): step 8963: loss 29.5938, lr 7.6e-05, dt 2.1s +All GPU(s): step 8964: loss 29.7031, lr 7.6e-05, dt 2.0s +All GPU(s): step 8965: loss 29.7812, lr 7.6e-05, dt 2.0s +All GPU(s): step 8966: loss 29.6719, lr 7.6e-05, dt 2.0s +All GPU(s): step 8967: loss 29.7500, lr 7.5e-05, dt 2.0s +All GPU(s): step 8968: loss 29.7188, lr 7.5e-05, dt 2.1s +All GPU(s): step 8969: loss 29.8125, lr 7.5e-05, dt 2.1s +All GPU(s): step 8970: loss 29.9375, lr 7.5e-05, dt 2.0s +All GPU(s): step 8971: loss 29.6562, lr 7.5e-05, dt 2.0s +All GPU(s): step 8972: loss 29.7188, lr 7.5e-05, dt 2.1s +All GPU(s): step 8973: loss 29.7344, lr 7.5e-05, dt 2.1s +All GPU(s): step 8974: loss 29.7500, lr 7.5e-05, dt 2.0s +All GPU(s): step 8975: loss 29.7656, lr 7.5e-05, dt 2.0s +All GPU(s): step 8976: loss 29.6094, lr 7.5e-05, dt 2.0s +All GPU(s): step 8977: loss 29.7969, lr 7.5e-05, dt 2.1s +All GPU(s): step 8978: loss 29.6094, lr 7.5e-05, dt 2.1s +All GPU(s): step 8979: loss 29.6719, lr 7.5e-05, dt 2.1s +All GPU(s): step 8980: loss 29.6250, lr 7.5e-05, dt 2.1s +All GPU(s): step 8981: loss 29.5000, lr 7.5e-05, dt 2.0s +All GPU(s): step 8982: loss 29.8281, lr 7.5e-05, dt 2.2s +All GPU(s): step 8983: loss 29.6094, lr 7.5e-05, dt 2.0s +All GPU(s): step 8984: loss 29.7188, lr 7.5e-05, dt 2.1s +All GPU(s): step 8985: loss 29.7031, lr 7.5e-05, dt 2.0s +All GPU(s): step 8986: loss 29.8906, lr 7.5e-05, dt 2.0s +All GPU(s): step 8987: loss 29.6562, lr 7.5e-05, dt 2.2s +All GPU(s): step 8988: loss 29.4375, lr 7.5e-05, dt 2.1s +All GPU(s): step 8989: loss 29.5938, lr 7.5e-05, dt 2.1s +All GPU(s): step 8990: loss 29.6250, lr 7.5e-05, dt 2.0s +All GPU(s): step 8991: loss 29.6094, lr 7.5e-05, dt 2.1s +All GPU(s): step 8992: loss 29.5312, lr 7.5e-05, dt 2.1s +All GPU(s): step 8993: loss 29.4844, lr 7.5e-05, dt 2.0s +All GPU(s): step 8994: loss 29.4375, lr 7.5e-05, dt 2.0s +All GPU(s): step 8995: loss 29.5469, lr 7.5e-05, dt 2.0s +All GPU(s): step 8996: loss 29.6250, lr 7.5e-05, dt 2.1s +All GPU(s): step 8997: loss 29.6562, lr 7.4e-05, dt 2.1s +All GPU(s): step 8998: loss 29.7031, lr 7.4e-05, dt 2.1s +All GPU(s): step 8999: loss 29.5469, lr 7.4e-05, dt 2.0s +saving checkpoint to checkpoints/ckpt_9000.pt +All GPU(s): step 9000: loss 29.5469, lr 7.4e-05, dt 2.1s +All GPU(s): step 9001: loss 29.9844, lr 7.4e-05, dt 2.1s +All GPU(s): step 9002: loss 29.7031, lr 7.4e-05, dt 2.0s +All GPU(s): step 9003: loss 29.7500, lr 7.4e-05, dt 2.1s +All GPU(s): step 9004: loss 29.7812, lr 7.4e-05, dt 2.0s +All GPU(s): step 9005: loss 29.7344, lr 7.4e-05, dt 2.0s +All GPU(s): step 9006: loss 29.7656, lr 7.4e-05, dt 2.1s +All GPU(s): step 9007: loss 29.7969, lr 7.4e-05, dt 2.1s +All GPU(s): step 9008: loss 29.7656, lr 7.4e-05, dt 2.1s +All GPU(s): step 9009: loss 29.6094, lr 7.4e-05, dt 2.1s +All GPU(s): step 9010: loss 29.7344, lr 7.4e-05, dt 2.0s +All GPU(s): step 9011: loss 29.9219, lr 7.4e-05, dt 2.2s +All GPU(s): step 9012: loss 29.7188, lr 7.4e-05, dt 2.1s +All GPU(s): step 9013: loss 29.6719, lr 7.4e-05, dt 2.1s +All GPU(s): step 9014: loss 29.7812, lr 7.4e-05, dt 2.1s +All GPU(s): step 9015: loss 29.7344, lr 7.4e-05, dt 2.1s +All GPU(s): step 9016: loss 29.7344, lr 7.4e-05, dt 2.1s +All GPU(s): step 9017: loss 29.5469, lr 7.4e-05, dt 2.1s +All GPU(s): step 9018: loss 29.6406, lr 7.4e-05, dt 2.1s +All GPU(s): step 9019: loss 29.8281, lr 7.4e-05, dt 2.1s +All GPU(s): step 9020: loss 29.7969, lr 7.4e-05, dt 2.1s +All GPU(s): step 9021: loss 29.8125, lr 7.4e-05, dt 2.1s +All GPU(s): step 9022: loss 29.7188, lr 7.4e-05, dt 2.0s +All GPU(s): step 9023: loss 29.7188, lr 7.4e-05, dt 2.1s +All GPU(s): step 9024: loss 29.7344, lr 7.4e-05, dt 2.0s +All GPU(s): step 9025: loss 29.8594, lr 7.4e-05, dt 2.1s +All GPU(s): step 9026: loss 29.6562, lr 7.4e-05, dt 2.1s +All GPU(s): step 9027: loss 29.8125, lr 7.3e-05, dt 2.0s +All GPU(s): step 9028: loss 29.6875, lr 7.3e-05, dt 2.0s +All GPU(s): step 9029: loss 29.8750, lr 7.3e-05, dt 2.0s +All GPU(s): step 9030: loss 29.7969, lr 7.3e-05, dt 2.1s +All GPU(s): step 9031: loss 29.6719, lr 7.3e-05, dt 2.0s +All GPU(s): step 9032: loss 29.8125, lr 7.3e-05, dt 2.1s +All GPU(s): step 9033: loss 29.6875, lr 7.3e-05, dt 2.0s +All GPU(s): step 9034: loss 29.7969, lr 7.3e-05, dt 2.0s +All GPU(s): step 9035: loss 29.8281, lr 7.3e-05, dt 2.2s +All GPU(s): step 9036: loss 29.9844, lr 7.3e-05, dt 2.1s +All GPU(s): step 9037: loss 29.8125, lr 7.3e-05, dt 2.1s +All GPU(s): step 9038: loss 29.8594, lr 7.3e-05, dt 2.1s +All GPU(s): step 9039: loss 30.0938, lr 7.3e-05, dt 2.0s +All GPU(s): step 9040: loss 30.0938, lr 7.3e-05, dt 2.2s +All GPU(s): step 9041: loss 29.8438, lr 7.3e-05, dt 2.1s +All GPU(s): step 9042: loss 30.3125, lr 7.3e-05, dt 2.0s +All GPU(s): step 9043: loss 30.2344, lr 7.3e-05, dt 2.0s +All GPU(s): step 9044: loss 30.0625, lr 7.3e-05, dt 2.0s +All GPU(s): step 9045: loss 30.3750, lr 7.3e-05, dt 2.3s +All GPU(s): step 9046: loss 30.3750, lr 7.3e-05, dt 2.0s +All GPU(s): step 9047: loss 30.3281, lr 7.3e-05, dt 2.0s +All GPU(s): step 9048: loss 30.1406, lr 7.3e-05, dt 2.0s +All GPU(s): step 9049: loss 30.1406, lr 7.3e-05, dt 2.1s +All GPU(s): step 9050: loss 30.0156, lr 7.3e-05, dt 2.1s +All GPU(s): step 9051: loss 30.1250, lr 7.3e-05, dt 2.0s +All GPU(s): step 9052: loss 30.2031, lr 7.3e-05, dt 2.0s +All GPU(s): step 9053: loss 30.2031, lr 7.3e-05, dt 2.0s +All GPU(s): step 9054: loss 30.1719, lr 7.3e-05, dt 2.1s +All GPU(s): step 9055: loss 30.1875, lr 7.3e-05, dt 2.0s +All GPU(s): step 9056: loss 30.2500, lr 7.3e-05, dt 2.1s +All GPU(s): step 9057: loss 30.2344, lr 7.3e-05, dt 2.0s +All GPU(s): step 9058: loss 30.0000, lr 7.2e-05, dt 2.0s +All GPU(s): step 9059: loss 30.2969, lr 7.2e-05, dt 2.2s +All GPU(s): step 9060: loss 30.3125, lr 7.2e-05, dt 2.1s +All GPU(s): step 9061: loss 30.2656, lr 7.2e-05, dt 2.1s +All GPU(s): step 9062: loss 30.0938, lr 7.2e-05, dt 2.1s +All GPU(s): step 9063: loss 30.2344, lr 7.2e-05, dt 2.1s +All GPU(s): step 9064: loss 30.3281, lr 7.2e-05, dt 2.2s +All GPU(s): step 9065: loss 30.2656, lr 7.2e-05, dt 2.0s +All GPU(s): step 9066: loss 30.2344, lr 7.2e-05, dt 2.0s +All GPU(s): step 9067: loss 30.0156, lr 7.2e-05, dt 2.1s +All GPU(s): step 9068: loss 30.1250, lr 7.2e-05, dt 2.0s +All GPU(s): step 9069: loss 30.2031, lr 7.2e-05, dt 2.1s +All GPU(s): step 9070: loss 30.0625, lr 7.2e-05, dt 2.0s +All GPU(s): step 9071: loss 30.0938, lr 7.2e-05, dt 2.0s +All GPU(s): step 9072: loss 29.9375, lr 7.2e-05, dt 2.0s +All GPU(s): step 9073: loss 30.0625, lr 7.2e-05, dt 2.0s +All GPU(s): step 9074: loss 30.0625, lr 7.2e-05, dt 2.1s +All GPU(s): step 9075: loss 30.0625, lr 7.2e-05, dt 2.1s +All GPU(s): step 9076: loss 30.1719, lr 7.2e-05, dt 2.1s +All GPU(s): step 9077: loss 30.1875, lr 7.2e-05, dt 2.1s +All GPU(s): step 9078: loss 30.2031, lr 7.2e-05, dt 2.1s +All GPU(s): step 9079: loss 30.2812, lr 7.2e-05, dt 2.1s +All GPU(s): step 9080: loss 30.2500, lr 7.2e-05, dt 2.1s +All GPU(s): step 9081: loss 29.9219, lr 7.2e-05, dt 2.1s +All GPU(s): step 9082: loss 30.0469, lr 7.2e-05, dt 2.1s +All GPU(s): step 9083: loss 30.0312, lr 7.2e-05, dt 2.2s +All GPU(s): step 9084: loss 30.0469, lr 7.2e-05, dt 2.0s +All GPU(s): step 9085: loss 29.7812, lr 7.2e-05, dt 2.0s +All GPU(s): step 9086: loss 29.9531, lr 7.2e-05, dt 2.0s +All GPU(s): step 9087: loss 29.8281, lr 7.2e-05, dt 2.0s +All GPU(s): step 9088: loss 30.0625, lr 7.2e-05, dt 2.1s +All GPU(s): step 9089: loss 29.8594, lr 7.1e-05, dt 2.1s +All GPU(s): step 9090: loss 30.0000, lr 7.1e-05, dt 2.0s +All GPU(s): step 9091: loss 30.0312, lr 7.1e-05, dt 2.0s +All GPU(s): step 9092: loss 30.1094, lr 7.1e-05, dt 2.0s +All GPU(s): step 9093: loss 30.0000, lr 7.1e-05, dt 2.1s +All GPU(s): step 9094: loss 29.9531, lr 7.1e-05, dt 2.0s +All GPU(s): step 9095: loss 30.1875, lr 7.1e-05, dt 2.0s +All GPU(s): step 9096: loss 30.0000, lr 7.1e-05, dt 2.0s +All GPU(s): step 9097: loss 30.2188, lr 7.1e-05, dt 2.1s +All GPU(s): step 9098: loss 30.0938, lr 7.1e-05, dt 2.1s +All GPU(s): step 9099: loss 30.0156, lr 7.1e-05, dt 2.1s +All GPU(s): step 9100: loss 29.8281, lr 7.1e-05, dt 2.1s +All GPU(s): step 9101: loss 30.1562, lr 7.1e-05, dt 2.1s +All GPU(s): step 9102: loss 29.8125, lr 7.1e-05, dt 2.1s +All GPU(s): step 9103: loss 30.0781, lr 7.1e-05, dt 2.1s +All GPU(s): step 9104: loss 29.9375, lr 7.1e-05, dt 2.0s +All GPU(s): step 9105: loss 30.0156, lr 7.1e-05, dt 2.1s +All GPU(s): step 9106: loss 30.0625, lr 7.1e-05, dt 2.1s +All GPU(s): step 9107: loss 29.9375, lr 7.1e-05, dt 2.1s +All GPU(s): step 9108: loss 30.0469, lr 7.1e-05, dt 2.0s +All GPU(s): step 9109: loss 29.9844, lr 7.1e-05, dt 2.0s +All GPU(s): step 9110: loss 29.8438, lr 7.1e-05, dt 2.0s +All GPU(s): step 9111: loss 30.0469, lr 7.1e-05, dt 2.1s +All GPU(s): step 9112: loss 29.8594, lr 7.1e-05, dt 2.2s +All GPU(s): step 9113: loss 30.0312, lr 7.1e-05, dt 2.1s +All GPU(s): step 9114: loss 29.9062, lr 7.1e-05, dt 2.0s +All GPU(s): step 9115: loss 30.2344, lr 7.1e-05, dt 2.1s +All GPU(s): step 9116: loss 29.9844, lr 7.1e-05, dt 2.1s +All GPU(s): step 9117: loss 30.0781, lr 7.1e-05, dt 2.1s +All GPU(s): step 9118: loss 30.1250, lr 7.1e-05, dt 2.1s +All GPU(s): step 9119: loss 30.1094, lr 7.1e-05, dt 2.1s +All GPU(s): step 9120: loss 30.1250, lr 7.0e-05, dt 2.1s +All GPU(s): step 9121: loss 29.9531, lr 7.0e-05, dt 2.1s +All GPU(s): step 9122: loss 30.2969, lr 7.0e-05, dt 2.1s +All GPU(s): step 9123: loss 30.0312, lr 7.0e-05, dt 2.1s +All GPU(s): step 9124: loss 30.3281, lr 7.0e-05, dt 2.0s +All GPU(s): step 9125: loss 30.1094, lr 7.0e-05, dt 2.0s +All GPU(s): step 9126: loss 30.0312, lr 7.0e-05, dt 2.1s +All GPU(s): step 9127: loss 29.8750, lr 7.0e-05, dt 2.1s +All GPU(s): step 9128: loss 29.8750, lr 7.0e-05, dt 2.0s +All GPU(s): step 9129: loss 29.9375, lr 7.0e-05, dt 2.1s +All GPU(s): step 9130: loss 29.6406, lr 7.0e-05, dt 2.1s +All GPU(s): step 9131: loss 29.7188, lr 7.0e-05, dt 2.1s +All GPU(s): step 9132: loss 29.6875, lr 7.0e-05, dt 2.1s +All GPU(s): step 9133: loss 29.7812, lr 7.0e-05, dt 2.0s +All GPU(s): step 9134: loss 29.7188, lr 7.0e-05, dt 2.1s +All GPU(s): step 9135: loss 29.6250, lr 7.0e-05, dt 2.1s +All GPU(s): step 9136: loss 29.7188, lr 7.0e-05, dt 2.1s +All GPU(s): step 9137: loss 29.4375, lr 7.0e-05, dt 2.1s +All GPU(s): step 9138: loss 29.2344, lr 7.0e-05, dt 2.0s +All GPU(s): step 9139: loss 29.4375, lr 7.0e-05, dt 2.0s +All GPU(s): step 9140: loss 29.2344, lr 7.0e-05, dt 2.1s +All GPU(s): step 9141: loss 29.3594, lr 7.0e-05, dt 2.1s +All GPU(s): step 9142: loss 29.2188, lr 7.0e-05, dt 2.1s +All GPU(s): step 9143: loss 29.3594, lr 7.0e-05, dt 2.0s +All GPU(s): step 9144: loss 29.7188, lr 7.0e-05, dt 2.0s +All GPU(s): step 9145: loss 29.5156, lr 7.0e-05, dt 2.0s +All GPU(s): step 9146: loss 29.8594, lr 7.0e-05, dt 2.1s +All GPU(s): step 9147: loss 29.8750, lr 7.0e-05, dt 2.0s +All GPU(s): step 9148: loss 29.9688, lr 7.0e-05, dt 2.0s +All GPU(s): step 9149: loss 29.8281, lr 7.0e-05, dt 2.0s +All GPU(s): step 9150: loss 29.8281, lr 7.0e-05, dt 2.0s +All GPU(s): step 9151: loss 30.0156, lr 7.0e-05, dt 2.1s +All GPU(s): step 9152: loss 30.1094, lr 6.9e-05, dt 2.0s +All GPU(s): step 9153: loss 30.0312, lr 6.9e-05, dt 2.0s +All GPU(s): step 9154: loss 30.1562, lr 6.9e-05, dt 2.0s +All GPU(s): step 9155: loss 30.2969, lr 6.9e-05, dt 2.1s +All GPU(s): step 9156: loss 30.3750, lr 6.9e-05, dt 2.1s +All GPU(s): step 9157: loss 30.1094, lr 6.9e-05, dt 2.1s +All GPU(s): step 9158: loss 30.1875, lr 6.9e-05, dt 2.1s +All GPU(s): step 9159: loss 30.5312, lr 6.9e-05, dt 2.0s +All GPU(s): step 9160: loss 30.1719, lr 6.9e-05, dt 2.1s +All GPU(s): step 9161: loss 30.4531, lr 6.9e-05, dt 2.0s +All GPU(s): step 9162: loss 30.5156, lr 6.9e-05, dt 2.0s +All GPU(s): step 9163: loss 30.2812, lr 6.9e-05, dt 2.0s +All GPU(s): step 9164: loss 30.1250, lr 6.9e-05, dt 2.0s +All GPU(s): step 9165: loss 30.5312, lr 6.9e-05, dt 2.2s +All GPU(s): step 9166: loss 30.5312, lr 6.9e-05, dt 2.1s +All GPU(s): step 9167: loss 30.2500, lr 6.9e-05, dt 2.1s +All GPU(s): step 9168: loss 30.1875, lr 6.9e-05, dt 2.1s +All GPU(s): step 9169: loss 30.2812, lr 6.9e-05, dt 2.1s +All GPU(s): step 9170: loss 30.3438, lr 6.9e-05, dt 2.2s +All GPU(s): step 9171: loss 30.2500, lr 6.9e-05, dt 2.1s +All GPU(s): step 9172: loss 30.5312, lr 6.9e-05, dt 2.1s +All GPU(s): step 9173: loss 30.2344, lr 6.9e-05, dt 2.1s +All GPU(s): step 9174: loss 30.2188, lr 6.9e-05, dt 2.1s +All GPU(s): step 9175: loss 30.0156, lr 6.9e-05, dt 2.1s +All GPU(s): step 9176: loss 30.3438, lr 6.9e-05, dt 2.1s +All GPU(s): step 9177: loss 30.2969, lr 6.9e-05, dt 2.0s +All GPU(s): step 9178: loss 30.4062, lr 6.9e-05, dt 2.1s +All GPU(s): step 9179: loss 30.0625, lr 6.9e-05, dt 2.1s +All GPU(s): step 9180: loss 30.3281, lr 6.9e-05, dt 2.0s +All GPU(s): step 9181: loss 30.1094, lr 6.9e-05, dt 2.0s +All GPU(s): step 9182: loss 30.1406, lr 6.9e-05, dt 2.0s +All GPU(s): step 9183: loss 29.9688, lr 6.9e-05, dt 2.0s +All GPU(s): step 9184: loss 29.8438, lr 6.9e-05, dt 2.1s +All GPU(s): step 9185: loss 30.0938, lr 6.8e-05, dt 2.1s +All GPU(s): step 9186: loss 29.9688, lr 6.8e-05, dt 2.1s +All GPU(s): step 9187: loss 30.0312, lr 6.8e-05, dt 2.0s +All GPU(s): step 9188: loss 30.0469, lr 6.8e-05, dt 2.0s +All GPU(s): step 9189: loss 29.7969, lr 6.8e-05, dt 2.2s +All GPU(s): step 9190: loss 29.8281, lr 6.8e-05, dt 2.1s +All GPU(s): step 9191: loss 29.9219, lr 6.8e-05, dt 2.0s +All GPU(s): step 9192: loss 29.9219, lr 6.8e-05, dt 2.0s +All GPU(s): step 9193: loss 29.9531, lr 6.8e-05, dt 2.1s +All GPU(s): step 9194: loss 29.8281, lr 6.8e-05, dt 2.1s +All GPU(s): step 9195: loss 30.0156, lr 6.8e-05, dt 2.1s +All GPU(s): step 9196: loss 29.7500, lr 6.8e-05, dt 2.1s +All GPU(s): step 9197: loss 30.0000, lr 6.8e-05, dt 2.0s +All GPU(s): step 9198: loss 29.9844, lr 6.8e-05, dt 2.1s +All GPU(s): step 9199: loss 29.8750, lr 6.8e-05, dt 2.1s +All GPU(s): step 9200: loss 30.0156, lr 6.8e-05, dt 2.1s +All GPU(s): step 9201: loss 30.1719, lr 6.8e-05, dt 2.0s +All GPU(s): step 9202: loss 30.0000, lr 6.8e-05, dt 2.0s +All GPU(s): step 9203: loss 30.0938, lr 6.8e-05, dt 2.1s +All GPU(s): step 9204: loss 29.9531, lr 6.8e-05, dt 2.0s +All GPU(s): step 9205: loss 29.9844, lr 6.8e-05, dt 2.0s +All GPU(s): step 9206: loss 29.8281, lr 6.8e-05, dt 2.0s +All GPU(s): step 9207: loss 29.9531, lr 6.8e-05, dt 2.1s +All GPU(s): step 9208: loss 30.2500, lr 6.8e-05, dt 2.2s +All GPU(s): step 9209: loss 30.0625, lr 6.8e-05, dt 2.1s +All GPU(s): step 9210: loss 30.1094, lr 6.8e-05, dt 2.1s +All GPU(s): step 9211: loss 30.2188, lr 6.8e-05, dt 2.1s +All GPU(s): step 9212: loss 30.0625, lr 6.8e-05, dt 2.1s +All GPU(s): step 9213: loss 30.2969, lr 6.8e-05, dt 2.2s +All GPU(s): step 9214: loss 30.1562, lr 6.8e-05, dt 2.1s +All GPU(s): step 9215: loss 30.2812, lr 6.8e-05, dt 2.0s +All GPU(s): step 9216: loss 30.1250, lr 6.8e-05, dt 2.0s +All GPU(s): step 9217: loss 30.2031, lr 6.8e-05, dt 2.1s +All GPU(s): step 9218: loss 30.0000, lr 6.7e-05, dt 2.1s +All GPU(s): step 9219: loss 30.1406, lr 6.7e-05, dt 2.0s +All GPU(s): step 9220: loss 30.3281, lr 6.7e-05, dt 2.0s +All GPU(s): step 9221: loss 30.0000, lr 6.7e-05, dt 2.0s +All GPU(s): step 9222: loss 30.2031, lr 6.7e-05, dt 2.1s +All GPU(s): step 9223: loss 30.1719, lr 6.7e-05, dt 2.1s +All GPU(s): step 9224: loss 29.9062, lr 6.7e-05, dt 2.0s +All GPU(s): step 9225: loss 29.9844, lr 6.7e-05, dt 2.1s +All GPU(s): step 9226: loss 30.0781, lr 6.7e-05, dt 2.1s +All GPU(s): step 9227: loss 29.9375, lr 6.7e-05, dt 2.1s +All GPU(s): step 9228: loss 30.0156, lr 6.7e-05, dt 2.1s +All GPU(s): step 9229: loss 29.9531, lr 6.7e-05, dt 2.0s +All GPU(s): step 9230: loss 29.8750, lr 6.7e-05, dt 2.0s +All GPU(s): step 9231: loss 29.7969, lr 6.7e-05, dt 2.1s +All GPU(s): step 9232: loss 29.7656, lr 6.7e-05, dt 2.1s +All GPU(s): step 9233: loss 30.0312, lr 6.7e-05, dt 2.0s +All GPU(s): step 9234: loss 29.7500, lr 6.7e-05, dt 2.0s +All GPU(s): step 9235: loss 29.8438, lr 6.7e-05, dt 2.1s +All GPU(s): step 9236: loss 29.9688, lr 6.7e-05, dt 2.1s +All GPU(s): step 9237: loss 29.9531, lr 6.7e-05, dt 2.2s +All GPU(s): step 9238: loss 29.8281, lr 6.7e-05, dt 2.0s +All GPU(s): step 9239: loss 29.6406, lr 6.7e-05, dt 2.0s +All GPU(s): step 9240: loss 29.6875, lr 6.7e-05, dt 2.0s +All GPU(s): step 9241: loss 29.7188, lr 6.7e-05, dt 2.1s +All GPU(s): step 9242: loss 29.7344, lr 6.7e-05, dt 2.2s +All GPU(s): step 9243: loss 29.5938, lr 6.7e-05, dt 2.0s +All GPU(s): step 9244: loss 29.5938, lr 6.7e-05, dt 2.0s +All GPU(s): step 9245: loss 29.5469, lr 6.7e-05, dt 2.0s +All GPU(s): step 9246: loss 29.5156, lr 6.7e-05, dt 2.1s +All GPU(s): step 9247: loss 29.3750, lr 6.7e-05, dt 2.1s +All GPU(s): step 9248: loss 29.2188, lr 6.7e-05, dt 2.0s +All GPU(s): step 9249: loss 29.2969, lr 6.7e-05, dt 2.0s +All GPU(s): step 9250: loss 29.2812, lr 6.7e-05, dt 2.0s +All GPU(s): step 9251: loss 29.2344, lr 6.6e-05, dt 2.1s +All GPU(s): step 9252: loss 29.2656, lr 6.6e-05, dt 2.1s +All GPU(s): step 9253: loss 29.0156, lr 6.6e-05, dt 2.1s +All GPU(s): step 9254: loss 29.3281, lr 6.6e-05, dt 2.1s +All GPU(s): step 9255: loss 29.2500, lr 6.6e-05, dt 2.0s +All GPU(s): step 9256: loss 29.2812, lr 6.6e-05, dt 2.1s +All GPU(s): step 9257: loss 29.2812, lr 6.6e-05, dt 2.1s +All GPU(s): step 9258: loss 29.3281, lr 6.6e-05, dt 2.0s +All GPU(s): step 9259: loss 29.4219, lr 6.6e-05, dt 2.0s +All GPU(s): step 9260: loss 29.2344, lr 6.6e-05, dt 2.1s +All GPU(s): step 9261: loss 29.6562, lr 6.6e-05, dt 2.2s +All GPU(s): step 9262: loss 29.3438, lr 6.6e-05, dt 2.0s +All GPU(s): step 9263: loss 29.3594, lr 6.6e-05, dt 2.1s +All GPU(s): step 9264: loss 29.0000, lr 6.6e-05, dt 2.1s +All GPU(s): step 9265: loss 29.3281, lr 6.6e-05, dt 2.0s +All GPU(s): step 9266: loss 29.4531, lr 6.6e-05, dt 2.2s +All GPU(s): step 9267: loss 29.4844, lr 6.6e-05, dt 2.0s +All GPU(s): step 9268: loss 29.5156, lr 6.6e-05, dt 2.0s +All GPU(s): step 9269: loss 29.4531, lr 6.6e-05, dt 2.0s +All GPU(s): step 9270: loss 29.7656, lr 6.6e-05, dt 2.1s +All GPU(s): step 9271: loss 29.7344, lr 6.6e-05, dt 2.1s +All GPU(s): step 9272: loss 29.7969, lr 6.6e-05, dt 2.1s +All GPU(s): step 9273: loss 29.4219, lr 6.6e-05, dt 2.0s +All GPU(s): step 9274: loss 29.6094, lr 6.6e-05, dt 2.0s +All GPU(s): step 9275: loss 29.5156, lr 6.6e-05, dt 2.1s +All GPU(s): step 9276: loss 29.5469, lr 6.6e-05, dt 2.0s +All GPU(s): step 9277: loss 29.7031, lr 6.6e-05, dt 2.1s +All GPU(s): step 9278: loss 29.5312, lr 6.6e-05, dt 2.0s +All GPU(s): step 9279: loss 29.6875, lr 6.6e-05, dt 2.0s +All GPU(s): step 9280: loss 29.5625, lr 6.6e-05, dt 2.1s +All GPU(s): step 9281: loss 29.5000, lr 6.6e-05, dt 2.0s +All GPU(s): step 9282: loss 29.5000, lr 6.6e-05, dt 2.1s +All GPU(s): step 9283: loss 29.4531, lr 6.6e-05, dt 2.0s +All GPU(s): step 9284: loss 29.1250, lr 6.6e-05, dt 2.0s +All GPU(s): step 9285: loss 29.2500, lr 6.5e-05, dt 2.1s +All GPU(s): step 9286: loss 29.1719, lr 6.5e-05, dt 2.0s +All GPU(s): step 9287: loss 29.0938, lr 6.5e-05, dt 2.0s +All GPU(s): step 9288: loss 28.8594, lr 6.5e-05, dt 2.0s +All GPU(s): step 9289: loss 28.5938, lr 6.5e-05, dt 2.1s +All GPU(s): step 9290: loss 28.9062, lr 6.5e-05, dt 2.1s +All GPU(s): step 9291: loss 28.8594, lr 6.5e-05, dt 2.0s +All GPU(s): step 9292: loss 28.8594, lr 6.5e-05, dt 2.0s +All GPU(s): step 9293: loss 29.0000, lr 6.5e-05, dt 2.0s +All GPU(s): step 9294: loss 29.0469, lr 6.5e-05, dt 2.0s +All GPU(s): step 9295: loss 28.8750, lr 6.5e-05, dt 2.1s +All GPU(s): step 9296: loss 28.8750, lr 6.5e-05, dt 2.0s +All GPU(s): step 9297: loss 28.9531, lr 6.5e-05, dt 2.0s +All GPU(s): step 9298: loss 29.0781, lr 6.5e-05, dt 2.0s +All GPU(s): step 9299: loss 28.8281, lr 6.5e-05, dt 2.1s +All GPU(s): step 9300: loss 29.0469, lr 6.5e-05, dt 2.1s +All GPU(s): step 9301: loss 29.2188, lr 6.5e-05, dt 2.0s +All GPU(s): step 9302: loss 29.2969, lr 6.5e-05, dt 2.0s +All GPU(s): step 9303: loss 29.2656, lr 6.5e-05, dt 2.0s +All GPU(s): step 9304: loss 29.1094, lr 6.5e-05, dt 2.1s +All GPU(s): step 9305: loss 29.1094, lr 6.5e-05, dt 2.0s +All GPU(s): step 9306: loss 29.3281, lr 6.5e-05, dt 2.0s +All GPU(s): step 9307: loss 29.5000, lr 6.5e-05, dt 2.0s +All GPU(s): step 9308: loss 29.4531, lr 6.5e-05, dt 2.1s +All GPU(s): step 9309: loss 29.4062, lr 6.5e-05, dt 2.1s +All GPU(s): step 9310: loss 29.6094, lr 6.5e-05, dt 2.0s +All GPU(s): step 9311: loss 29.6250, lr 6.5e-05, dt 2.0s +All GPU(s): step 9312: loss 29.5000, lr 6.5e-05, dt 2.0s +All GPU(s): step 9313: loss 29.2656, lr 6.5e-05, dt 2.1s +All GPU(s): step 9314: loss 29.2500, lr 6.5e-05, dt 2.1s +All GPU(s): step 9315: loss 29.3906, lr 6.5e-05, dt 2.0s +All GPU(s): step 9316: loss 29.1719, lr 6.5e-05, dt 2.0s +All GPU(s): step 9317: loss 29.1562, lr 6.5e-05, dt 2.0s +All GPU(s): step 9318: loss 29.0000, lr 6.5e-05, dt 2.1s +All GPU(s): step 9319: loss 29.2188, lr 6.5e-05, dt 2.2s +All GPU(s): step 9320: loss 29.1562, lr 6.4e-05, dt 2.1s +All GPU(s): step 9321: loss 29.4531, lr 6.4e-05, dt 2.1s +All GPU(s): step 9322: loss 29.0938, lr 6.4e-05, dt 2.0s +All GPU(s): step 9323: loss 28.7969, lr 6.4e-05, dt 2.1s +All GPU(s): step 9324: loss 28.9844, lr 6.4e-05, dt 2.1s +All GPU(s): step 9325: loss 29.1250, lr 6.4e-05, dt 2.1s +All GPU(s): step 9326: loss 28.9844, lr 6.4e-05, dt 2.0s +All GPU(s): step 9327: loss 28.8281, lr 6.4e-05, dt 2.0s +All GPU(s): step 9328: loss 28.8125, lr 6.4e-05, dt 2.1s +All GPU(s): step 9329: loss 28.9844, lr 6.4e-05, dt 2.1s +All GPU(s): step 9330: loss 28.9375, lr 6.4e-05, dt 2.0s +All GPU(s): step 9331: loss 28.6094, lr 6.4e-05, dt 2.1s +All GPU(s): step 9332: loss 28.6406, lr 6.4e-05, dt 2.1s +All GPU(s): step 9333: loss 28.8281, lr 6.4e-05, dt 2.1s +All GPU(s): step 9334: loss 28.8906, lr 6.4e-05, dt 2.0s +All GPU(s): step 9335: loss 28.8594, lr 6.4e-05, dt 2.0s +All GPU(s): step 9336: loss 28.7812, lr 6.4e-05, dt 2.1s +All GPU(s): step 9337: loss 28.7344, lr 6.4e-05, dt 2.1s +All GPU(s): step 9338: loss 28.7969, lr 6.4e-05, dt 2.2s +All GPU(s): step 9339: loss 28.7812, lr 6.4e-05, dt 2.1s +All GPU(s): step 9340: loss 28.9062, lr 6.4e-05, dt 2.1s +All GPU(s): step 9341: loss 28.9219, lr 6.4e-05, dt 2.1s +All GPU(s): step 9342: loss 28.9688, lr 6.4e-05, dt 2.1s +All GPU(s): step 9343: loss 28.7969, lr 6.4e-05, dt 2.2s +All GPU(s): step 9344: loss 28.9219, lr 6.4e-05, dt 2.1s +All GPU(s): step 9345: loss 28.9219, lr 6.4e-05, dt 2.0s +All GPU(s): step 9346: loss 29.1094, lr 6.4e-05, dt 2.1s +All GPU(s): step 9347: loss 28.9844, lr 6.4e-05, dt 2.1s +All GPU(s): step 9348: loss 28.9375, lr 6.4e-05, dt 2.1s +All GPU(s): step 9349: loss 28.9062, lr 6.4e-05, dt 2.1s +All GPU(s): step 9350: loss 28.7344, lr 6.4e-05, dt 2.0s +All GPU(s): step 9351: loss 28.7031, lr 6.4e-05, dt 2.1s +All GPU(s): step 9352: loss 28.7969, lr 6.4e-05, dt 2.1s +All GPU(s): step 9353: loss 28.4062, lr 6.4e-05, dt 2.1s +All GPU(s): step 9354: loss 28.6094, lr 6.4e-05, dt 2.1s +All GPU(s): step 9355: loss 28.4844, lr 6.4e-05, dt 2.1s +All GPU(s): step 9356: loss 28.5469, lr 6.3e-05, dt 2.0s +All GPU(s): step 9357: loss 28.4062, lr 6.3e-05, dt 2.1s +All GPU(s): step 9358: loss 28.4688, lr 6.3e-05, dt 2.0s +All GPU(s): step 9359: loss 28.5781, lr 6.3e-05, dt 2.0s +All GPU(s): step 9360: loss 28.4531, lr 6.3e-05, dt 2.0s +All GPU(s): step 9361: loss 28.3750, lr 6.3e-05, dt 2.1s +All GPU(s): step 9362: loss 28.0781, lr 6.3e-05, dt 2.1s +All GPU(s): step 9363: loss 28.1875, lr 6.3e-05, dt 2.1s +All GPU(s): step 9364: loss 27.9219, lr 6.3e-05, dt 2.1s +All GPU(s): step 9365: loss 28.2656, lr 6.3e-05, dt 2.0s +All GPU(s): step 9366: loss 28.1250, lr 6.3e-05, dt 2.1s +All GPU(s): step 9367: loss 27.7344, lr 6.3e-05, dt 2.1s +All GPU(s): step 9368: loss 27.7969, lr 6.3e-05, dt 2.0s +All GPU(s): step 9369: loss 27.8906, lr 6.3e-05, dt 2.0s +All GPU(s): step 9370: loss 27.8750, lr 6.3e-05, dt 2.0s +All GPU(s): step 9371: loss 28.0000, lr 6.3e-05, dt 2.1s +All GPU(s): step 9372: loss 27.7031, lr 6.3e-05, dt 2.1s +All GPU(s): step 9373: loss 27.6562, lr 6.3e-05, dt 2.1s +All GPU(s): step 9374: loss 27.9688, lr 6.3e-05, dt 2.1s +All GPU(s): step 9375: loss 27.8594, lr 6.3e-05, dt 2.1s +All GPU(s): step 9376: loss 27.7812, lr 6.3e-05, dt 2.1s +All GPU(s): step 9377: loss 28.1094, lr 6.3e-05, dt 2.1s +All GPU(s): step 9378: loss 28.1562, lr 6.3e-05, dt 2.0s +All GPU(s): step 9379: loss 27.8906, lr 6.3e-05, dt 2.0s +All GPU(s): step 9380: loss 28.1094, lr 6.3e-05, dt 2.0s +All GPU(s): step 9381: loss 27.7188, lr 6.3e-05, dt 2.1s +All GPU(s): step 9382: loss 27.9375, lr 6.3e-05, dt 2.0s +All GPU(s): step 9383: loss 27.9531, lr 6.3e-05, dt 2.1s +All GPU(s): step 9384: loss 27.9375, lr 6.3e-05, dt 2.0s +All GPU(s): step 9385: loss 27.8594, lr 6.3e-05, dt 2.1s +All GPU(s): step 9386: loss 27.7656, lr 6.3e-05, dt 2.1s +All GPU(s): step 9387: loss 27.8125, lr 6.3e-05, dt 2.0s +All GPU(s): step 9388: loss 27.8594, lr 6.3e-05, dt 2.0s +All GPU(s): step 9389: loss 27.7812, lr 6.3e-05, dt 2.0s +All GPU(s): step 9390: loss 27.8125, lr 6.3e-05, dt 2.1s +All GPU(s): step 9391: loss 27.8438, lr 6.3e-05, dt 2.1s +All GPU(s): step 9392: loss 27.7969, lr 6.2e-05, dt 2.0s +All GPU(s): step 9393: loss 27.8906, lr 6.2e-05, dt 2.0s +All GPU(s): step 9394: loss 27.8906, lr 6.2e-05, dt 2.0s +All GPU(s): step 9395: loss 27.8125, lr 6.2e-05, dt 2.0s +All GPU(s): step 9396: loss 27.9531, lr 6.2e-05, dt 2.1s +All GPU(s): step 9397: loss 27.8438, lr 6.2e-05, dt 2.1s +All GPU(s): step 9398: loss 27.5938, lr 6.2e-05, dt 2.1s +All GPU(s): step 9399: loss 27.8906, lr 6.2e-05, dt 2.1s +All GPU(s): step 9400: loss 27.4375, lr 6.2e-05, dt 2.1s +All GPU(s): step 9401: loss 27.7031, lr 6.2e-05, dt 2.1s +All GPU(s): step 9402: loss 27.2812, lr 6.2e-05, dt 2.0s +All GPU(s): step 9403: loss 27.5000, lr 6.2e-05, dt 2.0s +All GPU(s): step 9404: loss 27.3125, lr 6.2e-05, dt 2.0s +All GPU(s): step 9405: loss 27.2344, lr 6.2e-05, dt 2.1s +All GPU(s): step 9406: loss 27.1406, lr 6.2e-05, dt 2.0s +All GPU(s): step 9407: loss 27.4062, lr 6.2e-05, dt 2.0s +All GPU(s): step 9408: loss 27.3750, lr 6.2e-05, dt 2.1s +All GPU(s): step 9409: loss 27.3906, lr 6.2e-05, dt 2.0s +All GPU(s): step 9410: loss 27.2812, lr 6.2e-05, dt 2.1s +All GPU(s): step 9411: loss 27.4062, lr 6.2e-05, dt 2.1s +All GPU(s): step 9412: loss 27.2500, lr 6.2e-05, dt 2.1s +All GPU(s): step 9413: loss 27.3438, lr 6.2e-05, dt 2.1s +All GPU(s): step 9414: loss 27.3750, lr 6.2e-05, dt 2.1s +All GPU(s): step 9415: loss 27.4219, lr 6.2e-05, dt 2.2s +All GPU(s): step 9416: loss 27.4688, lr 6.2e-05, dt 2.0s +All GPU(s): step 9417: loss 27.7031, lr 6.2e-05, dt 2.1s +All GPU(s): step 9418: loss 27.4531, lr 6.2e-05, dt 2.0s +All GPU(s): step 9419: loss 27.3125, lr 6.2e-05, dt 2.0s +All GPU(s): step 9420: loss 27.4531, lr 6.2e-05, dt 2.2s +All GPU(s): step 9421: loss 27.4375, lr 6.2e-05, dt 2.0s +All GPU(s): step 9422: loss 27.5156, lr 6.2e-05, dt 2.0s +All GPU(s): step 9423: loss 27.5781, lr 6.2e-05, dt 2.0s +All GPU(s): step 9424: loss 27.3906, lr 6.2e-05, dt 2.0s +All GPU(s): step 9425: loss 27.4688, lr 6.2e-05, dt 2.1s +All GPU(s): step 9426: loss 27.2969, lr 6.2e-05, dt 2.0s +All GPU(s): step 9427: loss 27.1719, lr 6.2e-05, dt 2.0s +All GPU(s): step 9428: loss 27.2188, lr 6.2e-05, dt 2.0s +All GPU(s): step 9429: loss 27.3906, lr 6.1e-05, dt 2.1s +All GPU(s): step 9430: loss 27.0938, lr 6.1e-05, dt 2.1s +All GPU(s): step 9431: loss 27.2500, lr 6.1e-05, dt 2.0s +All GPU(s): step 9432: loss 27.2656, lr 6.1e-05, dt 2.0s +All GPU(s): step 9433: loss 27.3438, lr 6.1e-05, dt 2.0s +All GPU(s): step 9434: loss 27.1875, lr 6.1e-05, dt 2.1s +All GPU(s): step 9435: loss 27.7188, lr 6.1e-05, dt 2.0s +All GPU(s): step 9436: loss 27.3281, lr 6.1e-05, dt 2.0s +All GPU(s): step 9437: loss 27.3438, lr 6.1e-05, dt 2.0s +All GPU(s): step 9438: loss 27.3906, lr 6.1e-05, dt 2.0s +All GPU(s): step 9439: loss 27.3281, lr 6.1e-05, dt 2.2s +All GPU(s): step 9440: loss 27.3750, lr 6.1e-05, dt 2.0s +All GPU(s): step 9441: loss 27.2188, lr 6.1e-05, dt 2.0s +All GPU(s): step 9442: loss 27.3906, lr 6.1e-05, dt 2.0s +All GPU(s): step 9443: loss 27.5469, lr 6.1e-05, dt 2.1s +All GPU(s): step 9444: loss 27.5625, lr 6.1e-05, dt 2.2s +All GPU(s): step 9445: loss 27.6094, lr 6.1e-05, dt 2.1s +All GPU(s): step 9446: loss 27.8594, lr 6.1e-05, dt 2.1s +All GPU(s): step 9447: loss 27.6719, lr 6.1e-05, dt 2.1s +All GPU(s): step 9448: loss 27.9219, lr 6.1e-05, dt 2.1s +All GPU(s): step 9449: loss 27.5781, lr 6.1e-05, dt 2.2s +All GPU(s): step 9450: loss 27.6250, lr 6.1e-05, dt 2.0s +All GPU(s): step 9451: loss 27.3594, lr 6.1e-05, dt 2.0s +All GPU(s): step 9452: loss 27.7656, lr 6.1e-05, dt 2.1s +All GPU(s): step 9453: loss 27.7031, lr 6.1e-05, dt 2.1s +All GPU(s): step 9454: loss 27.6875, lr 6.1e-05, dt 2.1s +All GPU(s): step 9455: loss 27.4062, lr 6.1e-05, dt 2.0s +All GPU(s): step 9456: loss 27.6719, lr 6.1e-05, dt 2.0s +All GPU(s): step 9457: loss 27.2031, lr 6.1e-05, dt 2.0s +All GPU(s): step 9458: loss 27.0938, lr 6.1e-05, dt 2.1s +All GPU(s): step 9459: loss 27.1406, lr 6.1e-05, dt 2.0s +All GPU(s): step 9460: loss 27.0312, lr 6.1e-05, dt 2.0s +All GPU(s): step 9461: loss 27.1094, lr 6.1e-05, dt 2.1s +All GPU(s): step 9462: loss 27.0156, lr 6.1e-05, dt 2.1s +All GPU(s): step 9463: loss 26.8906, lr 6.1e-05, dt 2.2s +All GPU(s): step 9464: loss 26.7969, lr 6.1e-05, dt 2.1s +All GPU(s): step 9465: loss 27.3438, lr 6.1e-05, dt 2.0s +All GPU(s): step 9466: loss 26.4844, lr 6.1e-05, dt 2.0s +All GPU(s): step 9467: loss 26.6562, lr 6.0e-05, dt 2.1s +All GPU(s): step 9468: loss 26.7656, lr 6.0e-05, dt 2.2s +All GPU(s): step 9469: loss 27.0156, lr 6.0e-05, dt 2.0s +All GPU(s): step 9470: loss 27.0000, lr 6.0e-05, dt 2.0s +All GPU(s): step 9471: loss 26.9844, lr 6.0e-05, dt 2.0s +All GPU(s): step 9472: loss 26.9062, lr 6.0e-05, dt 2.0s +All GPU(s): step 9473: loss 26.8594, lr 6.0e-05, dt 2.1s +All GPU(s): step 9474: loss 26.5156, lr 6.0e-05, dt 2.0s +All GPU(s): step 9475: loss 26.6094, lr 6.0e-05, dt 2.0s +All GPU(s): step 9476: loss 26.5938, lr 6.0e-05, dt 2.0s +All GPU(s): step 9477: loss 26.7031, lr 6.0e-05, dt 2.0s +All GPU(s): step 9478: loss 26.4844, lr 6.0e-05, dt 2.1s +All GPU(s): step 9479: loss 26.4375, lr 6.0e-05, dt 2.0s +All GPU(s): step 9480: loss 26.0000, lr 6.0e-05, dt 2.1s +All GPU(s): step 9481: loss 26.0469, lr 6.0e-05, dt 2.0s +All GPU(s): step 9482: loss 25.7031, lr 6.0e-05, dt 2.1s +All GPU(s): step 9483: loss 25.5938, lr 6.0e-05, dt 2.1s +All GPU(s): step 9484: loss 25.1250, lr 6.0e-05, dt 2.0s +All GPU(s): step 9485: loss 25.3906, lr 6.0e-05, dt 2.0s +All GPU(s): step 9486: loss 25.3594, lr 6.0e-05, dt 2.0s +All GPU(s): step 9487: loss 25.0156, lr 6.0e-05, dt 2.1s +All GPU(s): step 9488: loss 25.1719, lr 6.0e-05, dt 2.0s +All GPU(s): step 9489: loss 25.2188, lr 6.0e-05, dt 2.0s +All GPU(s): step 9490: loss 25.1094, lr 6.0e-05, dt 2.0s +All GPU(s): step 9491: loss 25.2031, lr 6.0e-05, dt 2.0s +All GPU(s): step 9492: loss 24.9062, lr 6.0e-05, dt 2.1s +All GPU(s): step 9493: loss 25.1875, lr 6.0e-05, dt 2.1s +All GPU(s): step 9494: loss 25.1719, lr 6.0e-05, dt 2.0s +All GPU(s): step 9495: loss 25.2188, lr 6.0e-05, dt 2.1s +All GPU(s): step 9496: loss 25.2344, lr 6.0e-05, dt 2.1s +All GPU(s): step 9497: loss 25.0156, lr 6.0e-05, dt 2.1s +All GPU(s): step 9498: loss 25.3281, lr 6.0e-05, dt 2.1s +All GPU(s): step 9499: loss 25.2812, lr 6.0e-05, dt 2.1s +All GPU(s): step 9500: loss 25.2344, lr 6.0e-05, dt 2.1s +All GPU(s): step 9501: loss 25.1719, lr 6.0e-05, dt 2.1s +All GPU(s): step 9502: loss 25.3750, lr 6.0e-05, dt 2.1s +All GPU(s): step 9503: loss 25.5938, lr 6.0e-05, dt 2.1s +All GPU(s): step 9504: loss 25.6406, lr 6.0e-05, dt 2.1s +All GPU(s): step 9505: loss 25.4531, lr 5.9e-05, dt 2.1s +All GPU(s): step 9506: loss 25.5000, lr 5.9e-05, dt 2.1s +All GPU(s): step 9507: loss 25.8125, lr 5.9e-05, dt 2.1s +All GPU(s): step 9508: loss 25.7656, lr 5.9e-05, dt 2.0s +All GPU(s): step 9509: loss 25.8438, lr 5.9e-05, dt 2.0s +All GPU(s): step 9510: loss 26.4062, lr 5.9e-05, dt 2.1s +All GPU(s): step 9511: loss 26.1094, lr 5.9e-05, dt 2.1s +All GPU(s): step 9512: loss 26.3125, lr 5.9e-05, dt 2.1s +All GPU(s): step 9513: loss 26.3906, lr 5.9e-05, dt 2.1s +All GPU(s): step 9514: loss 26.6094, lr 5.9e-05, dt 2.1s +All GPU(s): step 9515: loss 26.6719, lr 5.9e-05, dt 2.0s +All GPU(s): step 9516: loss 26.8750, lr 5.9e-05, dt 2.1s +All GPU(s): step 9517: loss 26.5156, lr 5.9e-05, dt 2.0s +All GPU(s): step 9518: loss 26.8125, lr 5.9e-05, dt 2.1s +All GPU(s): step 9519: loss 26.7344, lr 5.9e-05, dt 2.0s +All GPU(s): step 9520: loss 26.5938, lr 5.9e-05, dt 2.1s +All GPU(s): step 9521: loss 26.8281, lr 5.9e-05, dt 2.1s +All GPU(s): step 9522: loss 26.4219, lr 5.9e-05, dt 2.0s +All GPU(s): step 9523: loss 26.5938, lr 5.9e-05, dt 2.0s +All GPU(s): step 9524: loss 26.4844, lr 5.9e-05, dt 2.0s +All GPU(s): step 9525: loss 26.5469, lr 5.9e-05, dt 2.1s +All GPU(s): step 9526: loss 26.6562, lr 5.9e-05, dt 2.1s +All GPU(s): step 9527: loss 26.6094, lr 5.9e-05, dt 2.1s +All GPU(s): step 9528: loss 26.5625, lr 5.9e-05, dt 2.1s +All GPU(s): step 9529: loss 26.4531, lr 5.9e-05, dt 2.1s +All GPU(s): step 9530: loss 26.5938, lr 5.9e-05, dt 2.1s +All GPU(s): step 9531: loss 26.6250, lr 5.9e-05, dt 2.1s +All GPU(s): step 9532: loss 26.6562, lr 5.9e-05, dt 2.1s +All GPU(s): step 9533: loss 26.5312, lr 5.9e-05, dt 2.1s +All GPU(s): step 9534: loss 26.9219, lr 5.9e-05, dt 2.1s +All GPU(s): step 9535: loss 26.5938, lr 5.9e-05, dt 2.1s +All GPU(s): step 9536: loss 26.6562, lr 5.9e-05, dt 2.0s +All GPU(s): step 9537: loss 26.6094, lr 5.9e-05, dt 2.1s +All GPU(s): step 9538: loss 26.5781, lr 5.9e-05, dt 2.1s +All GPU(s): step 9539: loss 26.4531, lr 5.9e-05, dt 2.0s +All GPU(s): step 9540: loss 26.1406, lr 5.9e-05, dt 2.2s +All GPU(s): step 9541: loss 26.3438, lr 5.9e-05, dt 2.1s +All GPU(s): step 9542: loss 26.2812, lr 5.9e-05, dt 2.1s +All GPU(s): step 9543: loss 26.5312, lr 5.9e-05, dt 2.1s +All GPU(s): step 9544: loss 25.9219, lr 5.9e-05, dt 2.1s +All GPU(s): step 9545: loss 26.2656, lr 5.8e-05, dt 2.2s +All GPU(s): step 9546: loss 26.4688, lr 5.8e-05, dt 2.1s +All GPU(s): step 9547: loss 26.2500, lr 5.8e-05, dt 2.1s +All GPU(s): step 9548: loss 26.0625, lr 5.8e-05, dt 2.1s +All GPU(s): step 9549: loss 25.9531, lr 5.8e-05, dt 2.1s +All GPU(s): step 9550: loss 26.2656, lr 5.8e-05, dt 2.1s +All GPU(s): step 9551: loss 25.9062, lr 5.8e-05, dt 2.1s +All GPU(s): step 9552: loss 26.1562, lr 5.8e-05, dt 2.1s +All GPU(s): step 9553: loss 25.9219, lr 5.8e-05, dt 2.0s +All GPU(s): step 9554: loss 25.5312, lr 5.8e-05, dt 2.1s +All GPU(s): step 9555: loss 25.5938, lr 5.8e-05, dt 2.1s +All GPU(s): step 9556: loss 25.7344, lr 5.8e-05, dt 2.0s +All GPU(s): step 9557: loss 25.6250, lr 5.8e-05, dt 2.0s +All GPU(s): step 9558: loss 25.7969, lr 5.8e-05, dt 2.0s +All GPU(s): step 9559: loss 25.8750, lr 5.8e-05, dt 2.1s +All GPU(s): step 9560: loss 25.6719, lr 5.8e-05, dt 2.0s +All GPU(s): step 9561: loss 25.4688, lr 5.8e-05, dt 2.1s +All GPU(s): step 9562: loss 25.4062, lr 5.8e-05, dt 2.1s +All GPU(s): step 9563: loss 25.8906, lr 5.8e-05, dt 2.0s +All GPU(s): step 9564: loss 25.5156, lr 5.8e-05, dt 2.1s +All GPU(s): step 9565: loss 26.0312, lr 5.8e-05, dt 2.0s +All GPU(s): step 9566: loss 26.0312, lr 5.8e-05, dt 2.1s +All GPU(s): step 9567: loss 25.6719, lr 5.8e-05, dt 2.1s +All GPU(s): step 9568: loss 26.3125, lr 5.8e-05, dt 2.1s +All GPU(s): step 9569: loss 26.0938, lr 5.8e-05, dt 2.2s +All GPU(s): step 9570: loss 26.1562, lr 5.8e-05, dt 2.0s +All GPU(s): step 9571: loss 26.3906, lr 5.8e-05, dt 2.0s +All GPU(s): step 9572: loss 26.3594, lr 5.8e-05, dt 2.0s +All GPU(s): step 9573: loss 26.3750, lr 5.8e-05, dt 2.0s +All GPU(s): step 9574: loss 26.4688, lr 5.8e-05, dt 2.2s +All GPU(s): step 9575: loss 26.5312, lr 5.8e-05, dt 2.1s +All GPU(s): step 9576: loss 26.9219, lr 5.8e-05, dt 2.0s +All GPU(s): step 9577: loss 26.5625, lr 5.8e-05, dt 2.0s +All GPU(s): step 9578: loss 27.0000, lr 5.8e-05, dt 2.0s +All GPU(s): step 9579: loss 26.8594, lr 5.8e-05, dt 2.1s +All GPU(s): step 9580: loss 26.8438, lr 5.8e-05, dt 2.0s +All GPU(s): step 9581: loss 27.0625, lr 5.8e-05, dt 2.0s +All GPU(s): step 9582: loss 27.0312, lr 5.8e-05, dt 2.0s +All GPU(s): step 9583: loss 27.1250, lr 5.8e-05, dt 2.1s +All GPU(s): step 9584: loss 26.8906, lr 5.8e-05, dt 2.0s +All GPU(s): step 9585: loss 27.1562, lr 5.7e-05, dt 2.1s +All GPU(s): step 9586: loss 27.1094, lr 5.7e-05, dt 2.0s +All GPU(s): step 9587: loss 26.7344, lr 5.7e-05, dt 2.0s +All GPU(s): step 9588: loss 26.8906, lr 5.7e-05, dt 2.1s +All GPU(s): step 9589: loss 26.8125, lr 5.7e-05, dt 2.0s +All GPU(s): step 9590: loss 26.8906, lr 5.7e-05, dt 2.1s +All GPU(s): step 9591: loss 26.8594, lr 5.7e-05, dt 2.0s +All GPU(s): step 9592: loss 26.7969, lr 5.7e-05, dt 2.0s +All GPU(s): step 9593: loss 27.1719, lr 5.7e-05, dt 2.2s +All GPU(s): step 9594: loss 27.0625, lr 5.7e-05, dt 2.1s +All GPU(s): step 9595: loss 27.2969, lr 5.7e-05, dt 2.1s +All GPU(s): step 9596: loss 27.2812, lr 5.7e-05, dt 2.0s +All GPU(s): step 9597: loss 26.9219, lr 5.7e-05, dt 2.0s +All GPU(s): step 9598: loss 27.4219, lr 5.7e-05, dt 2.1s +All GPU(s): step 9599: loss 27.7500, lr 5.7e-05, dt 2.0s +All GPU(s): step 9600: loss 27.6719, lr 5.7e-05, dt 2.0s +All GPU(s): step 9601: loss 27.4062, lr 5.7e-05, dt 2.0s +All GPU(s): step 9602: loss 27.4844, lr 5.7e-05, dt 2.1s +All GPU(s): step 9603: loss 27.2812, lr 5.7e-05, dt 2.1s +All GPU(s): step 9604: loss 27.4688, lr 5.7e-05, dt 2.1s +All GPU(s): step 9605: loss 27.3906, lr 5.7e-05, dt 2.0s +All GPU(s): step 9606: loss 27.3125, lr 5.7e-05, dt 2.0s +All GPU(s): step 9607: loss 27.1562, lr 5.7e-05, dt 2.1s +All GPU(s): step 9608: loss 27.2812, lr 5.7e-05, dt 2.1s +All GPU(s): step 9609: loss 27.2969, lr 5.7e-05, dt 2.1s +All GPU(s): step 9610: loss 27.2812, lr 5.7e-05, dt 2.0s +All GPU(s): step 9611: loss 27.1875, lr 5.7e-05, dt 2.0s +All GPU(s): step 9612: loss 27.1719, lr 5.7e-05, dt 2.1s +All GPU(s): step 9613: loss 27.1719, lr 5.7e-05, dt 2.1s +All GPU(s): step 9614: loss 27.3281, lr 5.7e-05, dt 2.0s +All GPU(s): step 9615: loss 27.0312, lr 5.7e-05, dt 2.0s +All GPU(s): step 9616: loss 27.0938, lr 5.7e-05, dt 2.1s +All GPU(s): step 9617: loss 27.3750, lr 5.7e-05, dt 2.1s +All GPU(s): step 9618: loss 27.1875, lr 5.7e-05, dt 2.0s +All GPU(s): step 9619: loss 27.4375, lr 5.7e-05, dt 2.0s +All GPU(s): step 9620: loss 27.1875, lr 5.7e-05, dt 2.0s +All GPU(s): step 9621: loss 27.2969, lr 5.7e-05, dt 2.1s +All GPU(s): step 9622: loss 27.2188, lr 5.7e-05, dt 2.1s +All GPU(s): step 9623: loss 27.0781, lr 5.7e-05, dt 2.0s +All GPU(s): step 9624: loss 27.1094, lr 5.7e-05, dt 2.0s +All GPU(s): step 9625: loss 27.3438, lr 5.7e-05, dt 2.0s +All GPU(s): step 9626: loss 26.9375, lr 5.7e-05, dt 2.1s +All GPU(s): step 9627: loss 26.7656, lr 5.6e-05, dt 2.1s +All GPU(s): step 9628: loss 26.6875, lr 5.6e-05, dt 2.0s +All GPU(s): step 9629: loss 26.8906, lr 5.6e-05, dt 2.0s +All GPU(s): step 9630: loss 26.7969, lr 5.6e-05, dt 2.0s +All GPU(s): step 9631: loss 26.5781, lr 5.6e-05, dt 2.1s +All GPU(s): step 9632: loss 26.5312, lr 5.6e-05, dt 2.1s +All GPU(s): step 9633: loss 26.2969, lr 5.6e-05, dt 2.0s +All GPU(s): step 9634: loss 26.3281, lr 5.6e-05, dt 2.0s +All GPU(s): step 9635: loss 26.2344, lr 5.6e-05, dt 2.1s +All GPU(s): step 9636: loss 26.6875, lr 5.6e-05, dt 2.1s +All GPU(s): step 9637: loss 26.3281, lr 5.6e-05, dt 2.1s +All GPU(s): step 9638: loss 26.7656, lr 5.6e-05, dt 2.1s +All GPU(s): step 9639: loss 26.4844, lr 5.6e-05, dt 2.1s +All GPU(s): step 9640: loss 26.1094, lr 5.6e-05, dt 2.1s +All GPU(s): step 9641: loss 26.5938, lr 5.6e-05, dt 2.1s +All GPU(s): step 9642: loss 26.7500, lr 5.6e-05, dt 2.0s +All GPU(s): step 9643: loss 26.4375, lr 5.6e-05, dt 2.1s +All GPU(s): step 9644: loss 26.4062, lr 5.6e-05, dt 2.1s +All GPU(s): step 9645: loss 26.4219, lr 5.6e-05, dt 2.1s +All GPU(s): step 9646: loss 26.2812, lr 5.6e-05, dt 2.1s +All GPU(s): step 9647: loss 26.4844, lr 5.6e-05, dt 2.1s +All GPU(s): step 9648: loss 26.2656, lr 5.6e-05, dt 2.1s +All GPU(s): step 9649: loss 26.0938, lr 5.6e-05, dt 2.1s +All GPU(s): step 9650: loss 26.3281, lr 5.6e-05, dt 2.1s +All GPU(s): step 9651: loss 26.0156, lr 5.6e-05, dt 2.1s +All GPU(s): step 9652: loss 26.2500, lr 5.6e-05, dt 2.1s +All GPU(s): step 9653: loss 25.9844, lr 5.6e-05, dt 2.0s +All GPU(s): step 9654: loss 25.9219, lr 5.6e-05, dt 2.1s +All GPU(s): step 9655: loss 25.9219, lr 5.6e-05, dt 2.1s +All GPU(s): step 9656: loss 25.9844, lr 5.6e-05, dt 2.1s +All GPU(s): step 9657: loss 25.7812, lr 5.6e-05, dt 2.0s +All GPU(s): step 9658: loss 25.8438, lr 5.6e-05, dt 2.1s +All GPU(s): step 9659: loss 25.7188, lr 5.6e-05, dt 2.0s +All GPU(s): step 9660: loss 25.9219, lr 5.6e-05, dt 2.1s +All GPU(s): step 9661: loss 26.2969, lr 5.6e-05, dt 2.1s +All GPU(s): step 9662: loss 25.8750, lr 5.6e-05, dt 2.0s +All GPU(s): step 9663: loss 26.0000, lr 5.6e-05, dt 2.0s +All GPU(s): step 9664: loss 25.7656, lr 5.6e-05, dt 2.0s +All GPU(s): step 9665: loss 25.8594, lr 5.6e-05, dt 2.1s +All GPU(s): step 9666: loss 25.7969, lr 5.6e-05, dt 2.0s +All GPU(s): step 9667: loss 26.2656, lr 5.6e-05, dt 2.0s +All GPU(s): step 9668: loss 25.8438, lr 5.6e-05, dt 2.0s +All GPU(s): step 9669: loss 26.0469, lr 5.6e-05, dt 2.1s +All GPU(s): step 9670: loss 25.6719, lr 5.5e-05, dt 2.2s +All GPU(s): step 9671: loss 25.9375, lr 5.5e-05, dt 2.1s +All GPU(s): step 9672: loss 25.9375, lr 5.5e-05, dt 2.1s +All GPU(s): step 9673: loss 25.8281, lr 5.5e-05, dt 2.1s +All GPU(s): step 9674: loss 25.8750, lr 5.5e-05, dt 2.1s +All GPU(s): step 9675: loss 25.6875, lr 5.5e-05, dt 2.1s +All GPU(s): step 9676: loss 25.6250, lr 5.5e-05, dt 2.0s +All GPU(s): step 9677: loss 25.7188, lr 5.5e-05, dt 2.0s +All GPU(s): step 9678: loss 25.7969, lr 5.5e-05, dt 2.0s +All GPU(s): step 9679: loss 25.2344, lr 5.5e-05, dt 2.0s +All GPU(s): step 9680: loss 25.7188, lr 5.5e-05, dt 2.1s +All GPU(s): step 9681: loss 25.6406, lr 5.5e-05, dt 2.0s +All GPU(s): step 9682: loss 25.4844, lr 5.5e-05, dt 2.0s +All GPU(s): step 9683: loss 25.2812, lr 5.5e-05, dt 2.1s +All GPU(s): step 9684: loss 25.8750, lr 5.5e-05, dt 2.1s +All GPU(s): step 9685: loss 25.9219, lr 5.5e-05, dt 2.1s +All GPU(s): step 9686: loss 25.5469, lr 5.5e-05, dt 2.1s +All GPU(s): step 9687: loss 25.6719, lr 5.5e-05, dt 2.1s +All GPU(s): step 9688: loss 25.8594, lr 5.5e-05, dt 2.1s +All GPU(s): step 9689: loss 25.8906, lr 5.5e-05, dt 2.1s +All GPU(s): step 9690: loss 25.7031, lr 5.5e-05, dt 2.0s +All GPU(s): step 9691: loss 25.5469, lr 5.5e-05, dt 2.0s +All GPU(s): step 9692: loss 25.7344, lr 5.5e-05, dt 2.0s +All GPU(s): step 9693: loss 25.5469, lr 5.5e-05, dt 2.1s +All GPU(s): step 9694: loss 25.6406, lr 5.5e-05, dt 2.1s +All GPU(s): step 9695: loss 25.7344, lr 5.5e-05, dt 2.0s +All GPU(s): step 9696: loss 25.5938, lr 5.5e-05, dt 2.0s +All GPU(s): step 9697: loss 25.4219, lr 5.5e-05, dt 2.0s +All GPU(s): step 9698: loss 25.6406, lr 5.5e-05, dt 2.1s +All GPU(s): step 9699: loss 25.2188, lr 5.5e-05, dt 2.1s +All GPU(s): step 9700: loss 25.4688, lr 5.5e-05, dt 2.0s +All GPU(s): step 9701: loss 25.3281, lr 5.5e-05, dt 2.1s +All GPU(s): step 9702: loss 25.3438, lr 5.5e-05, dt 2.1s +All GPU(s): step 9703: loss 24.9375, lr 5.5e-05, dt 2.1s +All GPU(s): step 9704: loss 25.0312, lr 5.5e-05, dt 2.1s +All GPU(s): step 9705: loss 24.7500, lr 5.5e-05, dt 2.1s +All GPU(s): step 9706: loss 24.9688, lr 5.5e-05, dt 2.1s +All GPU(s): step 9707: loss 24.9062, lr 5.5e-05, dt 2.1s +All GPU(s): step 9708: loss 24.6250, lr 5.5e-05, dt 2.1s +All GPU(s): step 9709: loss 24.9375, lr 5.5e-05, dt 2.1s +All GPU(s): step 9710: loss 24.7031, lr 5.5e-05, dt 2.1s +All GPU(s): step 9711: loss 24.8281, lr 5.5e-05, dt 2.0s +All GPU(s): step 9712: loss 24.6719, lr 5.5e-05, dt 2.0s +All GPU(s): step 9713: loss 24.9219, lr 5.5e-05, dt 2.1s +All GPU(s): step 9714: loss 25.0938, lr 5.4e-05, dt 2.0s +All GPU(s): step 9715: loss 24.7969, lr 5.4e-05, dt 2.0s +All GPU(s): step 9716: loss 25.3281, lr 5.4e-05, dt 2.0s +All GPU(s): step 9717: loss 25.0000, lr 5.4e-05, dt 2.1s +All GPU(s): step 9718: loss 25.8125, lr 5.4e-05, dt 2.1s +All GPU(s): step 9719: loss 25.6719, lr 5.4e-05, dt 2.0s +All GPU(s): step 9720: loss 25.9062, lr 5.4e-05, dt 2.1s +All GPU(s): step 9721: loss 25.9844, lr 5.4e-05, dt 2.1s +All GPU(s): step 9722: loss 26.1094, lr 5.4e-05, dt 2.1s +All GPU(s): step 9723: loss 25.9375, lr 5.4e-05, dt 2.1s +All GPU(s): step 9724: loss 26.0781, lr 5.4e-05, dt 2.0s +All GPU(s): step 9725: loss 26.1719, lr 5.4e-05, dt 2.0s +All GPU(s): step 9726: loss 25.9219, lr 5.4e-05, dt 2.0s +All GPU(s): step 9727: loss 26.1406, lr 5.4e-05, dt 2.1s +All GPU(s): step 9728: loss 26.0000, lr 5.4e-05, dt 2.1s +All GPU(s): step 9729: loss 25.8750, lr 5.4e-05, dt 2.0s +All GPU(s): step 9730: loss 25.6094, lr 5.4e-05, dt 2.0s +All GPU(s): step 9731: loss 26.0000, lr 5.4e-05, dt 2.0s +All GPU(s): step 9732: loss 25.5156, lr 5.4e-05, dt 2.1s +All GPU(s): step 9733: loss 25.2656, lr 5.4e-05, dt 2.1s +All GPU(s): step 9734: loss 24.9844, lr 5.4e-05, dt 2.0s +All GPU(s): step 9735: loss 25.1250, lr 5.4e-05, dt 2.1s +All GPU(s): step 9736: loss 24.9219, lr 5.4e-05, dt 2.1s +All GPU(s): step 9737: loss 24.8125, lr 5.4e-05, dt 2.1s +All GPU(s): step 9738: loss 24.6094, lr 5.4e-05, dt 2.0s +All GPU(s): step 9739: loss 24.5781, lr 5.4e-05, dt 2.0s +All GPU(s): step 9740: loss 24.3750, lr 5.4e-05, dt 2.0s +All GPU(s): step 9741: loss 24.3438, lr 5.4e-05, dt 2.0s +All GPU(s): step 9742: loss 24.2812, lr 5.4e-05, dt 2.3s +All GPU(s): step 9743: loss 24.7188, lr 5.4e-05, dt 2.0s +All GPU(s): step 9744: loss 24.5156, lr 5.4e-05, dt 2.0s +All GPU(s): step 9745: loss 23.9219, lr 5.4e-05, dt 2.0s +All GPU(s): step 9746: loss 24.2344, lr 5.4e-05, dt 2.0s +All GPU(s): step 9747: loss 24.1250, lr 5.4e-05, dt 2.2s +All GPU(s): step 9748: loss 24.2812, lr 5.4e-05, dt 2.0s +All GPU(s): step 9749: loss 24.1562, lr 5.4e-05, dt 2.0s +All GPU(s): step 9750: loss 24.4375, lr 5.4e-05, dt 2.0s +All GPU(s): step 9751: loss 24.1875, lr 5.4e-05, dt 2.0s +All GPU(s): step 9752: loss 24.0625, lr 5.4e-05, dt 2.1s +All GPU(s): step 9753: loss 24.1250, lr 5.4e-05, dt 2.0s +All GPU(s): step 9754: loss 24.0000, lr 5.4e-05, dt 2.1s +All GPU(s): step 9755: loss 23.7812, lr 5.4e-05, dt 2.0s +All GPU(s): step 9756: loss 23.8438, lr 5.4e-05, dt 2.1s +All GPU(s): step 9757: loss 24.0469, lr 5.4e-05, dt 2.1s +All GPU(s): step 9758: loss 23.8594, lr 5.4e-05, dt 2.0s +All GPU(s): step 9759: loss 23.6562, lr 5.4e-05, dt 2.0s +All GPU(s): step 9760: loss 23.9688, lr 5.3e-05, dt 2.0s +All GPU(s): step 9761: loss 23.9062, lr 5.3e-05, dt 2.1s +All GPU(s): step 9762: loss 23.9062, lr 5.3e-05, dt 2.0s +All GPU(s): step 9763: loss 24.0156, lr 5.3e-05, dt 2.0s +All GPU(s): step 9764: loss 24.2969, lr 5.3e-05, dt 2.0s +All GPU(s): step 9765: loss 23.9062, lr 5.3e-05, dt 2.1s +All GPU(s): step 9766: loss 23.5625, lr 5.3e-05, dt 2.1s +All GPU(s): step 9767: loss 24.0000, lr 5.3e-05, dt 2.0s +All GPU(s): step 9768: loss 24.1406, lr 5.3e-05, dt 2.0s +All GPU(s): step 9769: loss 24.3594, lr 5.3e-05, dt 2.0s +All GPU(s): step 9770: loss 24.1250, lr 5.3e-05, dt 2.0s +All GPU(s): step 9771: loss 23.7656, lr 5.3e-05, dt 2.2s +All GPU(s): step 9772: loss 24.0469, lr 5.3e-05, dt 2.0s +All GPU(s): step 9773: loss 23.8125, lr 5.3e-05, dt 2.1s +All GPU(s): step 9774: loss 24.0156, lr 5.3e-05, dt 2.0s +All GPU(s): step 9775: loss 24.2969, lr 5.3e-05, dt 2.1s +All GPU(s): step 9776: loss 24.0781, lr 5.3e-05, dt 2.1s +All GPU(s): step 9777: loss 24.2812, lr 5.3e-05, dt 2.0s +All GPU(s): step 9778: loss 24.0938, lr 5.3e-05, dt 2.0s +All GPU(s): step 9779: loss 23.9062, lr 5.3e-05, dt 2.0s +All GPU(s): step 9780: loss 23.9688, lr 5.3e-05, dt 2.1s +All GPU(s): step 9781: loss 23.9375, lr 5.3e-05, dt 2.1s +All GPU(s): step 9782: loss 24.0156, lr 5.3e-05, dt 2.1s +All GPU(s): step 9783: loss 23.6719, lr 5.3e-05, dt 2.0s +All GPU(s): step 9784: loss 23.9219, lr 5.3e-05, dt 2.0s +All GPU(s): step 9785: loss 23.9062, lr 5.3e-05, dt 2.1s +All GPU(s): step 9786: loss 23.8750, lr 5.3e-05, dt 2.1s +All GPU(s): step 9787: loss 23.7969, lr 5.3e-05, dt 2.1s +All GPU(s): step 9788: loss 23.7969, lr 5.3e-05, dt 2.1s +All GPU(s): step 9789: loss 23.7812, lr 5.3e-05, dt 2.0s +All GPU(s): step 9790: loss 23.7656, lr 5.3e-05, dt 2.1s +All GPU(s): step 9791: loss 23.7188, lr 5.3e-05, dt 2.0s +All GPU(s): step 9792: loss 23.6875, lr 5.3e-05, dt 2.1s +All GPU(s): step 9793: loss 23.7500, lr 5.3e-05, dt 2.0s +All GPU(s): step 9794: loss 24.2812, lr 5.3e-05, dt 2.0s +All GPU(s): step 9795: loss 23.9062, lr 5.3e-05, dt 2.1s +All GPU(s): step 9796: loss 23.6562, lr 5.3e-05, dt 2.0s +All GPU(s): step 9797: loss 24.0625, lr 5.3e-05, dt 2.0s +All GPU(s): step 9798: loss 23.9375, lr 5.3e-05, dt 2.0s +All GPU(s): step 9799: loss 23.6250, lr 5.3e-05, dt 2.0s +All GPU(s): step 9800: loss 24.4375, lr 5.3e-05, dt 2.1s +All GPU(s): step 9801: loss 24.1406, lr 5.3e-05, dt 2.0s +All GPU(s): step 9802: loss 24.7031, lr 5.3e-05, dt 2.1s +All GPU(s): step 9803: loss 24.3125, lr 5.3e-05, dt 2.0s +All GPU(s): step 9804: loss 24.7969, lr 5.3e-05, dt 2.0s +All GPU(s): step 9805: loss 24.8281, lr 5.3e-05, dt 2.1s +All GPU(s): step 9806: loss 25.1719, lr 5.3e-05, dt 2.0s +All GPU(s): step 9807: loss 25.2031, lr 5.2e-05, dt 2.0s +All GPU(s): step 9808: loss 25.3125, lr 5.2e-05, dt 2.0s +All GPU(s): step 9809: loss 25.4062, lr 5.2e-05, dt 2.0s +All GPU(s): step 9810: loss 25.5781, lr 5.2e-05, dt 2.1s +All GPU(s): step 9811: loss 25.3594, lr 5.2e-05, dt 2.0s +All GPU(s): step 9812: loss 25.8594, lr 5.2e-05, dt 2.0s +All GPU(s): step 9813: loss 25.8281, lr 5.2e-05, dt 2.0s +All GPU(s): step 9814: loss 25.7969, lr 5.2e-05, dt 2.1s +All GPU(s): step 9815: loss 25.9688, lr 5.2e-05, dt 2.1s +All GPU(s): step 9816: loss 25.6875, lr 5.2e-05, dt 2.1s +All GPU(s): step 9817: loss 26.1406, lr 5.2e-05, dt 2.0s +All GPU(s): step 9818: loss 26.0938, lr 5.2e-05, dt 2.1s +All GPU(s): step 9819: loss 26.1094, lr 5.2e-05, dt 2.1s +All GPU(s): step 9820: loss 25.8125, lr 5.2e-05, dt 2.0s +All GPU(s): step 9821: loss 26.0156, lr 5.2e-05, dt 2.0s +All GPU(s): step 9822: loss 25.9375, lr 5.2e-05, dt 2.0s +All GPU(s): step 9823: loss 25.9219, lr 5.2e-05, dt 2.0s +All GPU(s): step 9824: loss 26.1719, lr 5.2e-05, dt 2.3s +All GPU(s): step 9825: loss 25.8438, lr 5.2e-05, dt 2.0s +All GPU(s): step 9826: loss 26.0156, lr 5.2e-05, dt 2.0s +All GPU(s): step 9827: loss 25.8594, lr 5.2e-05, dt 2.1s +All GPU(s): step 9828: loss 26.0469, lr 5.2e-05, dt 2.1s +All GPU(s): step 9829: loss 25.9219, lr 5.2e-05, dt 2.2s +All GPU(s): step 9830: loss 25.8281, lr 5.2e-05, dt 2.1s +All GPU(s): step 9831: loss 25.7969, lr 5.2e-05, dt 2.1s +All GPU(s): step 9832: loss 25.7500, lr 5.2e-05, dt 2.1s +All GPU(s): step 9833: loss 25.5469, lr 5.2e-05, dt 2.1s +All GPU(s): step 9834: loss 25.4531, lr 5.2e-05, dt 2.1s +All GPU(s): step 9835: loss 25.3438, lr 5.2e-05, dt 2.0s +All GPU(s): step 9836: loss 25.0625, lr 5.2e-05, dt 2.0s +All GPU(s): step 9837: loss 25.5469, lr 5.2e-05, dt 2.1s +All GPU(s): step 9838: loss 25.0938, lr 5.2e-05, dt 2.1s +All GPU(s): step 9839: loss 25.1719, lr 5.2e-05, dt 2.1s +All GPU(s): step 9840: loss 25.5000, lr 5.2e-05, dt 2.1s +All GPU(s): step 9841: loss 25.2344, lr 5.2e-05, dt 2.0s +All GPU(s): step 9842: loss 25.3750, lr 5.2e-05, dt 2.0s +All GPU(s): step 9843: loss 25.4531, lr 5.2e-05, dt 2.1s +All GPU(s): step 9844: loss 25.4062, lr 5.2e-05, dt 2.1s +All GPU(s): step 9845: loss 25.7031, lr 5.2e-05, dt 2.0s +All GPU(s): step 9846: loss 25.4375, lr 5.2e-05, dt 2.1s +All GPU(s): step 9847: loss 25.8438, lr 5.2e-05, dt 2.1s +All GPU(s): step 9848: loss 25.9844, lr 5.2e-05, dt 2.2s +All GPU(s): step 9849: loss 26.2500, lr 5.2e-05, dt 2.0s +All GPU(s): step 9850: loss 26.1562, lr 5.2e-05, dt 2.0s +All GPU(s): step 9851: loss 26.1719, lr 5.2e-05, dt 2.0s +All GPU(s): step 9852: loss 26.0938, lr 5.2e-05, dt 2.1s +All GPU(s): step 9853: loss 25.9688, lr 5.2e-05, dt 2.2s +All GPU(s): step 9854: loss 25.6719, lr 5.2e-05, dt 2.1s +All GPU(s): step 9855: loss 25.9219, lr 5.2e-05, dt 2.1s +All GPU(s): step 9856: loss 26.0000, lr 5.2e-05, dt 2.1s +All GPU(s): step 9857: loss 26.2031, lr 5.1e-05, dt 2.1s +All GPU(s): step 9858: loss 25.8594, lr 5.1e-05, dt 2.1s +All GPU(s): step 9859: loss 25.9219, lr 5.1e-05, dt 2.0s +All GPU(s): step 9860: loss 25.9219, lr 5.1e-05, dt 2.0s +All GPU(s): step 9861: loss 25.8594, lr 5.1e-05, dt 2.0s +All GPU(s): step 9862: loss 25.8750, lr 5.1e-05, dt 2.1s +All GPU(s): step 9863: loss 25.9219, lr 5.1e-05, dt 2.1s +All GPU(s): step 9864: loss 25.8125, lr 5.1e-05, dt 2.0s +All GPU(s): step 9865: loss 25.7500, lr 5.1e-05, dt 2.1s +All GPU(s): step 9866: loss 25.8281, lr 5.1e-05, dt 2.1s +All GPU(s): step 9867: loss 25.4219, lr 5.1e-05, dt 2.1s +All GPU(s): step 9868: loss 26.0000, lr 5.1e-05, dt 2.1s +All GPU(s): step 9869: loss 25.7500, lr 5.1e-05, dt 2.1s +All GPU(s): step 9870: loss 25.7656, lr 5.1e-05, dt 2.1s +All GPU(s): step 9871: loss 25.4219, lr 5.1e-05, dt 2.1s +All GPU(s): step 9872: loss 25.4688, lr 5.1e-05, dt 2.1s +All GPU(s): step 9873: loss 25.5156, lr 5.1e-05, dt 2.0s +All GPU(s): step 9874: loss 25.5469, lr 5.1e-05, dt 2.0s +All GPU(s): step 9875: loss 25.5000, lr 5.1e-05, dt 2.0s +All GPU(s): step 9876: loss 25.7500, lr 5.1e-05, dt 2.0s +All GPU(s): step 9877: loss 25.7031, lr 5.1e-05, dt 2.2s +All GPU(s): step 9878: loss 25.5000, lr 5.1e-05, dt 2.0s +All GPU(s): step 9879: loss 25.6875, lr 5.1e-05, dt 2.0s +All GPU(s): step 9880: loss 25.8125, lr 5.1e-05, dt 2.0s +All GPU(s): step 9881: loss 25.8594, lr 5.1e-05, dt 2.1s +All GPU(s): step 9882: loss 26.1562, lr 5.1e-05, dt 2.1s +All GPU(s): step 9883: loss 25.8750, lr 5.1e-05, dt 2.0s +All GPU(s): step 9884: loss 25.8594, lr 5.1e-05, dt 2.1s +All GPU(s): step 9885: loss 26.1406, lr 5.1e-05, dt 2.0s +All GPU(s): step 9886: loss 26.1250, lr 5.1e-05, dt 2.0s +All GPU(s): step 9887: loss 26.1250, lr 5.1e-05, dt 2.1s +All GPU(s): step 9888: loss 25.8906, lr 5.1e-05, dt 2.1s +All GPU(s): step 9889: loss 25.7344, lr 5.1e-05, dt 2.1s +All GPU(s): step 9890: loss 26.0938, lr 5.1e-05, dt 2.0s +All GPU(s): step 9891: loss 26.0312, lr 5.1e-05, dt 2.1s +All GPU(s): step 9892: loss 25.7812, lr 5.1e-05, dt 2.1s +All GPU(s): step 9893: loss 26.1562, lr 5.1e-05, dt 2.1s +All GPU(s): step 9894: loss 26.1875, lr 5.1e-05, dt 2.0s +All GPU(s): step 9895: loss 26.1406, lr 5.1e-05, dt 2.0s +All GPU(s): step 9896: loss 26.3281, lr 5.1e-05, dt 2.2s +All GPU(s): step 9897: loss 26.2344, lr 5.1e-05, dt 2.0s +All GPU(s): step 9898: loss 26.1875, lr 5.1e-05, dt 2.0s +All GPU(s): step 9899: loss 26.4219, lr 5.1e-05, dt 2.1s +All GPU(s): step 9900: loss 26.1562, lr 5.1e-05, dt 2.1s +All GPU(s): step 9901: loss 26.1094, lr 5.1e-05, dt 2.2s +All GPU(s): step 9902: loss 26.1094, lr 5.1e-05, dt 2.0s +All GPU(s): step 9903: loss 26.0156, lr 5.1e-05, dt 2.1s +All GPU(s): step 9904: loss 25.9844, lr 5.1e-05, dt 2.1s +All GPU(s): step 9905: loss 25.6406, lr 5.1e-05, dt 2.1s +All GPU(s): step 9906: loss 25.8438, lr 5.1e-05, dt 2.1s +All GPU(s): step 9907: loss 25.8438, lr 5.1e-05, dt 2.1s +All GPU(s): step 9908: loss 25.7344, lr 5.0e-05, dt 2.0s +All GPU(s): step 9909: loss 25.9688, lr 5.0e-05, dt 2.0s +All GPU(s): step 9910: loss 25.6250, lr 5.0e-05, dt 2.1s +All GPU(s): step 9911: loss 25.8438, lr 5.0e-05, dt 2.1s +All GPU(s): step 9912: loss 26.0469, lr 5.0e-05, dt 2.0s +All GPU(s): step 9913: loss 26.2500, lr 5.0e-05, dt 2.0s +All GPU(s): step 9914: loss 26.3594, lr 5.0e-05, dt 2.0s +All GPU(s): step 9915: loss 26.3594, lr 5.0e-05, dt 2.1s +All GPU(s): step 9916: loss 26.2812, lr 5.0e-05, dt 2.0s +All GPU(s): step 9917: loss 26.4531, lr 5.0e-05, dt 2.0s +All GPU(s): step 9918: loss 26.6250, lr 5.0e-05, dt 2.0s +All GPU(s): step 9919: loss 26.6562, lr 5.0e-05, dt 2.0s +All GPU(s): step 9920: loss 26.7656, lr 5.0e-05, dt 2.1s +All GPU(s): step 9921: loss 27.0625, lr 5.0e-05, dt 2.0s +All GPU(s): step 9922: loss 27.1094, lr 5.0e-05, dt 2.0s +All GPU(s): step 9923: loss 27.0938, lr 5.0e-05, dt 2.0s +All GPU(s): step 9924: loss 27.2188, lr 5.0e-05, dt 2.0s +All GPU(s): step 9925: loss 27.3281, lr 5.0e-05, dt 2.1s +All GPU(s): step 9926: loss 27.2812, lr 5.0e-05, dt 2.1s +All GPU(s): step 9927: loss 27.2812, lr 5.0e-05, dt 2.0s +All GPU(s): step 9928: loss 27.3281, lr 5.0e-05, dt 2.1s +All GPU(s): step 9929: loss 27.2969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9930: loss 27.2500, lr 5.0e-05, dt 2.2s +All GPU(s): step 9931: loss 27.3125, lr 5.0e-05, dt 2.1s +All GPU(s): step 9932: loss 27.6562, lr 5.0e-05, dt 2.1s +All GPU(s): step 9933: loss 27.5625, lr 5.0e-05, dt 2.1s +All GPU(s): step 9934: loss 27.5312, lr 5.0e-05, dt 2.1s +All GPU(s): step 9935: loss 27.6406, lr 5.0e-05, dt 2.1s +All GPU(s): step 9936: loss 27.5625, lr 5.0e-05, dt 2.0s +All GPU(s): step 9937: loss 27.9062, lr 5.0e-05, dt 2.0s +All GPU(s): step 9938: loss 27.8125, lr 5.0e-05, dt 2.0s +All GPU(s): step 9939: loss 27.7031, lr 5.0e-05, dt 2.1s +All GPU(s): step 9940: loss 27.7812, lr 5.0e-05, dt 2.1s +All GPU(s): step 9941: loss 27.9062, lr 5.0e-05, dt 2.1s +All GPU(s): step 9942: loss 27.8438, lr 5.0e-05, dt 2.1s +All GPU(s): step 9943: loss 27.7656, lr 5.0e-05, dt 2.0s +All GPU(s): step 9944: loss 27.9531, lr 5.0e-05, dt 2.2s +All GPU(s): step 9945: loss 27.7188, lr 5.0e-05, dt 2.1s +All GPU(s): step 9946: loss 27.7969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9947: loss 27.9219, lr 5.0e-05, dt 2.0s +All GPU(s): step 9948: loss 27.7812, lr 5.0e-05, dt 2.1s +All GPU(s): step 9949: loss 27.9688, lr 5.0e-05, dt 2.2s +All GPU(s): step 9950: loss 28.0312, lr 5.0e-05, dt 2.1s +All GPU(s): step 9951: loss 27.9688, lr 5.0e-05, dt 2.0s +All GPU(s): step 9952: loss 28.3281, lr 5.0e-05, dt 2.1s +All GPU(s): step 9953: loss 28.2969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9954: loss 28.2969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9955: loss 28.5469, lr 5.0e-05, dt 2.0s +All GPU(s): step 9956: loss 28.5156, lr 5.0e-05, dt 2.1s +All GPU(s): step 9957: loss 28.4688, lr 5.0e-05, dt 2.1s +All GPU(s): step 9958: loss 28.7969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9959: loss 29.0156, lr 5.0e-05, dt 2.1s +All GPU(s): step 9960: loss 28.8281, lr 5.0e-05, dt 2.1s +All GPU(s): step 9961: loss 28.7969, lr 5.0e-05, dt 2.1s +All GPU(s): step 9962: loss 28.9219, lr 4.9e-05, dt 2.0s +All GPU(s): step 9963: loss 29.1719, lr 4.9e-05, dt 2.1s +All GPU(s): step 9964: loss 29.1562, lr 4.9e-05, dt 2.1s +All GPU(s): step 9965: loss 29.0938, lr 4.9e-05, dt 2.0s +All GPU(s): step 9966: loss 29.1719, lr 4.9e-05, dt 2.0s +All GPU(s): step 9967: loss 29.1406, lr 4.9e-05, dt 2.0s +All GPU(s): step 9968: loss 28.8281, lr 4.9e-05, dt 2.1s +All GPU(s): step 9969: loss 29.1562, lr 4.9e-05, dt 2.0s +All GPU(s): step 9970: loss 29.0312, lr 4.9e-05, dt 2.0s +All GPU(s): step 9971: loss 29.1094, lr 4.9e-05, dt 2.0s +All GPU(s): step 9972: loss 29.1094, lr 4.9e-05, dt 2.1s +All GPU(s): step 9973: loss 28.9531, lr 4.9e-05, dt 2.1s +All GPU(s): step 9974: loss 28.8281, lr 4.9e-05, dt 2.1s +All GPU(s): step 9975: loss 28.9688, lr 4.9e-05, dt 2.1s +All GPU(s): step 9976: loss 29.0469, lr 4.9e-05, dt 2.0s +All GPU(s): step 9977: loss 29.0781, lr 4.9e-05, dt 2.0s +All GPU(s): step 9978: loss 29.0000, lr 4.9e-05, dt 2.2s +All GPU(s): step 9979: loss 29.1250, lr 4.9e-05, dt 2.0s +All GPU(s): step 9980: loss 28.9844, lr 4.9e-05, dt 2.0s +All GPU(s): step 9981: loss 28.9219, lr 4.9e-05, dt 2.0s +All GPU(s): step 9982: loss 28.8906, lr 4.9e-05, dt 2.1s +All GPU(s): step 9983: loss 29.0000, lr 4.9e-05, dt 2.1s +All GPU(s): step 9984: loss 29.0781, lr 4.9e-05, dt 2.0s +All GPU(s): step 9985: loss 28.9531, lr 4.9e-05, dt 2.0s +All GPU(s): step 9986: loss 28.7969, lr 4.9e-05, dt 2.0s +All GPU(s): step 9987: loss 28.9531, lr 4.9e-05, dt 2.1s +All GPU(s): step 9988: loss 28.8750, lr 4.9e-05, dt 2.1s +All GPU(s): step 9989: loss 28.8125, lr 4.9e-05, dt 2.0s +All GPU(s): step 9990: loss 28.9219, lr 4.9e-05, dt 2.0s +All GPU(s): step 9991: loss 28.8438, lr 4.9e-05, dt 2.0s +All GPU(s): step 9992: loss 28.8750, lr 4.9e-05, dt 2.1s +All GPU(s): step 9993: loss 28.9844, lr 4.9e-05, dt 2.1s +All GPU(s): step 9994: loss 28.9688, lr 4.9e-05, dt 2.1s +All GPU(s): step 9995: loss 29.2500, lr 4.9e-05, dt 2.0s +All GPU(s): step 9996: loss 29.2812, lr 4.9e-05, dt 2.0s +All GPU(s): step 9997: loss 29.0469, lr 4.9e-05, dt 2.1s +All GPU(s): step 9998: loss 29.1406, lr 4.9e-05, dt 2.1s +All GPU(s): step 9999: loss 28.9219, lr 4.9e-05, dt 2.1s +saving checkpoint to checkpoints/ckpt_10000.pt diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-metadata.json b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8c79e90c88ef7116133e1905d9f882f2667e547b --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-metadata.json @@ -0,0 +1,88 @@ +{ + "os": "Linux-5.15.0-117-generic-x86_64-with-glibc2.31", + "python": "3.10.14", + "startedAt": "2024-09-23T15:28:12.110629Z", + "args": [ + "--config-name", + "experimental/byte_autoencoder_1" + ], + "program": "/root/SuperTinyLanguageModels/train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/LeonGuertler/SuperTinyLanguageModels.git", + "commit": "7b6e7767d3d2c8e69005f9debea4643e53335e50" + }, + "email": "calvin14@gmail.com", + "root": "/root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03", + "host": "11c6e13f6a55", + "username": "root", + "executable": "/root/SuperTinyLanguageModels/.conda/bin/python3", + "cpu_count": 128, + "cpu_count_logical": 256, + "gpu": "[NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090]", + "gpu_count": 8, + "disk": { + "/": { + "total": "1123133947904", + "used": "552531927040" + } + }, + "memory": { + "total": "540812599296" + }, + "cpu": { + "count": 128, + "countLogical": 256 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + }, + { + "name": "NVIDIA GeForce RTX 4090", + "memoryTotal": "25757220864", + "cudaCores": 16384, + "architecture": "Ada" + } + ], + "cudaVersion": "12.5" +} \ No newline at end of file diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-summary.json b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..c732c23d1e0fea5a2af79a544be757d6ba58f846 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/files/wandb-summary.json @@ -0,0 +1 @@ +{"additional_info/chunk_len_penalty_loss":0,"_step":491470848,"loss":28.921875,"_wandb":{"runtime":20359},"additional_info/total-loss":29.242591857910156,"_timestamp":1.7271256490543854e+09,"lr":4.8827310013829805e-05,"additional_info/chunk_len_loss":29.242586135864258,"iter":9999,"_runtime":20359.16672919,"additional_info/average_chunk_length":2.5923585891723633,"additional_info/BCE-loss":6.630856205447344e-06,"token_num":491470848} \ No newline at end of file diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..96381044df5dcdf943bc052c1a9352dd0634af8c --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log @@ -0,0 +1,13 @@ +{"time":"2024-09-23T15:28:11.467867812Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpy08r19nv/port-130651.txt","pid":130651,"debug":false,"disable-analytics":false} +{"time":"2024-09-23T15:28:11.467933072Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2024-09-23T15:28:11.486175572Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":130651} +{"time":"2024-09-23T15:28:11.486146531Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46569,"Zone":""}} +{"time":"2024-09-23T15:28:11.655290069Z","level":"INFO","msg":"created new connection","id":"127.0.0.1:53594"} +{"time":"2024-09-23T15:28:12.112154024Z","level":"INFO","msg":"connection init received","streamId":"jp82yqcj","id":"127.0.0.1:53594"} +{"time":"2024-09-23T15:28:12.11256317Z","level":"ERROR","msg":"error creating symlink","error":"symlink /root/.cache/wandb/logs/core-debug-20240923_152811.log /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log: file exists"} +{"time":"2024-09-23T15:28:12.118226978Z","level":"INFO","msg":"connection init completed","streamId":"jp82yqcj","id":"127.0.0.1:53594"} +{"time":"2024-09-23T21:07:31.277204678Z","level":"INFO","msg":"connection: teardown","id":"127.0.0.1:53594"} +{"time":"2024-09-23T21:07:31.277398051Z","level":"INFO","msg":"server is shutting down"} +{"time":"2024-09-23T21:07:31.277722525Z","level":"INFO","msg":"closed connection","id":"127.0.0.1:53594"} +{"time":"2024-09-23T21:07:32.697785267Z","level":"INFO","msg":"connection closed","id":"127.0.0.1:53594"} +{"time":"2024-09-23T21:07:32.697808647Z","level":"INFO","msg":"server is closed"} diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-internal.log b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6d852068997579e77a0b39777cd2e8d8984c2756 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-internal.log @@ -0,0 +1,23 @@ +{"time":"2024-09-23T15:28:12.112481689Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:28:12.112519869Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log"} +{"time":"2024-09-23T15:28:12.112623061Z","level":"INFO","msg":"using version","core version":"0.18.1"} +{"time":"2024-09-23T15:28:12.112635331Z","level":"INFO","msg":"created symlink","path":"/root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-core.log"} +{"time":"2024-09-23T15:28:12.118142746Z","level":"INFO","msg":"created new stream","id":"jp82yqcj"} +{"time":"2024-09-23T15:28:12.118214317Z","level":"INFO","msg":"stream: started","id":"jp82yqcj"} +{"time":"2024-09-23T15:28:12.118265208Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.118273418Z","level":"INFO","msg":"sender: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.118314949Z","level":"INFO","msg":"handler: started","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T15:28:12.538706106Z","level":"INFO","msg":"wandb-core","!BADKEY":null} +{"time":"2024-09-23T15:28:12.540526091Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-09-23T17:00:42.85305684Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T17:52:17.918721149Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/stlm/SuperTinyLanguageModels/jp82yqcj/file_stream"} +{"time":"2024-09-23T20:07:58.162758248Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T20:09:43.17329067Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T20:42:28.235348476Z","level":"INFO","msg":"api: retrying HTTP request, no error or response"} +{"time":"2024-09-23T21:07:31.27732282Z","level":"INFO","msg":"stream: closing","id":"jp82yqcj"} +{"time":"2024-09-23T21:07:31.2773774Z","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-09-23T21:07:31.278475337Z","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-09-23T21:07:32.697344011Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697421022Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697407192Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"jp82yqcj"}} +{"time":"2024-09-23T21:07:32.697664785Z","level":"INFO","msg":"stream: closed","id":"jp82yqcj"} diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug.log b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0764f36353def6e1b54e483ec7cd27e113f01844 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug.log @@ -0,0 +1,26 @@ +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Current SDK version is 0.18.1 +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Configure stats pid to 130651 +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from /root/.config/wandb/settings +2024-09-23 15:28:12,105 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/settings +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Loading settings from environment variables: {} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Applying setup settings: {'mode': None, '_disable_service': None} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/root/SuperTinyLanguageModels/train.py', 'program': '/root/SuperTinyLanguageModels/train.py'} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_setup.py:_flush():77] Applying login settings: {} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:_log_setup():532] Logging user logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug.log +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:_log_setup():533] Logging internal logs to /root/SuperTinyLanguageModels/outputs/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/logs/debug-internal.log +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():616] calling init triggers +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():623] wandb.init called with sweep_config: {} +config: {'model': {'core_model_type': 'pass_through', 'hidden_dim': 384, 'byte_hidden': 128, 'max_chunk_length': 12, 'max_num_chunks': 1024, 'num_delimiter_layers': 3, 'num_byte_decoder_layers': 5, 'target_chunk_len': 8.0, 'chunk_len_loss_weight': 1.0, 'chunk_len_penalty': 0.1, 'context_window': 8192, 'embedding_model_type': 'byte_level', 'tokenizer_type': 'bpe', 'tokenizer_dataset_name': 'simple_en_wiki', 'tokenizer_simplify_data': True, 'vocab_size': 259, 'lm_head_type': 'byte_level', 'lm_head_normalization': 'rms_norm', 'lm_head_bias': False, 'lm_head_dropout': 0.0, 'model_shell_type': 'byte_autoencoder_shell', 'embedding_weight_tying': True, 'ffn_weight_tying': False, 'cproj_weight_tying': False, 'positional_encoding_type': 'rope'}, 'trainer': {'trainer_type': 'base_trainer', 'dataset': 'fineweb_edu_10B', 'batch_size': 6, 'gradient_accumulation_steps': 8, 'max_iters': 10000, 'eval_interval': 50000000, 'log_interval': 1, 'checkpoint_interval': 1000, 'eval_iters': 1000, 'run_eval': False, 'eval': {'mcq_benchmarks': None, 'mcq_num_samples': 1000, 'eval_byte_metrics': False, 'text_modeling_eval': False, 'text_generation_eval': False}, 'optimizer': {'optimizer_name': 'adamW', 'lr': 0.0004, 'min_lr': 4e-05, 'weight_decay': 0.01, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0}, 'lr_scheduler': {'name': 'cosine', 'warmup_iters': 1000}, 'dataloader': {'name': 'autoencoder'}, 'datasampling': {'name': 'standard'}, 'loss_fn': {'name': 'pass_through'}}, 'general': {'logging': {'wandb_log': True, 'wandb_project': 'SuperTinyLanguageModels', 'wandb_run_name': None, 'group_name': 'experimental_byte_level'}, 'paths': {'output_dir': 'outputs', 'data_dir': '/root/SuperTinyLanguageModels/data', 'checkpoint_dir': 'checkpoints', 'eval_dir': '/root/SuperTinyLanguageModels/evals'}, 'seed': 489, 'device': 'cuda'}} +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():666] starting backend +2024-09-23 15:28:12,106 INFO MainThread:130651 [wandb_init.py:init():670] setting up manager +2024-09-23 15:28:12,108 INFO MainThread:130651 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-09-23 15:28:12,110 INFO MainThread:130651 [wandb_init.py:init():678] backend started and connected +2024-09-23 15:28:12,113 INFO MainThread:130651 [wandb_init.py:init():773] updated telemetry +2024-09-23 15:28:12,120 INFO MainThread:130651 [wandb_init.py:init():806] communicating run to backend with 90.0 second timeout +2024-09-23 15:28:12,535 INFO MainThread:130651 [wandb_init.py:init():857] starting run threads in backend +2024-09-23 15:28:12,711 INFO MainThread:130651 [wandb_run.py:_console_start():2459] atexit reg +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2307] redirect: wrap_raw +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2372] Wrapping output streams. +2024-09-23 15:28:12,712 INFO MainThread:130651 [wandb_run.py:_redirect():2397] Redirects installed. +2024-09-23 15:28:12,714 INFO MainThread:130651 [wandb_init.py:init():900] run started, returning control to user process +2024-09-23 21:07:31,278 WARNING MsgRouterThr:130651 [router.py:message_loop():77] message_loop has been closed diff --git a/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/run-jp82yqcj.wandb b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/run-jp82yqcj.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d657ff256d533b4ab5b52fec6ab342d07c7334e1 --- /dev/null +++ b/2024-09-23/15-28-03/wandb/run-20240923_152812-jp82yqcj/run-jp82yqcj.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ee0b58508c41ac51fc65504ae44710aedb4b7a06fbbb084ad25e5b157ab7f8 +size 41501177