Spaces:
Running
Running
feat(train): custom start_preconditioning_step
Browse files- tools/train/train.py +5 -1
tools/train/train.py
CHANGED
|
@@ -248,6 +248,10 @@ class TrainingArguments:
|
|
| 248 |
default=1024,
|
| 249 |
metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
|
| 250 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
preconditioning_compute_steps: int = field(
|
| 252 |
default=10, metadata={"help": "Number of steps to update preconditioner."}
|
| 253 |
)
|
|
@@ -608,7 +612,7 @@ def main():
|
|
| 608 |
beta2=training_args.beta2,
|
| 609 |
diagonal_epsilon=1e-10,
|
| 610 |
matrix_epsilon=1e-8,
|
| 611 |
-
start_preconditioning_step=training_args.
|
| 612 |
preconditioning_compute_steps=training_args.preconditioning_compute_steps,
|
| 613 |
statistics_compute_steps=1,
|
| 614 |
best_effort_shape_interpretation=True,
|
|
|
|
| 248 |
default=1024,
|
| 249 |
metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
|
| 250 |
)
|
| 251 |
+
start_preconditioning_step: int = field(
|
| 252 |
+
default=100,
|
| 253 |
+
metadata={"help": "Number of steps before starting to update preconditioner."},
|
| 254 |
+
)
|
| 255 |
preconditioning_compute_steps: int = field(
|
| 256 |
default=10, metadata={"help": "Number of steps to update preconditioner."}
|
| 257 |
)
|
|
|
|
| 612 |
beta2=training_args.beta2,
|
| 613 |
diagonal_epsilon=1e-10,
|
| 614 |
matrix_epsilon=1e-8,
|
| 615 |
+
start_preconditioning_step=training_args.start_preconditioning_step,
|
| 616 |
preconditioning_compute_steps=training_args.preconditioning_compute_steps,
|
| 617 |
statistics_compute_steps=1,
|
| 618 |
best_effort_shape_interpretation=True,
|