Spaces:
Running
Running
feat(train): update sweep config
Browse files- tools/train/sweep.yaml +9 -9
tools/train/sweep.yaml
CHANGED
|
@@ -1,16 +1,17 @@
|
|
| 1 |
program: train.py
|
| 2 |
-
entity: dalle-mini
|
| 3 |
project: dalle-mini
|
| 4 |
method: random
|
| 5 |
metric:
|
| 6 |
name: eval/loss
|
| 7 |
goal: minimize
|
| 8 |
parameters:
|
|
|
|
|
|
|
| 9 |
learning_rate:
|
| 10 |
distribution: log_uniform
|
| 11 |
# from exp(min) to exp(max)
|
| 12 |
-
min: -
|
| 13 |
-
max: -
|
| 14 |
tokenizer_name:
|
| 15 |
value: boris/dalle-mini-tokenizer
|
| 16 |
config_name:
|
|
@@ -26,15 +27,15 @@ parameters:
|
|
| 26 |
gradient_accumulation_steps:
|
| 27 |
value: 1
|
| 28 |
warmup_steps:
|
| 29 |
-
value:
|
| 30 |
num_train_epochs:
|
| 31 |
value: 1
|
| 32 |
-
logging_steps:
|
| 33 |
-
value: 32
|
| 34 |
-
eval_steps:
|
| 35 |
-
value: 800
|
| 36 |
max_train_samples:
|
| 37 |
value: 1000000
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
command:
|
| 40 |
- python3
|
|
@@ -43,7 +44,6 @@ command:
|
|
| 43 |
- "--output_dir"
|
| 44 |
- "./output"
|
| 45 |
- "--overwrite_output_dir"
|
| 46 |
-
- "--adafactor"
|
| 47 |
- "--do_train"
|
| 48 |
- "--do_eval"
|
| 49 |
- ${args}
|
|
|
|
| 1 |
program: train.py
|
|
|
|
| 2 |
project: dalle-mini
|
| 3 |
method: random
|
| 4 |
metric:
|
| 5 |
name: eval/loss
|
| 6 |
goal: minimize
|
| 7 |
parameters:
|
| 8 |
+
optim:
|
| 9 |
+
value: distributed_shampoo
|
| 10 |
learning_rate:
|
| 11 |
distribution: log_uniform
|
| 12 |
# from exp(min) to exp(max)
|
| 13 |
+
min: -9.2
|
| 14 |
+
max: -6.9
|
| 15 |
tokenizer_name:
|
| 16 |
value: boris/dalle-mini-tokenizer
|
| 17 |
config_name:
|
|
|
|
| 27 |
gradient_accumulation_steps:
|
| 28 |
value: 1
|
| 29 |
warmup_steps:
|
| 30 |
+
value: 1000
|
| 31 |
num_train_epochs:
|
| 32 |
value: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
max_train_samples:
|
| 34 |
value: 1000000
|
| 35 |
+
logging_steps:
|
| 36 |
+
value: 40
|
| 37 |
+
eval_steps:
|
| 38 |
+
value: 200
|
| 39 |
|
| 40 |
command:
|
| 41 |
- python3
|
|
|
|
| 44 |
- "--output_dir"
|
| 45 |
- "./output"
|
| 46 |
- "--overwrite_output_dir"
|
|
|
|
| 47 |
- "--do_train"
|
| 48 |
- "--do_eval"
|
| 49 |
- ${args}
|