pythia_410m_tofu / cfg.yaml
zhilif
init
d30087d
raw
history blame contribute delete
325 Bytes
epochs: 1
batch_size: 16
lr: 0.0003
weight_decay: 0.1
gradient_accumulation_steps: 1
model_id: EleutherAI/pythia-410m
seed: 42
dataset_path: /data/user_data/zhilif/soft_mem/pile_tofu_poly_repeated_2.zst
save_dir: /data/locus/llm_weights/zhilif/soft_mem
wandb:
project: soft-mem
group: continual-finetune
max_length: 2048