epochs: 1 batch_size: 16 lr: 0.0003 weight_decay: 0.1 gradient_accumulation_steps: 1 model_id: EleutherAI/pythia-410m seed: 42 dataset_path: /data/user_data/zhilif/soft_mem/pile_tofu_poly_repeated_2.zst save_dir: /data/locus/llm_weights/zhilif/soft_mem wandb: project: soft-mem group: continual-finetune max_length: 2048