trainer: target: semanticist.engine.gpt_trainer.GPTTrainer params: num_epoch: 400 blr: 1e-4 cosine_lr: False warmup_epochs: 100 batch_size: 16 num_workers: 8 pin_memory: True grad_accum_steps: 1 precision: 'bf16' max_grad_norm: 1.0 enable_ema: True save_every: 10000 sample_every: 5000 fid_every: 50000 eval_fid: False result_folder: "./output/autoregressive" log_dir: "./output/autoregressive/logs" ae_cfg: 1.0 cfg: 6.0 cfg_schedule: "linear" train_num_slots: 32 test_num_slots: 32 compile: True enable_cache_latents: False ae_model: target: semanticist.stage1.diffuse_slot.DiffuseSlot params: encoder: 'vit_base_patch16' enc_img_size: 256 enc_causal: True num_slots: 256 slot_dim: 16 norm_slots: True cond_method: 'token' dit_model: 'DiT-L-2' vae: 'xwen99/mar-vae-kl16' num_sampling_steps: '250' # ckpt_path: ./output/tokenizer/models_l/step250000/custom_checkpoint_1.pkl ckpt_path: /mnt/ceph_rbd/mnt_pvc_vid_data/zbc/cache/semanticist_tok_L.pkl gpt_model: target: GPT-L params: num_slots: 32 slot_dim: 16 num_classes: 1000 cls_token_num: 1 resid_dropout_p: 0.1 ffn_dropout_p: 0.1 diffloss_d: 12 diffloss_w: 1536 num_sampling_steps: '100' diffusion_batch_mul: 4 use_si: True cond_method: 'concat' ckpt_path: None dataset: target: semanticist.utils.datasets.ImageNet params: root: ./dataset/imagenet/ split: train # aug: tencrop_cached # or centercrop_cached aug: randcrop img_size: 256