Sony
/

Diffusers
ONNX
Safetensors
shoukanghu-sony commited on
Commit
43f43ae
·
1 Parent(s): 71ee9f0

initial commit

Browse files
Files changed (23) hide show
  1. eval_fvd/i3d_pretrained_400.pt +3 -0
  2. eval_fvd/resnet-50-kinetics.pth +3 -0
  3. pkgs/pretrained_models/DWPose/dw-ll_ucoco_384.onnx +3 -0
  4. pkgs/pretrained_models/DWPose/yolox_l.onnx +3 -0
  5. pkgs/pretrained_models/__init__.py +19 -0
  6. pkgs/pretrained_models/detectron2/model_final_f05665.pkl +3 -0
  7. pkgs/pretrained_models/download.py +60 -0
  8. pkgs/pretrained_models/hmr2/data/SMPL_to_J19.pkl +3 -0
  9. pkgs/pretrained_models/hmr2/data/smpl_mean_params.npz +3 -0
  10. pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/checkpoints/epoch=35-step=1000000.ckpt +3 -0
  11. pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/dataset_config.yaml +112 -0
  12. pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/model_config.yaml +148 -0
  13. pkgs/pretrained_models/image_encoder/config.json +23 -0
  14. pkgs/pretrained_models/image_encoder/pytorch_model.bin +3 -0
  15. pkgs/pretrained_models/sd-vae-ft-mse/config.json +29 -0
  16. pkgs/pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.bin +3 -0
  17. pkgs/pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
  18. pkgs/pretrained_models/smpl/SMPL_NEUTRAL.pkl +3 -0
  19. pkgs/pretrained_models/stable-diffusion-v1-5/feature_extractor/preprocessor_config.json +20 -0
  20. pkgs/pretrained_models/stable-diffusion-v1-5/model_index.json +32 -0
  21. pkgs/pretrained_models/stable-diffusion-v1-5/unet/config.json +36 -0
  22. pkgs/pretrained_models/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin +3 -0
  23. pkgs/pretrained_models/stable-diffusion-v1-5/v1-inference.yaml +70 -0
eval_fvd/i3d_pretrained_400.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55095f049e706479d48e221adcdb145b2b9dc930ba28b081ed72367ffaa32343
3
+ size 50939526
eval_fvd/resnet-50-kinetics.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b6a070ed1004c3b28a1751d5bc60f7dfd24d145425db32719989544d6dafc9
3
+ size 376424414
pkgs/pretrained_models/DWPose/dw-ll_ucoco_384.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
3
+ size 134399116
pkgs/pretrained_models/DWPose/yolox_l.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
3
+ size 216746733
pkgs/pretrained_models/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ PRETRAIN_MODELS_DIR = os.path.join(
5
+ Path(__file__).parent.parent.parent, "pretrained_models"
6
+ )
7
+
8
+ HMR2_MODELS_DIR = f"{PRETRAIN_MODELS_DIR}/hmr2"
9
+ HMR2_DEFAULT_CKPT = f"{HMR2_MODELS_DIR}/logs/train/multiruns/hmr2/0/checkpoints/epoch=35-step=1000000.ckpt"
10
+
11
+ SMPL_MODEL_DIR = f"{PRETRAIN_MODELS_DIR}/smpl"
12
+ SMPL_MODEL_PATH = f"{SMPL_MODEL_DIR}/SMPL_NEUTRAL.pkl"
13
+
14
+ DETECTRON2_MODEL_DIR = f"{PRETRAIN_MODELS_DIR}/detectron2"
15
+ DETECTRON2_MODEL_PATH = f"{DETECTRON2_MODEL_DIR}/model_final_f05665.pkl"
16
+
17
+ DWPOSE_MODEL_DIR = f"{PRETRAIN_MODELS_DIR}/DWPose"
18
+ YOLO_L_MODEL_PATH = f"{DWPOSE_MODEL_DIR}/yolox_l.onnx"
19
+ DWPOSE_MODEL_PATH = f"{DWPOSE_MODEL_DIR}/dw-ll_ucoco_384.onnx"
pkgs/pretrained_models/detectron2/model_final_f05665.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8601bc52000c8a87960f3db6a9672596c5e06ce33bc30a3b8f96a96efe42ae60
3
+ size 2765948277
pkgs/pretrained_models/download.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ from pathlib import Path
4
+ from hmr2.models import download_models
5
+ from scripts.pretrained_models import (
6
+ DETECTRON2_MODEL_DIR,
7
+ DETECTRON2_MODEL_PATH,
8
+ HMR2_MODELS_DIR,
9
+ PRETRAIN_MODELS_DIR,
10
+ SMPL_MODEL_DIR,
11
+ SMPL_MODEL_PATH,
12
+ )
13
+
14
+ from utils.download import download
15
+
16
+
17
+ def download_hmr2_models():
18
+ if not os.path.exists(HMR2_MODELS_DIR):
19
+ os.makedirs(HMR2_MODELS_DIR)
20
+ download_models(HMR2_MODELS_DIR)
21
+
22
+
23
+ def download_smpl_model():
24
+ if not os.path.exists(SMPL_MODEL_DIR):
25
+ os.makedirs(SMPL_MODEL_DIR)
26
+ print(
27
+ f"Please download smpl model from https://smplify.is.tue.mpg.de/, and place it in {SMPL_MODEL_PATH}"
28
+ )
29
+
30
+
31
+ def download_detectron2_model():
32
+ if not os.path.exists(DETECTRON2_MODEL_DIR):
33
+ os.makedirs(DETECTRON2_MODEL_DIR)
34
+ download(
35
+ "https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h/f328730692/model_final_f05665.pkl",
36
+ output=Path(DETECTRON2_MODEL_PATH),
37
+ )
38
+
39
+
40
+ if __name__ == "__main__":
41
+ parser = argparse.ArgumentParser(description="model downloader")
42
+ parser.add_argument("--all", type=bool, help="download all models")
43
+ parser.add_argument("--hmr2", type=bool, help="download hmr2 models only")
44
+ parser.add_argument("--smpl", type=bool, help="download smpl models only")
45
+ parser.add_argument(
46
+ "--detectron2", type=bool, help="download detectron2 models only"
47
+ )
48
+
49
+ args = parser.parse_args()
50
+
51
+ if args.hmr2:
52
+ download_hmr2_models()
53
+ if args.detectron2:
54
+ download_detectron2_model()
55
+ if args.smpl:
56
+ download_smpl_model()
57
+ if args.all:
58
+ download_hmr2_models()
59
+ download_smpl_model()
60
+ download_detectron2_model()
pkgs/pretrained_models/hmr2/data/SMPL_to_J19.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66e340412ebd320a5c694bee5fb2674f700736709bbdd93859e970f0e59f47c
3
+ size 1047441
pkgs/pretrained_models/hmr2/data/smpl_mean_params.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd6dd687800da946d0a0492383f973b92ec20f166a0b829775882868c35fcdd
3
+ size 1310
pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/checkpoints/epoch=35-step=1000000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ea09c834161468638c9de7a782c614e300c622709bf52967274c543ec72fcf
3
+ size 2709521501
pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/dataset_config.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 3DPW-TEST:
2
+ KEYPOINT_LIST:
3
+ - 25
4
+ - 26
5
+ - 27
6
+ - 28
7
+ - 29
8
+ - 30
9
+ - 31
10
+ - 32
11
+ - 33
12
+ - 34
13
+ - 35
14
+ - 36
15
+ - 37
16
+ - 43
17
+ TYPE: ImageDataset
18
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_new/3dpw-test/{000000..000012}.tar
19
+ USE_HIPS: false
20
+ AIC-TRAIN:
21
+ TYPE: ImageDataset
22
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/aic-train/{000000..000104}.tar
23
+ epoch_size: 200000
24
+ AIC-TRAIN-WMASK:
25
+ TYPE: ImageDataset
26
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/aic-train-vitpose/{000000..000104}.tar
27
+ epoch_size: 200000
28
+ AVA-TRAIN-MIDFRAMES-1FPS:
29
+ TYPE: ImageDataset
30
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/ava-train-midframes-1fps/{000000..000092}.tar
31
+ epoch_size: 200000
32
+ AVA-TRAIN-MIDFRAMES-1FPS-WMASK:
33
+ TYPE: ImageDataset
34
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/ava-train-midframes-1fps-vitpose/{000000..000092}.tar
35
+ epoch_size: 200000
36
+ CMU-MOCAP:
37
+ DATASET_FILE: /fsx/shubham/code/hmr2023/data/datasets/cmu_mocap.npz
38
+ COCO-TRAIN-2014:
39
+ TYPE: ImageDataset
40
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/coco-train-2014/{000000..000017}.tar
41
+ epoch_size: 360000
42
+ COCO-TRAIN-2014-VITPOSE-REPLICATE-PRUNED12:
43
+ TYPE: ImageDataset
44
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/coco-train-2014-vitpose-replicate-pruned12/{000000..000044}.tar
45
+ epoch_size: 45000
46
+ COCO-TRAIN-2014-WMASK:
47
+ TYPE: ImageDataset
48
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/coco-train-2014/{000000..000034}.tar
49
+ epoch_size: 35000
50
+ COCO-TRAIN-2014-WMASK-PRUNED:
51
+ TYPE: ImageDataset
52
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/coco-train-2014-pruned/{000000..000017}.tar
53
+ epoch_size: 18000
54
+ COCO-VAL:
55
+ KEYPOINT_LIST:
56
+ - 0
57
+ TYPE: ImageDataset
58
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/coco-val/{000000..000000}.tar
59
+ H36M-TRAIN:
60
+ TYPE: ImageDataset
61
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/h36m-train/{000000..000156}.tar
62
+ epoch_size: 314000
63
+ H36M-TRAIN-WMASK:
64
+ TYPE: ImageDataset
65
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/h36m-train/{000000..000312}.tar
66
+ epoch_size: 314000
67
+ H36M-VAL-P2:
68
+ KEYPOINT_LIST:
69
+ - 25
70
+ - 26
71
+ - 27
72
+ - 28
73
+ - 29
74
+ - 30
75
+ - 31
76
+ - 32
77
+ - 33
78
+ - 34
79
+ - 35
80
+ - 36
81
+ - 37
82
+ - 43
83
+ TYPE: ImageDataset
84
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_new/h36m-val-p2/{000000..000013}.tar
85
+ USE_HIPS: true
86
+ H36M-VAL-P2-OPENPOSE:
87
+ TYPE: ImageDataset
88
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_new/h36m-val-p2-openpose/{000000..000013}.tar
89
+ INSTA-TRAIN:
90
+ TYPE: ImageDataset
91
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/insta-train/{000000..001828}.tar
92
+ epoch_size: 4000000
93
+ INSTA-TRAIN-WMASK:
94
+ TYPE: ImageDataset
95
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/insta-train-vitpose/{000000..001828}.tar
96
+ epoch_size: 4000000
97
+ MPI-INF-TRAIN:
98
+ TYPE: ImageDataset
99
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/mpi-inf-train/{000000..000048}.tar
100
+ epoch_size: 100000
101
+ MPI-INF-TRAIN-PRUNED:
102
+ TYPE: ImageDataset
103
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_new/mpi-inf-train-pruned/{000000..00006}.tar
104
+ epoch_size: 12000
105
+ MPII-TRAIN:
106
+ TYPE: ImageDataset
107
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled/mpii-train/{000000..000004}.tar
108
+ epoch_size: 100000
109
+ MPII-TRAIN-WMASK:
110
+ TYPE: ImageDataset
111
+ URLS: /fsx/shubham/data/hmr2023_data_shuffled_wmasks/mpii-train/{000000..000009}.tar
112
+ epoch_size: 100000
pkgs/pretrained_models/hmr2/logs/train/multiruns/hmr2/0/model_config.yaml ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: train
2
+ tags:
3
+ - dev
4
+ train: true
5
+ test: false
6
+ ckpt_path: null
7
+ seed: null
8
+ DATASETS:
9
+ SUPPRESS_KP_CONF_THRESH: 0.3
10
+ FILTER_NUM_KP: 4
11
+ FILTER_NUM_KP_THRESH: 0.0
12
+ FILTER_REPROJ_THRESH: 31000
13
+ SUPPRESS_BETAS_THRESH: 3.0
14
+ SUPPRESS_BAD_POSES: true
15
+ POSES_BETAS_SIMULTANEOUS: true
16
+ FILTER_NO_POSES: false
17
+ TRAIN:
18
+ H36M-TRAIN-WMASK:
19
+ WEIGHT: 0.1
20
+ MPII-TRAIN-WMASK:
21
+ WEIGHT: 0.1
22
+ COCO-TRAIN-2014-WMASK-PRUNED:
23
+ WEIGHT: 0.1
24
+ COCO-TRAIN-2014-VITPOSE-REPLICATE-PRUNED12:
25
+ WEIGHT: 0.1
26
+ MPI-INF-TRAIN-PRUNED:
27
+ WEIGHT: 0.02
28
+ AVA-TRAIN-MIDFRAMES-1FPS-WMASK:
29
+ WEIGHT: 0.19
30
+ AIC-TRAIN-WMASK:
31
+ WEIGHT: 0.19
32
+ INSTA-TRAIN-WMASK:
33
+ WEIGHT: 0.2
34
+ VAL:
35
+ COCO-VAL:
36
+ WEIGHT: 1.0
37
+ MOCAP: CMU-MOCAP
38
+ CONFIG:
39
+ SCALE_FACTOR: 0.3
40
+ ROT_FACTOR: 30
41
+ TRANS_FACTOR: 0.02
42
+ COLOR_SCALE: 0.2
43
+ ROT_AUG_RATE: 0.6
44
+ TRANS_AUG_RATE: 0.5
45
+ DO_FLIP: true
46
+ FLIP_AUG_RATE: 0.5
47
+ EXTREME_CROP_AUG_RATE: 0.1
48
+ EXTREME_CROP_AUG_LEVEL: 1
49
+ BETAS_REG: true
50
+ trainer:
51
+ _target_: pytorch_lightning.Trainer
52
+ default_root_dir: ${paths.output_dir}
53
+ accelerator: gpu
54
+ devices: 8
55
+ deterministic: false
56
+ num_sanity_val_steps: 0
57
+ log_every_n_steps: ${GENERAL.LOG_STEPS}
58
+ val_check_interval: ${GENERAL.VAL_STEPS}
59
+ precision: 16
60
+ max_steps: ${GENERAL.TOTAL_STEPS}
61
+ move_metrics_to_cpu: true
62
+ limit_val_batches: 1
63
+ track_grad_norm: -1
64
+ strategy: ddp
65
+ num_nodes: 1
66
+ sync_batchnorm: true
67
+ paths:
68
+ root_dir: ${oc.env:PROJECT_ROOT}
69
+ data_dir: ${paths.root_dir}/data/
70
+ log_dir: /fsx/shubham/code/hmr2023/logs_hydra/
71
+ output_dir: ${hydra:runtime.output_dir}
72
+ work_dir: ${hydra:runtime.cwd}
73
+ extras:
74
+ ignore_warnings: false
75
+ enforce_tags: true
76
+ print_config: true
77
+ exp_name: hmr2
78
+ SMPL:
79
+ MODEL_PATH: data/smpl
80
+ GENDER: neutral
81
+ NUM_BODY_JOINTS: 23
82
+ JOINT_REGRESSOR_EXTRA: data/SMPL_to_J19.pkl
83
+ MEAN_PARAMS: data/smpl_mean_params.npz
84
+ EXTRA:
85
+ FOCAL_LENGTH: 5000
86
+ NUM_LOG_IMAGES: 4
87
+ NUM_LOG_SAMPLES_PER_IMAGE: 8
88
+ PELVIS_IND: 39
89
+ MODEL:
90
+ IMAGE_SIZE: 256
91
+ IMAGE_MEAN:
92
+ - 0.485
93
+ - 0.456
94
+ - 0.406
95
+ IMAGE_STD:
96
+ - 0.229
97
+ - 0.224
98
+ - 0.225
99
+ BACKBONE:
100
+ TYPE: vit
101
+ NUM_LAYERS: 50
102
+ OUT_CHANNELS: 2048
103
+ ADD_NECK: false
104
+ FLOW:
105
+ DIM: 144
106
+ NUM_LAYERS: 4
107
+ CONTEXT_FEATURES: 2048
108
+ LAYER_HIDDEN_FEATURES: 1024
109
+ LAYER_DEPTH: 2
110
+ FC_HEAD:
111
+ NUM_FEATURES: 1024
112
+ SMPL_HEAD:
113
+ TYPE: transformer_decoder
114
+ IN_CHANNELS: 2048
115
+ TRANSFORMER_DECODER:
116
+ depth: 6
117
+ heads: 8
118
+ mlp_dim: 1024
119
+ dim_head: 64
120
+ dropout: 0.0
121
+ emb_dropout: 0.0
122
+ norm: layer
123
+ context_dim: 1280
124
+ GENERAL:
125
+ TOTAL_STEPS: 1000000
126
+ LOG_STEPS: 1000
127
+ VAL_STEPS: 1000
128
+ CHECKPOINT_STEPS: 10000
129
+ CHECKPOINT_SAVE_TOP_K: 1
130
+ NUM_WORKERS: 6
131
+ PREFETCH_FACTOR: 2
132
+ TRAIN:
133
+ LR: 1.0e-05
134
+ WEIGHT_DECAY: 0.0001
135
+ BATCH_SIZE: 48
136
+ LOSS_REDUCTION: mean
137
+ NUM_TRAIN_SAMPLES: 2
138
+ NUM_TEST_SAMPLES: 64
139
+ POSE_2D_NOISE_RATIO: 0.01
140
+ SMPL_PARAM_NOISE_RATIO: 0.005
141
+ LOSS_WEIGHTS:
142
+ KEYPOINTS_3D: 0.05
143
+ KEYPOINTS_2D: 0.01
144
+ GLOBAL_ORIENT: 0.001
145
+ BODY_POSE: 0.001
146
+ BETAS: 0.0005
147
+ ADVERSARIAL: 0.0005
148
+ local: {}
pkgs/pretrained_models/image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jpinkney/.cache/huggingface/diffusers/models--lambdalabs--sd-image-variations-diffusers/snapshots/ca6f97f838ae1b5bf764f31363a21f388f4d8f3e/image_encoder",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "quick_gelu",
9
+ "hidden_size": 1024,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 24,
19
+ "patch_size": 14,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1"
23
+ }
pkgs/pretrained_models/image_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d2aa29b5fdf64f3ad4f45fb4227ea98bc45156bbae673b85be1af7783dbabb
3
+ size 1215993967
pkgs/pretrained_models/sd-vae-ft-mse/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.4.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
pkgs/pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
3
+ size 334707217
pkgs/pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
pkgs/pretrained_models/smpl/SMPL_NEUTRAL.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e65c74ad9b998783132f00880d1025a8d64b158e040e6ef13a557e5098bc42
3
+ size 39001280
pkgs/pretrained_models/stable-diffusion-v1-5/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_resize": true,
7
+ "feature_extractor_type": "CLIPFeatureExtractor",
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "resample": 3,
19
+ "size": 224
20
+ }
pkgs/pretrained_models/stable-diffusion-v1-5/model_index.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.6.0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPImageProcessor"
7
+ ],
8
+ "safety_checker": [
9
+ "stable_diffusion",
10
+ "StableDiffusionSafetyChecker"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "PNDMScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "CLIPTextModel"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "CLIPTokenizer"
23
+ ],
24
+ "unet": [
25
+ "diffusers",
26
+ "UNet2DConditionModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKL"
31
+ ]
32
+ }
pkgs/pretrained_models/stable-diffusion-v1-5/unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 4,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
pkgs/pretrained_models/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7da0e21ba7ea50637bee26e81c220844defdf01aafca02b2c42ecdadb813de4
3
+ size 3438354725
pkgs/pretrained_models/stable-diffusion-v1-5/v1-inference.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder