Sony
/

Diffusers
ONNX
Safetensors
shoukanghu-sony commited on
Commit
4eb3ee6
·
1 Parent(s): 857d5f0

upload checkpoint

Browse files
Files changed (24) hide show
  1. pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/NeRF_renderer-150000.pth +3 -0
  2. pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/denoising_unet-150000.pth +3 -0
  3. pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_nerf-150000.pth +3 -0
  4. pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_normal-150000.pth +3 -0
  5. pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/reference_unet-150000.pth +3 -0
  6. pretrained_models/humangif/DNA_Rendering/stage2_w_normal_w_nerf_guid_w_img_loss_w_view_attention/saved_models/view_module.pth +3 -0
  7. pretrained_models/humangif/DNA_Rendering/stage3_w_normal_w_nerf_guid_w_img_loss_w_view_attention_w_motion_attention/saved_models/motion_module.pth +3 -0
  8. pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/NeRF_renderer-140000.pth +3 -0
  9. pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/denoising_unet-140000.pth +3 -0
  10. pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_nerf-140000.pth +3 -0
  11. pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_normal-140000.pth +3 -0
  12. pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/reference_unet-140000.pth +3 -0
  13. pretrained_models/humangif/RenderPeople/stage2_w_normal_w_nerf_guid_w_img_loss_w_view_attention/saved_models/view_module.pth +3 -0
  14. pretrained_models/humangif/RenderPeople/stage3_w_normal_w_nerf_guid_w_img_loss_w_view_attention_w_motion_attention/saved_models/motion_module.pth +3 -0
  15. pretrained_models/image_encoder/config.json +23 -0
  16. pretrained_models/image_encoder/pytorch_model.bin +3 -0
  17. pretrained_models/sd-vae-ft-mse/config.json +29 -0
  18. pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.bin +3 -0
  19. pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
  20. pretrained_models/stable-diffusion-v1-5/feature_extractor/preprocessor_config.json +20 -0
  21. pretrained_models/stable-diffusion-v1-5/model_index.json +32 -0
  22. pretrained_models/stable-diffusion-v1-5/unet/config.json +36 -0
  23. pretrained_models/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin +3 -0
  24. pretrained_models/stable-diffusion-v1-5/v1-inference.yaml +70 -0
pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/NeRF_renderer-150000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b48bec085529b2a479d6d6c40f7b66a4e1a4375a4a79566c8e51347bee595ff
3
+ size 56726004
pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/denoising_unet-150000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1146491f9f7ac36726fb2df4e7d68b79ba4e4f88aca1df81fdc524f7743755
3
+ size 3438375954
pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_nerf-150000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1c6bd73995273bc6a3c625f3b4bf339ab83646100f0a1d9b3778d5652066d3
3
+ size 15394517
pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_normal-150000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511875358266f0cf8367be8574cc44edaac639b5282782a1b9706301d455027b
3
+ size 15394823
pretrained_models/humangif/DNA_Rendering/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/reference_unet-150000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67213041fd6bc5c1c6dd9904e58d22ac227672c46fdaffdbdff8a45b2dccc0c3
3
+ size 3438325026
pretrained_models/humangif/DNA_Rendering/stage2_w_normal_w_nerf_guid_w_img_loss_w_view_attention/saved_models/view_module.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4b6ea535030c4c1625b8ddbf48987a10da5ff74ccc281744dd9faf56634d53c
3
+ size 1767810138
pretrained_models/humangif/DNA_Rendering/stage3_w_normal_w_nerf_guid_w_img_loss_w_view_attention_w_motion_attention/saved_models/motion_module.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530e47c7ab4ac50ef0148b75a3ce93eb71c882fdb03907aeb0d1a9282e0606bd
3
+ size 1772245854
pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/NeRF_renderer-140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f72e93f4bb913872a2cbae5d267dd48fd610ec1da330c5473539ab83cf59c0
3
+ size 56726772
pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/denoising_unet-140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8342658e0d9a9520afc3db956993157f9933aeb162fde89acf4ae3c57ca51c8
3
+ size 3438375954
pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_nerf-140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd08fd84911cf342dabc691af593cf9d023ee48a7e54ee82dee893ba5c37331
3
+ size 15396245
pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/guidance_encoder_normal-140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24c6a697b55da3021ae4ac1c7eb1c409b633cda91cfa2358483a8c93bfe4b99
3
+ size 15394823
pretrained_models/humangif/RenderPeople/stage1_w_normal_w_nerf_guid_w_img_loss/saved_models/reference_unet-140000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f72fa4f732ed51a854a706d0c2158adb97747d92812a9aec4e5cdb8db2d27a6
3
+ size 3438325026
pretrained_models/humangif/RenderPeople/stage2_w_normal_w_nerf_guid_w_img_loss_w_view_attention/saved_models/view_module.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb72ebf3a81bda48df49fe6a69f179e255fee9c3ea0a1872a5a8f26f2c8add41
3
+ size 1767809718
pretrained_models/humangif/RenderPeople/stage3_w_normal_w_nerf_guid_w_img_loss_w_view_attention_w_motion_attention/saved_models/motion_module.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56faa17f65877283602b3d9c3253f5109a7cd5542306f0f5a8645c2530019d2b
3
+ size 1772245854
pretrained_models/image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jpinkney/.cache/huggingface/diffusers/models--lambdalabs--sd-image-variations-diffusers/snapshots/ca6f97f838ae1b5bf764f31363a21f388f4d8f3e/image_encoder",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "quick_gelu",
9
+ "hidden_size": 1024,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 24,
19
+ "patch_size": 14,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1"
23
+ }
pretrained_models/image_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d2aa29b5fdf64f3ad4f45fb4227ea98bc45156bbae673b85be1af7783dbabb
3
+ size 1215993967
pretrained_models/sd-vae-ft-mse/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.4.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
3
+ size 334707217
pretrained_models/sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
pretrained_models/stable-diffusion-v1-5/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_resize": true,
7
+ "feature_extractor_type": "CLIPFeatureExtractor",
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "resample": 3,
19
+ "size": 224
20
+ }
pretrained_models/stable-diffusion-v1-5/model_index.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.6.0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPImageProcessor"
7
+ ],
8
+ "safety_checker": [
9
+ "stable_diffusion",
10
+ "StableDiffusionSafetyChecker"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "PNDMScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "CLIPTextModel"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "CLIPTokenizer"
23
+ ],
24
+ "unet": [
25
+ "diffusers",
26
+ "UNet2DConditionModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKL"
31
+ ]
32
+ }
pretrained_models/stable-diffusion-v1-5/unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 4,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
pretrained_models/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7da0e21ba7ea50637bee26e81c220844defdf01aafca02b2c42ecdadb813de4
3
+ size 3438354725
pretrained_models/stable-diffusion-v1-5/v1-inference.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: ldm.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ unet_config:
30
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: ldm.models.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: ldm.modules.encoders.modules.FrozenCLIPEmbedder