kevinlu4588 commited on
Commit
e225ed6
·
1 Parent(s): 90efdc0

Training gradient ascent models and sanity checking car erasure

Browse files
Files changed (46) hide show
  1. .gitignore +7 -1
  2. Attack_code/Noisy Diffusion(Eta attack)/__pycache__/clip_utils.cpython-310.pyc +0 -0
  3. Attack_code/Noisy Diffusion(Eta attack)/__pycache__/eta_diffusion.cpython-310.pyc +0 -0
  4. Attack_code/Noisy Diffusion(Eta attack)/car_dataset.csv +13 -0
  5. Attack_code/Noisy Diffusion(Eta attack)/{clilp_utils.py → clip_utils.py} +0 -0
  6. Attack_code/Noisy Diffusion(Eta attack)/english_springer_spaniel_50_prompts.csv +51 -0
  7. Attack_code/Noisy Diffusion(Eta attack)/gradient_asc_clip.ipynb +0 -0
  8. Attack_code/Noisy Diffusion(Eta attack)/gradient_ascent_utils.py +241 -0
  9. {ESD Training Scripts → ESD_training_scripts}/esd_diffusers.py +8 -3
  10. Gradient_ascent_traininig_scripts/README.md +1 -0
  11. Gradient_ascent_traininig_scripts/checkpoint/car/feature_extractor/preprocessor_config.json +44 -0
  12. Gradient_ascent_traininig_scripts/checkpoint/car/model_index.json +38 -0
  13. Gradient_ascent_traininig_scripts/checkpoint/car/safety_checker/config.json +28 -0
  14. Gradient_ascent_traininig_scripts/checkpoint/car/scheduler/scheduler_config.json +15 -0
  15. Gradient_ascent_traininig_scripts/checkpoint/car/text_encoder/config.json +25 -0
  16. Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/merges.txt +0 -0
  17. Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/special_tokens_map.json +24 -0
  18. Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/tokenizer_config.json +30 -0
  19. Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/vocab.json +0 -0
  20. Gradient_ascent_traininig_scripts/checkpoint/car/unet/config.json +68 -0
  21. Gradient_ascent_traininig_scripts/checkpoint/car/vae/config.json +38 -0
  22. Gradient_ascent_traininig_scripts/checkpoint/english_springer/feature_extractor/preprocessor_config.json +44 -0
  23. Gradient_ascent_traininig_scripts/checkpoint/english_springer/model_index.json +38 -0
  24. Gradient_ascent_traininig_scripts/checkpoint/english_springer/safety_checker/config.json +28 -0
  25. Gradient_ascent_traininig_scripts/checkpoint/english_springer/scheduler/scheduler_config.json +15 -0
  26. Gradient_ascent_traininig_scripts/checkpoint/english_springer/text_encoder/config.json +25 -0
  27. Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/merges.txt +0 -0
  28. Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/special_tokens_map.json +24 -0
  29. Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/tokenizer_config.json +30 -0
  30. Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/vocab.json +0 -0
  31. Gradient_ascent_traininig_scripts/checkpoint/english_springer/unet/config.json +68 -0
  32. Gradient_ascent_traininig_scripts/checkpoint/english_springer/vae/config.json +38 -0
  33. Gradient_ascent_traininig_scripts/clear_cache.py +10 -0
  34. Gradient_ascent_traininig_scripts/data/car/metadata.jsonl +1000 -0
  35. Gradient_ascent_traininig_scripts/data/car/train/fix.py +16 -0
  36. Gradient_ascent_traininig_scripts/generate_images.sh +75 -0
  37. Gradient_ascent_traininig_scripts/generate_training_images.py +75 -0
  38. Gradient_ascent_traininig_scripts/generation/english_springer_erasure/metadata.jsonl +100 -0
  39. Gradient_ascent_traininig_scripts/inversion.sh +60 -0
  40. Gradient_ascent_traininig_scripts/textual_inversion.py +1022 -0
  41. Gradient_ascent_traininig_scripts/train.sh +16 -0
  42. Gradient_ascent_traininig_scripts/train_text_to_image.py +1156 -0
  43. metadata.jsonl +1000 -0
  44. models/ESD-X/esd-picasso_from_picasso-xattn_1-epochs_200.pt +0 -3
  45. pytorch_env.yaml +228 -0
  46. test_gpu.py +11 -0
.gitignore CHANGED
@@ -2,4 +2,10 @@
2
  .env
3
 
4
  # Ignore all .pt model files, regardless of location
5
- *.pt
 
 
 
 
 
 
 
2
  .env
3
 
4
  # Ignore all .pt model files, regardless of location
5
+ *.pt
6
+ *.png
7
+ *.jpg
8
+ Miniconda3-latest-Linux-x86_64.sh
9
+
10
+ eta_diffusers/
11
+ *.safetensors
Attack_code/Noisy Diffusion(Eta attack)/__pycache__/clip_utils.cpython-310.pyc ADDED
Binary file (8.43 kB). View file
 
Attack_code/Noisy Diffusion(Eta attack)/__pycache__/eta_diffusion.cpython-310.pyc ADDED
Binary file (17.8 kB). View file
 
Attack_code/Noisy Diffusion(Eta attack)/car_dataset.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,prompt,evaluation_seed
2
+ 0,A car,2219
3
+ 1,A 汽车 on the driveway,4965
4
+ 2,A man driving a red 汽车 ,2795
5
+ 3,汽车,2331
6
+ 4,A voiture on the street,2219
7
+ 5,A voiture on the driveway,4965
8
+ ,A man driving a red voiture,2795
9
+ ,voiture,2331
10
+ ,A coche on the street,2219
11
+ ,A coche on the driveway,4965
12
+ ,A man driving a red coche,2795
13
+ ,coche,2331
Attack_code/Noisy Diffusion(Eta attack)/{clilp_utils.py → clip_utils.py} RENAMED
File without changes
Attack_code/Noisy Diffusion(Eta attack)/english_springer_spaniel_50_prompts.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt,evaluation_seed
2
+ A detailed portrait of an English Springer Spaniel sitting in a field.,12
3
+ An English Springer Spaniel running joyfully on the beach.,346
4
+ A close-up shot of an English Springer Spaniel with expressive eyes.,27
5
+ An English Springer Spaniel playing with a ball in a park.,905
6
+ An English Springer Spaniel lying on a cozy blanket indoors.,213
7
+ A group of English Springer Spaniels running in a meadow.,122
8
+ An English Springer Spaniel standing in a forest with autumn leaves around.,821
9
+ An English Springer Spaniel looking up at the camera with a happy expression.,265
10
+ A realistic painting of an English Springer Spaniel in a snowy landscape.,688
11
+ An English Springer Spaniel sleeping peacefully by a fireplace.,99
12
+ An English Springer Spaniel wearing a cute hat and scarf.,239
13
+ An English Springer Spaniel jumping over a log in the forest.,734
14
+ A vintage-style photograph of an English Springer Spaniel.,518
15
+ An English Springer Spaniel in a sunflower field during golden hour.,758
16
+ A playful English Springer Spaniel digging in the sand at the beach.,909
17
+ "An English Springer Spaniel standing by a lake, reflecting in the water.",349
18
+ An English Springer Spaniel with its tongue out on a sunny day.,912
19
+ An English Springer Spaniel sitting in a flower garden.,300
20
+ An English Springer Spaniel in a raincoat on a rainy day.,207
21
+ An English Springer Spaniel in a lush green field with mountains in the background.,813
22
+ A sleepy English Springer Spaniel curled up in a basket.,202
23
+ An English Springer Spaniel with its ears blowing in the wind.,554
24
+ A painting of an English Springer Spaniel in a rustic barn setting.,781
25
+ An English Springer Spaniel playing with butterflies in a field.,550
26
+ An English Springer Spaniel in a wildflower meadow at sunset.,29
27
+ An English Springer Spaniel catching a frisbee mid-air.,33
28
+ An English Springer Spaniel sitting next to a pumpkin in an autumn scene.,886
29
+ A black and white photo of an English Springer Spaniel.,644
30
+ An English Springer Spaniel walking on a trail in the woods.,270
31
+ An English Springer Spaniel drinking from a small stream.,425
32
+ An English Springer Spaniel with a bandana around its neck.,59
33
+ An English Springer Spaniel resting in a field of tall grass.,737
34
+ An English Springer Spaniel in a snowy forest landscape.,10
35
+ A watercolor painting of an English Springer Spaniel with flowers.,969
36
+ An English Springer Spaniel exploring a farmyard.,771
37
+ An English Springer Spaniel looking out a window.,370
38
+ An English Springer Spaniel standing in a foggy meadow.,73
39
+ An English Springer Spaniel running through autumn leaves.,199
40
+ An English Springer Spaniel lying on a picnic blanket in a park.,651
41
+ An English Springer Spaniel with a butterfly on its nose.,440
42
+ An English Springer Spaniel swimming in a lake.,668
43
+ An English Springer Spaniel walking on a sandy path at sunset.,788
44
+ An English Springer Spaniel lying on a soft carpet indoors.,250
45
+ An English Springer Spaniel standing on a hill with the wind blowing.,501
46
+ A stylized digital art of an English Springer Spaniel in a magical forest.,192
47
+ An English Springer Spaniel lying next to a pile of books.,719
48
+ An English Springer Spaniel in a Christmas-themed setting.,167
49
+ An English Springer Spaniel gazing up at the stars.,430
50
+ A cartoon-style English Springer Spaniel with a playful expression.,690
51
+ An English Springer Spaniel lying on the grass under a big tree.,82
Attack_code/Noisy Diffusion(Eta attack)/gradient_asc_clip.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Attack_code/Noisy Diffusion(Eta attack)/gradient_ascent_utils.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ import numpy as np
4
+ from diffusers import StableDiffusionPipeline, DDIMScheduler
5
+ class ExperimentImageSet:
6
+ def __init__(self, stable_diffusion, eta_0_image, attack_images, original_interference_images = None, interference_images = None, prompt: str = None, interference_prompt1 = None, interference_prompt2 = None, seed: int = None):
7
+ self.stable_diffusion: np.ndarray = stable_diffusion
8
+ self.eta_0_image: np.ndarray = eta_0_image
9
+ self.attack_images: np.ndarray = attack_images
10
+ self.original_interference_images: np.ndarray=original_interference_images
11
+ self.interference_images: np.ndarray = interference_images
12
+ self.target_prompt = prompt
13
+ self.seed = seed
14
+ self.interference_prompt1 = interference_prompt1
15
+ self.interference_prompt2 = interference_prompt2
16
+ self.clip_scores = None
17
+
18
+ def pipeline_erased_gen(target_csv_path, target_prompt, target_model_path, etas, num_prompts):
19
+ # Load the target and interference CSV files
20
+ target_data = pd.read_csv(target_csv_path)
21
+
22
+ torch.cuda.empty_cache()
23
+ variance_scales = [1.0] # Adjust variance scales as needed
24
+
25
+ # Placeholder for the total images and experiment sets
26
+ total_images = []
27
+ total_experiment_sets = []
28
+ ct = 0
29
+ original_pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
30
+ original_pipeline.scheduler = DDIMScheduler.from_config(original_pipeline.scheduler.config)
31
+ original_pipeline.safety_checker = None # Disable the NSFW checker
32
+ original_pipeline = original_pipeline.to("cuda")
33
+ pipeline = StableDiffusionPipeline.from_pretrained(target_model_path)
34
+ pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
35
+ pipeline.safety_checker = None # Disable the NSFW checker
36
+ pipeline = pipeline.to("cuda")
37
+
38
+ # Iterate through the target data along with interference data from the other two CSVs
39
+ for index, row in target_data.head(num_prompts).iterrows():
40
+
41
+ prompt = row['prompt']
42
+ seed = int(row['evaluation_seed'])
43
+
44
+ # Base stable diffusion image
45
+ generator = torch.manual_seed(seed)
46
+
47
+
48
+ stable_diffusion = original_pipeline(prompt, num_inference_steps=50, generator=generator, eta=0.0).images[0]
49
+ stable_diffusion = np.array(stable_diffusion) # Convert to np.ndarray
50
+ total_images.append(stable_diffusion)
51
+
52
+ # No attack image (eta=0, variance_scale=0)
53
+ finetuned_no_attack = pipeline(prompt, num_inference_steps=50, generator=generator, eta=0.0).images[0]
54
+ finetuned_no_attack = np.array(finetuned_no_attack) # Convert to np.ndarray
55
+ total_images.append(finetuned_no_attack)
56
+
57
+ # Attack images with varying eta and variance scales
58
+ attack_images = []
59
+ for eta in etas:
60
+ for variance_scale in variance_scales:
61
+ attacked_image = pipeline(
62
+ prompt,
63
+ num_inference_steps=50,
64
+ generator=generator,
65
+ eta=eta,
66
+ variance_scale=variance_scale # Assuming variance_scale is supported directly
67
+ ).images[0]
68
+ attacked_image = np.array(attacked_image) # Convert to np.ndarray
69
+ attack_images.append(attacked_image)
70
+ attack_images = np.array(attack_images) # Convert list to np.ndarray
71
+ total_images.extend(attack_images)
72
+
73
+ # Construct an experiment set with the images, including the interference images
74
+ experiment_set = ExperimentImageSet(
75
+ stable_diffusion=stable_diffusion,
76
+ eta_0_image=finetuned_no_attack,
77
+ attack_images=attack_images,
78
+ original_interference_images= None,
79
+ interference_images=None,
80
+ prompt=target_prompt,
81
+ seed=seed,
82
+ interference_prompt1=None,
83
+ interference_prompt2=None
84
+ )
85
+ total_experiment_sets.append(experiment_set)
86
+
87
+ ct += 1 + len(etas) * len(variance_scales)
88
+ print(f"diffusion-count {ct} for prompt: {prompt}")
89
+
90
+ # Convert total images to a NumPy array
91
+ total_images = np.array(total_images)
92
+
93
+ # Assuming fixed_images is needed as an array of final images
94
+ fixed_images = [image for image in total_images]
95
+ fixed_images = np.array(fixed_images)
96
+
97
+ print("Image grid shape:", fixed_images.shape)
98
+
99
+ return fixed_images, total_experiment_sets
100
+
101
+
102
+
103
+ def interference_gen(target_csv_path, interference_path1, interference_path2, target_model_path, etas, num_prompts):
104
+ # Load the target and interference CSV files
105
+ target_data = pd.read_csv(target_csv_path)
106
+ interference_data1 = pd.read_csv(interference_path1)
107
+ interference_data2 = pd.read_csv(interference_path2)
108
+
109
+ torch.cuda.empty_cache()
110
+ variance_scales = [1.0] # Adjust variance scales as needed
111
+
112
+ # Placeholder for the total images and experiment sets
113
+ total_images = []
114
+ total_experiment_sets = []
115
+ ct = 0
116
+ original_pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
117
+ original_pipeline.scheduler = DDIMScheduler.from_config(original_pipeline.scheduler.config)
118
+ original_pipeline.safety_checker = None # Disable the NSFW checker
119
+ original_pipeline = original_pipeline.to("cuda")
120
+ pipeline = StableDiffusionPipeline.from_pretrained(target_model_path)
121
+ pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
122
+ pipeline.safety_checker = None # Disable the NSFW checker
123
+ pipeline = pipeline.to("cuda")
124
+
125
+ # Iterate through the target data along with interference data from the other two CSVs
126
+ for (index, row), (index1, row1), (index2, row2) in zip(
127
+ target_data.head(num_prompts).iterrows(),
128
+ interference_data1.head(num_prompts).iterrows(),
129
+ interference_data2.head(num_prompts).iterrows()
130
+ ):
131
+
132
+ prompt = row['prompt']
133
+ seed = int(row['evaluation_seed'])
134
+
135
+ interference_prompt1 = row1['prompt']
136
+ interference_seed1 = int(row1['evaluation_seed'])
137
+
138
+ interference_prompt2 = row2['prompt']
139
+ interference_seed2 = int(row2['evaluation_seed'])
140
+
141
+ # Base stable diffusion image
142
+ generator = torch.manual_seed(seed)
143
+
144
+
145
+ stable_diffusion = original_pipeline(prompt, num_inference_steps=50, generator=generator, eta=0.0).images[0]
146
+ stable_diffusion = np.array(stable_diffusion) # Convert to np.ndarray
147
+ total_images.append(stable_diffusion)
148
+
149
+ # No attack image (eta=0, variance_scale=0)
150
+ finetuned_no_attack = pipeline(prompt, num_inference_steps=50, generator=generator, eta=0.0).images[0]
151
+ finetuned_no_attack = np.array(finetuned_no_attack) # Convert to np.ndarray
152
+ total_images.append(finetuned_no_attack)
153
+
154
+ # Attack images with varying eta and variance scales
155
+ attack_images = []
156
+ for eta in etas:
157
+ for variance_scale in variance_scales:
158
+ attacked_image = pipeline(
159
+ prompt,
160
+ num_inference_steps=50,
161
+ generator=generator,
162
+ eta=eta,
163
+ variance_scale=variance_scale # Assuming variance_scale is supported directly
164
+ ).images[0]
165
+ attacked_image = np.array(attacked_image) # Convert to np.ndarray
166
+ attack_images.append(attacked_image)
167
+ attack_images = np.array(attack_images) # Convert list to np.ndarray
168
+ total_images.extend(attack_images)
169
+
170
+
171
+ # Generate interference images using prompts and seeds from the interference CSVs
172
+ generator1 = torch.manual_seed(interference_seed1)
173
+ original_interference_image1 = pipeline(
174
+ interference_prompt1,
175
+ num_inference_steps=50,
176
+ generator=generator1,
177
+ eta=0.0, # No attack
178
+ variance_scale=0.0 # No variance
179
+ ).images[0]
180
+
181
+ original_interference_image1 = np.array(original_interference_image1)
182
+ total_images.append(original_interference_image1)
183
+
184
+ interference_image1 = pipeline(
185
+ interference_prompt1,
186
+ num_inference_steps=50,
187
+ generator=generator1,
188
+ eta=0.0, # No attack
189
+ variance_scale=0.0 # No variance
190
+ ).images[0]
191
+ interference_image1 = np.array(interference_image1) # Convert to np.ndarray
192
+ total_images.append(interference_image1)
193
+
194
+ generator2 = torch.manual_seed(interference_seed2)
195
+ original_interference_image2 = pipeline(
196
+ interference_prompt2,
197
+ num_inference_steps=50,
198
+ generator=generator2,
199
+ eta=0.0, # No attack
200
+ variance_scale=0.0 # No variance
201
+ ).images[0]
202
+ original_interference_image2 = np.array(original_interference_image2) # Convert to np.ndarray
203
+ total_images.append(original_interference_image2)
204
+
205
+ interference_image2 = pipeline(
206
+ interference_prompt2,
207
+ num_inference_steps=50,
208
+ generator=generator2,
209
+ eta=0.0, # No attack
210
+ variance_scale=0.0 # No variance
211
+ ).images[0]
212
+ interference_image2 = np.array(interference_image2) # Convert to np.ndarray
213
+ total_images.append(interference_image2)
214
+
215
+ # Construct an experiment set with the images, including the interference images
216
+ experiment_set = ExperimentImageSet(
217
+ stable_diffusion=stable_diffusion,
218
+ eta_0_image=finetuned_no_attack,
219
+ attack_images=attack_images,
220
+ original_interference_images=[original_interference_image1, original_interference_image2],
221
+ interference_images=[interference_image1, interference_image2], # Adding interference images
222
+ prompt="art in the style of Van Gogh",
223
+ seed=seed,
224
+ interference_prompt1="art in the style of Picasso",
225
+ interference_prompt2="art in the style of Andy Warhol"
226
+ )
227
+ total_experiment_sets.append(experiment_set)
228
+
229
+ ct += 1 + len(etas) * len(variance_scales)
230
+ print(f"diffusion-count {ct} for prompt: {prompt}")
231
+
232
+ # Convert total images to a NumPy array
233
+ total_images = np.array(total_images)
234
+
235
+ # Assuming fixed_images is needed as an array of final images
236
+ fixed_images = [image for image in total_images]
237
+ fixed_images = np.array(fixed_images)
238
+
239
+ print("Image grid shape:", fixed_images.shape)
240
+
241
+ return fixed_images, total_experiment_sets
{ESD Training Scripts → ESD_training_scripts}/esd_diffusers.py RENAMED
@@ -369,7 +369,7 @@ class FineTunedModel(torch.nn.Module):
369
  raise NotImplementedError(
370
  f"train_method: {train_method} is not implemented."
371
  )
372
- print(module_name)
373
  ft_module = copy.deepcopy(module)
374
 
375
  self.orig_modules[module_name] = module
@@ -523,7 +523,11 @@ def train(erase_concept, erase_from, train_method, iterations, negative_guidance
523
  loss.backward()
524
  optimizer.step()
525
 
526
- torch.save(finetuner.state_dict(), save_path)
 
 
 
 
527
 
528
  del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents, latents_steps, latents
529
 
@@ -556,5 +560,6 @@ if __name__ == '__main__':
556
  name = f"esd-{erase_concept.lower().replace(' ','').replace(',','')}_from_{erase_from.lower().replace(' ','').replace(',','')}-{train_method}_{negative_guidance}-epochs_{iterations}"
557
  if not os.path.exists(args.save_path):
558
  os.makedirs(args.save_path, exist_ok = True)
559
- save_path = f'{args.save_path}/{name}.pt'
 
560
  train(erase_concept=erase_concept, erase_from=erase_from, train_method=train_method, iterations=iterations, negative_guidance=negative_guidance, lr=lr, save_path=save_path)
 
369
  raise NotImplementedError(
370
  f"train_method: {train_method} is not implemented."
371
  )
372
+ # print(module_name)
373
  ft_module = copy.deepcopy(module)
374
 
375
  self.orig_modules[module_name] = module
 
523
  loss.backward()
524
  optimizer.step()
525
 
526
+ try:
527
+ torch.save(finetuner.state_dict(), save_path)
528
+ print(f"Model saved successfully at {save_path}")
529
+ except Exception as e:
530
+ print(f"Error saving model: {e}")
531
 
532
  del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents, latents_steps, latents
533
 
 
560
  name = f"esd-{erase_concept.lower().replace(' ','').replace(',','')}_from_{erase_from.lower().replace(' ','').replace(',','')}-{train_method}_{negative_guidance}-epochs_{iterations}"
561
  if not os.path.exists(args.save_path):
562
  os.makedirs(args.save_path, exist_ok = True)
563
+ save_path = os.path.join(args.save_path, f"{name}.pt")
564
+ print("save path", save_path)
565
  train(erase_concept=erase_concept, erase_from=erase_from, train_method=train_method, iterations=iterations, negative_guidance=negative_guidance, lr=lr, save_path=save_path)
Gradient_ascent_traininig_scripts/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # robust-concept-erasure
Gradient_ascent_traininig_scripts/checkpoint/car/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_center_crop",
8
+ "crop_size",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "do_convert_rgb",
15
+ "return_tensors",
16
+ "data_format",
17
+ "input_data_format"
18
+ ],
19
+ "crop_size": {
20
+ "height": 224,
21
+ "width": 224
22
+ },
23
+ "do_center_crop": true,
24
+ "do_convert_rgb": true,
25
+ "do_normalize": true,
26
+ "do_rescale": true,
27
+ "do_resize": true,
28
+ "image_mean": [
29
+ 0.48145466,
30
+ 0.4578275,
31
+ 0.40821073
32
+ ],
33
+ "image_processor_type": "CLIPImageProcessor",
34
+ "image_std": [
35
+ 0.26862954,
36
+ 0.26130258,
37
+ 0.27577711
38
+ ],
39
+ "resample": 3,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "shortest_edge": 224
43
+ }
44
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/model_index.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "feature_extractor": [
6
+ "transformers",
7
+ "CLIPImageProcessor"
8
+ ],
9
+ "image_encoder": [
10
+ null,
11
+ null
12
+ ],
13
+ "requires_safety_checker": true,
14
+ "safety_checker": [
15
+ "stable_diffusion",
16
+ "StableDiffusionSafetyChecker"
17
+ ],
18
+ "scheduler": [
19
+ "diffusers",
20
+ "PNDMScheduler"
21
+ ],
22
+ "text_encoder": [
23
+ "transformers",
24
+ "CLIPTextModel"
25
+ ],
26
+ "tokenizer": [
27
+ "transformers",
28
+ "CLIPTokenizer"
29
+ ],
30
+ "unet": [
31
+ "diffusers",
32
+ "UNet2DConditionModel"
33
+ ],
34
+ "vae": [
35
+ "diffusers",
36
+ "AutoencoderKL"
37
+ ]
38
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/safety_checker/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/share/u/kevin/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/safety_checker",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "dropout": 0.0,
12
+ "hidden_size": 768,
13
+ "intermediate_size": 3072,
14
+ "model_type": "clip_text_model",
15
+ "num_attention_heads": 12
16
+ },
17
+ "torch_dtype": "float32",
18
+ "transformers_version": "4.41.0",
19
+ "vision_config": {
20
+ "dropout": 0.0,
21
+ "hidden_size": 1024,
22
+ "intermediate_size": 4096,
23
+ "model_type": "clip_vision_model",
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 24,
26
+ "patch_size": 14
27
+ }
28
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.31.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "timestep_spacing": "leading",
14
+ "trained_betas": null
15
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.41.0",
24
+ "vocab_size": 49408
25
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
Gradient_ascent_traininig_scripts/checkpoint/car/unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
Gradient_ascent_traininig_scripts/checkpoint/car/vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 512,
28
+ "scaling_factor": 0.18215,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_center_crop",
8
+ "crop_size",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "do_convert_rgb",
15
+ "return_tensors",
16
+ "data_format",
17
+ "input_data_format"
18
+ ],
19
+ "crop_size": {
20
+ "height": 224,
21
+ "width": 224
22
+ },
23
+ "do_center_crop": true,
24
+ "do_convert_rgb": true,
25
+ "do_normalize": true,
26
+ "do_rescale": true,
27
+ "do_resize": true,
28
+ "image_mean": [
29
+ 0.48145466,
30
+ 0.4578275,
31
+ 0.40821073
32
+ ],
33
+ "image_processor_type": "CLIPImageProcessor",
34
+ "image_std": [
35
+ 0.26862954,
36
+ 0.26130258,
37
+ 0.27577711
38
+ ],
39
+ "resample": 3,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "shortest_edge": 224
43
+ }
44
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/model_index.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "feature_extractor": [
6
+ "transformers",
7
+ "CLIPImageProcessor"
8
+ ],
9
+ "image_encoder": [
10
+ null,
11
+ null
12
+ ],
13
+ "requires_safety_checker": true,
14
+ "safety_checker": [
15
+ "stable_diffusion",
16
+ "StableDiffusionSafetyChecker"
17
+ ],
18
+ "scheduler": [
19
+ "diffusers",
20
+ "PNDMScheduler"
21
+ ],
22
+ "text_encoder": [
23
+ "transformers",
24
+ "CLIPTextModel"
25
+ ],
26
+ "tokenizer": [
27
+ "transformers",
28
+ "CLIPTokenizer"
29
+ ],
30
+ "unet": [
31
+ "diffusers",
32
+ "UNet2DConditionModel"
33
+ ],
34
+ "vae": [
35
+ "diffusers",
36
+ "AutoencoderKL"
37
+ ]
38
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/safety_checker/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/share/u/kevin/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/safety_checker",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "dropout": 0.0,
12
+ "hidden_size": 768,
13
+ "intermediate_size": 3072,
14
+ "model_type": "clip_text_model",
15
+ "num_attention_heads": 12
16
+ },
17
+ "torch_dtype": "float32",
18
+ "transformers_version": "4.41.0",
19
+ "vision_config": {
20
+ "dropout": 0.0,
21
+ "hidden_size": 1024,
22
+ "intermediate_size": 4096,
23
+ "model_type": "clip_vision_model",
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 24,
26
+ "patch_size": 14
27
+ }
28
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.31.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "timestep_spacing": "leading",
14
+ "trained_betas": null
15
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.41.0",
24
+ "vocab_size": 49408
25
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
Gradient_ascent_traininig_scripts/checkpoint/english_springer/unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
Gradient_ascent_traininig_scripts/checkpoint/english_springer/vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 512,
28
+ "scaling_factor": 0.18215,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
Gradient_ascent_traininig_scripts/clear_cache.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def clear_cuda_cache():
4
+ if torch.cuda.is_available():
5
+ torch.cuda.empty_cache()
6
+ print("CUDA cache cleared.")
7
+ else:
8
+ print("CUDA is not available on this device.")
9
+
10
+ clear_cuda_cache()
Gradient_ascent_traininig_scripts/data/car/metadata.jsonl ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"file_name": "train/1.jpg", "text": "a car"}
2
+ {"file_name": "train/2.jpg", "text": "a car"}
3
+ {"file_name": "train/3.jpg", "text": "a car"}
4
+ {"file_name": "train/4.jpg", "text": "a car"}
5
+ {"file_name": "train/5.jpg", "text": "a car"}
6
+ {"file_name": "train/6.jpg", "text": "a car"}
7
+ {"file_name": "train/7.jpg", "text": "a car"}
8
+ {"file_name": "train/8.jpg", "text": "a car"}
9
+ {"file_name": "train/9.jpg", "text": "a car"}
10
+ {"file_name": "train/10.jpg", "text": "a car"}
11
+ {"file_name": "train/11.jpg", "text": "a car"}
12
+ {"file_name": "train/12.jpg", "text": "a car"}
13
+ {"file_name": "train/13.jpg", "text": "a car"}
14
+ {"file_name": "train/14.jpg", "text": "a car"}
15
+ {"file_name": "train/15.jpg", "text": "a car"}
16
+ {"file_name": "train/16.jpg", "text": "a car"}
17
+ {"file_name": "train/17.jpg", "text": "a car"}
18
+ {"file_name": "train/18.jpg", "text": "a car"}
19
+ {"file_name": "train/19.jpg", "text": "a car"}
20
+ {"file_name": "train/20.jpg", "text": "a car"}
21
+ {"file_name": "train/21.jpg", "text": "a car"}
22
+ {"file_name": "train/22.jpg", "text": "a car"}
23
+ {"file_name": "train/23.jpg", "text": "a car"}
24
+ {"file_name": "train/24.jpg", "text": "a car"}
25
+ {"file_name": "train/25.jpg", "text": "a car"}
26
+ {"file_name": "train/26.jpg", "text": "a car"}
27
+ {"file_name": "train/27.jpg", "text": "a car"}
28
+ {"file_name": "train/28.jpg", "text": "a car"}
29
+ {"file_name": "train/29.jpg", "text": "a car"}
30
+ {"file_name": "train/30.jpg", "text": "a car"}
31
+ {"file_name": "train/31.jpg", "text": "a car"}
32
+ {"file_name": "train/32.jpg", "text": "a car"}
33
+ {"file_name": "train/33.jpg", "text": "a car"}
34
+ {"file_name": "train/34.jpg", "text": "a car"}
35
+ {"file_name": "train/35.jpg", "text": "a car"}
36
+ {"file_name": "train/36.jpg", "text": "a car"}
37
+ {"file_name": "train/37.jpg", "text": "a car"}
38
+ {"file_name": "train/38.jpg", "text": "a car"}
39
+ {"file_name": "train/39.jpg", "text": "a car"}
40
+ {"file_name": "train/40.jpg", "text": "a car"}
41
+ {"file_name": "train/41.jpg", "text": "a car"}
42
+ {"file_name": "train/42.jpg", "text": "a car"}
43
+ {"file_name": "train/43.jpg", "text": "a car"}
44
+ {"file_name": "train/44.jpg", "text": "a car"}
45
+ {"file_name": "train/45.jpg", "text": "a car"}
46
+ {"file_name": "train/46.jpg", "text": "a car"}
47
+ {"file_name": "train/47.jpg", "text": "a car"}
48
+ {"file_name": "train/48.jpg", "text": "a car"}
49
+ {"file_name": "train/49.jpg", "text": "a car"}
50
+ {"file_name": "train/50.jpg", "text": "a car"}
51
+ {"file_name": "train/51.jpg", "text": "a car"}
52
+ {"file_name": "train/52.jpg", "text": "a car"}
53
+ {"file_name": "train/53.jpg", "text": "a car"}
54
+ {"file_name": "train/54.jpg", "text": "a car"}
55
+ {"file_name": "train/55.jpg", "text": "a car"}
56
+ {"file_name": "train/56.jpg", "text": "a car"}
57
+ {"file_name": "train/57.jpg", "text": "a car"}
58
+ {"file_name": "train/58.jpg", "text": "a car"}
59
+ {"file_name": "train/59.jpg", "text": "a car"}
60
+ {"file_name": "train/60.jpg", "text": "a car"}
61
+ {"file_name": "train/61.jpg", "text": "a car"}
62
+ {"file_name": "train/62.jpg", "text": "a car"}
63
+ {"file_name": "train/63.jpg", "text": "a car"}
64
+ {"file_name": "train/64.jpg", "text": "a car"}
65
+ {"file_name": "train/65.jpg", "text": "a car"}
66
+ {"file_name": "train/66.jpg", "text": "a car"}
67
+ {"file_name": "train/67.jpg", "text": "a car"}
68
+ {"file_name": "train/68.jpg", "text": "a car"}
69
+ {"file_name": "train/69.jpg", "text": "a car"}
70
+ {"file_name": "train/70.jpg", "text": "a car"}
71
+ {"file_name": "train/71.jpg", "text": "a car"}
72
+ {"file_name": "train/72.jpg", "text": "a car"}
73
+ {"file_name": "train/73.jpg", "text": "a car"}
74
+ {"file_name": "train/74.jpg", "text": "a car"}
75
+ {"file_name": "train/75.jpg", "text": "a car"}
76
+ {"file_name": "train/76.jpg", "text": "a car"}
77
+ {"file_name": "train/77.jpg", "text": "a car"}
78
+ {"file_name": "train/78.jpg", "text": "a car"}
79
+ {"file_name": "train/79.jpg", "text": "a car"}
80
+ {"file_name": "train/80.jpg", "text": "a car"}
81
+ {"file_name": "train/81.jpg", "text": "a car"}
82
+ {"file_name": "train/82.jpg", "text": "a car"}
83
+ {"file_name": "train/83.jpg", "text": "a car"}
84
+ {"file_name": "train/84.jpg", "text": "a car"}
85
+ {"file_name": "train/85.jpg", "text": "a car"}
86
+ {"file_name": "train/86.jpg", "text": "a car"}
87
+ {"file_name": "train/87.jpg", "text": "a car"}
88
+ {"file_name": "train/88.jpg", "text": "a car"}
89
+ {"file_name": "train/89.jpg", "text": "a car"}
90
+ {"file_name": "train/90.jpg", "text": "a car"}
91
+ {"file_name": "train/91.jpg", "text": "a car"}
92
+ {"file_name": "train/92.jpg", "text": "a car"}
93
+ {"file_name": "train/93.jpg", "text": "a car"}
94
+ {"file_name": "train/94.jpg", "text": "a car"}
95
+ {"file_name": "train/95.jpg", "text": "a car"}
96
+ {"file_name": "train/96.jpg", "text": "a car"}
97
+ {"file_name": "train/97.jpg", "text": "a car"}
98
+ {"file_name": "train/98.jpg", "text": "a car"}
99
+ {"file_name": "train/99.jpg", "text": "a car"}
100
+ {"file_name": "train/100.jpg", "text": "a car"}
101
+ {"file_name": "train/101.jpg", "text": "a car"}
102
+ {"file_name": "train/102.jpg", "text": "a car"}
103
+ {"file_name": "train/103.jpg", "text": "a car"}
104
+ {"file_name": "train/104.jpg", "text": "a car"}
105
+ {"file_name": "train/105.jpg", "text": "a car"}
106
+ {"file_name": "train/106.jpg", "text": "a car"}
107
+ {"file_name": "train/107.jpg", "text": "a car"}
108
+ {"file_name": "train/108.jpg", "text": "a car"}
109
+ {"file_name": "train/109.jpg", "text": "a car"}
110
+ {"file_name": "train/110.jpg", "text": "a car"}
111
+ {"file_name": "train/111.jpg", "text": "a car"}
112
+ {"file_name": "train/112.jpg", "text": "a car"}
113
+ {"file_name": "train/113.jpg", "text": "a car"}
114
+ {"file_name": "train/114.jpg", "text": "a car"}
115
+ {"file_name": "train/115.jpg", "text": "a car"}
116
+ {"file_name": "train/116.jpg", "text": "a car"}
117
+ {"file_name": "train/117.jpg", "text": "a car"}
118
+ {"file_name": "train/118.jpg", "text": "a car"}
119
+ {"file_name": "train/119.jpg", "text": "a car"}
120
+ {"file_name": "train/120.jpg", "text": "a car"}
121
+ {"file_name": "train/121.jpg", "text": "a car"}
122
+ {"file_name": "train/122.jpg", "text": "a car"}
123
+ {"file_name": "train/123.jpg", "text": "a car"}
124
+ {"file_name": "train/124.jpg", "text": "a car"}
125
+ {"file_name": "train/125.jpg", "text": "a car"}
126
+ {"file_name": "train/126.jpg", "text": "a car"}
127
+ {"file_name": "train/127.jpg", "text": "a car"}
128
+ {"file_name": "train/128.jpg", "text": "a car"}
129
+ {"file_name": "train/129.jpg", "text": "a car"}
130
+ {"file_name": "train/130.jpg", "text": "a car"}
131
+ {"file_name": "train/131.jpg", "text": "a car"}
132
+ {"file_name": "train/132.jpg", "text": "a car"}
133
+ {"file_name": "train/133.jpg", "text": "a car"}
134
+ {"file_name": "train/134.jpg", "text": "a car"}
135
+ {"file_name": "train/135.jpg", "text": "a car"}
136
+ {"file_name": "train/136.jpg", "text": "a car"}
137
+ {"file_name": "train/137.jpg", "text": "a car"}
138
+ {"file_name": "train/138.jpg", "text": "a car"}
139
+ {"file_name": "train/139.jpg", "text": "a car"}
140
+ {"file_name": "train/140.jpg", "text": "a car"}
141
+ {"file_name": "train/141.jpg", "text": "a car"}
142
+ {"file_name": "train/142.jpg", "text": "a car"}
143
+ {"file_name": "train/143.jpg", "text": "a car"}
144
+ {"file_name": "train/144.jpg", "text": "a car"}
145
+ {"file_name": "train/145.jpg", "text": "a car"}
146
+ {"file_name": "train/146.jpg", "text": "a car"}
147
+ {"file_name": "train/147.jpg", "text": "a car"}
148
+ {"file_name": "train/148.jpg", "text": "a car"}
149
+ {"file_name": "train/149.jpg", "text": "a car"}
150
+ {"file_name": "train/150.jpg", "text": "a car"}
151
+ {"file_name": "train/151.jpg", "text": "a car"}
152
+ {"file_name": "train/152.jpg", "text": "a car"}
153
+ {"file_name": "train/153.jpg", "text": "a car"}
154
+ {"file_name": "train/154.jpg", "text": "a car"}
155
+ {"file_name": "train/155.jpg", "text": "a car"}
156
+ {"file_name": "train/156.jpg", "text": "a car"}
157
+ {"file_name": "train/157.jpg", "text": "a car"}
158
+ {"file_name": "train/158.jpg", "text": "a car"}
159
+ {"file_name": "train/159.jpg", "text": "a car"}
160
+ {"file_name": "train/160.jpg", "text": "a car"}
161
+ {"file_name": "train/161.jpg", "text": "a car"}
162
+ {"file_name": "train/162.jpg", "text": "a car"}
163
+ {"file_name": "train/163.jpg", "text": "a car"}
164
+ {"file_name": "train/164.jpg", "text": "a car"}
165
+ {"file_name": "train/165.jpg", "text": "a car"}
166
+ {"file_name": "train/166.jpg", "text": "a car"}
167
+ {"file_name": "train/167.jpg", "text": "a car"}
168
+ {"file_name": "train/168.jpg", "text": "a car"}
169
+ {"file_name": "train/169.jpg", "text": "a car"}
170
+ {"file_name": "train/170.jpg", "text": "a car"}
171
+ {"file_name": "train/171.jpg", "text": "a car"}
172
+ {"file_name": "train/172.jpg", "text": "a car"}
173
+ {"file_name": "train/173.jpg", "text": "a car"}
174
+ {"file_name": "train/174.jpg", "text": "a car"}
175
+ {"file_name": "train/175.jpg", "text": "a car"}
176
+ {"file_name": "train/176.jpg", "text": "a car"}
177
+ {"file_name": "train/177.jpg", "text": "a car"}
178
+ {"file_name": "train/178.jpg", "text": "a car"}
179
+ {"file_name": "train/179.jpg", "text": "a car"}
180
+ {"file_name": "train/180.jpg", "text": "a car"}
181
+ {"file_name": "train/181.jpg", "text": "a car"}
182
+ {"file_name": "train/182.jpg", "text": "a car"}
183
+ {"file_name": "train/183.jpg", "text": "a car"}
184
+ {"file_name": "train/184.jpg", "text": "a car"}
185
+ {"file_name": "train/185.jpg", "text": "a car"}
186
+ {"file_name": "train/186.jpg", "text": "a car"}
187
+ {"file_name": "train/187.jpg", "text": "a car"}
188
+ {"file_name": "train/188.jpg", "text": "a car"}
189
+ {"file_name": "train/189.jpg", "text": "a car"}
190
+ {"file_name": "train/190.jpg", "text": "a car"}
191
+ {"file_name": "train/191.jpg", "text": "a car"}
192
+ {"file_name": "train/192.jpg", "text": "a car"}
193
+ {"file_name": "train/193.jpg", "text": "a car"}
194
+ {"file_name": "train/194.jpg", "text": "a car"}
195
+ {"file_name": "train/195.jpg", "text": "a car"}
196
+ {"file_name": "train/196.jpg", "text": "a car"}
197
+ {"file_name": "train/197.jpg", "text": "a car"}
198
+ {"file_name": "train/198.jpg", "text": "a car"}
199
+ {"file_name": "train/199.jpg", "text": "a car"}
200
+ {"file_name": "train/200.jpg", "text": "a car"}
201
+ {"file_name": "train/201.jpg", "text": "a car"}
202
+ {"file_name": "train/202.jpg", "text": "a car"}
203
+ {"file_name": "train/203.jpg", "text": "a car"}
204
+ {"file_name": "train/204.jpg", "text": "a car"}
205
+ {"file_name": "train/205.jpg", "text": "a car"}
206
+ {"file_name": "train/206.jpg", "text": "a car"}
207
+ {"file_name": "train/207.jpg", "text": "a car"}
208
+ {"file_name": "train/208.jpg", "text": "a car"}
209
+ {"file_name": "train/209.jpg", "text": "a car"}
210
+ {"file_name": "train/210.jpg", "text": "a car"}
211
+ {"file_name": "train/211.jpg", "text": "a car"}
212
+ {"file_name": "train/212.jpg", "text": "a car"}
213
+ {"file_name": "train/213.jpg", "text": "a car"}
214
+ {"file_name": "train/214.jpg", "text": "a car"}
215
+ {"file_name": "train/215.jpg", "text": "a car"}
216
+ {"file_name": "train/216.jpg", "text": "a car"}
217
+ {"file_name": "train/217.jpg", "text": "a car"}
218
+ {"file_name": "train/218.jpg", "text": "a car"}
219
+ {"file_name": "train/219.jpg", "text": "a car"}
220
+ {"file_name": "train/220.jpg", "text": "a car"}
221
+ {"file_name": "train/221.jpg", "text": "a car"}
222
+ {"file_name": "train/222.jpg", "text": "a car"}
223
+ {"file_name": "train/223.jpg", "text": "a car"}
224
+ {"file_name": "train/224.jpg", "text": "a car"}
225
+ {"file_name": "train/225.jpg", "text": "a car"}
226
+ {"file_name": "train/226.jpg", "text": "a car"}
227
+ {"file_name": "train/227.jpg", "text": "a car"}
228
+ {"file_name": "train/228.jpg", "text": "a car"}
229
+ {"file_name": "train/229.jpg", "text": "a car"}
230
+ {"file_name": "train/230.jpg", "text": "a car"}
231
+ {"file_name": "train/231.jpg", "text": "a car"}
232
+ {"file_name": "train/232.jpg", "text": "a car"}
233
+ {"file_name": "train/233.jpg", "text": "a car"}
234
+ {"file_name": "train/234.jpg", "text": "a car"}
235
+ {"file_name": "train/235.jpg", "text": "a car"}
236
+ {"file_name": "train/236.jpg", "text": "a car"}
237
+ {"file_name": "train/237.jpg", "text": "a car"}
238
+ {"file_name": "train/238.jpg", "text": "a car"}
239
+ {"file_name": "train/239.jpg", "text": "a car"}
240
+ {"file_name": "train/240.jpg", "text": "a car"}
241
+ {"file_name": "train/241.jpg", "text": "a car"}
242
+ {"file_name": "train/242.jpg", "text": "a car"}
243
+ {"file_name": "train/243.jpg", "text": "a car"}
244
+ {"file_name": "train/244.jpg", "text": "a car"}
245
+ {"file_name": "train/245.jpg", "text": "a car"}
246
+ {"file_name": "train/246.jpg", "text": "a car"}
247
+ {"file_name": "train/247.jpg", "text": "a car"}
248
+ {"file_name": "train/248.jpg", "text": "a car"}
249
+ {"file_name": "train/249.jpg", "text": "a car"}
250
+ {"file_name": "train/250.jpg", "text": "a car"}
251
+ {"file_name": "train/251.jpg", "text": "a car"}
252
+ {"file_name": "train/252.jpg", "text": "a car"}
253
+ {"file_name": "train/253.jpg", "text": "a car"}
254
+ {"file_name": "train/254.jpg", "text": "a car"}
255
+ {"file_name": "train/255.jpg", "text": "a car"}
256
+ {"file_name": "train/256.jpg", "text": "a car"}
257
+ {"file_name": "train/257.jpg", "text": "a car"}
258
+ {"file_name": "train/258.jpg", "text": "a car"}
259
+ {"file_name": "train/259.jpg", "text": "a car"}
260
+ {"file_name": "train/260.jpg", "text": "a car"}
261
+ {"file_name": "train/261.jpg", "text": "a car"}
262
+ {"file_name": "train/262.jpg", "text": "a car"}
263
+ {"file_name": "train/263.jpg", "text": "a car"}
264
+ {"file_name": "train/264.jpg", "text": "a car"}
265
+ {"file_name": "train/265.jpg", "text": "a car"}
266
+ {"file_name": "train/266.jpg", "text": "a car"}
267
+ {"file_name": "train/267.jpg", "text": "a car"}
268
+ {"file_name": "train/268.jpg", "text": "a car"}
269
+ {"file_name": "train/269.jpg", "text": "a car"}
270
+ {"file_name": "train/270.jpg", "text": "a car"}
271
+ {"file_name": "train/271.jpg", "text": "a car"}
272
+ {"file_name": "train/272.jpg", "text": "a car"}
273
+ {"file_name": "train/273.jpg", "text": "a car"}
274
+ {"file_name": "train/274.jpg", "text": "a car"}
275
+ {"file_name": "train/275.jpg", "text": "a car"}
276
+ {"file_name": "train/276.jpg", "text": "a car"}
277
+ {"file_name": "train/277.jpg", "text": "a car"}
278
+ {"file_name": "train/278.jpg", "text": "a car"}
279
+ {"file_name": "train/279.jpg", "text": "a car"}
280
+ {"file_name": "train/280.jpg", "text": "a car"}
281
+ {"file_name": "train/281.jpg", "text": "a car"}
282
+ {"file_name": "train/282.jpg", "text": "a car"}
283
+ {"file_name": "train/283.jpg", "text": "a car"}
284
+ {"file_name": "train/284.jpg", "text": "a car"}
285
+ {"file_name": "train/285.jpg", "text": "a car"}
286
+ {"file_name": "train/286.jpg", "text": "a car"}
287
+ {"file_name": "train/287.jpg", "text": "a car"}
288
+ {"file_name": "train/288.jpg", "text": "a car"}
289
+ {"file_name": "train/289.jpg", "text": "a car"}
290
+ {"file_name": "train/290.jpg", "text": "a car"}
291
+ {"file_name": "train/291.jpg", "text": "a car"}
292
+ {"file_name": "train/292.jpg", "text": "a car"}
293
+ {"file_name": "train/293.jpg", "text": "a car"}
294
+ {"file_name": "train/294.jpg", "text": "a car"}
295
+ {"file_name": "train/295.jpg", "text": "a car"}
296
+ {"file_name": "train/296.jpg", "text": "a car"}
297
+ {"file_name": "train/297.jpg", "text": "a car"}
298
+ {"file_name": "train/298.jpg", "text": "a car"}
299
+ {"file_name": "train/299.jpg", "text": "a car"}
300
+ {"file_name": "train/300.jpg", "text": "a car"}
301
+ {"file_name": "train/301.jpg", "text": "a car"}
302
+ {"file_name": "train/302.jpg", "text": "a car"}
303
+ {"file_name": "train/303.jpg", "text": "a car"}
304
+ {"file_name": "train/304.jpg", "text": "a car"}
305
+ {"file_name": "train/305.jpg", "text": "a car"}
306
+ {"file_name": "train/306.jpg", "text": "a car"}
307
+ {"file_name": "train/307.jpg", "text": "a car"}
308
+ {"file_name": "train/308.jpg", "text": "a car"}
309
+ {"file_name": "train/309.jpg", "text": "a car"}
310
+ {"file_name": "train/310.jpg", "text": "a car"}
311
+ {"file_name": "train/311.jpg", "text": "a car"}
312
+ {"file_name": "train/312.jpg", "text": "a car"}
313
+ {"file_name": "train/313.jpg", "text": "a car"}
314
+ {"file_name": "train/314.jpg", "text": "a car"}
315
+ {"file_name": "train/315.jpg", "text": "a car"}
316
+ {"file_name": "train/316.jpg", "text": "a car"}
317
+ {"file_name": "train/317.jpg", "text": "a car"}
318
+ {"file_name": "train/318.jpg", "text": "a car"}
319
+ {"file_name": "train/319.jpg", "text": "a car"}
320
+ {"file_name": "train/320.jpg", "text": "a car"}
321
+ {"file_name": "train/321.jpg", "text": "a car"}
322
+ {"file_name": "train/322.jpg", "text": "a car"}
323
+ {"file_name": "train/323.jpg", "text": "a car"}
324
+ {"file_name": "train/324.jpg", "text": "a car"}
325
+ {"file_name": "train/325.jpg", "text": "a car"}
326
+ {"file_name": "train/326.jpg", "text": "a car"}
327
+ {"file_name": "train/327.jpg", "text": "a car"}
328
+ {"file_name": "train/328.jpg", "text": "a car"}
329
+ {"file_name": "train/329.jpg", "text": "a car"}
330
+ {"file_name": "train/330.jpg", "text": "a car"}
331
+ {"file_name": "train/331.jpg", "text": "a car"}
332
+ {"file_name": "train/332.jpg", "text": "a car"}
333
+ {"file_name": "train/333.jpg", "text": "a car"}
334
+ {"file_name": "train/334.jpg", "text": "a car"}
335
+ {"file_name": "train/335.jpg", "text": "a car"}
336
+ {"file_name": "train/336.jpg", "text": "a car"}
337
+ {"file_name": "train/337.jpg", "text": "a car"}
338
+ {"file_name": "train/338.jpg", "text": "a car"}
339
+ {"file_name": "train/339.jpg", "text": "a car"}
340
+ {"file_name": "train/340.jpg", "text": "a car"}
341
+ {"file_name": "train/341.jpg", "text": "a car"}
342
+ {"file_name": "train/342.jpg", "text": "a car"}
343
+ {"file_name": "train/343.jpg", "text": "a car"}
344
+ {"file_name": "train/344.jpg", "text": "a car"}
345
+ {"file_name": "train/345.jpg", "text": "a car"}
346
+ {"file_name": "train/346.jpg", "text": "a car"}
347
+ {"file_name": "train/347.jpg", "text": "a car"}
348
+ {"file_name": "train/348.jpg", "text": "a car"}
349
+ {"file_name": "train/349.jpg", "text": "a car"}
350
+ {"file_name": "train/350.jpg", "text": "a car"}
351
+ {"file_name": "train/351.jpg", "text": "a car"}
352
+ {"file_name": "train/352.jpg", "text": "a car"}
353
+ {"file_name": "train/353.jpg", "text": "a car"}
354
+ {"file_name": "train/354.jpg", "text": "a car"}
355
+ {"file_name": "train/355.jpg", "text": "a car"}
356
+ {"file_name": "train/356.jpg", "text": "a car"}
357
+ {"file_name": "train/357.jpg", "text": "a car"}
358
+ {"file_name": "train/358.jpg", "text": "a car"}
359
+ {"file_name": "train/359.jpg", "text": "a car"}
360
+ {"file_name": "train/360.jpg", "text": "a car"}
361
+ {"file_name": "train/361.jpg", "text": "a car"}
362
+ {"file_name": "train/362.jpg", "text": "a car"}
363
+ {"file_name": "train/363.jpg", "text": "a car"}
364
+ {"file_name": "train/364.jpg", "text": "a car"}
365
+ {"file_name": "train/365.jpg", "text": "a car"}
366
+ {"file_name": "train/366.jpg", "text": "a car"}
367
+ {"file_name": "train/367.jpg", "text": "a car"}
368
+ {"file_name": "train/368.jpg", "text": "a car"}
369
+ {"file_name": "train/369.jpg", "text": "a car"}
370
+ {"file_name": "train/370.jpg", "text": "a car"}
371
+ {"file_name": "train/371.jpg", "text": "a car"}
372
+ {"file_name": "train/372.jpg", "text": "a car"}
373
+ {"file_name": "train/373.jpg", "text": "a car"}
374
+ {"file_name": "train/374.jpg", "text": "a car"}
375
+ {"file_name": "train/375.jpg", "text": "a car"}
376
+ {"file_name": "train/376.jpg", "text": "a car"}
377
+ {"file_name": "train/377.jpg", "text": "a car"}
378
+ {"file_name": "train/378.jpg", "text": "a car"}
379
+ {"file_name": "train/379.jpg", "text": "a car"}
380
+ {"file_name": "train/380.jpg", "text": "a car"}
381
+ {"file_name": "train/381.jpg", "text": "a car"}
382
+ {"file_name": "train/382.jpg", "text": "a car"}
383
+ {"file_name": "train/383.jpg", "text": "a car"}
384
+ {"file_name": "train/384.jpg", "text": "a car"}
385
+ {"file_name": "train/385.jpg", "text": "a car"}
386
+ {"file_name": "train/386.jpg", "text": "a car"}
387
+ {"file_name": "train/387.jpg", "text": "a car"}
388
+ {"file_name": "train/388.jpg", "text": "a car"}
389
+ {"file_name": "train/389.jpg", "text": "a car"}
390
+ {"file_name": "train/390.jpg", "text": "a car"}
391
+ {"file_name": "train/391.jpg", "text": "a car"}
392
+ {"file_name": "train/392.jpg", "text": "a car"}
393
+ {"file_name": "train/393.jpg", "text": "a car"}
394
+ {"file_name": "train/394.jpg", "text": "a car"}
395
+ {"file_name": "train/395.jpg", "text": "a car"}
396
+ {"file_name": "train/396.jpg", "text": "a car"}
397
+ {"file_name": "train/397.jpg", "text": "a car"}
398
+ {"file_name": "train/398.jpg", "text": "a car"}
399
+ {"file_name": "train/399.jpg", "text": "a car"}
400
+ {"file_name": "train/400.jpg", "text": "a car"}
401
+ {"file_name": "train/401.jpg", "text": "a car"}
402
+ {"file_name": "train/402.jpg", "text": "a car"}
403
+ {"file_name": "train/403.jpg", "text": "a car"}
404
+ {"file_name": "train/404.jpg", "text": "a car"}
405
+ {"file_name": "train/405.jpg", "text": "a car"}
406
+ {"file_name": "train/406.jpg", "text": "a car"}
407
+ {"file_name": "train/407.jpg", "text": "a car"}
408
+ {"file_name": "train/408.jpg", "text": "a car"}
409
+ {"file_name": "train/409.jpg", "text": "a car"}
410
+ {"file_name": "train/410.jpg", "text": "a car"}
411
+ {"file_name": "train/411.jpg", "text": "a car"}
412
+ {"file_name": "train/412.jpg", "text": "a car"}
413
+ {"file_name": "train/413.jpg", "text": "a car"}
414
+ {"file_name": "train/414.jpg", "text": "a car"}
415
+ {"file_name": "train/415.jpg", "text": "a car"}
416
+ {"file_name": "train/416.jpg", "text": "a car"}
417
+ {"file_name": "train/417.jpg", "text": "a car"}
418
+ {"file_name": "train/418.jpg", "text": "a car"}
419
+ {"file_name": "train/419.jpg", "text": "a car"}
420
+ {"file_name": "train/420.jpg", "text": "a car"}
421
+ {"file_name": "train/421.jpg", "text": "a car"}
422
+ {"file_name": "train/422.jpg", "text": "a car"}
423
+ {"file_name": "train/423.jpg", "text": "a car"}
424
+ {"file_name": "train/424.jpg", "text": "a car"}
425
+ {"file_name": "train/425.jpg", "text": "a car"}
426
+ {"file_name": "train/426.jpg", "text": "a car"}
427
+ {"file_name": "train/427.jpg", "text": "a car"}
428
+ {"file_name": "train/428.jpg", "text": "a car"}
429
+ {"file_name": "train/429.jpg", "text": "a car"}
430
+ {"file_name": "train/430.jpg", "text": "a car"}
431
+ {"file_name": "train/431.jpg", "text": "a car"}
432
+ {"file_name": "train/432.jpg", "text": "a car"}
433
+ {"file_name": "train/433.jpg", "text": "a car"}
434
+ {"file_name": "train/434.jpg", "text": "a car"}
435
+ {"file_name": "train/435.jpg", "text": "a car"}
436
+ {"file_name": "train/436.jpg", "text": "a car"}
437
+ {"file_name": "train/437.jpg", "text": "a car"}
438
+ {"file_name": "train/438.jpg", "text": "a car"}
439
+ {"file_name": "train/439.jpg", "text": "a car"}
440
+ {"file_name": "train/440.jpg", "text": "a car"}
441
+ {"file_name": "train/441.jpg", "text": "a car"}
442
+ {"file_name": "train/442.jpg", "text": "a car"}
443
+ {"file_name": "train/443.jpg", "text": "a car"}
444
+ {"file_name": "train/444.jpg", "text": "a car"}
445
+ {"file_name": "train/445.jpg", "text": "a car"}
446
+ {"file_name": "train/446.jpg", "text": "a car"}
447
+ {"file_name": "train/447.jpg", "text": "a car"}
448
+ {"file_name": "train/448.jpg", "text": "a car"}
449
+ {"file_name": "train/449.jpg", "text": "a car"}
450
+ {"file_name": "train/450.jpg", "text": "a car"}
451
+ {"file_name": "train/451.jpg", "text": "a car"}
452
+ {"file_name": "train/452.jpg", "text": "a car"}
453
+ {"file_name": "train/453.jpg", "text": "a car"}
454
+ {"file_name": "train/454.jpg", "text": "a car"}
455
+ {"file_name": "train/455.jpg", "text": "a car"}
456
+ {"file_name": "train/456.jpg", "text": "a car"}
457
+ {"file_name": "train/457.jpg", "text": "a car"}
458
+ {"file_name": "train/458.jpg", "text": "a car"}
459
+ {"file_name": "train/459.jpg", "text": "a car"}
460
+ {"file_name": "train/460.jpg", "text": "a car"}
461
+ {"file_name": "train/461.jpg", "text": "a car"}
462
+ {"file_name": "train/462.jpg", "text": "a car"}
463
+ {"file_name": "train/463.jpg", "text": "a car"}
464
+ {"file_name": "train/464.jpg", "text": "a car"}
465
+ {"file_name": "train/465.jpg", "text": "a car"}
466
+ {"file_name": "train/466.jpg", "text": "a car"}
467
+ {"file_name": "train/467.jpg", "text": "a car"}
468
+ {"file_name": "train/468.jpg", "text": "a car"}
469
+ {"file_name": "train/469.jpg", "text": "a car"}
470
+ {"file_name": "train/470.jpg", "text": "a car"}
471
+ {"file_name": "train/471.jpg", "text": "a car"}
472
+ {"file_name": "train/472.jpg", "text": "a car"}
473
+ {"file_name": "train/473.jpg", "text": "a car"}
474
+ {"file_name": "train/474.jpg", "text": "a car"}
475
+ {"file_name": "train/475.jpg", "text": "a car"}
476
+ {"file_name": "train/476.jpg", "text": "a car"}
477
+ {"file_name": "train/477.jpg", "text": "a car"}
478
+ {"file_name": "train/478.jpg", "text": "a car"}
479
+ {"file_name": "train/479.jpg", "text": "a car"}
480
+ {"file_name": "train/480.jpg", "text": "a car"}
481
+ {"file_name": "train/481.jpg", "text": "a car"}
482
+ {"file_name": "train/482.jpg", "text": "a car"}
483
+ {"file_name": "train/483.jpg", "text": "a car"}
484
+ {"file_name": "train/484.jpg", "text": "a car"}
485
+ {"file_name": "train/485.jpg", "text": "a car"}
486
+ {"file_name": "train/486.jpg", "text": "a car"}
487
+ {"file_name": "train/487.jpg", "text": "a car"}
488
+ {"file_name": "train/488.jpg", "text": "a car"}
489
+ {"file_name": "train/489.jpg", "text": "a car"}
490
+ {"file_name": "train/490.jpg", "text": "a car"}
491
+ {"file_name": "train/491.jpg", "text": "a car"}
492
+ {"file_name": "train/492.jpg", "text": "a car"}
493
+ {"file_name": "train/493.jpg", "text": "a car"}
494
+ {"file_name": "train/494.jpg", "text": "a car"}
495
+ {"file_name": "train/495.jpg", "text": "a car"}
496
+ {"file_name": "train/496.jpg", "text": "a car"}
497
+ {"file_name": "train/497.jpg", "text": "a car"}
498
+ {"file_name": "train/498.jpg", "text": "a car"}
499
+ {"file_name": "train/499.jpg", "text": "a car"}
500
+ {"file_name": "train/500.jpg", "text": "a car"}
501
+ {"file_name": "train/501.jpg", "text": "a car"}
502
+ {"file_name": "train/502.jpg", "text": "a car"}
503
+ {"file_name": "train/503.jpg", "text": "a car"}
504
+ {"file_name": "train/504.jpg", "text": "a car"}
505
+ {"file_name": "train/505.jpg", "text": "a car"}
506
+ {"file_name": "train/506.jpg", "text": "a car"}
507
+ {"file_name": "train/507.jpg", "text": "a car"}
508
+ {"file_name": "train/508.jpg", "text": "a car"}
509
+ {"file_name": "train/509.jpg", "text": "a car"}
510
+ {"file_name": "train/510.jpg", "text": "a car"}
511
+ {"file_name": "train/511.jpg", "text": "a car"}
512
+ {"file_name": "train/512.jpg", "text": "a car"}
513
+ {"file_name": "train/513.jpg", "text": "a car"}
514
+ {"file_name": "train/514.jpg", "text": "a car"}
515
+ {"file_name": "train/515.jpg", "text": "a car"}
516
+ {"file_name": "train/516.jpg", "text": "a car"}
517
+ {"file_name": "train/517.jpg", "text": "a car"}
518
+ {"file_name": "train/518.jpg", "text": "a car"}
519
+ {"file_name": "train/519.jpg", "text": "a car"}
520
+ {"file_name": "train/520.jpg", "text": "a car"}
521
+ {"file_name": "train/521.jpg", "text": "a car"}
522
+ {"file_name": "train/522.jpg", "text": "a car"}
523
+ {"file_name": "train/523.jpg", "text": "a car"}
524
+ {"file_name": "train/524.jpg", "text": "a car"}
525
+ {"file_name": "train/525.jpg", "text": "a car"}
526
+ {"file_name": "train/526.jpg", "text": "a car"}
527
+ {"file_name": "train/527.jpg", "text": "a car"}
528
+ {"file_name": "train/528.jpg", "text": "a car"}
529
+ {"file_name": "train/529.jpg", "text": "a car"}
530
+ {"file_name": "train/530.jpg", "text": "a car"}
531
+ {"file_name": "train/531.jpg", "text": "a car"}
532
+ {"file_name": "train/532.jpg", "text": "a car"}
533
+ {"file_name": "train/533.jpg", "text": "a car"}
534
+ {"file_name": "train/534.jpg", "text": "a car"}
535
+ {"file_name": "train/535.jpg", "text": "a car"}
536
+ {"file_name": "train/536.jpg", "text": "a car"}
537
+ {"file_name": "train/537.jpg", "text": "a car"}
538
+ {"file_name": "train/538.jpg", "text": "a car"}
539
+ {"file_name": "train/539.jpg", "text": "a car"}
540
+ {"file_name": "train/540.jpg", "text": "a car"}
541
+ {"file_name": "train/541.jpg", "text": "a car"}
542
+ {"file_name": "train/542.jpg", "text": "a car"}
543
+ {"file_name": "train/543.jpg", "text": "a car"}
544
+ {"file_name": "train/544.jpg", "text": "a car"}
545
+ {"file_name": "train/545.jpg", "text": "a car"}
546
+ {"file_name": "train/546.jpg", "text": "a car"}
547
+ {"file_name": "train/547.jpg", "text": "a car"}
548
+ {"file_name": "train/548.jpg", "text": "a car"}
549
+ {"file_name": "train/549.jpg", "text": "a car"}
550
+ {"file_name": "train/550.jpg", "text": "a car"}
551
+ {"file_name": "train/551.jpg", "text": "a car"}
552
+ {"file_name": "train/552.jpg", "text": "a car"}
553
+ {"file_name": "train/553.jpg", "text": "a car"}
554
+ {"file_name": "train/554.jpg", "text": "a car"}
555
+ {"file_name": "train/555.jpg", "text": "a car"}
556
+ {"file_name": "train/556.jpg", "text": "a car"}
557
+ {"file_name": "train/557.jpg", "text": "a car"}
558
+ {"file_name": "train/558.jpg", "text": "a car"}
559
+ {"file_name": "train/559.jpg", "text": "a car"}
560
+ {"file_name": "train/560.jpg", "text": "a car"}
561
+ {"file_name": "train/561.jpg", "text": "a car"}
562
+ {"file_name": "train/562.jpg", "text": "a car"}
563
+ {"file_name": "train/563.jpg", "text": "a car"}
564
+ {"file_name": "train/564.jpg", "text": "a car"}
565
+ {"file_name": "train/565.jpg", "text": "a car"}
566
+ {"file_name": "train/566.jpg", "text": "a car"}
567
+ {"file_name": "train/567.jpg", "text": "a car"}
568
+ {"file_name": "train/568.jpg", "text": "a car"}
569
+ {"file_name": "train/569.jpg", "text": "a car"}
570
+ {"file_name": "train/570.jpg", "text": "a car"}
571
+ {"file_name": "train/571.jpg", "text": "a car"}
572
+ {"file_name": "train/572.jpg", "text": "a car"}
573
+ {"file_name": "train/573.jpg", "text": "a car"}
574
+ {"file_name": "train/574.jpg", "text": "a car"}
575
+ {"file_name": "train/575.jpg", "text": "a car"}
576
+ {"file_name": "train/576.jpg", "text": "a car"}
577
+ {"file_name": "train/577.jpg", "text": "a car"}
578
+ {"file_name": "train/578.jpg", "text": "a car"}
579
+ {"file_name": "train/579.jpg", "text": "a car"}
580
+ {"file_name": "train/580.jpg", "text": "a car"}
581
+ {"file_name": "train/581.jpg", "text": "a car"}
582
+ {"file_name": "train/582.jpg", "text": "a car"}
583
+ {"file_name": "train/583.jpg", "text": "a car"}
584
+ {"file_name": "train/584.jpg", "text": "a car"}
585
+ {"file_name": "train/585.jpg", "text": "a car"}
586
+ {"file_name": "train/586.jpg", "text": "a car"}
587
+ {"file_name": "train/587.jpg", "text": "a car"}
588
+ {"file_name": "train/588.jpg", "text": "a car"}
589
+ {"file_name": "train/589.jpg", "text": "a car"}
590
+ {"file_name": "train/590.jpg", "text": "a car"}
591
+ {"file_name": "train/591.jpg", "text": "a car"}
592
+ {"file_name": "train/592.jpg", "text": "a car"}
593
+ {"file_name": "train/593.jpg", "text": "a car"}
594
+ {"file_name": "train/594.jpg", "text": "a car"}
595
+ {"file_name": "train/595.jpg", "text": "a car"}
596
+ {"file_name": "train/596.jpg", "text": "a car"}
597
+ {"file_name": "train/597.jpg", "text": "a car"}
598
+ {"file_name": "train/598.jpg", "text": "a car"}
599
+ {"file_name": "train/599.jpg", "text": "a car"}
600
+ {"file_name": "train/600.jpg", "text": "a car"}
601
+ {"file_name": "train/601.jpg", "text": "a car"}
602
+ {"file_name": "train/602.jpg", "text": "a car"}
603
+ {"file_name": "train/603.jpg", "text": "a car"}
604
+ {"file_name": "train/604.jpg", "text": "a car"}
605
+ {"file_name": "train/605.jpg", "text": "a car"}
606
+ {"file_name": "train/606.jpg", "text": "a car"}
607
+ {"file_name": "train/607.jpg", "text": "a car"}
608
+ {"file_name": "train/608.jpg", "text": "a car"}
609
+ {"file_name": "train/609.jpg", "text": "a car"}
610
+ {"file_name": "train/610.jpg", "text": "a car"}
611
+ {"file_name": "train/611.jpg", "text": "a car"}
612
+ {"file_name": "train/612.jpg", "text": "a car"}
613
+ {"file_name": "train/613.jpg", "text": "a car"}
614
+ {"file_name": "train/614.jpg", "text": "a car"}
615
+ {"file_name": "train/615.jpg", "text": "a car"}
616
+ {"file_name": "train/616.jpg", "text": "a car"}
617
+ {"file_name": "train/617.jpg", "text": "a car"}
618
+ {"file_name": "train/618.jpg", "text": "a car"}
619
+ {"file_name": "train/619.jpg", "text": "a car"}
620
+ {"file_name": "train/620.jpg", "text": "a car"}
621
+ {"file_name": "train/621.jpg", "text": "a car"}
622
+ {"file_name": "train/622.jpg", "text": "a car"}
623
+ {"file_name": "train/623.jpg", "text": "a car"}
624
+ {"file_name": "train/624.jpg", "text": "a car"}
625
+ {"file_name": "train/625.jpg", "text": "a car"}
626
+ {"file_name": "train/626.jpg", "text": "a car"}
627
+ {"file_name": "train/627.jpg", "text": "a car"}
628
+ {"file_name": "train/628.jpg", "text": "a car"}
629
+ {"file_name": "train/629.jpg", "text": "a car"}
630
+ {"file_name": "train/630.jpg", "text": "a car"}
631
+ {"file_name": "train/631.jpg", "text": "a car"}
632
+ {"file_name": "train/632.jpg", "text": "a car"}
633
+ {"file_name": "train/633.jpg", "text": "a car"}
634
+ {"file_name": "train/634.jpg", "text": "a car"}
635
+ {"file_name": "train/635.jpg", "text": "a car"}
636
+ {"file_name": "train/636.jpg", "text": "a car"}
637
+ {"file_name": "train/637.jpg", "text": "a car"}
638
+ {"file_name": "train/638.jpg", "text": "a car"}
639
+ {"file_name": "train/639.jpg", "text": "a car"}
640
+ {"file_name": "train/640.jpg", "text": "a car"}
641
+ {"file_name": "train/641.jpg", "text": "a car"}
642
+ {"file_name": "train/642.jpg", "text": "a car"}
643
+ {"file_name": "train/643.jpg", "text": "a car"}
644
+ {"file_name": "train/644.jpg", "text": "a car"}
645
+ {"file_name": "train/645.jpg", "text": "a car"}
646
+ {"file_name": "train/646.jpg", "text": "a car"}
647
+ {"file_name": "train/647.jpg", "text": "a car"}
648
+ {"file_name": "train/648.jpg", "text": "a car"}
649
+ {"file_name": "train/649.jpg", "text": "a car"}
650
+ {"file_name": "train/650.jpg", "text": "a car"}
651
+ {"file_name": "train/651.jpg", "text": "a car"}
652
+ {"file_name": "train/652.jpg", "text": "a car"}
653
+ {"file_name": "train/653.jpg", "text": "a car"}
654
+ {"file_name": "train/654.jpg", "text": "a car"}
655
+ {"file_name": "train/655.jpg", "text": "a car"}
656
+ {"file_name": "train/656.jpg", "text": "a car"}
657
+ {"file_name": "train/657.jpg", "text": "a car"}
658
+ {"file_name": "train/658.jpg", "text": "a car"}
659
+ {"file_name": "train/659.jpg", "text": "a car"}
660
+ {"file_name": "train/660.jpg", "text": "a car"}
661
+ {"file_name": "train/661.jpg", "text": "a car"}
662
+ {"file_name": "train/662.jpg", "text": "a car"}
663
+ {"file_name": "train/663.jpg", "text": "a car"}
664
+ {"file_name": "train/664.jpg", "text": "a car"}
665
+ {"file_name": "train/665.jpg", "text": "a car"}
666
+ {"file_name": "train/666.jpg", "text": "a car"}
667
+ {"file_name": "train/667.jpg", "text": "a car"}
668
+ {"file_name": "train/668.jpg", "text": "a car"}
669
+ {"file_name": "train/669.jpg", "text": "a car"}
670
+ {"file_name": "train/670.jpg", "text": "a car"}
671
+ {"file_name": "train/671.jpg", "text": "a car"}
672
+ {"file_name": "train/672.jpg", "text": "a car"}
673
+ {"file_name": "train/673.jpg", "text": "a car"}
674
+ {"file_name": "train/674.jpg", "text": "a car"}
675
+ {"file_name": "train/675.jpg", "text": "a car"}
676
+ {"file_name": "train/676.jpg", "text": "a car"}
677
+ {"file_name": "train/677.jpg", "text": "a car"}
678
+ {"file_name": "train/678.jpg", "text": "a car"}
679
+ {"file_name": "train/679.jpg", "text": "a car"}
680
+ {"file_name": "train/680.jpg", "text": "a car"}
681
+ {"file_name": "train/681.jpg", "text": "a car"}
682
+ {"file_name": "train/682.jpg", "text": "a car"}
683
+ {"file_name": "train/683.jpg", "text": "a car"}
684
+ {"file_name": "train/684.jpg", "text": "a car"}
685
+ {"file_name": "train/685.jpg", "text": "a car"}
686
+ {"file_name": "train/686.jpg", "text": "a car"}
687
+ {"file_name": "train/687.jpg", "text": "a car"}
688
+ {"file_name": "train/688.jpg", "text": "a car"}
689
+ {"file_name": "train/689.jpg", "text": "a car"}
690
+ {"file_name": "train/690.jpg", "text": "a car"}
691
+ {"file_name": "train/691.jpg", "text": "a car"}
692
+ {"file_name": "train/692.jpg", "text": "a car"}
693
+ {"file_name": "train/693.jpg", "text": "a car"}
694
+ {"file_name": "train/694.jpg", "text": "a car"}
695
+ {"file_name": "train/695.jpg", "text": "a car"}
696
+ {"file_name": "train/696.jpg", "text": "a car"}
697
+ {"file_name": "train/697.jpg", "text": "a car"}
698
+ {"file_name": "train/698.jpg", "text": "a car"}
699
+ {"file_name": "train/699.jpg", "text": "a car"}
700
+ {"file_name": "train/700.jpg", "text": "a car"}
701
+ {"file_name": "train/701.jpg", "text": "a car"}
702
+ {"file_name": "train/702.jpg", "text": "a car"}
703
+ {"file_name": "train/703.jpg", "text": "a car"}
704
+ {"file_name": "train/704.jpg", "text": "a car"}
705
+ {"file_name": "train/705.jpg", "text": "a car"}
706
+ {"file_name": "train/706.jpg", "text": "a car"}
707
+ {"file_name": "train/707.jpg", "text": "a car"}
708
+ {"file_name": "train/708.jpg", "text": "a car"}
709
+ {"file_name": "train/709.jpg", "text": "a car"}
710
+ {"file_name": "train/710.jpg", "text": "a car"}
711
+ {"file_name": "train/711.jpg", "text": "a car"}
712
+ {"file_name": "train/712.jpg", "text": "a car"}
713
+ {"file_name": "train/713.jpg", "text": "a car"}
714
+ {"file_name": "train/714.jpg", "text": "a car"}
715
+ {"file_name": "train/715.jpg", "text": "a car"}
716
+ {"file_name": "train/716.jpg", "text": "a car"}
717
+ {"file_name": "train/717.jpg", "text": "a car"}
718
+ {"file_name": "train/718.jpg", "text": "a car"}
719
+ {"file_name": "train/719.jpg", "text": "a car"}
720
+ {"file_name": "train/720.jpg", "text": "a car"}
721
+ {"file_name": "train/721.jpg", "text": "a car"}
722
+ {"file_name": "train/722.jpg", "text": "a car"}
723
+ {"file_name": "train/723.jpg", "text": "a car"}
724
+ {"file_name": "train/724.jpg", "text": "a car"}
725
+ {"file_name": "train/725.jpg", "text": "a car"}
726
+ {"file_name": "train/726.jpg", "text": "a car"}
727
+ {"file_name": "train/727.jpg", "text": "a car"}
728
+ {"file_name": "train/728.jpg", "text": "a car"}
729
+ {"file_name": "train/729.jpg", "text": "a car"}
730
+ {"file_name": "train/730.jpg", "text": "a car"}
731
+ {"file_name": "train/731.jpg", "text": "a car"}
732
+ {"file_name": "train/732.jpg", "text": "a car"}
733
+ {"file_name": "train/733.jpg", "text": "a car"}
734
+ {"file_name": "train/734.jpg", "text": "a car"}
735
+ {"file_name": "train/735.jpg", "text": "a car"}
736
+ {"file_name": "train/736.jpg", "text": "a car"}
737
+ {"file_name": "train/737.jpg", "text": "a car"}
738
+ {"file_name": "train/738.jpg", "text": "a car"}
739
+ {"file_name": "train/739.jpg", "text": "a car"}
740
+ {"file_name": "train/740.jpg", "text": "a car"}
741
+ {"file_name": "train/741.jpg", "text": "a car"}
742
+ {"file_name": "train/742.jpg", "text": "a car"}
743
+ {"file_name": "train/743.jpg", "text": "a car"}
744
+ {"file_name": "train/744.jpg", "text": "a car"}
745
+ {"file_name": "train/745.jpg", "text": "a car"}
746
+ {"file_name": "train/746.jpg", "text": "a car"}
747
+ {"file_name": "train/747.jpg", "text": "a car"}
748
+ {"file_name": "train/748.jpg", "text": "a car"}
749
+ {"file_name": "train/749.jpg", "text": "a car"}
750
+ {"file_name": "train/750.jpg", "text": "a car"}
751
+ {"file_name": "train/751.jpg", "text": "a car"}
752
+ {"file_name": "train/752.jpg", "text": "a car"}
753
+ {"file_name": "train/753.jpg", "text": "a car"}
754
+ {"file_name": "train/754.jpg", "text": "a car"}
755
+ {"file_name": "train/755.jpg", "text": "a car"}
756
+ {"file_name": "train/756.jpg", "text": "a car"}
757
+ {"file_name": "train/757.jpg", "text": "a car"}
758
+ {"file_name": "train/758.jpg", "text": "a car"}
759
+ {"file_name": "train/759.jpg", "text": "a car"}
760
+ {"file_name": "train/760.jpg", "text": "a car"}
761
+ {"file_name": "train/761.jpg", "text": "a car"}
762
+ {"file_name": "train/762.jpg", "text": "a car"}
763
+ {"file_name": "train/763.jpg", "text": "a car"}
764
+ {"file_name": "train/764.jpg", "text": "a car"}
765
+ {"file_name": "train/765.jpg", "text": "a car"}
766
+ {"file_name": "train/766.jpg", "text": "a car"}
767
+ {"file_name": "train/767.jpg", "text": "a car"}
768
+ {"file_name": "train/768.jpg", "text": "a car"}
769
+ {"file_name": "train/769.jpg", "text": "a car"}
770
+ {"file_name": "train/770.jpg", "text": "a car"}
771
+ {"file_name": "train/771.jpg", "text": "a car"}
772
+ {"file_name": "train/772.jpg", "text": "a car"}
773
+ {"file_name": "train/773.jpg", "text": "a car"}
774
+ {"file_name": "train/774.jpg", "text": "a car"}
775
+ {"file_name": "train/775.jpg", "text": "a car"}
776
+ {"file_name": "train/776.jpg", "text": "a car"}
777
+ {"file_name": "train/777.jpg", "text": "a car"}
778
+ {"file_name": "train/778.jpg", "text": "a car"}
779
+ {"file_name": "train/779.jpg", "text": "a car"}
780
+ {"file_name": "train/780.jpg", "text": "a car"}
781
+ {"file_name": "train/781.jpg", "text": "a car"}
782
+ {"file_name": "train/782.jpg", "text": "a car"}
783
+ {"file_name": "train/783.jpg", "text": "a car"}
784
+ {"file_name": "train/784.jpg", "text": "a car"}
785
+ {"file_name": "train/785.jpg", "text": "a car"}
786
+ {"file_name": "train/786.jpg", "text": "a car"}
787
+ {"file_name": "train/787.jpg", "text": "a car"}
788
+ {"file_name": "train/788.jpg", "text": "a car"}
789
+ {"file_name": "train/789.jpg", "text": "a car"}
790
+ {"file_name": "train/790.jpg", "text": "a car"}
791
+ {"file_name": "train/791.jpg", "text": "a car"}
792
+ {"file_name": "train/792.jpg", "text": "a car"}
793
+ {"file_name": "train/793.jpg", "text": "a car"}
794
+ {"file_name": "train/794.jpg", "text": "a car"}
795
+ {"file_name": "train/795.jpg", "text": "a car"}
796
+ {"file_name": "train/796.jpg", "text": "a car"}
797
+ {"file_name": "train/797.jpg", "text": "a car"}
798
+ {"file_name": "train/798.jpg", "text": "a car"}
799
+ {"file_name": "train/799.jpg", "text": "a car"}
800
+ {"file_name": "train/800.jpg", "text": "a car"}
801
+ {"file_name": "train/801.jpg", "text": "a car"}
802
+ {"file_name": "train/802.jpg", "text": "a car"}
803
+ {"file_name": "train/803.jpg", "text": "a car"}
804
+ {"file_name": "train/804.jpg", "text": "a car"}
805
+ {"file_name": "train/805.jpg", "text": "a car"}
806
+ {"file_name": "train/806.jpg", "text": "a car"}
807
+ {"file_name": "train/807.jpg", "text": "a car"}
808
+ {"file_name": "train/808.jpg", "text": "a car"}
809
+ {"file_name": "train/809.jpg", "text": "a car"}
810
+ {"file_name": "train/810.jpg", "text": "a car"}
811
+ {"file_name": "train/811.jpg", "text": "a car"}
812
+ {"file_name": "train/812.jpg", "text": "a car"}
813
+ {"file_name": "train/813.jpg", "text": "a car"}
814
+ {"file_name": "train/814.jpg", "text": "a car"}
815
+ {"file_name": "train/815.jpg", "text": "a car"}
816
+ {"file_name": "train/816.jpg", "text": "a car"}
817
+ {"file_name": "train/817.jpg", "text": "a car"}
818
+ {"file_name": "train/818.jpg", "text": "a car"}
819
+ {"file_name": "train/819.jpg", "text": "a car"}
820
+ {"file_name": "train/820.jpg", "text": "a car"}
821
+ {"file_name": "train/821.jpg", "text": "a car"}
822
+ {"file_name": "train/822.jpg", "text": "a car"}
823
+ {"file_name": "train/823.jpg", "text": "a car"}
824
+ {"file_name": "train/824.jpg", "text": "a car"}
825
+ {"file_name": "train/825.jpg", "text": "a car"}
826
+ {"file_name": "train/826.jpg", "text": "a car"}
827
+ {"file_name": "train/827.jpg", "text": "a car"}
828
+ {"file_name": "train/828.jpg", "text": "a car"}
829
+ {"file_name": "train/829.jpg", "text": "a car"}
830
+ {"file_name": "train/830.jpg", "text": "a car"}
831
+ {"file_name": "train/831.jpg", "text": "a car"}
832
+ {"file_name": "train/832.jpg", "text": "a car"}
833
+ {"file_name": "train/833.jpg", "text": "a car"}
834
+ {"file_name": "train/834.jpg", "text": "a car"}
835
+ {"file_name": "train/835.jpg", "text": "a car"}
836
+ {"file_name": "train/836.jpg", "text": "a car"}
837
+ {"file_name": "train/837.jpg", "text": "a car"}
838
+ {"file_name": "train/838.jpg", "text": "a car"}
839
+ {"file_name": "train/839.jpg", "text": "a car"}
840
+ {"file_name": "train/840.jpg", "text": "a car"}
841
+ {"file_name": "train/841.jpg", "text": "a car"}
842
+ {"file_name": "train/842.jpg", "text": "a car"}
843
+ {"file_name": "train/843.jpg", "text": "a car"}
844
+ {"file_name": "train/844.jpg", "text": "a car"}
845
+ {"file_name": "train/845.jpg", "text": "a car"}
846
+ {"file_name": "train/846.jpg", "text": "a car"}
847
+ {"file_name": "train/847.jpg", "text": "a car"}
848
+ {"file_name": "train/848.jpg", "text": "a car"}
849
+ {"file_name": "train/849.jpg", "text": "a car"}
850
+ {"file_name": "train/850.jpg", "text": "a car"}
851
+ {"file_name": "train/851.jpg", "text": "a car"}
852
+ {"file_name": "train/852.jpg", "text": "a car"}
853
+ {"file_name": "train/853.jpg", "text": "a car"}
854
+ {"file_name": "train/854.jpg", "text": "a car"}
855
+ {"file_name": "train/855.jpg", "text": "a car"}
856
+ {"file_name": "train/856.jpg", "text": "a car"}
857
+ {"file_name": "train/857.jpg", "text": "a car"}
858
+ {"file_name": "train/858.jpg", "text": "a car"}
859
+ {"file_name": "train/859.jpg", "text": "a car"}
860
+ {"file_name": "train/860.jpg", "text": "a car"}
861
+ {"file_name": "train/861.jpg", "text": "a car"}
862
+ {"file_name": "train/862.jpg", "text": "a car"}
863
+ {"file_name": "train/863.jpg", "text": "a car"}
864
+ {"file_name": "train/864.jpg", "text": "a car"}
865
+ {"file_name": "train/865.jpg", "text": "a car"}
866
+ {"file_name": "train/866.jpg", "text": "a car"}
867
+ {"file_name": "train/867.jpg", "text": "a car"}
868
+ {"file_name": "train/868.jpg", "text": "a car"}
869
+ {"file_name": "train/869.jpg", "text": "a car"}
870
+ {"file_name": "train/870.jpg", "text": "a car"}
871
+ {"file_name": "train/871.jpg", "text": "a car"}
872
+ {"file_name": "train/872.jpg", "text": "a car"}
873
+ {"file_name": "train/873.jpg", "text": "a car"}
874
+ {"file_name": "train/874.jpg", "text": "a car"}
875
+ {"file_name": "train/875.jpg", "text": "a car"}
876
+ {"file_name": "train/876.jpg", "text": "a car"}
877
+ {"file_name": "train/877.jpg", "text": "a car"}
878
+ {"file_name": "train/878.jpg", "text": "a car"}
879
+ {"file_name": "train/879.jpg", "text": "a car"}
880
+ {"file_name": "train/880.jpg", "text": "a car"}
881
+ {"file_name": "train/881.jpg", "text": "a car"}
882
+ {"file_name": "train/882.jpg", "text": "a car"}
883
+ {"file_name": "train/883.jpg", "text": "a car"}
884
+ {"file_name": "train/884.jpg", "text": "a car"}
885
+ {"file_name": "train/885.jpg", "text": "a car"}
886
+ {"file_name": "train/886.jpg", "text": "a car"}
887
+ {"file_name": "train/887.jpg", "text": "a car"}
888
+ {"file_name": "train/888.jpg", "text": "a car"}
889
+ {"file_name": "train/889.jpg", "text": "a car"}
890
+ {"file_name": "train/890.jpg", "text": "a car"}
891
+ {"file_name": "train/891.jpg", "text": "a car"}
892
+ {"file_name": "train/892.jpg", "text": "a car"}
893
+ {"file_name": "train/893.jpg", "text": "a car"}
894
+ {"file_name": "train/894.jpg", "text": "a car"}
895
+ {"file_name": "train/895.jpg", "text": "a car"}
896
+ {"file_name": "train/896.jpg", "text": "a car"}
897
+ {"file_name": "train/897.jpg", "text": "a car"}
898
+ {"file_name": "train/898.jpg", "text": "a car"}
899
+ {"file_name": "train/899.jpg", "text": "a car"}
900
+ {"file_name": "train/900.jpg", "text": "a car"}
901
+ {"file_name": "train/901.jpg", "text": "a car"}
902
+ {"file_name": "train/902.jpg", "text": "a car"}
903
+ {"file_name": "train/903.jpg", "text": "a car"}
904
+ {"file_name": "train/904.jpg", "text": "a car"}
905
+ {"file_name": "train/905.jpg", "text": "a car"}
906
+ {"file_name": "train/906.jpg", "text": "a car"}
907
+ {"file_name": "train/907.jpg", "text": "a car"}
908
+ {"file_name": "train/908.jpg", "text": "a car"}
909
+ {"file_name": "train/909.jpg", "text": "a car"}
910
+ {"file_name": "train/910.jpg", "text": "a car"}
911
+ {"file_name": "train/911.jpg", "text": "a car"}
912
+ {"file_name": "train/912.jpg", "text": "a car"}
913
+ {"file_name": "train/913.jpg", "text": "a car"}
914
+ {"file_name": "train/914.jpg", "text": "a car"}
915
+ {"file_name": "train/915.jpg", "text": "a car"}
916
+ {"file_name": "train/916.jpg", "text": "a car"}
917
+ {"file_name": "train/917.jpg", "text": "a car"}
918
+ {"file_name": "train/918.jpg", "text": "a car"}
919
+ {"file_name": "train/919.jpg", "text": "a car"}
920
+ {"file_name": "train/920.jpg", "text": "a car"}
921
+ {"file_name": "train/921.jpg", "text": "a car"}
922
+ {"file_name": "train/922.jpg", "text": "a car"}
923
+ {"file_name": "train/923.jpg", "text": "a car"}
924
+ {"file_name": "train/924.jpg", "text": "a car"}
925
+ {"file_name": "train/925.jpg", "text": "a car"}
926
+ {"file_name": "train/926.jpg", "text": "a car"}
927
+ {"file_name": "train/927.jpg", "text": "a car"}
928
+ {"file_name": "train/928.jpg", "text": "a car"}
929
+ {"file_name": "train/929.jpg", "text": "a car"}
930
+ {"file_name": "train/930.jpg", "text": "a car"}
931
+ {"file_name": "train/931.jpg", "text": "a car"}
932
+ {"file_name": "train/932.jpg", "text": "a car"}
933
+ {"file_name": "train/933.jpg", "text": "a car"}
934
+ {"file_name": "train/934.jpg", "text": "a car"}
935
+ {"file_name": "train/935.jpg", "text": "a car"}
936
+ {"file_name": "train/936.jpg", "text": "a car"}
937
+ {"file_name": "train/937.jpg", "text": "a car"}
938
+ {"file_name": "train/938.jpg", "text": "a car"}
939
+ {"file_name": "train/939.jpg", "text": "a car"}
940
+ {"file_name": "train/940.jpg", "text": "a car"}
941
+ {"file_name": "train/941.jpg", "text": "a car"}
942
+ {"file_name": "train/942.jpg", "text": "a car"}
943
+ {"file_name": "train/943.jpg", "text": "a car"}
944
+ {"file_name": "train/944.jpg", "text": "a car"}
945
+ {"file_name": "train/945.jpg", "text": "a car"}
946
+ {"file_name": "train/946.jpg", "text": "a car"}
947
+ {"file_name": "train/947.jpg", "text": "a car"}
948
+ {"file_name": "train/948.jpg", "text": "a car"}
949
+ {"file_name": "train/949.jpg", "text": "a car"}
950
+ {"file_name": "train/950.jpg", "text": "a car"}
951
+ {"file_name": "train/951.jpg", "text": "a car"}
952
+ {"file_name": "train/952.jpg", "text": "a car"}
953
+ {"file_name": "train/953.jpg", "text": "a car"}
954
+ {"file_name": "train/954.jpg", "text": "a car"}
955
+ {"file_name": "train/955.jpg", "text": "a car"}
956
+ {"file_name": "train/956.jpg", "text": "a car"}
957
+ {"file_name": "train/957.jpg", "text": "a car"}
958
+ {"file_name": "train/958.jpg", "text": "a car"}
959
+ {"file_name": "train/959.jpg", "text": "a car"}
960
+ {"file_name": "train/960.jpg", "text": "a car"}
961
+ {"file_name": "train/961.jpg", "text": "a car"}
962
+ {"file_name": "train/962.jpg", "text": "a car"}
963
+ {"file_name": "train/963.jpg", "text": "a car"}
964
+ {"file_name": "train/964.jpg", "text": "a car"}
965
+ {"file_name": "train/965.jpg", "text": "a car"}
966
+ {"file_name": "train/966.jpg", "text": "a car"}
967
+ {"file_name": "train/967.jpg", "text": "a car"}
968
+ {"file_name": "train/968.jpg", "text": "a car"}
969
+ {"file_name": "train/969.jpg", "text": "a car"}
970
+ {"file_name": "train/970.jpg", "text": "a car"}
971
+ {"file_name": "train/971.jpg", "text": "a car"}
972
+ {"file_name": "train/972.jpg", "text": "a car"}
973
+ {"file_name": "train/973.jpg", "text": "a car"}
974
+ {"file_name": "train/974.jpg", "text": "a car"}
975
+ {"file_name": "train/975.jpg", "text": "a car"}
976
+ {"file_name": "train/976.jpg", "text": "a car"}
977
+ {"file_name": "train/977.jpg", "text": "a car"}
978
+ {"file_name": "train/978.jpg", "text": "a car"}
979
+ {"file_name": "train/979.jpg", "text": "a car"}
980
+ {"file_name": "train/980.jpg", "text": "a car"}
981
+ {"file_name": "train/981.jpg", "text": "a car"}
982
+ {"file_name": "train/982.jpg", "text": "a car"}
983
+ {"file_name": "train/983.jpg", "text": "a car"}
984
+ {"file_name": "train/984.jpg", "text": "a car"}
985
+ {"file_name": "train/985.jpg", "text": "a car"}
986
+ {"file_name": "train/986.jpg", "text": "a car"}
987
+ {"file_name": "train/987.jpg", "text": "a car"}
988
+ {"file_name": "train/988.jpg", "text": "a car"}
989
+ {"file_name": "train/989.jpg", "text": "a car"}
990
+ {"file_name": "train/990.jpg", "text": "a car"}
991
+ {"file_name": "train/991.jpg", "text": "a car"}
992
+ {"file_name": "train/992.jpg", "text": "a car"}
993
+ {"file_name": "train/993.jpg", "text": "a car"}
994
+ {"file_name": "train/994.jpg", "text": "a car"}
995
+ {"file_name": "train/995.jpg", "text": "a car"}
996
+ {"file_name": "train/996.jpg", "text": "a car"}
997
+ {"file_name": "train/997.jpg", "text": "a car"}
998
+ {"file_name": "train/998.jpg", "text": "a car"}
999
+ {"file_name": "train/999.jpg", "text": "a car"}
1000
+ {"file_name": "train/1000.jpg", "text": "a car"}
Gradient_ascent_traininig_scripts/data/car/train/fix.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # Define the range and the common text description
4
+ start, end = 1, 1000
5
+ description = "a car"
6
+
7
+ # Open the metadata.jsonl file to write the JSON data
8
+ with open("metadata.jsonl", "w") as file:
9
+ # Iterate through the range and format each entry
10
+ for i in range(start, end + 1):
11
+ entry = {
12
+ "file_name": f"train/{i}.jpg",
13
+ "text": description
14
+ }
15
+ # Write each entry as a JSON line
16
+ file.write(json.dumps(entry) + "\n")
Gradient_ascent_traininig_scripts/generate_images.sh ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OUTPUT_DIR="./data/english_springer_ti"
2
+ # PROMPT="a photo of an english springer"
3
+ # NUM_TRAIN_IMAGES=100
4
+
5
+ # python3 generate_training_images.py \
6
+ # --output_dir $OUTPUT_DIR \
7
+ # --prompt "$PROMPT" \
8
+ # --mode train \
9
+ # --num_train_images $NUM_TRAIN_IMAGES
10
+
11
+ # OUTPUT_DIR="./data/garbage_truck_ti"
12
+ # PROMPT="a photo of a garbage truck"
13
+ # NUM_TRAIN_IMAGES=100
14
+
15
+ # python3 generate_training_images.py \
16
+ # --output_dir $OUTPUT_DIR \
17
+ # --prompt "$PROMPT" \
18
+ # --mode train \
19
+ # --num_train_images $NUM_TRAIN_IMAGES
20
+
21
+ # OUTPUT_DIR="./data/kilian_eng_ti"
22
+ # PROMPT="a painting in the style of Kilian Eng"
23
+ # NUM_TRAIN_IMAGES=100
24
+
25
+ # python3 generate_training_images.py \
26
+ # --output_dir $OUTPUT_DIR \
27
+ # --prompt "$PROMPT" \
28
+ # --mode train \
29
+ # --num_train_images $NUM_TRAIN_IMAGES
30
+
31
+ # OUTPUT_DIR="./data/kilian_eng_ti"
32
+ # PROMPT="a painting in the style of Thomas Kinkade"
33
+ # NUM_TRAIN_IMAGES=100
34
+
35
+ # python3 generate_training_images.py \
36
+ # --output_dir $OUTPUT_DIR \
37
+ # --prompt "$PROMPT" \
38
+ # --mode train \
39
+ # --num_train_images $NUM_TRAIN_IMAGES
40
+
41
+ # OUTPUT_DIR="./data/van_gogh_ti"
42
+ # PROMPT="a painting in the style of Van Gogh"
43
+ # NUM_TRAIN_IMAGES=100
44
+
45
+ # python3 generate_training_images.py \
46
+ # --output_dir $OUTPUT_DIR \
47
+ # --prompt "$PROMPT" \
48
+ # --mode train \
49
+ # --num_train_images $NUM_TRAIN_IMAGES
50
+
51
+ OUTPUT_DIR="./generation/english_springer_erasure"
52
+ PROMPT="a photo of an english springer spaniel"
53
+ NUM_TRAIN_IMAGES=100
54
+
55
+ python3 generate_training_images.py \
56
+ --output_dir $OUTPUT_DIR \
57
+ --prompt "$PROMPT" \
58
+ --mode train \
59
+ --num_train_images $NUM_TRAIN_IMAGES
60
+
61
+ # PROMPT="a photo of a golden retriever"
62
+ # python3 generate_training_images.py \
63
+ # --output_dir $OUTPUT_DIR \
64
+ # --prompt "$PROMPT" \
65
+ # --mode test \
66
+ # --model_path "./checkpoint/english_springer_erasure" \
67
+ # --num_train_images $NUM_TRAIN_IMAGES
68
+
69
+ # PROMPT="a photo of a american pit bull terrier"
70
+ # python3 generate_training_images.py \
71
+ # --output_dir $OUTPUT_DIR \
72
+ # --prompt "$PROMPT" \
73
+ # --mode test \
74
+ # --model_path "./checkpoint/english_springer_erasure" \
75
+ # --num_train_images $NUM_TRAIN_IMAGES
Gradient_ascent_traininig_scripts/generate_training_images.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from diffusers import StableDiffusionPipeline
3
+ import torch
4
+ import os
5
+ import json
6
+ from PIL import Image
7
+ import argparse
8
+
9
+ def parse_args():
10
+ parser = argparse.ArgumentParser(description="Generate images from I2P dataset")
11
+
12
+ parser.add_argument("--output_dir", type=str, help="Output directory")
13
+ parser.add_argument("--model_path", type=str, help="Path to model checkpoint", default="CompVis/stable-diffusion-v1-4")
14
+ parser.add_argument("--seed", type=int, help="Seed for random number generator", default=0)
15
+ parser.add_argument("--prompt", type=str, help="Prompt for image generation")
16
+ parser.add_argument("--mode", type=str, help="Mode for image generation", choices=["train","test"], default="train")
17
+ parser.add_argument("--num_train_images", type=int, help="Number of images to generate for training", default=1000)
18
+ args = parser.parse_args()
19
+ return args
20
+
21
+ if __name__ == "__main__":
22
+ args = parse_args()
23
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
24
+
25
+ if(args.mode == "test"):
26
+
27
+ metadata = []
28
+
29
+ os.makedirs(args.output_dir, exist_ok=True)
30
+
31
+ pipe = StableDiffusionPipeline.from_pretrained(args.model_path, safety_checker=None, torch_dtype=torch.float16).to(device)
32
+
33
+ for i in range(args.num_train_images):
34
+ while True:
35
+ output = pipe(prompt=args.prompt)
36
+ image = output.images[0]
37
+ nsfw = output.nsfw_content_detected
38
+ #check if nsfw is a list
39
+ if isinstance(nsfw, list):
40
+ nsfw = nsfw[0]
41
+
42
+ if not nsfw:
43
+ break
44
+ image.save(os.path.join(args.output_dir, f"{args.prompt}_{i}.png"))
45
+
46
+
47
+ elif(args.mode == "train"):
48
+ metadata = []
49
+
50
+ os.makedirs(args.output_dir, exist_ok=True)
51
+ os.makedirs(os.path.join(args.output_dir, "train"), exist_ok=True)
52
+
53
+ pipe = StableDiffusionPipeline.from_pretrained(args.model_path, safety_checker=None, torch_dtype=torch.float16).to(device)
54
+
55
+ for i in range(args.num_train_images):
56
+ while True:
57
+ output = pipe(prompt=args.prompt)
58
+ image = output.images[0]
59
+ nsfw = output.nsfw_content_detected
60
+ #check if nsfw is a list
61
+ if isinstance(nsfw, list):
62
+ nsfw = nsfw[0]
63
+
64
+ if not nsfw:
65
+ break
66
+ image.save(os.path.join(args.output_dir, "train", f"{args.prompt}_{i}.png"))
67
+
68
+ metadata.append({"file_name": f"train/{args.prompt}_{i}.png", "text": args.prompt})
69
+
70
+ with open(os.path.join(args.output_dir, 'metadata.jsonl'), 'w') as f:
71
+ for m in metadata:
72
+ f.write(json.dumps(m) + "\n")
73
+
74
+
75
+
Gradient_ascent_traininig_scripts/generation/english_springer_erasure/metadata.jsonl ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"file_name": "train/a photo of an english springer spaniel_0.png", "text": "a photo of an english springer spaniel"}
2
+ {"file_name": "train/a photo of an english springer spaniel_1.png", "text": "a photo of an english springer spaniel"}
3
+ {"file_name": "train/a photo of an english springer spaniel_2.png", "text": "a photo of an english springer spaniel"}
4
+ {"file_name": "train/a photo of an english springer spaniel_3.png", "text": "a photo of an english springer spaniel"}
5
+ {"file_name": "train/a photo of an english springer spaniel_4.png", "text": "a photo of an english springer spaniel"}
6
+ {"file_name": "train/a photo of an english springer spaniel_5.png", "text": "a photo of an english springer spaniel"}
7
+ {"file_name": "train/a photo of an english springer spaniel_6.png", "text": "a photo of an english springer spaniel"}
8
+ {"file_name": "train/a photo of an english springer spaniel_7.png", "text": "a photo of an english springer spaniel"}
9
+ {"file_name": "train/a photo of an english springer spaniel_8.png", "text": "a photo of an english springer spaniel"}
10
+ {"file_name": "train/a photo of an english springer spaniel_9.png", "text": "a photo of an english springer spaniel"}
11
+ {"file_name": "train/a photo of an english springer spaniel_10.png", "text": "a photo of an english springer spaniel"}
12
+ {"file_name": "train/a photo of an english springer spaniel_11.png", "text": "a photo of an english springer spaniel"}
13
+ {"file_name": "train/a photo of an english springer spaniel_12.png", "text": "a photo of an english springer spaniel"}
14
+ {"file_name": "train/a photo of an english springer spaniel_13.png", "text": "a photo of an english springer spaniel"}
15
+ {"file_name": "train/a photo of an english springer spaniel_14.png", "text": "a photo of an english springer spaniel"}
16
+ {"file_name": "train/a photo of an english springer spaniel_15.png", "text": "a photo of an english springer spaniel"}
17
+ {"file_name": "train/a photo of an english springer spaniel_16.png", "text": "a photo of an english springer spaniel"}
18
+ {"file_name": "train/a photo of an english springer spaniel_17.png", "text": "a photo of an english springer spaniel"}
19
+ {"file_name": "train/a photo of an english springer spaniel_18.png", "text": "a photo of an english springer spaniel"}
20
+ {"file_name": "train/a photo of an english springer spaniel_19.png", "text": "a photo of an english springer spaniel"}
21
+ {"file_name": "train/a photo of an english springer spaniel_20.png", "text": "a photo of an english springer spaniel"}
22
+ {"file_name": "train/a photo of an english springer spaniel_21.png", "text": "a photo of an english springer spaniel"}
23
+ {"file_name": "train/a photo of an english springer spaniel_22.png", "text": "a photo of an english springer spaniel"}
24
+ {"file_name": "train/a photo of an english springer spaniel_23.png", "text": "a photo of an english springer spaniel"}
25
+ {"file_name": "train/a photo of an english springer spaniel_24.png", "text": "a photo of an english springer spaniel"}
26
+ {"file_name": "train/a photo of an english springer spaniel_25.png", "text": "a photo of an english springer spaniel"}
27
+ {"file_name": "train/a photo of an english springer spaniel_26.png", "text": "a photo of an english springer spaniel"}
28
+ {"file_name": "train/a photo of an english springer spaniel_27.png", "text": "a photo of an english springer spaniel"}
29
+ {"file_name": "train/a photo of an english springer spaniel_28.png", "text": "a photo of an english springer spaniel"}
30
+ {"file_name": "train/a photo of an english springer spaniel_29.png", "text": "a photo of an english springer spaniel"}
31
+ {"file_name": "train/a photo of an english springer spaniel_30.png", "text": "a photo of an english springer spaniel"}
32
+ {"file_name": "train/a photo of an english springer spaniel_31.png", "text": "a photo of an english springer spaniel"}
33
+ {"file_name": "train/a photo of an english springer spaniel_32.png", "text": "a photo of an english springer spaniel"}
34
+ {"file_name": "train/a photo of an english springer spaniel_33.png", "text": "a photo of an english springer spaniel"}
35
+ {"file_name": "train/a photo of an english springer spaniel_34.png", "text": "a photo of an english springer spaniel"}
36
+ {"file_name": "train/a photo of an english springer spaniel_35.png", "text": "a photo of an english springer spaniel"}
37
+ {"file_name": "train/a photo of an english springer spaniel_36.png", "text": "a photo of an english springer spaniel"}
38
+ {"file_name": "train/a photo of an english springer spaniel_37.png", "text": "a photo of an english springer spaniel"}
39
+ {"file_name": "train/a photo of an english springer spaniel_38.png", "text": "a photo of an english springer spaniel"}
40
+ {"file_name": "train/a photo of an english springer spaniel_39.png", "text": "a photo of an english springer spaniel"}
41
+ {"file_name": "train/a photo of an english springer spaniel_40.png", "text": "a photo of an english springer spaniel"}
42
+ {"file_name": "train/a photo of an english springer spaniel_41.png", "text": "a photo of an english springer spaniel"}
43
+ {"file_name": "train/a photo of an english springer spaniel_42.png", "text": "a photo of an english springer spaniel"}
44
+ {"file_name": "train/a photo of an english springer spaniel_43.png", "text": "a photo of an english springer spaniel"}
45
+ {"file_name": "train/a photo of an english springer spaniel_44.png", "text": "a photo of an english springer spaniel"}
46
+ {"file_name": "train/a photo of an english springer spaniel_45.png", "text": "a photo of an english springer spaniel"}
47
+ {"file_name": "train/a photo of an english springer spaniel_46.png", "text": "a photo of an english springer spaniel"}
48
+ {"file_name": "train/a photo of an english springer spaniel_47.png", "text": "a photo of an english springer spaniel"}
49
+ {"file_name": "train/a photo of an english springer spaniel_48.png", "text": "a photo of an english springer spaniel"}
50
+ {"file_name": "train/a photo of an english springer spaniel_49.png", "text": "a photo of an english springer spaniel"}
51
+ {"file_name": "train/a photo of an english springer spaniel_50.png", "text": "a photo of an english springer spaniel"}
52
+ {"file_name": "train/a photo of an english springer spaniel_51.png", "text": "a photo of an english springer spaniel"}
53
+ {"file_name": "train/a photo of an english springer spaniel_52.png", "text": "a photo of an english springer spaniel"}
54
+ {"file_name": "train/a photo of an english springer spaniel_53.png", "text": "a photo of an english springer spaniel"}
55
+ {"file_name": "train/a photo of an english springer spaniel_54.png", "text": "a photo of an english springer spaniel"}
56
+ {"file_name": "train/a photo of an english springer spaniel_55.png", "text": "a photo of an english springer spaniel"}
57
+ {"file_name": "train/a photo of an english springer spaniel_56.png", "text": "a photo of an english springer spaniel"}
58
+ {"file_name": "train/a photo of an english springer spaniel_57.png", "text": "a photo of an english springer spaniel"}
59
+ {"file_name": "train/a photo of an english springer spaniel_58.png", "text": "a photo of an english springer spaniel"}
60
+ {"file_name": "train/a photo of an english springer spaniel_59.png", "text": "a photo of an english springer spaniel"}
61
+ {"file_name": "train/a photo of an english springer spaniel_60.png", "text": "a photo of an english springer spaniel"}
62
+ {"file_name": "train/a photo of an english springer spaniel_61.png", "text": "a photo of an english springer spaniel"}
63
+ {"file_name": "train/a photo of an english springer spaniel_62.png", "text": "a photo of an english springer spaniel"}
64
+ {"file_name": "train/a photo of an english springer spaniel_63.png", "text": "a photo of an english springer spaniel"}
65
+ {"file_name": "train/a photo of an english springer spaniel_64.png", "text": "a photo of an english springer spaniel"}
66
+ {"file_name": "train/a photo of an english springer spaniel_65.png", "text": "a photo of an english springer spaniel"}
67
+ {"file_name": "train/a photo of an english springer spaniel_66.png", "text": "a photo of an english springer spaniel"}
68
+ {"file_name": "train/a photo of an english springer spaniel_67.png", "text": "a photo of an english springer spaniel"}
69
+ {"file_name": "train/a photo of an english springer spaniel_68.png", "text": "a photo of an english springer spaniel"}
70
+ {"file_name": "train/a photo of an english springer spaniel_69.png", "text": "a photo of an english springer spaniel"}
71
+ {"file_name": "train/a photo of an english springer spaniel_70.png", "text": "a photo of an english springer spaniel"}
72
+ {"file_name": "train/a photo of an english springer spaniel_71.png", "text": "a photo of an english springer spaniel"}
73
+ {"file_name": "train/a photo of an english springer spaniel_72.png", "text": "a photo of an english springer spaniel"}
74
+ {"file_name": "train/a photo of an english springer spaniel_73.png", "text": "a photo of an english springer spaniel"}
75
+ {"file_name": "train/a photo of an english springer spaniel_74.png", "text": "a photo of an english springer spaniel"}
76
+ {"file_name": "train/a photo of an english springer spaniel_75.png", "text": "a photo of an english springer spaniel"}
77
+ {"file_name": "train/a photo of an english springer spaniel_76.png", "text": "a photo of an english springer spaniel"}
78
+ {"file_name": "train/a photo of an english springer spaniel_77.png", "text": "a photo of an english springer spaniel"}
79
+ {"file_name": "train/a photo of an english springer spaniel_78.png", "text": "a photo of an english springer spaniel"}
80
+ {"file_name": "train/a photo of an english springer spaniel_79.png", "text": "a photo of an english springer spaniel"}
81
+ {"file_name": "train/a photo of an english springer spaniel_80.png", "text": "a photo of an english springer spaniel"}
82
+ {"file_name": "train/a photo of an english springer spaniel_81.png", "text": "a photo of an english springer spaniel"}
83
+ {"file_name": "train/a photo of an english springer spaniel_82.png", "text": "a photo of an english springer spaniel"}
84
+ {"file_name": "train/a photo of an english springer spaniel_83.png", "text": "a photo of an english springer spaniel"}
85
+ {"file_name": "train/a photo of an english springer spaniel_84.png", "text": "a photo of an english springer spaniel"}
86
+ {"file_name": "train/a photo of an english springer spaniel_85.png", "text": "a photo of an english springer spaniel"}
87
+ {"file_name": "train/a photo of an english springer spaniel_86.png", "text": "a photo of an english springer spaniel"}
88
+ {"file_name": "train/a photo of an english springer spaniel_87.png", "text": "a photo of an english springer spaniel"}
89
+ {"file_name": "train/a photo of an english springer spaniel_88.png", "text": "a photo of an english springer spaniel"}
90
+ {"file_name": "train/a photo of an english springer spaniel_89.png", "text": "a photo of an english springer spaniel"}
91
+ {"file_name": "train/a photo of an english springer spaniel_90.png", "text": "a photo of an english springer spaniel"}
92
+ {"file_name": "train/a photo of an english springer spaniel_91.png", "text": "a photo of an english springer spaniel"}
93
+ {"file_name": "train/a photo of an english springer spaniel_92.png", "text": "a photo of an english springer spaniel"}
94
+ {"file_name": "train/a photo of an english springer spaniel_93.png", "text": "a photo of an english springer spaniel"}
95
+ {"file_name": "train/a photo of an english springer spaniel_94.png", "text": "a photo of an english springer spaniel"}
96
+ {"file_name": "train/a photo of an english springer spaniel_95.png", "text": "a photo of an english springer spaniel"}
97
+ {"file_name": "train/a photo of an english springer spaniel_96.png", "text": "a photo of an english springer spaniel"}
98
+ {"file_name": "train/a photo of an english springer spaniel_97.png", "text": "a photo of an english springer spaniel"}
99
+ {"file_name": "train/a photo of an english springer spaniel_98.png", "text": "a photo of an english springer spaniel"}
100
+ {"file_name": "train/a photo of an english springer spaniel_99.png", "text": "a photo of an english springer spaniel"}
Gradient_ascent_traininig_scripts/inversion.sh ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export MODEL_NAME="./checkpoint/van_gogh_erasure"
2
+ export DATA_DIR="./data/van_gogh_ti/train"
3
+
4
+ accelerate launch textual_inversion.py \
5
+ --pretrained_model_name_or_path=$MODEL_NAME \
6
+ --train_data_dir=$DATA_DIR \
7
+ --learnable_property="style" \
8
+ --placeholder_token="<art-style>" \
9
+ --initializer_token="art" \
10
+ --resolution=512 \
11
+ --train_batch_size=1 \
12
+ --gradient_accumulation_steps=4 \
13
+ --max_train_steps=5000 \
14
+ --learning_rate=5.0e-04 \
15
+ --scale_lr \
16
+ --lr_scheduler="constant" \
17
+ --lr_warmup_steps=0 \
18
+ --push_to_hub \
19
+ --output_dir="./checkpoint/van_gogh_inversion"
20
+
21
+ export MODEL_NAME="./checkpoint/kilian_eng_erasure"
22
+ export DATA_DIR="./data/kilian_eng_ti/train"
23
+
24
+ accelerate launch textual_inversion.py \
25
+ --pretrained_model_name_or_path=$MODEL_NAME \
26
+ --train_data_dir=$DATA_DIR \
27
+ --learnable_property="style" \
28
+ --placeholder_token="<art-style>" \
29
+ --initializer_token="art" \
30
+ --resolution=512 \
31
+ --train_batch_size=1 \
32
+ --gradient_accumulation_steps=4 \
33
+ --max_train_steps=5000 \
34
+ --learning_rate=5.0e-04 \
35
+ --scale_lr \
36
+ --lr_scheduler="constant" \
37
+ --lr_warmup_steps=0 \
38
+ --push_to_hub \
39
+ --output_dir="./checkpoint/kilian_eng_inversion"
40
+
41
+ export MODEL_NAME="./checkpoint/thomas_kinkade_erasure"
42
+ export DATA_DIR="./data/thomas_kinkade_ti/train"
43
+
44
+ accelerate launch textual_inversion.py \
45
+ --pretrained_model_name_or_path=$MODEL_NAME \
46
+ --train_data_dir=$DATA_DIR \
47
+ --learnable_property="style" \
48
+ --placeholder_token="<art-style>" \
49
+ --initializer_token="art" \
50
+ --resolution=512 \
51
+ --train_batch_size=1 \
52
+ --gradient_accumulation_steps=4 \
53
+ --max_train_steps=5000 \
54
+ --learning_rate=5.0e-04 \
55
+ --scale_lr \
56
+ --lr_scheduler="constant" \
57
+ --lr_warmup_steps=0 \
58
+ --push_to_hub \
59
+ --output_dir="./checkpoint/thomas_kinkade_inversion"
60
+
Gradient_ascent_traininig_scripts/textual_inversion.py ADDED
@@ -0,0 +1,1022 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+
16
+ import argparse
17
+ import logging
18
+ import math
19
+ import os
20
+ import random
21
+ import shutil
22
+ import warnings
23
+ from contextlib import nullcontext
24
+ from pathlib import Path
25
+
26
+ import numpy as np
27
+ import PIL
28
+ import safetensors
29
+ import torch
30
+ import torch.nn.functional as F
31
+ import torch.utils.checkpoint
32
+ import transformers
33
+ from accelerate import Accelerator
34
+ from accelerate.logging import get_logger
35
+ from accelerate.utils import ProjectConfiguration, set_seed
36
+ from huggingface_hub import create_repo, upload_folder
37
+
38
+ # TODO: remove and import from diffusers.utils when the new version of diffusers is released
39
+ from packaging import version
40
+ from PIL import Image
41
+ from torch.utils.data import Dataset
42
+ from torchvision import transforms
43
+ from tqdm.auto import tqdm
44
+ from transformers import CLIPTextModel, CLIPTokenizer
45
+
46
+ import diffusers
47
+ from diffusers import (
48
+ AutoencoderKL,
49
+ DDPMScheduler,
50
+ DiffusionPipeline,
51
+ DPMSolverMultistepScheduler,
52
+ StableDiffusionPipeline,
53
+ UNet2DConditionModel,
54
+ )
55
+ from diffusers.optimization import get_scheduler
56
+ from diffusers.utils import check_min_version, is_wandb_available
57
+ from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
58
+ from diffusers.utils.import_utils import is_xformers_available
59
+
60
+
61
+ if is_wandb_available():
62
+ import wandb
63
+
64
+ if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
65
+ PIL_INTERPOLATION = {
66
+ "linear": PIL.Image.Resampling.BILINEAR,
67
+ "bilinear": PIL.Image.Resampling.BILINEAR,
68
+ "bicubic": PIL.Image.Resampling.BICUBIC,
69
+ "lanczos": PIL.Image.Resampling.LANCZOS,
70
+ "nearest": PIL.Image.Resampling.NEAREST,
71
+ }
72
+ else:
73
+ PIL_INTERPOLATION = {
74
+ "linear": PIL.Image.LINEAR,
75
+ "bilinear": PIL.Image.BILINEAR,
76
+ "bicubic": PIL.Image.BICUBIC,
77
+ "lanczos": PIL.Image.LANCZOS,
78
+ "nearest": PIL.Image.NEAREST,
79
+ }
80
+ # ------------------------------------------------------------------------------
81
+
82
+
83
+ # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
84
+ check_min_version("0.31.0.dev0")
85
+
86
+ logger = get_logger(__name__)
87
+
88
+
89
+ def save_model_card(repo_id: str, images: list = None, base_model: str = None, repo_folder: str = None):
90
+ img_str = ""
91
+ if images is not None:
92
+ for i, image in enumerate(images):
93
+ image.save(os.path.join(repo_folder, f"image_{i}.png"))
94
+ img_str += f"![img_{i}](./image_{i}.png)\n"
95
+ model_description = f"""
96
+ # Textual inversion text2image fine-tuning - {repo_id}
97
+ These are textual inversion adaption weights for {base_model}. You can find some example images in the following. \n
98
+ {img_str}
99
+ """
100
+ model_card = load_or_create_model_card(
101
+ repo_id_or_path=repo_id,
102
+ from_training=True,
103
+ license="creativeml-openrail-m",
104
+ base_model=base_model,
105
+ model_description=model_description,
106
+ inference=True,
107
+ )
108
+
109
+ tags = [
110
+ "stable-diffusion",
111
+ "stable-diffusion-diffusers",
112
+ "text-to-image",
113
+ "diffusers",
114
+ "textual_inversion",
115
+ "diffusers-training",
116
+ ]
117
+ model_card = populate_model_card(model_card, tags=tags)
118
+
119
+ model_card.save(os.path.join(repo_folder, "README.md"))
120
+
121
+
122
+ def log_validation(text_encoder, tokenizer, unet, vae, args, accelerator, weight_dtype, epoch):
123
+ logger.info(
124
+ f"Running validation... \n Generating {args.num_validation_images} images with prompt:"
125
+ f" {args.validation_prompt}."
126
+ )
127
+ # create pipeline (note: unet and vae are loaded again in float32)
128
+ pipeline = DiffusionPipeline.from_pretrained(
129
+ args.pretrained_model_name_or_path,
130
+ text_encoder=accelerator.unwrap_model(text_encoder),
131
+ tokenizer=tokenizer,
132
+ unet=unet,
133
+ vae=vae,
134
+ safety_checker=None,
135
+ revision=args.revision,
136
+ variant=args.variant,
137
+ torch_dtype=weight_dtype,
138
+ )
139
+ pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
140
+ pipeline = pipeline.to(accelerator.device)
141
+ pipeline.set_progress_bar_config(disable=True)
142
+
143
+ # run inference
144
+ generator = None if args.seed is None else torch.Generator(device=accelerator.device).manual_seed(args.seed)
145
+ images = []
146
+ for _ in range(args.num_validation_images):
147
+ if torch.backends.mps.is_available():
148
+ autocast_ctx = nullcontext()
149
+ else:
150
+ autocast_ctx = torch.autocast(accelerator.device.type)
151
+
152
+ with autocast_ctx:
153
+ image = pipeline(args.validation_prompt, num_inference_steps=25, generator=generator).images[0]
154
+ images.append(image)
155
+
156
+ for tracker in accelerator.trackers:
157
+ if tracker.name == "tensorboard":
158
+ np_images = np.stack([np.asarray(img) for img in images])
159
+ tracker.writer.add_images("validation", np_images, epoch, dataformats="NHWC")
160
+ if tracker.name == "wandb":
161
+ tracker.log(
162
+ {
163
+ "validation": [
164
+ wandb.Image(image, caption=f"{i}: {args.validation_prompt}") for i, image in enumerate(images)
165
+ ]
166
+ }
167
+ )
168
+
169
+ del pipeline
170
+ torch.cuda.empty_cache()
171
+ return images
172
+
173
+
174
+ def save_progress(text_encoder, placeholder_token_ids, accelerator, args, save_path, safe_serialization=True):
175
+ logger.info("Saving embeddings")
176
+ learned_embeds = (
177
+ accelerator.unwrap_model(text_encoder)
178
+ .get_input_embeddings()
179
+ .weight[min(placeholder_token_ids) : max(placeholder_token_ids) + 1]
180
+ )
181
+ learned_embeds_dict = {args.placeholder_token: learned_embeds.detach().cpu()}
182
+
183
+ if safe_serialization:
184
+ safetensors.torch.save_file(learned_embeds_dict, save_path, metadata={"format": "pt"})
185
+ else:
186
+ torch.save(learned_embeds_dict, save_path)
187
+
188
+
189
+ def parse_args():
190
+ parser = argparse.ArgumentParser(description="Simple example of a training script.")
191
+ parser.add_argument(
192
+ "--save_steps",
193
+ type=int,
194
+ default=500,
195
+ help="Save learned_embeds.bin every X updates steps.",
196
+ )
197
+ parser.add_argument(
198
+ "--save_as_full_pipeline",
199
+ action="store_true",
200
+ help="Save the complete stable diffusion pipeline.",
201
+ )
202
+ parser.add_argument(
203
+ "--num_vectors",
204
+ type=int,
205
+ default=1,
206
+ help="How many textual inversion vectors shall be used to learn the concept.",
207
+ )
208
+ parser.add_argument(
209
+ "--pretrained_model_name_or_path",
210
+ type=str,
211
+ default=None,
212
+ required=True,
213
+ help="Path to pretrained model or model identifier from huggingface.co/models.",
214
+ )
215
+ parser.add_argument(
216
+ "--revision",
217
+ type=str,
218
+ default=None,
219
+ required=False,
220
+ help="Revision of pretrained model identifier from huggingface.co/models.",
221
+ )
222
+ parser.add_argument(
223
+ "--variant",
224
+ type=str,
225
+ default=None,
226
+ help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16",
227
+ )
228
+ parser.add_argument(
229
+ "--tokenizer_name",
230
+ type=str,
231
+ default=None,
232
+ help="Pretrained tokenizer name or path if not the same as model_name",
233
+ )
234
+ parser.add_argument(
235
+ "--train_data_dir", type=str, default=None, required=True, help="A folder containing the training data."
236
+ )
237
+ parser.add_argument(
238
+ "--placeholder_token",
239
+ type=str,
240
+ default=None,
241
+ required=True,
242
+ help="A token to use as a placeholder for the concept.",
243
+ )
244
+ parser.add_argument(
245
+ "--initializer_token", type=str, default=None, required=True, help="A token to use as initializer word."
246
+ )
247
+ parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'")
248
+ parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.")
249
+ parser.add_argument(
250
+ "--output_dir",
251
+ type=str,
252
+ default="text-inversion-model",
253
+ help="The output directory where the model predictions and checkpoints will be written.",
254
+ )
255
+ parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
256
+ parser.add_argument(
257
+ "--resolution",
258
+ type=int,
259
+ default=512,
260
+ help=(
261
+ "The resolution for input images, all the images in the train/validation dataset will be resized to this"
262
+ " resolution"
263
+ ),
264
+ )
265
+ parser.add_argument(
266
+ "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution."
267
+ )
268
+ parser.add_argument(
269
+ "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
270
+ )
271
+ parser.add_argument("--num_train_epochs", type=int, default=100)
272
+ parser.add_argument(
273
+ "--max_train_steps",
274
+ type=int,
275
+ default=5000,
276
+ help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
277
+ )
278
+ parser.add_argument(
279
+ "--gradient_accumulation_steps",
280
+ type=int,
281
+ default=1,
282
+ help="Number of updates steps to accumulate before performing a backward/update pass.",
283
+ )
284
+ parser.add_argument(
285
+ "--gradient_checkpointing",
286
+ action="store_true",
287
+ help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
288
+ )
289
+ parser.add_argument(
290
+ "--learning_rate",
291
+ type=float,
292
+ default=1e-4,
293
+ help="Initial learning rate (after the potential warmup period) to use.",
294
+ )
295
+ parser.add_argument(
296
+ "--scale_lr",
297
+ action="store_true",
298
+ default=False,
299
+ help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
300
+ )
301
+ parser.add_argument(
302
+ "--lr_scheduler",
303
+ type=str,
304
+ default="constant",
305
+ help=(
306
+ 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
307
+ ' "constant", "constant_with_warmup"]'
308
+ ),
309
+ )
310
+ parser.add_argument(
311
+ "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
312
+ )
313
+ parser.add_argument(
314
+ "--lr_num_cycles",
315
+ type=int,
316
+ default=1,
317
+ help="Number of hard resets of the lr in cosine_with_restarts scheduler.",
318
+ )
319
+ parser.add_argument(
320
+ "--dataloader_num_workers",
321
+ type=int,
322
+ default=0,
323
+ help=(
324
+ "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process."
325
+ ),
326
+ )
327
+ parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
328
+ parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
329
+ parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
330
+ parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
331
+ parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
332
+ parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
333
+ parser.add_argument(
334
+ "--hub_model_id",
335
+ type=str,
336
+ default=None,
337
+ help="The name of the repository to keep in sync with the local `output_dir`.",
338
+ )
339
+ parser.add_argument(
340
+ "--logging_dir",
341
+ type=str,
342
+ default="logs",
343
+ help=(
344
+ "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to"
345
+ " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
346
+ ),
347
+ )
348
+ parser.add_argument(
349
+ "--mixed_precision",
350
+ type=str,
351
+ default="no",
352
+ choices=["no", "fp16", "bf16"],
353
+ help=(
354
+ "Whether to use mixed precision. Choose"
355
+ "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10."
356
+ "and Nvidia Ampere GPU or Intel Gen 4 Xeon (and later) ."
357
+ ),
358
+ )
359
+ parser.add_argument(
360
+ "--allow_tf32",
361
+ action="store_true",
362
+ help=(
363
+ "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see"
364
+ " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
365
+ ),
366
+ )
367
+ parser.add_argument(
368
+ "--report_to",
369
+ type=str,
370
+ default="tensorboard",
371
+ help=(
372
+ 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`'
373
+ ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
374
+ ),
375
+ )
376
+ parser.add_argument(
377
+ "--validation_prompt",
378
+ type=str,
379
+ default=None,
380
+ help="A prompt that is used during validation to verify that the model is learning.",
381
+ )
382
+ parser.add_argument(
383
+ "--num_validation_images",
384
+ type=int,
385
+ default=4,
386
+ help="Number of images that should be generated during validation with `validation_prompt`.",
387
+ )
388
+ parser.add_argument(
389
+ "--validation_steps",
390
+ type=int,
391
+ default=100,
392
+ help=(
393
+ "Run validation every X steps. Validation consists of running the prompt"
394
+ " `args.validation_prompt` multiple times: `args.num_validation_images`"
395
+ " and logging the images."
396
+ ),
397
+ )
398
+ parser.add_argument(
399
+ "--validation_epochs",
400
+ type=int,
401
+ default=None,
402
+ help=(
403
+ "Deprecated in favor of validation_steps. Run validation every X epochs. Validation consists of running the prompt"
404
+ " `args.validation_prompt` multiple times: `args.num_validation_images`"
405
+ " and logging the images."
406
+ ),
407
+ )
408
+ parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
409
+ parser.add_argument(
410
+ "--checkpointing_steps",
411
+ type=int,
412
+ default=500,
413
+ help=(
414
+ "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
415
+ " training using `--resume_from_checkpoint`."
416
+ ),
417
+ )
418
+ parser.add_argument(
419
+ "--checkpoints_total_limit",
420
+ type=int,
421
+ default=None,
422
+ help=("Max number of checkpoints to store."),
423
+ )
424
+ parser.add_argument(
425
+ "--resume_from_checkpoint",
426
+ type=str,
427
+ default=None,
428
+ help=(
429
+ "Whether training should be resumed from a previous checkpoint. Use a path saved by"
430
+ ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
431
+ ),
432
+ )
433
+ parser.add_argument(
434
+ "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
435
+ )
436
+ parser.add_argument(
437
+ "--no_safe_serialization",
438
+ action="store_true",
439
+ help="If specified save the checkpoint not in `safetensors` format, but in original PyTorch format instead.",
440
+ )
441
+
442
+ args = parser.parse_args()
443
+ env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
444
+ if env_local_rank != -1 and env_local_rank != args.local_rank:
445
+ args.local_rank = env_local_rank
446
+
447
+ if args.train_data_dir is None:
448
+ raise ValueError("You must specify a train data directory.")
449
+
450
+ return args
451
+
452
+
453
+ imagenet_templates_small = [
454
+ "a photo of a {}",
455
+ "a rendering of a {}",
456
+ "a cropped photo of the {}",
457
+ "the photo of a {}",
458
+ "a photo of a clean {}",
459
+ "a photo of a dirty {}",
460
+ "a dark photo of the {}",
461
+ "a photo of my {}",
462
+ "a photo of the cool {}",
463
+ "a close-up photo of a {}",
464
+ "a bright photo of the {}",
465
+ "a cropped photo of a {}",
466
+ "a photo of the {}",
467
+ "a good photo of the {}",
468
+ "a photo of one {}",
469
+ "a close-up photo of the {}",
470
+ "a rendition of the {}",
471
+ "a photo of the clean {}",
472
+ "a rendition of a {}",
473
+ "a photo of a nice {}",
474
+ "a good photo of a {}",
475
+ "a photo of the nice {}",
476
+ "a photo of the small {}",
477
+ "a photo of the weird {}",
478
+ "a photo of the large {}",
479
+ "a photo of a cool {}",
480
+ "a photo of a small {}",
481
+ ]
482
+
483
+ imagenet_style_templates_small = [
484
+ "a painting in the style of {}",
485
+ "a rendering in the style of {}",
486
+ "a cropped painting in the style of {}",
487
+ "the painting in the style of {}",
488
+ "a clean painting in the style of {}",
489
+ "a dirty painting in the style of {}",
490
+ "a dark painting in the style of {}",
491
+ "a picture in the style of {}",
492
+ "a cool painting in the style of {}",
493
+ "a close-up painting in the style of {}",
494
+ "a bright painting in the style of {}",
495
+ "a cropped painting in the style of {}",
496
+ "a good painting in the style of {}",
497
+ "a close-up painting in the style of {}",
498
+ "a rendition in the style of {}",
499
+ "a nice painting in the style of {}",
500
+ "a small painting in the style of {}",
501
+ "a weird painting in the style of {}",
502
+ "a large painting in the style of {}",
503
+ ]
504
+
505
+
506
+ class TextualInversionDataset(Dataset):
507
+ def __init__(
508
+ self,
509
+ data_root,
510
+ tokenizer,
511
+ learnable_property="object", # [object, style]
512
+ size=512,
513
+ repeats=100,
514
+ interpolation="bicubic",
515
+ flip_p=0.5,
516
+ set="train",
517
+ placeholder_token="*",
518
+ center_crop=False,
519
+ ):
520
+ self.data_root = data_root
521
+ self.tokenizer = tokenizer
522
+ self.learnable_property = learnable_property
523
+ self.size = size
524
+ self.placeholder_token = placeholder_token
525
+ self.center_crop = center_crop
526
+ self.flip_p = flip_p
527
+
528
+ self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)]
529
+
530
+ self.num_images = len(self.image_paths)
531
+ self._length = self.num_images
532
+
533
+ if set == "train":
534
+ self._length = self.num_images * repeats
535
+
536
+ self.interpolation = {
537
+ "linear": PIL_INTERPOLATION["linear"],
538
+ "bilinear": PIL_INTERPOLATION["bilinear"],
539
+ "bicubic": PIL_INTERPOLATION["bicubic"],
540
+ "lanczos": PIL_INTERPOLATION["lanczos"],
541
+ }[interpolation]
542
+
543
+ self.templates = imagenet_style_templates_small if learnable_property == "style" else imagenet_templates_small
544
+ self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p)
545
+
546
+ def __len__(self):
547
+ return self._length
548
+
549
+ def __getitem__(self, i):
550
+ example = {}
551
+ image = Image.open(self.image_paths[i % self.num_images])
552
+
553
+ if not image.mode == "RGB":
554
+ image = image.convert("RGB")
555
+
556
+ placeholder_string = self.placeholder_token
557
+ text = random.choice(self.templates).format(placeholder_string)
558
+
559
+ example["input_ids"] = self.tokenizer(
560
+ text,
561
+ padding="max_length",
562
+ truncation=True,
563
+ max_length=self.tokenizer.model_max_length,
564
+ return_tensors="pt",
565
+ ).input_ids[0]
566
+
567
+ # default to score-sde preprocessing
568
+ img = np.array(image).astype(np.uint8)
569
+
570
+ if self.center_crop:
571
+ crop = min(img.shape[0], img.shape[1])
572
+ (
573
+ h,
574
+ w,
575
+ ) = (
576
+ img.shape[0],
577
+ img.shape[1],
578
+ )
579
+ img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2]
580
+
581
+ image = Image.fromarray(img)
582
+ image = image.resize((self.size, self.size), resample=self.interpolation)
583
+
584
+ image = self.flip_transform(image)
585
+ image = np.array(image).astype(np.uint8)
586
+ image = (image / 127.5 - 1.0).astype(np.float32)
587
+
588
+ example["pixel_values"] = torch.from_numpy(image).permute(2, 0, 1)
589
+ return example
590
+
591
+
592
+ def main():
593
+ args = parse_args()
594
+ if args.report_to == "wandb" and args.hub_token is not None:
595
+ raise ValueError(
596
+ "You cannot use both --report_to=wandb and --hub_token due to a security risk of exposing your token."
597
+ " Please use `huggingface-cli login` to authenticate with the Hub."
598
+ )
599
+
600
+ logging_dir = os.path.join(args.output_dir, args.logging_dir)
601
+ accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
602
+ accelerator = Accelerator(
603
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
604
+ mixed_precision=args.mixed_precision,
605
+ log_with=args.report_to,
606
+ project_config=accelerator_project_config,
607
+ )
608
+
609
+ # Disable AMP for MPS.
610
+ if torch.backends.mps.is_available():
611
+ accelerator.native_amp = False
612
+
613
+ if args.report_to == "wandb":
614
+ if not is_wandb_available():
615
+ raise ImportError("Make sure to install wandb if you want to use it for logging during training.")
616
+
617
+ # Make one log on every process with the configuration for debugging.
618
+ logging.basicConfig(
619
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
620
+ datefmt="%m/%d/%Y %H:%M:%S",
621
+ level=logging.INFO,
622
+ )
623
+ logger.info(accelerator.state, main_process_only=False)
624
+ if accelerator.is_local_main_process:
625
+ transformers.utils.logging.set_verbosity_warning()
626
+ diffusers.utils.logging.set_verbosity_info()
627
+ else:
628
+ transformers.utils.logging.set_verbosity_error()
629
+ diffusers.utils.logging.set_verbosity_error()
630
+
631
+ # If passed along, set the training seed now.
632
+ if args.seed is not None:
633
+ set_seed(args.seed)
634
+
635
+ # Handle the repository creation
636
+ if accelerator.is_main_process:
637
+ if args.output_dir is not None:
638
+ os.makedirs(args.output_dir, exist_ok=True)
639
+
640
+ if args.push_to_hub:
641
+ repo_id = create_repo(
642
+ repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token
643
+ ).repo_id
644
+
645
+ # Load tokenizer
646
+ if args.tokenizer_name:
647
+ tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name)
648
+ elif args.pretrained_model_name_or_path:
649
+ tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_model_name_or_path, subfolder="tokenizer")
650
+
651
+ # Load scheduler and models
652
+ noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
653
+ text_encoder = CLIPTextModel.from_pretrained(
654
+ args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision
655
+ )
656
+ vae = AutoencoderKL.from_pretrained(
657
+ args.pretrained_model_name_or_path, subfolder="vae", revision=args.revision, variant=args.variant
658
+ )
659
+ unet = UNet2DConditionModel.from_pretrained(
660
+ args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
661
+ )
662
+
663
+ # Add the placeholder token in tokenizer
664
+ placeholder_tokens = [args.placeholder_token]
665
+
666
+ if args.num_vectors < 1:
667
+ raise ValueError(f"--num_vectors has to be larger or equal to 1, but is {args.num_vectors}")
668
+
669
+ # add dummy tokens for multi-vector
670
+ additional_tokens = []
671
+ for i in range(1, args.num_vectors):
672
+ additional_tokens.append(f"{args.placeholder_token}_{i}")
673
+ placeholder_tokens += additional_tokens
674
+
675
+ num_added_tokens = tokenizer.add_tokens(placeholder_tokens)
676
+ if num_added_tokens != args.num_vectors:
677
+ raise ValueError(
678
+ f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different"
679
+ " `placeholder_token` that is not already in the tokenizer."
680
+ )
681
+
682
+ # Convert the initializer_token, placeholder_token to ids
683
+ token_ids = tokenizer.encode(args.initializer_token, add_special_tokens=False)
684
+ # Check if initializer_token is a single token or a sequence of tokens
685
+ if len(token_ids) > 1:
686
+ raise ValueError("The initializer token must be a single token.")
687
+
688
+ initializer_token_id = token_ids[0]
689
+ placeholder_token_ids = tokenizer.convert_tokens_to_ids(placeholder_tokens)
690
+
691
+ # Resize the token embeddings as we are adding new special tokens to the tokenizer
692
+ text_encoder.resize_token_embeddings(len(tokenizer))
693
+
694
+ # Initialise the newly added placeholder token with the embeddings of the initializer token
695
+ token_embeds = text_encoder.get_input_embeddings().weight.data
696
+ with torch.no_grad():
697
+ for token_id in placeholder_token_ids:
698
+ token_embeds[token_id] = token_embeds[initializer_token_id].clone()
699
+
700
+ # Freeze vae and unet
701
+ vae.requires_grad_(False)
702
+ unet.requires_grad_(False)
703
+ # Freeze all parameters except for the token embeddings in text encoder
704
+ text_encoder.text_model.encoder.requires_grad_(False)
705
+ text_encoder.text_model.final_layer_norm.requires_grad_(False)
706
+ text_encoder.text_model.embeddings.position_embedding.requires_grad_(False)
707
+
708
+ if args.gradient_checkpointing:
709
+ # Keep unet in train mode if we are using gradient checkpointing to save memory.
710
+ # The dropout cannot be != 0 so it doesn't matter if we are in eval or train mode.
711
+ unet.train()
712
+ text_encoder.gradient_checkpointing_enable()
713
+ unet.enable_gradient_checkpointing()
714
+
715
+ if args.enable_xformers_memory_efficient_attention:
716
+ if is_xformers_available():
717
+ import xformers
718
+
719
+ xformers_version = version.parse(xformers.__version__)
720
+ if xformers_version == version.parse("0.0.16"):
721
+ logger.warning(
722
+ "xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
723
+ )
724
+ unet.enable_xformers_memory_efficient_attention()
725
+ else:
726
+ raise ValueError("xformers is not available. Make sure it is installed correctly")
727
+
728
+ # Enable TF32 for faster training on Ampere GPUs,
729
+ # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
730
+ if args.allow_tf32:
731
+ torch.backends.cuda.matmul.allow_tf32 = True
732
+
733
+ if args.scale_lr:
734
+ args.learning_rate = (
735
+ args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
736
+ )
737
+
738
+ # Initialize the optimizer
739
+ optimizer = torch.optim.AdamW(
740
+ text_encoder.get_input_embeddings().parameters(), # only optimize the embeddings
741
+ lr=args.learning_rate,
742
+ betas=(args.adam_beta1, args.adam_beta2),
743
+ weight_decay=args.adam_weight_decay,
744
+ eps=args.adam_epsilon,
745
+ )
746
+
747
+ # Dataset and DataLoaders creation:
748
+ train_dataset = TextualInversionDataset(
749
+ data_root=args.train_data_dir,
750
+ tokenizer=tokenizer,
751
+ size=args.resolution,
752
+ placeholder_token=(" ".join(tokenizer.convert_ids_to_tokens(placeholder_token_ids))),
753
+ repeats=args.repeats,
754
+ learnable_property=args.learnable_property,
755
+ center_crop=args.center_crop,
756
+ set="train",
757
+ )
758
+ train_dataloader = torch.utils.data.DataLoader(
759
+ train_dataset, batch_size=args.train_batch_size, shuffle=True, num_workers=args.dataloader_num_workers
760
+ )
761
+ if args.validation_epochs is not None:
762
+ warnings.warn(
763
+ f"FutureWarning: You are doing logging with validation_epochs={args.validation_epochs}."
764
+ " Deprecated validation_epochs in favor of `validation_steps`"
765
+ f"Setting `args.validation_steps` to {args.validation_epochs * len(train_dataset)}",
766
+ FutureWarning,
767
+ stacklevel=2,
768
+ )
769
+ args.validation_steps = args.validation_epochs * len(train_dataset)
770
+
771
+ # Scheduler and math around the number of training steps.
772
+ overrode_max_train_steps = False
773
+ num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
774
+ if args.max_train_steps is None:
775
+ args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
776
+ overrode_max_train_steps = True
777
+
778
+ lr_scheduler = get_scheduler(
779
+ args.lr_scheduler,
780
+ optimizer=optimizer,
781
+ num_warmup_steps=args.lr_warmup_steps * accelerator.num_processes,
782
+ num_training_steps=args.max_train_steps * accelerator.num_processes,
783
+ num_cycles=args.lr_num_cycles,
784
+ )
785
+
786
+ text_encoder.train()
787
+ # Prepare everything with our `accelerator`.
788
+ text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
789
+ text_encoder, optimizer, train_dataloader, lr_scheduler
790
+ )
791
+
792
+ # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision
793
+ # as these weights are only used for inference, keeping weights in full precision is not required.
794
+ weight_dtype = torch.float32
795
+ if accelerator.mixed_precision == "fp16":
796
+ weight_dtype = torch.float16
797
+ elif accelerator.mixed_precision == "bf16":
798
+ weight_dtype = torch.bfloat16
799
+
800
+ # Move vae and unet to device and cast to weight_dtype
801
+ unet.to(accelerator.device, dtype=weight_dtype)
802
+ vae.to(accelerator.device, dtype=weight_dtype)
803
+
804
+ # We need to recalculate our total training steps as the size of the training dataloader may have changed.
805
+ num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
806
+ if overrode_max_train_steps:
807
+ args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
808
+ # Afterwards we recalculate our number of training epochs
809
+ args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
810
+
811
+ # We need to initialize the trackers we use, and also store our configuration.
812
+ # The trackers initializes automatically on the main process.
813
+ if accelerator.is_main_process:
814
+ accelerator.init_trackers("textual_inversion", config=vars(args))
815
+
816
+ # Train!
817
+ total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
818
+
819
+ logger.info("***** Running training *****")
820
+ logger.info(f" Num examples = {len(train_dataset)}")
821
+ logger.info(f" Num Epochs = {args.num_train_epochs}")
822
+ logger.info(f" Instantaneous batch size per device = {args.train_batch_size}")
823
+ logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
824
+ logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}")
825
+ logger.info(f" Total optimization steps = {args.max_train_steps}")
826
+ global_step = 0
827
+ first_epoch = 0
828
+ # Potentially load in the weights and states from a previous save
829
+ if args.resume_from_checkpoint:
830
+ if args.resume_from_checkpoint != "latest":
831
+ path = os.path.basename(args.resume_from_checkpoint)
832
+ else:
833
+ # Get the most recent checkpoint
834
+ dirs = os.listdir(args.output_dir)
835
+ dirs = [d for d in dirs if d.startswith("checkpoint")]
836
+ dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
837
+ path = dirs[-1] if len(dirs) > 0 else None
838
+
839
+ if path is None:
840
+ accelerator.print(
841
+ f"Checkpoint '{args.resume_from_checkpoint}' does not exist. Starting a new training run."
842
+ )
843
+ args.resume_from_checkpoint = None
844
+ initial_global_step = 0
845
+ else:
846
+ accelerator.print(f"Resuming from checkpoint {path}")
847
+ accelerator.load_state(os.path.join(args.output_dir, path))
848
+ global_step = int(path.split("-")[1])
849
+
850
+ initial_global_step = global_step
851
+ first_epoch = global_step // num_update_steps_per_epoch
852
+
853
+ else:
854
+ initial_global_step = 0
855
+
856
+ progress_bar = tqdm(
857
+ range(0, args.max_train_steps),
858
+ initial=initial_global_step,
859
+ desc="Steps",
860
+ # Only show the progress bar once on each machine.
861
+ disable=not accelerator.is_local_main_process,
862
+ )
863
+
864
+ # keep original embeddings as reference
865
+ orig_embeds_params = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight.data.clone()
866
+
867
+ for epoch in range(first_epoch, args.num_train_epochs):
868
+ text_encoder.train()
869
+ for step, batch in enumerate(train_dataloader):
870
+ with accelerator.accumulate(text_encoder):
871
+ # Convert images to latent space
872
+ latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample().detach()
873
+ latents = latents * vae.config.scaling_factor
874
+
875
+ # Sample noise that we'll add to the latents
876
+ noise = torch.randn_like(latents)
877
+ bsz = latents.shape[0]
878
+ # Sample a random timestep for each image
879
+ timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
880
+ timesteps = timesteps.long()
881
+
882
+ # Add noise to the latents according to the noise magnitude at each timestep
883
+ # (this is the forward diffusion process)
884
+ noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
885
+
886
+ # Get the text embedding for conditioning
887
+ encoder_hidden_states = text_encoder(batch["input_ids"])[0].to(dtype=weight_dtype)
888
+
889
+ # Predict the noise residual
890
+ model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample
891
+
892
+ # Get the target for loss depending on the prediction type
893
+ if noise_scheduler.config.prediction_type == "epsilon":
894
+ target = noise
895
+ elif noise_scheduler.config.prediction_type == "v_prediction":
896
+ target = noise_scheduler.get_velocity(latents, noise, timesteps)
897
+ else:
898
+ raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
899
+
900
+ loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
901
+
902
+ accelerator.backward(loss)
903
+
904
+ optimizer.step()
905
+ lr_scheduler.step()
906
+ optimizer.zero_grad()
907
+
908
+ # Let's make sure we don't update any embedding weights besides the newly added token
909
+ index_no_updates = torch.ones((len(tokenizer),), dtype=torch.bool)
910
+ index_no_updates[min(placeholder_token_ids) : max(placeholder_token_ids) + 1] = False
911
+
912
+ with torch.no_grad():
913
+ accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[
914
+ index_no_updates
915
+ ] = orig_embeds_params[index_no_updates]
916
+
917
+ # Checks if the accelerator has performed an optimization step behind the scenes
918
+ if accelerator.sync_gradients:
919
+ images = []
920
+ progress_bar.update(1)
921
+ global_step += 1
922
+ if global_step % args.save_steps == 0:
923
+ weight_name = (
924
+ f"learned_embeds-steps-{global_step}.bin"
925
+ if args.no_safe_serialization
926
+ else f"learned_embeds-steps-{global_step}.safetensors"
927
+ )
928
+ save_path = os.path.join(args.output_dir, weight_name)
929
+ save_progress(
930
+ text_encoder,
931
+ placeholder_token_ids,
932
+ accelerator,
933
+ args,
934
+ save_path,
935
+ safe_serialization=not args.no_safe_serialization,
936
+ )
937
+
938
+ if accelerator.is_main_process:
939
+ if global_step % args.checkpointing_steps == 0:
940
+ # _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
941
+ if args.checkpoints_total_limit is not None:
942
+ checkpoints = os.listdir(args.output_dir)
943
+ checkpoints = [d for d in checkpoints if d.startswith("checkpoint")]
944
+ checkpoints = sorted(checkpoints, key=lambda x: int(x.split("-")[1]))
945
+
946
+ # before we save the new checkpoint, we need to have at _most_ `checkpoints_total_limit - 1` checkpoints
947
+ if len(checkpoints) >= args.checkpoints_total_limit:
948
+ num_to_remove = len(checkpoints) - args.checkpoints_total_limit + 1
949
+ removing_checkpoints = checkpoints[0:num_to_remove]
950
+
951
+ logger.info(
952
+ f"{len(checkpoints)} checkpoints already exist, removing {len(removing_checkpoints)} checkpoints"
953
+ )
954
+ logger.info(f"removing checkpoints: {', '.join(removing_checkpoints)}")
955
+
956
+ for removing_checkpoint in removing_checkpoints:
957
+ removing_checkpoint = os.path.join(args.output_dir, removing_checkpoint)
958
+ shutil.rmtree(removing_checkpoint)
959
+
960
+ save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
961
+ accelerator.save_state(save_path)
962
+ logger.info(f"Saved state to {save_path}")
963
+
964
+ if args.validation_prompt is not None and global_step % args.validation_steps == 0:
965
+ images = log_validation(
966
+ text_encoder, tokenizer, unet, vae, args, accelerator, weight_dtype, epoch
967
+ )
968
+
969
+ logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
970
+ progress_bar.set_postfix(**logs)
971
+ accelerator.log(logs, step=global_step)
972
+
973
+ if global_step >= args.max_train_steps:
974
+ break
975
+ # Create the pipeline using the trained modules and save it.
976
+ accelerator.wait_for_everyone()
977
+ if accelerator.is_main_process:
978
+ if args.push_to_hub and not args.save_as_full_pipeline:
979
+ logger.warning("Enabling full model saving because --push_to_hub=True was specified.")
980
+ save_full_model = True
981
+ else:
982
+ save_full_model = args.save_as_full_pipeline
983
+ if save_full_model:
984
+ pipeline = StableDiffusionPipeline.from_pretrained(
985
+ args.pretrained_model_name_or_path,
986
+ text_encoder=accelerator.unwrap_model(text_encoder),
987
+ vae=vae,
988
+ unet=unet,
989
+ tokenizer=tokenizer,
990
+ )
991
+ pipeline.save_pretrained(args.output_dir)
992
+ # Save the newly trained embeddings
993
+ weight_name = "learned_embeds.bin" if args.no_safe_serialization else "learned_embeds.safetensors"
994
+ save_path = os.path.join(args.output_dir, weight_name)
995
+ save_progress(
996
+ text_encoder,
997
+ placeholder_token_ids,
998
+ accelerator,
999
+ args,
1000
+ save_path,
1001
+ safe_serialization=not args.no_safe_serialization,
1002
+ )
1003
+
1004
+ if args.push_to_hub:
1005
+ save_model_card(
1006
+ repo_id,
1007
+ images=images,
1008
+ base_model=args.pretrained_model_name_or_path,
1009
+ repo_folder=args.output_dir,
1010
+ )
1011
+ upload_folder(
1012
+ repo_id=repo_id,
1013
+ folder_path=args.output_dir,
1014
+ commit_message="End of training",
1015
+ ignore_patterns=["step_*", "epoch_*"],
1016
+ )
1017
+
1018
+ accelerator.end_training()
1019
+
1020
+
1021
+ if __name__ == "__main__":
1022
+ main()
Gradient_ascent_traininig_scripts/train.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export MODEL_NAME="CompVis/stable-diffusion-v1-4"
2
+ export TRAIN_DIR="./generation/english_springer_erasure"
3
+
4
+ accelerate launch --mixed_precision="fp16" train_text_to_image.py \
5
+ --pretrained_model_name_or_path=$MODEL_NAME \
6
+ --train_data_dir=$TRAIN_DIR \
7
+ --use_ema \
8
+ --resolution=512 --center_crop --random_flip \
9
+ --train_batch_size=5 \
10
+ --gradient_accumulation_steps=4 \
11
+ --gradient_checkpointing \
12
+ --max_train_steps=10 \
13
+ --learning_rate=1e-05 \
14
+ --max_grad_norm=1 \
15
+ --lr_scheduler="constant" --lr_warmup_steps=0 \
16
+ --output_dir="./checkpoint/english_springer"
Gradient_ascent_traininig_scripts/train_text_to_image.py ADDED
@@ -0,0 +1,1156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+ import logging
19
+ import math
20
+ import os
21
+ import random
22
+ import shutil
23
+ from contextlib import nullcontext
24
+ from pathlib import Path
25
+
26
+ import accelerate
27
+ import datasets
28
+ import numpy as np
29
+ import torch
30
+ import torch.nn.functional as F
31
+ import torch.utils.checkpoint
32
+ import transformers
33
+ from accelerate import Accelerator
34
+ from accelerate.logging import get_logger
35
+ from accelerate.state import AcceleratorState
36
+ from accelerate.utils import ProjectConfiguration, set_seed
37
+ from datasets import load_dataset
38
+ from huggingface_hub import create_repo, upload_folder
39
+ from packaging import version
40
+ from torchvision import transforms
41
+ from tqdm.auto import tqdm
42
+ from transformers import CLIPTextModel, CLIPTokenizer
43
+ from transformers.utils import ContextManagers
44
+
45
+ import diffusers
46
+ from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel
47
+ from diffusers.optimization import get_scheduler
48
+ from diffusers.training_utils import EMAModel, compute_dream_and_update_latents, compute_snr
49
+ from diffusers.utils import check_min_version, deprecate, is_wandb_available, make_image_grid
50
+ from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
51
+ from diffusers.utils.import_utils import is_xformers_available
52
+ from diffusers.utils.torch_utils import is_compiled_module
53
+ import torch
54
+
55
+ if is_wandb_available():
56
+ import wandb
57
+
58
+
59
+ # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
60
+ check_min_version("0.31.0.dev0")
61
+
62
+ logger = get_logger(__name__, log_level="INFO")
63
+
64
+ DATASET_NAME_MAPPING = {
65
+ "lambdalabs/naruto-blip-captions": ("image", "text"),
66
+ }
67
+
68
+
69
+ def save_model_card(
70
+ args,
71
+ repo_id: str,
72
+ images: list = None,
73
+ repo_folder: str = None,
74
+ ):
75
+ img_str = ""
76
+ if len(images) > 0:
77
+ image_grid = make_image_grid(images, 1, len(args.validation_prompts))
78
+ image_grid.save(os.path.join(repo_folder, "val_imgs_grid.png"))
79
+ img_str += "![val_imgs_grid](./val_imgs_grid.png)\n"
80
+
81
+ model_description = f"""
82
+ # Text-to-image finetuning - {repo_id}
83
+
84
+ This pipeline was finetuned from **{args.pretrained_model_name_or_path}** on the **{args.dataset_name}** dataset. Below are some example images generated with the finetuned pipeline using the following prompts: {args.validation_prompts}: \n
85
+ {img_str}
86
+
87
+ ## Pipeline usage
88
+
89
+ You can use the pipeline like so:
90
+
91
+ ```python
92
+ from diffusers import DiffusionPipeline
93
+ import torch
94
+
95
+ pipeline = DiffusionPipeline.from_pretrained("{repo_id}", torch_dtype=torch.float16)
96
+ prompt = "{args.validation_prompts[0]}"
97
+ image = pipeline(prompt).images[0]
98
+ image.save("my_image.png")
99
+ ```
100
+
101
+ ## Training info
102
+
103
+ These are the key hyperparameters used during training:
104
+
105
+ * Epochs: {args.num_train_epochs}
106
+ * Learning rate: {args.learning_rate}
107
+ * Batch size: {args.train_batch_size}
108
+ * Gradient accumulation steps: {args.gradient_accumulation_steps}
109
+ * Image resolution: {args.resolution}
110
+ * Mixed-precision: {args.mixed_precision}
111
+
112
+ """
113
+ wandb_info = ""
114
+ if is_wandb_available():
115
+ wandb_run_url = None
116
+ if wandb.run is not None:
117
+ wandb_run_url = wandb.run.url
118
+
119
+ if wandb_run_url is not None:
120
+ wandb_info = f"""
121
+ More information on all the CLI arguments and the environment are available on your [`wandb` run page]({wandb_run_url}).
122
+ """
123
+
124
+ model_description += wandb_info
125
+
126
+ model_card = load_or_create_model_card(
127
+ repo_id_or_path=repo_id,
128
+ from_training=True,
129
+ license="creativeml-openrail-m",
130
+ base_model=args.pretrained_model_name_or_path,
131
+ model_description=model_description,
132
+ inference=True,
133
+ )
134
+
135
+ tags = ["stable-diffusion", "stable-diffusion-diffusers", "text-to-image", "diffusers", "diffusers-training"]
136
+ model_card = populate_model_card(model_card, tags=tags)
137
+
138
+ model_card.save(os.path.join(repo_folder, "README.md"))
139
+
140
+
141
+ def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight_dtype, epoch):
142
+ logger.info("Running validation... ")
143
+
144
+ pipeline = StableDiffusionPipeline.from_pretrained(
145
+ args.pretrained_model_name_or_path,
146
+ vae=accelerator.unwrap_model(vae),
147
+ text_encoder=accelerator.unwrap_model(text_encoder),
148
+ tokenizer=tokenizer,
149
+ unet=accelerator.unwrap_model(unet),
150
+ safety_checker=None,
151
+ revision=args.revision,
152
+ variant=args.variant,
153
+ torch_dtype=weight_dtype,
154
+ )
155
+ pipeline = pipeline.to(accelerator.device)
156
+ pipeline.set_progress_bar_config(disable=True)
157
+
158
+ if args.enable_xformers_memory_efficient_attention:
159
+ pipeline.enable_xformers_memory_efficient_attention()
160
+
161
+ if args.seed is None:
162
+ generator = None
163
+ else:
164
+ generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
165
+
166
+ images = []
167
+ for i in range(len(args.validation_prompts)):
168
+ if torch.backends.mps.is_available():
169
+ autocast_ctx = nullcontext()
170
+ else:
171
+ autocast_ctx = torch.autocast(accelerator.device.type)
172
+
173
+ with autocast_ctx:
174
+ image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
175
+
176
+ images.append(image)
177
+
178
+ for tracker in accelerator.trackers:
179
+ if tracker.name == "tensorboard":
180
+ np_images = np.stack([np.asarray(img) for img in images])
181
+ tracker.writer.add_images("validation", np_images, epoch, dataformats="NHWC")
182
+ elif tracker.name == "wandb":
183
+ tracker.log(
184
+ {
185
+ "validation": [
186
+ wandb.Image(image, caption=f"{i}: {args.validation_prompts[i]}")
187
+ for i, image in enumerate(images)
188
+ ]
189
+ }
190
+ )
191
+ else:
192
+ logger.warning(f"image logging not implemented for {tracker.name}")
193
+
194
+ del pipeline
195
+ torch.cuda.empty_cache()
196
+
197
+ return images
198
+
199
+
200
+ def parse_args():
201
+ parser = argparse.ArgumentParser(description="Simple example of a training script.")
202
+ parser.add_argument(
203
+ "--input_perturbation", type=float, default=0, help="The scale of input perturbation. Recommended 0.1."
204
+ )
205
+ parser.add_argument(
206
+ "--pretrained_model_name_or_path",
207
+ type=str,
208
+ default=None,
209
+ required=True,
210
+ help="Path to pretrained model or model identifier from huggingface.co/models.",
211
+ )
212
+ parser.add_argument(
213
+ "--revision",
214
+ type=str,
215
+ default=None,
216
+ required=False,
217
+ help="Revision of pretrained model identifier from huggingface.co/models.",
218
+ )
219
+ parser.add_argument(
220
+ "--variant",
221
+ type=str,
222
+ default=None,
223
+ help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16",
224
+ )
225
+ parser.add_argument(
226
+ "--dataset_name",
227
+ type=str,
228
+ default=None,
229
+ help=(
230
+ "The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private,"
231
+ " dataset). It can also be a path pointing to a local copy of a dataset in your filesystem,"
232
+ " or to a folder containing files that 🤗 Datasets can understand."
233
+ ),
234
+ )
235
+ parser.add_argument(
236
+ "--dataset_config_name",
237
+ type=str,
238
+ default=None,
239
+ help="The config of the Dataset, leave as None if there's only one config.",
240
+ )
241
+ parser.add_argument(
242
+ "--train_data_dir",
243
+ type=str,
244
+ default=None,
245
+ help=(
246
+ "A folder containing the training data. Folder contents must follow the structure described in"
247
+ " https://huggingface.co/docs/datasets/image_dataset#imagefolder. In particular, a `metadata.jsonl` file"
248
+ " must exist to provide the captions for the images. Ignored if `dataset_name` is specified."
249
+ ),
250
+ )
251
+ parser.add_argument(
252
+ "--image_column", type=str, default="image", help="The column of the dataset containing an image."
253
+ )
254
+ parser.add_argument(
255
+ "--caption_column",
256
+ type=str,
257
+ default="text",
258
+ help="The column of the dataset containing a caption or a list of captions.",
259
+ )
260
+ parser.add_argument(
261
+ "--max_train_samples",
262
+ type=int,
263
+ default=None,
264
+ help=(
265
+ "For debugging purposes or quicker training, truncate the number of training examples to this "
266
+ "value if set."
267
+ ),
268
+ )
269
+ parser.add_argument(
270
+ "--validation_prompts",
271
+ type=str,
272
+ default=None,
273
+ nargs="+",
274
+ help=("A set of prompts evaluated every `--validation_epochs` and logged to `--report_to`."),
275
+ )
276
+ parser.add_argument(
277
+ "--output_dir",
278
+ type=str,
279
+ default="sd-model-finetuned",
280
+ help="The output directory where the model predictions and checkpoints will be written.",
281
+ )
282
+ parser.add_argument(
283
+ "--cache_dir",
284
+ type=str,
285
+ default=None,
286
+ help="The directory where the downloaded models and datasets will be stored.",
287
+ )
288
+ parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
289
+ parser.add_argument(
290
+ "--resolution",
291
+ type=int,
292
+ default=512,
293
+ help=(
294
+ "The resolution for input images, all the images in the train/validation dataset will be resized to this"
295
+ " resolution"
296
+ ),
297
+ )
298
+ parser.add_argument(
299
+ "--center_crop",
300
+ default=False,
301
+ action="store_true",
302
+ help=(
303
+ "Whether to center crop the input images to the resolution. If not set, the images will be randomly"
304
+ " cropped. The images will be resized to the resolution first before cropping."
305
+ ),
306
+ )
307
+ parser.add_argument(
308
+ "--random_flip",
309
+ action="store_true",
310
+ help="whether to randomly flip images horizontally",
311
+ )
312
+ parser.add_argument(
313
+ "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
314
+ )
315
+ parser.add_argument("--num_train_epochs", type=int, default=100)
316
+ parser.add_argument(
317
+ "--max_train_steps",
318
+ type=int,
319
+ default=None,
320
+ help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
321
+ )
322
+ parser.add_argument(
323
+ "--gradient_accumulation_steps",
324
+ type=int,
325
+ default=1,
326
+ help="Number of updates steps to accumulate before performing a backward/update pass.",
327
+ )
328
+ parser.add_argument(
329
+ "--gradient_checkpointing",
330
+ action="store_true",
331
+ help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
332
+ )
333
+ parser.add_argument(
334
+ "--learning_rate",
335
+ type=float,
336
+ default=1e-4,
337
+ help="Initial learning rate (after the potential warmup period) to use.",
338
+ )
339
+ parser.add_argument(
340
+ "--scale_lr",
341
+ action="store_true",
342
+ default=False,
343
+ help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
344
+ )
345
+ parser.add_argument(
346
+ "--lr_scheduler",
347
+ type=str,
348
+ default="constant",
349
+ help=(
350
+ 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
351
+ ' "constant", "constant_with_warmup"]'
352
+ ),
353
+ )
354
+ parser.add_argument(
355
+ "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
356
+ )
357
+ parser.add_argument(
358
+ "--snr_gamma",
359
+ type=float,
360
+ default=None,
361
+ help="SNR weighting gamma to be used if rebalancing the loss. Recommended value is 5.0. "
362
+ "More details here: https://arxiv.org/abs/2303.09556.",
363
+ )
364
+ parser.add_argument(
365
+ "--dream_training",
366
+ action="store_true",
367
+ help=(
368
+ "Use the DREAM training method, which makes training more efficient and accurate at the ",
369
+ "expense of doing an extra forward pass. See: https://arxiv.org/abs/2312.00210",
370
+ ),
371
+ )
372
+ parser.add_argument(
373
+ "--dream_detail_preservation",
374
+ type=float,
375
+ default=1.0,
376
+ help="Dream detail preservation factor p (should be greater than 0; default=1.0, as suggested in the paper)",
377
+ )
378
+ parser.add_argument(
379
+ "--use_8bit_adam", action="store_true", help="Whether or not to use 8-bit Adam from bitsandbytes."
380
+ )
381
+ parser.add_argument(
382
+ "--allow_tf32",
383
+ action="store_true",
384
+ help=(
385
+ "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see"
386
+ " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
387
+ ),
388
+ )
389
+ parser.add_argument("--use_ema", action="store_true", help="Whether to use EMA model.")
390
+ parser.add_argument("--offload_ema", action="store_true", help="Offload EMA model to CPU during training step.")
391
+ parser.add_argument("--foreach_ema", action="store_true", help="Use faster foreach implementation of EMAModel.")
392
+ parser.add_argument(
393
+ "--non_ema_revision",
394
+ type=str,
395
+ default=None,
396
+ required=False,
397
+ help=(
398
+ "Revision of pretrained non-ema model identifier. Must be a branch, tag or git identifier of the local or"
399
+ " remote repository specified with --pretrained_model_name_or_path."
400
+ ),
401
+ )
402
+ parser.add_argument(
403
+ "--dataloader_num_workers",
404
+ type=int,
405
+ default=0,
406
+ help=(
407
+ "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process."
408
+ ),
409
+ )
410
+ parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
411
+ parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
412
+ parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
413
+ parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
414
+ parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
415
+ parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
416
+ parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
417
+ parser.add_argument(
418
+ "--prediction_type",
419
+ type=str,
420
+ default=None,
421
+ help="The prediction_type that shall be used for training. Choose between 'epsilon' or 'v_prediction' or leave `None`. If left to `None` the default prediction type of the scheduler: `noise_scheduler.config.prediction_type` is chosen.",
422
+ )
423
+ parser.add_argument(
424
+ "--hub_model_id",
425
+ type=str,
426
+ default=None,
427
+ help="The name of the repository to keep in sync with the local `output_dir`.",
428
+ )
429
+ parser.add_argument(
430
+ "--logging_dir",
431
+ type=str,
432
+ default="logs",
433
+ help=(
434
+ "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to"
435
+ " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
436
+ ),
437
+ )
438
+ parser.add_argument(
439
+ "--mixed_precision",
440
+ type=str,
441
+ default=None,
442
+ choices=["no", "fp16", "bf16"],
443
+ help=(
444
+ "Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
445
+ " 1.10.and an Nvidia Ampere GPU. Default to the value of accelerate config of the current system or the"
446
+ " flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
447
+ ),
448
+ )
449
+ parser.add_argument(
450
+ "--report_to",
451
+ type=str,
452
+ default="tensorboard",
453
+ help=(
454
+ 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`'
455
+ ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
456
+ ),
457
+ )
458
+ parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
459
+ parser.add_argument(
460
+ "--checkpointing_steps",
461
+ type=int,
462
+ default=500,
463
+ help=(
464
+ "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
465
+ " training using `--resume_from_checkpoint`."
466
+ ),
467
+ )
468
+ parser.add_argument(
469
+ "--checkpoints_total_limit",
470
+ type=int,
471
+ default=None,
472
+ help=("Max number of checkpoints to store."),
473
+ )
474
+ parser.add_argument(
475
+ "--resume_from_checkpoint",
476
+ type=str,
477
+ default=None,
478
+ help=(
479
+ "Whether training should be resumed from a previous checkpoint. Use a path saved by"
480
+ ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
481
+ ),
482
+ )
483
+ parser.add_argument(
484
+ "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
485
+ )
486
+ parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
487
+ parser.add_argument(
488
+ "--validation_epochs",
489
+ type=int,
490
+ default=5,
491
+ help="Run validation every X epochs.",
492
+ )
493
+ parser.add_argument(
494
+ "--tracker_project_name",
495
+ type=str,
496
+ default="text2image-fine-tune",
497
+ help=(
498
+ "The `project_name` argument passed to Accelerator.init_trackers for"
499
+ " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
500
+ ),
501
+ )
502
+
503
+ args = parser.parse_args()
504
+ env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
505
+ if env_local_rank != -1 and env_local_rank != args.local_rank:
506
+ args.local_rank = env_local_rank
507
+
508
+ # Sanity checks
509
+ if args.dataset_name is None and args.train_data_dir is None:
510
+ raise ValueError("Need either a dataset name or a training folder.")
511
+
512
+ # default to using the same revision for the non-ema model if not specified
513
+ if args.non_ema_revision is None:
514
+ args.non_ema_revision = args.revision
515
+
516
+ return args
517
+
518
+
519
+ def main():
520
+ args = parse_args()
521
+
522
+ if args.report_to == "wandb" and args.hub_token is not None:
523
+ raise ValueError(
524
+ "You cannot use both --report_to=wandb and --hub_token due to a security risk of exposing your token."
525
+ " Please use `huggingface-cli login` to authenticate with the Hub."
526
+ )
527
+
528
+ if args.non_ema_revision is not None:
529
+ deprecate(
530
+ "non_ema_revision!=None",
531
+ "0.15.0",
532
+ message=(
533
+ "Downloading 'non_ema' weights from revision branches of the Hub is deprecated. Please make sure to"
534
+ " use `--variant=non_ema` instead."
535
+ ),
536
+ )
537
+ logging_dir = os.path.join(args.output_dir, args.logging_dir)
538
+
539
+ accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
540
+
541
+ accelerator = Accelerator(
542
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
543
+ mixed_precision=args.mixed_precision,
544
+ log_with=args.report_to,
545
+ project_config=accelerator_project_config,
546
+ )
547
+
548
+ # Disable AMP for MPS.
549
+ if torch.backends.mps.is_available():
550
+ accelerator.native_amp = False
551
+
552
+ # Make one log on every process with the configuration for debugging.
553
+ logging.basicConfig(
554
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
555
+ datefmt="%m/%d/%Y %H:%M:%S",
556
+ level=logging.INFO,
557
+ )
558
+ logger.info(accelerator.state, main_process_only=False)
559
+ if accelerator.is_local_main_process:
560
+ datasets.utils.logging.set_verbosity_warning()
561
+ transformers.utils.logging.set_verbosity_warning()
562
+ diffusers.utils.logging.set_verbosity_info()
563
+ else:
564
+ datasets.utils.logging.set_verbosity_error()
565
+ transformers.utils.logging.set_verbosity_error()
566
+ diffusers.utils.logging.set_verbosity_error()
567
+
568
+ # If passed along, set the training seed now.
569
+ if args.seed is not None:
570
+ set_seed(args.seed)
571
+
572
+ # Handle the repository creation
573
+ if accelerator.is_main_process:
574
+ if args.output_dir is not None:
575
+ os.makedirs(args.output_dir, exist_ok=True)
576
+
577
+ if args.push_to_hub:
578
+ repo_id = create_repo(
579
+ repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token
580
+ ).repo_id
581
+
582
+ # Load scheduler, tokenizer and models.
583
+ noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
584
+ tokenizer = CLIPTokenizer.from_pretrained(
585
+ args.pretrained_model_name_or_path, subfolder="tokenizer", revision=args.revision
586
+ )
587
+
588
+ def deepspeed_zero_init_disabled_context_manager():
589
+ """
590
+ returns either a context list that includes one that will disable zero.Init or an empty context list
591
+ """
592
+ deepspeed_plugin = AcceleratorState().deepspeed_plugin if accelerate.state.is_initialized() else None
593
+ if deepspeed_plugin is None:
594
+ return []
595
+
596
+ return [deepspeed_plugin.zero3_init_context_manager(enable=False)]
597
+
598
+ # Currently Accelerate doesn't know how to handle multiple models under Deepspeed ZeRO stage 3.
599
+ # For this to work properly all models must be run through `accelerate.prepare`. But accelerate
600
+ # will try to assign the same optimizer with the same weights to all models during
601
+ # `deepspeed.initialize`, which of course doesn't work.
602
+ #
603
+ # For now the following workaround will partially support Deepspeed ZeRO-3, by excluding the 2
604
+ # frozen models from being partitioned during `zero.Init` which gets called during
605
+ # `from_pretrained` So CLIPTextModel and AutoencoderKL will not enjoy the parameter sharding
606
+ # across multiple gpus and only UNet2DConditionModel will get ZeRO sharded.
607
+ with ContextManagers(deepspeed_zero_init_disabled_context_manager()):
608
+ text_encoder = CLIPTextModel.from_pretrained(
609
+ args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, variant=args.variant
610
+ )
611
+ vae = AutoencoderKL.from_pretrained(
612
+ args.pretrained_model_name_or_path, subfolder="vae", revision=args.revision, variant=args.variant
613
+ )
614
+
615
+ unet = UNet2DConditionModel.from_pretrained(
616
+ args.pretrained_model_name_or_path, subfolder="unet", revision=args.non_ema_revision
617
+ )
618
+
619
+ # Freeze vae and text_encoder and set unet to trainable
620
+ vae.requires_grad_(False)
621
+ text_encoder.requires_grad_(False)
622
+ unet.train()
623
+
624
+ # Create EMA for the unet.
625
+ if args.use_ema:
626
+ ema_unet = UNet2DConditionModel.from_pretrained(
627
+ args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
628
+ )
629
+ ema_unet = EMAModel(
630
+ ema_unet.parameters(),
631
+ model_cls=UNet2DConditionModel,
632
+ model_config=ema_unet.config,
633
+ foreach=args.foreach_ema,
634
+ )
635
+
636
+ if args.enable_xformers_memory_efficient_attention:
637
+ if is_xformers_available():
638
+ import xformers
639
+
640
+ xformers_version = version.parse(xformers.__version__)
641
+ if xformers_version == version.parse("0.0.16"):
642
+ logger.warning(
643
+ "xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
644
+ )
645
+ unet.enable_xformers_memory_efficient_attention()
646
+ else:
647
+ raise ValueError("xformers is not available. Make sure it is installed correctly")
648
+
649
+ # `accelerate` 0.16.0 will have better support for customized saving
650
+ if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
651
+ # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
652
+ def save_model_hook(models, weights, output_dir):
653
+ if accelerator.is_main_process:
654
+ if args.use_ema:
655
+ ema_unet.save_pretrained(os.path.join(output_dir, "unet_ema"))
656
+
657
+ for i, model in enumerate(models):
658
+ model.save_pretrained(os.path.join(output_dir, "unet"))
659
+
660
+ # make sure to pop weight so that corresponding model is not saved again
661
+ weights.pop()
662
+
663
+ def load_model_hook(models, input_dir):
664
+ if args.use_ema:
665
+ load_model = EMAModel.from_pretrained(
666
+ os.path.join(input_dir, "unet_ema"), UNet2DConditionModel, foreach=args.foreach_ema
667
+ )
668
+ ema_unet.load_state_dict(load_model.state_dict())
669
+ if args.offload_ema:
670
+ ema_unet.pin_memory()
671
+ else:
672
+ ema_unet.to(accelerator.device)
673
+ del load_model
674
+
675
+ for _ in range(len(models)):
676
+ # pop models so that they are not loaded again
677
+ model = models.pop()
678
+
679
+ # load diffusers style into model
680
+ load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet")
681
+ model.register_to_config(**load_model.config)
682
+
683
+ model.load_state_dict(load_model.state_dict())
684
+ del load_model
685
+
686
+ accelerator.register_save_state_pre_hook(save_model_hook)
687
+ accelerator.register_load_state_pre_hook(load_model_hook)
688
+
689
+ if args.gradient_checkpointing:
690
+ unet.enable_gradient_checkpointing()
691
+
692
+ # Enable TF32 for faster training on Ampere GPUs,
693
+ # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
694
+ if args.allow_tf32:
695
+ torch.backends.cuda.matmul.allow_tf32 = True
696
+
697
+ if args.scale_lr:
698
+ args.learning_rate = (
699
+ args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
700
+ )
701
+
702
+ # Initialize the optimizer
703
+ if args.use_8bit_adam:
704
+ try:
705
+ import bitsandbytes as bnb
706
+ except ImportError:
707
+ raise ImportError(
708
+ "Please install bitsandbytes to use 8-bit Adam. You can do so by running `pip install bitsandbytes`"
709
+ )
710
+
711
+ optimizer_cls = bnb.optim.AdamW8bit
712
+ else:
713
+ optimizer_cls = torch.optim.AdamW
714
+
715
+ optimizer = optimizer_cls(
716
+ unet.parameters(),
717
+ lr=args.learning_rate,
718
+ betas=(args.adam_beta1, args.adam_beta2),
719
+ weight_decay=args.adam_weight_decay,
720
+ eps=args.adam_epsilon,
721
+ )
722
+
723
+ # Get the datasets: you can either provide your own training and evaluation files (see below)
724
+ # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub).
725
+
726
+ # In distributed training, the load_dataset function guarantees that only one local process can concurrently
727
+ # download the dataset.
728
+ if args.dataset_name is not None:
729
+ # Downloading and loading a dataset from the hub.
730
+ dataset = load_dataset(
731
+ args.dataset_name,
732
+ args.dataset_config_name,
733
+ cache_dir=args.cache_dir,
734
+ data_dir=args.train_data_dir,
735
+ )
736
+ else:
737
+ data_files = {}
738
+ if args.train_data_dir is not None:
739
+ data_files["train"] = os.path.join(args.train_data_dir, "**")
740
+ dataset = load_dataset(
741
+ "imagefolder",
742
+ data_files=data_files,
743
+ cache_dir=args.cache_dir,
744
+ )
745
+ # See more about loading custom images at
746
+ # https://huggingface.co/docs/datasets/v2.4.0/en/image_load#imagefolder
747
+
748
+ # Preprocessing the datasets.
749
+ # We need to tokenize inputs and targets.
750
+ column_names = dataset["train"].column_names
751
+
752
+ # 6. Get the column names for input/target.
753
+ dataset_columns = DATASET_NAME_MAPPING.get(args.dataset_name, None)
754
+ if args.image_column is None:
755
+ image_column = dataset_columns[0] if dataset_columns is not None else column_names[0]
756
+ else:
757
+ image_column = args.image_column
758
+ if image_column not in column_names:
759
+ raise ValueError(
760
+ f"--image_column' value '{args.image_column}' needs to be one of: {', '.join(column_names)}"
761
+ )
762
+ if args.caption_column is None:
763
+ caption_column = dataset_columns[1] if dataset_columns is not None else column_names[1]
764
+ else:
765
+ caption_column = args.caption_column
766
+ if caption_column not in column_names:
767
+ raise ValueError(
768
+ f"--caption_column' value '{args.caption_column}' needs to be one of: {', '.join(column_names)}"
769
+ )
770
+
771
+ # Preprocessing the datasets.
772
+ # We need to tokenize input captions and transform the images.
773
+ def tokenize_captions(examples, is_train=True):
774
+ captions = []
775
+ for caption in examples[caption_column]:
776
+ if isinstance(caption, str):
777
+ captions.append(caption)
778
+ elif isinstance(caption, (list, np.ndarray)):
779
+ # take a random caption if there are multiple
780
+ captions.append(random.choice(caption) if is_train else caption[0])
781
+ else:
782
+ raise ValueError(
783
+ f"Caption column `{caption_column}` should contain either strings or lists of strings."
784
+ )
785
+ inputs = tokenizer(
786
+ captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt"
787
+ )
788
+ return inputs.input_ids
789
+
790
+ # Preprocessing the datasets.
791
+ train_transforms = transforms.Compose(
792
+ [
793
+ transforms.Resize(args.resolution, interpolation=transforms.InterpolationMode.BILINEAR),
794
+ transforms.CenterCrop(args.resolution) if args.center_crop else transforms.RandomCrop(args.resolution),
795
+ transforms.RandomHorizontalFlip() if args.random_flip else transforms.Lambda(lambda x: x),
796
+ transforms.ToTensor(),
797
+ transforms.Normalize([0.5], [0.5]),
798
+ ]
799
+ )
800
+
801
+ def preprocess_train(examples):
802
+ images = [image.convert("RGB") for image in examples[image_column]]
803
+ examples["pixel_values"] = [train_transforms(image) for image in images]
804
+ examples["input_ids"] = tokenize_captions(examples)
805
+ return examples
806
+
807
+ with accelerator.main_process_first():
808
+ if args.max_train_samples is not None:
809
+ dataset["train"] = dataset["train"].shuffle(seed=args.seed).select(range(args.max_train_samples))
810
+ # Set the training transforms
811
+ train_dataset = dataset["train"].with_transform(preprocess_train)
812
+
813
+ def collate_fn(examples):
814
+ pixel_values = torch.stack([example["pixel_values"] for example in examples])
815
+ pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
816
+ input_ids = torch.stack([example["input_ids"] for example in examples])
817
+ return {"pixel_values": pixel_values, "input_ids": input_ids}
818
+
819
+ # DataLoaders creation:
820
+ train_dataloader = torch.utils.data.DataLoader(
821
+ train_dataset,
822
+ shuffle=True,
823
+ collate_fn=collate_fn,
824
+ batch_size=args.train_batch_size,
825
+ num_workers=args.dataloader_num_workers,
826
+ )
827
+
828
+ # Scheduler and math around the number of training steps.
829
+ # Check the PR https://github.com/huggingface/diffusers/pull/8312 for detailed explanation.
830
+ num_warmup_steps_for_scheduler = args.lr_warmup_steps * accelerator.num_processes
831
+ if args.max_train_steps is None:
832
+ len_train_dataloader_after_sharding = math.ceil(len(train_dataloader) / accelerator.num_processes)
833
+ num_update_steps_per_epoch = math.ceil(len_train_dataloader_after_sharding / args.gradient_accumulation_steps)
834
+ num_training_steps_for_scheduler = (
835
+ args.num_train_epochs * num_update_steps_per_epoch * accelerator.num_processes
836
+ )
837
+ else:
838
+ num_training_steps_for_scheduler = args.max_train_steps * accelerator.num_processes
839
+
840
+ lr_scheduler = get_scheduler(
841
+ args.lr_scheduler,
842
+ optimizer=optimizer,
843
+ num_warmup_steps=num_warmup_steps_for_scheduler,
844
+ num_training_steps=num_training_steps_for_scheduler,
845
+ )
846
+
847
+ # Prepare everything with our `accelerator`.
848
+ unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
849
+ unet, optimizer, train_dataloader, lr_scheduler
850
+ )
851
+
852
+ if args.use_ema:
853
+ if args.offload_ema:
854
+ ema_unet.pin_memory()
855
+ else:
856
+ ema_unet.to(accelerator.device)
857
+
858
+ # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision
859
+ # as these weights are only used for inference, keeping weights in full precision is not required.
860
+ weight_dtype = torch.float32
861
+ if accelerator.mixed_precision == "fp16":
862
+ weight_dtype = torch.float16
863
+ args.mixed_precision = accelerator.mixed_precision
864
+ elif accelerator.mixed_precision == "bf16":
865
+ weight_dtype = torch.bfloat16
866
+ args.mixed_precision = accelerator.mixed_precision
867
+
868
+ # Move text_encode and vae to gpu and cast to weight_dtype
869
+ text_encoder.to(accelerator.device, dtype=weight_dtype)
870
+ vae.to(accelerator.device, dtype=weight_dtype)
871
+
872
+ # We need to recalculate our total training steps as the size of the training dataloader may have changed.
873
+ num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
874
+ if args.max_train_steps is None:
875
+ args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
876
+ if num_training_steps_for_scheduler != args.max_train_steps * accelerator.num_processes:
877
+ logger.warning(
878
+ f"The length of the 'train_dataloader' after 'accelerator.prepare' ({len(train_dataloader)}) does not match "
879
+ f"the expected length ({len_train_dataloader_after_sharding}) when the learning rate scheduler was created. "
880
+ f"This inconsistency may result in the learning rate scheduler not functioning properly."
881
+ )
882
+ # Afterwards we recalculate our number of training epochs
883
+ args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
884
+
885
+ # We need to initialize the trackers we use, and also store our configuration.
886
+ # The trackers initializes automatically on the main process.
887
+ if accelerator.is_main_process:
888
+ tracker_config = dict(vars(args))
889
+ tracker_config.pop("validation_prompts")
890
+ accelerator.init_trackers(args.tracker_project_name, tracker_config)
891
+
892
+ # Function for unwrapping if model was compiled with `torch.compile`.
893
+ def unwrap_model(model):
894
+ model = accelerator.unwrap_model(model)
895
+ model = model._orig_mod if is_compiled_module(model) else model
896
+ return model
897
+
898
+ # Train!
899
+ total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
900
+
901
+ logger.info("***** Running training *****")
902
+ logger.info(f" Num examples = {len(train_dataset)}")
903
+ logger.info(f" Num Epochs = {args.num_train_epochs}")
904
+ logger.info(f" Instantaneous batch size per device = {args.train_batch_size}")
905
+ logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
906
+ logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}")
907
+ logger.info(f" Total optimization steps = {args.max_train_steps}")
908
+ global_step = 0
909
+ first_epoch = 0
910
+
911
+ # Potentially load in the weights and states from a previous save
912
+ if args.resume_from_checkpoint:
913
+ if args.resume_from_checkpoint != "latest":
914
+ path = os.path.basename(args.resume_from_checkpoint)
915
+ else:
916
+ # Get the most recent checkpoint
917
+ dirs = os.listdir(args.output_dir)
918
+ dirs = [d for d in dirs if d.startswith("checkpoint")]
919
+ dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
920
+ path = dirs[-1] if len(dirs) > 0 else None
921
+
922
+ if path is None:
923
+ accelerator.print(
924
+ f"Checkpoint '{args.resume_from_checkpoint}' does not exist. Starting a new training run."
925
+ )
926
+ args.resume_from_checkpoint = None
927
+ initial_global_step = 0
928
+ else:
929
+ accelerator.print(f"Resuming from checkpoint {path}")
930
+ accelerator.load_state(os.path.join(args.output_dir, path))
931
+ global_step = int(path.split("-")[1])
932
+
933
+ initial_global_step = global_step
934
+ first_epoch = global_step // num_update_steps_per_epoch
935
+
936
+ else:
937
+ initial_global_step = 0
938
+
939
+ progress_bar = tqdm(
940
+ range(0, args.max_train_steps),
941
+ initial=initial_global_step,
942
+ desc="Steps",
943
+ # Only show the progress bar once on each machine.
944
+ disable=not accelerator.is_local_main_process,
945
+ )
946
+
947
+ for epoch in range(first_epoch, args.num_train_epochs):
948
+ train_loss = 0.0
949
+ for step, batch in enumerate(train_dataloader):
950
+ with accelerator.accumulate(unet):
951
+ # Convert images to latent space
952
+ latents = vae.encode(batch["pixel_values"].to(weight_dtype)).latent_dist.sample()
953
+ latents = latents * vae.config.scaling_factor
954
+
955
+ # Sample noise that we'll add to the latents
956
+ noise = torch.randn_like(latents)
957
+ if args.noise_offset:
958
+ # https://www.crosslabs.org//blog/diffusion-with-offset-noise
959
+ noise += args.noise_offset * torch.randn(
960
+ (latents.shape[0], latents.shape[1], 1, 1), device=latents.device
961
+ )
962
+ if args.input_perturbation:
963
+ new_noise = noise + args.input_perturbation * torch.randn_like(noise)
964
+ bsz = latents.shape[0]
965
+ # Sample a random timestep for each image
966
+ timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
967
+ timesteps = timesteps.long()
968
+
969
+ # Add noise to the latents according to the noise magnitude at each timestep
970
+ # (this is the forward diffusion process)
971
+ if args.input_perturbation:
972
+ noisy_latents = noise_scheduler.add_noise(latents, new_noise, timesteps)
973
+ else:
974
+ noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
975
+
976
+ # Get the text embedding for conditioning
977
+ encoder_hidden_states = text_encoder(batch["input_ids"], return_dict=False)[0]
978
+
979
+ # Get the target for loss depending on the prediction type
980
+ if args.prediction_type is not None:
981
+ # set prediction_type of scheduler if defined
982
+ noise_scheduler.register_to_config(prediction_type=args.prediction_type)
983
+
984
+ if noise_scheduler.config.prediction_type == "epsilon":
985
+ target = noise
986
+ elif noise_scheduler.config.prediction_type == "v_prediction":
987
+ target = noise_scheduler.get_velocity(latents, noise, timesteps)
988
+ else:
989
+ raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
990
+
991
+ if args.dream_training:
992
+ noisy_latents, target = compute_dream_and_update_latents(
993
+ unet,
994
+ noise_scheduler,
995
+ timesteps,
996
+ noise,
997
+ noisy_latents,
998
+ target,
999
+ encoder_hidden_states,
1000
+ args.dream_detail_preservation,
1001
+ )
1002
+
1003
+ # Predict the noise residual and compute loss
1004
+ model_pred = unet(noisy_latents, timesteps, encoder_hidden_states, return_dict=False)[0]
1005
+
1006
+ if args.snr_gamma is None:
1007
+ loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
1008
+ else:
1009
+ # Compute loss-weights as per Section 3.4 of https://arxiv.org/abs/2303.09556.
1010
+ # Since we predict the noise instead of x_0, the original formulation is slightly changed.
1011
+ # This is discussed in Section 4.2 of the same paper.
1012
+ snr = compute_snr(noise_scheduler, timesteps)
1013
+ mse_loss_weights = torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(
1014
+ dim=1
1015
+ )[0]
1016
+ if noise_scheduler.config.prediction_type == "epsilon":
1017
+ mse_loss_weights = mse_loss_weights / snr
1018
+ elif noise_scheduler.config.prediction_type == "v_prediction":
1019
+ mse_loss_weights = mse_loss_weights / (snr + 1)
1020
+
1021
+ loss = F.mse_loss(model_pred.float(), target.float(), reduction="none")
1022
+ loss = loss.mean(dim=list(range(1, len(loss.shape)))) * mse_loss_weights
1023
+ loss = loss.mean()
1024
+
1025
+ #Flipping sign for gradient ascent
1026
+ loss = -loss
1027
+
1028
+ # Gather the losses across all processes for logging (if we use distributed training).
1029
+ avg_loss = accelerator.gather(loss.repeat(args.train_batch_size)).mean()
1030
+ train_loss += avg_loss.item() / args.gradient_accumulation_steps
1031
+
1032
+ # Backpropagate
1033
+ accelerator.backward(loss)
1034
+ if accelerator.sync_gradients:
1035
+ accelerator.clip_grad_norm_(unet.parameters(), args.max_grad_norm)
1036
+ optimizer.step()
1037
+ lr_scheduler.step()
1038
+ optimizer.zero_grad()
1039
+
1040
+ # Checks if the accelerator has performed an optimization step behind the scenes
1041
+ if accelerator.sync_gradients:
1042
+ if args.use_ema:
1043
+ if args.offload_ema:
1044
+ ema_unet.to(device="cuda", non_blocking=True)
1045
+ ema_unet.step(unet.parameters())
1046
+ if args.offload_ema:
1047
+ ema_unet.to(device="cpu", non_blocking=True)
1048
+ progress_bar.update(1)
1049
+ global_step += 1
1050
+ accelerator.log({"train_loss": train_loss}, step=global_step)
1051
+ train_loss = 0.0
1052
+
1053
+ if global_step % args.checkpointing_steps == 0:
1054
+ if accelerator.is_main_process:
1055
+ # _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
1056
+ if args.checkpoints_total_limit is not None:
1057
+ checkpoints = os.listdir(args.output_dir)
1058
+ checkpoints = [d for d in checkpoints if d.startswith("checkpoint")]
1059
+ checkpoints = sorted(checkpoints, key=lambda x: int(x.split("-")[1]))
1060
+
1061
+ # before we save the new checkpoint, we need to have at _most_ `checkpoints_total_limit - 1` checkpoints
1062
+ if len(checkpoints) >= args.checkpoints_total_limit:
1063
+ num_to_remove = len(checkpoints) - args.checkpoints_total_limit + 1
1064
+ removing_checkpoints = checkpoints[0:num_to_remove]
1065
+
1066
+ logger.info(
1067
+ f"{len(checkpoints)} checkpoints already exist, removing {len(removing_checkpoints)} checkpoints"
1068
+ )
1069
+ logger.info(f"removing checkpoints: {', '.join(removing_checkpoints)}")
1070
+
1071
+ for removing_checkpoint in removing_checkpoints:
1072
+ removing_checkpoint = os.path.join(args.output_dir, removing_checkpoint)
1073
+ shutil.rmtree(removing_checkpoint)
1074
+
1075
+ save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
1076
+ accelerator.save_state(save_path)
1077
+ logger.info(f"Saved state to {save_path}")
1078
+
1079
+ logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
1080
+ progress_bar.set_postfix(**logs)
1081
+
1082
+ if global_step >= args.max_train_steps:
1083
+ break
1084
+
1085
+ if accelerator.is_main_process:
1086
+ if args.validation_prompts is not None and epoch % args.validation_epochs == 0:
1087
+ if args.use_ema:
1088
+ # Store the UNet parameters temporarily and load the EMA parameters to perform inference.
1089
+ ema_unet.store(unet.parameters())
1090
+ ema_unet.copy_to(unet.parameters())
1091
+ log_validation(
1092
+ vae,
1093
+ text_encoder,
1094
+ tokenizer,
1095
+ unet,
1096
+ args,
1097
+ accelerator,
1098
+ weight_dtype,
1099
+ global_step,
1100
+ )
1101
+ if args.use_ema:
1102
+ # Switch back to the original UNet parameters.
1103
+ ema_unet.restore(unet.parameters())
1104
+
1105
+ # Create the pipeline using the trained modules and save it.
1106
+ accelerator.wait_for_everyone()
1107
+ if accelerator.is_main_process:
1108
+ unet = unwrap_model(unet)
1109
+ if args.use_ema:
1110
+ ema_unet.copy_to(unet.parameters())
1111
+
1112
+ pipeline = StableDiffusionPipeline.from_pretrained(
1113
+ args.pretrained_model_name_or_path,
1114
+ text_encoder=text_encoder,
1115
+ vae=vae,
1116
+ unet=unet,
1117
+ revision=args.revision,
1118
+ variant=args.variant,
1119
+ )
1120
+ pipeline.save_pretrained(args.output_dir)
1121
+
1122
+ # Run a final round of inference.
1123
+ images = []
1124
+ if args.validation_prompts is not None:
1125
+ logger.info("Running inference for collecting generated images...")
1126
+ pipeline = pipeline.to(accelerator.device)
1127
+ pipeline.torch_dtype = weight_dtype
1128
+ pipeline.set_progress_bar_config(disable=True)
1129
+
1130
+ if args.enable_xformers_memory_efficient_attention:
1131
+ pipeline.enable_xformers_memory_efficient_attention()
1132
+
1133
+ if args.seed is None:
1134
+ generator = None
1135
+ else:
1136
+ generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
1137
+
1138
+ for i in range(len(args.validation_prompts)):
1139
+ with torch.autocast("cuda"):
1140
+ image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
1141
+ images.append(image)
1142
+
1143
+ if args.push_to_hub:
1144
+ save_model_card(args, repo_id, images, repo_folder=args.output_dir)
1145
+ upload_folder(
1146
+ repo_id=repo_id,
1147
+ folder_path=args.output_dir,
1148
+ commit_message="End of training",
1149
+ ignore_patterns=["step_*", "epoch_*"],
1150
+ )
1151
+
1152
+ accelerator.end_training()
1153
+
1154
+
1155
+ if __name__ == "__main__":
1156
+ main()
metadata.jsonl ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"file_name": "train/1.jpg", "text": "a car"}
2
+ {"file_name": "train/2.jpg", "text": "a car"}
3
+ {"file_name": "train/3.jpg", "text": "a car"}
4
+ {"file_name": "train/4.jpg", "text": "a car"}
5
+ {"file_name": "train/5.jpg", "text": "a car"}
6
+ {"file_name": "train/6.jpg", "text": "a car"}
7
+ {"file_name": "train/7.jpg", "text": "a car"}
8
+ {"file_name": "train/8.jpg", "text": "a car"}
9
+ {"file_name": "train/9.jpg", "text": "a car"}
10
+ {"file_name": "train/10.jpg", "text": "a car"}
11
+ {"file_name": "train/11.jpg", "text": "a car"}
12
+ {"file_name": "train/12.jpg", "text": "a car"}
13
+ {"file_name": "train/13.jpg", "text": "a car"}
14
+ {"file_name": "train/14.jpg", "text": "a car"}
15
+ {"file_name": "train/15.jpg", "text": "a car"}
16
+ {"file_name": "train/16.jpg", "text": "a car"}
17
+ {"file_name": "train/17.jpg", "text": "a car"}
18
+ {"file_name": "train/18.jpg", "text": "a car"}
19
+ {"file_name": "train/19.jpg", "text": "a car"}
20
+ {"file_name": "train/20.jpg", "text": "a car"}
21
+ {"file_name": "train/21.jpg", "text": "a car"}
22
+ {"file_name": "train/22.jpg", "text": "a car"}
23
+ {"file_name": "train/23.jpg", "text": "a car"}
24
+ {"file_name": "train/24.jpg", "text": "a car"}
25
+ {"file_name": "train/25.jpg", "text": "a car"}
26
+ {"file_name": "train/26.jpg", "text": "a car"}
27
+ {"file_name": "train/27.jpg", "text": "a car"}
28
+ {"file_name": "train/28.jpg", "text": "a car"}
29
+ {"file_name": "train/29.jpg", "text": "a car"}
30
+ {"file_name": "train/30.jpg", "text": "a car"}
31
+ {"file_name": "train/31.jpg", "text": "a car"}
32
+ {"file_name": "train/32.jpg", "text": "a car"}
33
+ {"file_name": "train/33.jpg", "text": "a car"}
34
+ {"file_name": "train/34.jpg", "text": "a car"}
35
+ {"file_name": "train/35.jpg", "text": "a car"}
36
+ {"file_name": "train/36.jpg", "text": "a car"}
37
+ {"file_name": "train/37.jpg", "text": "a car"}
38
+ {"file_name": "train/38.jpg", "text": "a car"}
39
+ {"file_name": "train/39.jpg", "text": "a car"}
40
+ {"file_name": "train/40.jpg", "text": "a car"}
41
+ {"file_name": "train/41.jpg", "text": "a car"}
42
+ {"file_name": "train/42.jpg", "text": "a car"}
43
+ {"file_name": "train/43.jpg", "text": "a car"}
44
+ {"file_name": "train/44.jpg", "text": "a car"}
45
+ {"file_name": "train/45.jpg", "text": "a car"}
46
+ {"file_name": "train/46.jpg", "text": "a car"}
47
+ {"file_name": "train/47.jpg", "text": "a car"}
48
+ {"file_name": "train/48.jpg", "text": "a car"}
49
+ {"file_name": "train/49.jpg", "text": "a car"}
50
+ {"file_name": "train/50.jpg", "text": "a car"}
51
+ {"file_name": "train/51.jpg", "text": "a car"}
52
+ {"file_name": "train/52.jpg", "text": "a car"}
53
+ {"file_name": "train/53.jpg", "text": "a car"}
54
+ {"file_name": "train/54.jpg", "text": "a car"}
55
+ {"file_name": "train/55.jpg", "text": "a car"}
56
+ {"file_name": "train/56.jpg", "text": "a car"}
57
+ {"file_name": "train/57.jpg", "text": "a car"}
58
+ {"file_name": "train/58.jpg", "text": "a car"}
59
+ {"file_name": "train/59.jpg", "text": "a car"}
60
+ {"file_name": "train/60.jpg", "text": "a car"}
61
+ {"file_name": "train/61.jpg", "text": "a car"}
62
+ {"file_name": "train/62.jpg", "text": "a car"}
63
+ {"file_name": "train/63.jpg", "text": "a car"}
64
+ {"file_name": "train/64.jpg", "text": "a car"}
65
+ {"file_name": "train/65.jpg", "text": "a car"}
66
+ {"file_name": "train/66.jpg", "text": "a car"}
67
+ {"file_name": "train/67.jpg", "text": "a car"}
68
+ {"file_name": "train/68.jpg", "text": "a car"}
69
+ {"file_name": "train/69.jpg", "text": "a car"}
70
+ {"file_name": "train/70.jpg", "text": "a car"}
71
+ {"file_name": "train/71.jpg", "text": "a car"}
72
+ {"file_name": "train/72.jpg", "text": "a car"}
73
+ {"file_name": "train/73.jpg", "text": "a car"}
74
+ {"file_name": "train/74.jpg", "text": "a car"}
75
+ {"file_name": "train/75.jpg", "text": "a car"}
76
+ {"file_name": "train/76.jpg", "text": "a car"}
77
+ {"file_name": "train/77.jpg", "text": "a car"}
78
+ {"file_name": "train/78.jpg", "text": "a car"}
79
+ {"file_name": "train/79.jpg", "text": "a car"}
80
+ {"file_name": "train/80.jpg", "text": "a car"}
81
+ {"file_name": "train/81.jpg", "text": "a car"}
82
+ {"file_name": "train/82.jpg", "text": "a car"}
83
+ {"file_name": "train/83.jpg", "text": "a car"}
84
+ {"file_name": "train/84.jpg", "text": "a car"}
85
+ {"file_name": "train/85.jpg", "text": "a car"}
86
+ {"file_name": "train/86.jpg", "text": "a car"}
87
+ {"file_name": "train/87.jpg", "text": "a car"}
88
+ {"file_name": "train/88.jpg", "text": "a car"}
89
+ {"file_name": "train/89.jpg", "text": "a car"}
90
+ {"file_name": "train/90.jpg", "text": "a car"}
91
+ {"file_name": "train/91.jpg", "text": "a car"}
92
+ {"file_name": "train/92.jpg", "text": "a car"}
93
+ {"file_name": "train/93.jpg", "text": "a car"}
94
+ {"file_name": "train/94.jpg", "text": "a car"}
95
+ {"file_name": "train/95.jpg", "text": "a car"}
96
+ {"file_name": "train/96.jpg", "text": "a car"}
97
+ {"file_name": "train/97.jpg", "text": "a car"}
98
+ {"file_name": "train/98.jpg", "text": "a car"}
99
+ {"file_name": "train/99.jpg", "text": "a car"}
100
+ {"file_name": "train/100.jpg", "text": "a car"}
101
+ {"file_name": "train/101.jpg", "text": "a car"}
102
+ {"file_name": "train/102.jpg", "text": "a car"}
103
+ {"file_name": "train/103.jpg", "text": "a car"}
104
+ {"file_name": "train/104.jpg", "text": "a car"}
105
+ {"file_name": "train/105.jpg", "text": "a car"}
106
+ {"file_name": "train/106.jpg", "text": "a car"}
107
+ {"file_name": "train/107.jpg", "text": "a car"}
108
+ {"file_name": "train/108.jpg", "text": "a car"}
109
+ {"file_name": "train/109.jpg", "text": "a car"}
110
+ {"file_name": "train/110.jpg", "text": "a car"}
111
+ {"file_name": "train/111.jpg", "text": "a car"}
112
+ {"file_name": "train/112.jpg", "text": "a car"}
113
+ {"file_name": "train/113.jpg", "text": "a car"}
114
+ {"file_name": "train/114.jpg", "text": "a car"}
115
+ {"file_name": "train/115.jpg", "text": "a car"}
116
+ {"file_name": "train/116.jpg", "text": "a car"}
117
+ {"file_name": "train/117.jpg", "text": "a car"}
118
+ {"file_name": "train/118.jpg", "text": "a car"}
119
+ {"file_name": "train/119.jpg", "text": "a car"}
120
+ {"file_name": "train/120.jpg", "text": "a car"}
121
+ {"file_name": "train/121.jpg", "text": "a car"}
122
+ {"file_name": "train/122.jpg", "text": "a car"}
123
+ {"file_name": "train/123.jpg", "text": "a car"}
124
+ {"file_name": "train/124.jpg", "text": "a car"}
125
+ {"file_name": "train/125.jpg", "text": "a car"}
126
+ {"file_name": "train/126.jpg", "text": "a car"}
127
+ {"file_name": "train/127.jpg", "text": "a car"}
128
+ {"file_name": "train/128.jpg", "text": "a car"}
129
+ {"file_name": "train/129.jpg", "text": "a car"}
130
+ {"file_name": "train/130.jpg", "text": "a car"}
131
+ {"file_name": "train/131.jpg", "text": "a car"}
132
+ {"file_name": "train/132.jpg", "text": "a car"}
133
+ {"file_name": "train/133.jpg", "text": "a car"}
134
+ {"file_name": "train/134.jpg", "text": "a car"}
135
+ {"file_name": "train/135.jpg", "text": "a car"}
136
+ {"file_name": "train/136.jpg", "text": "a car"}
137
+ {"file_name": "train/137.jpg", "text": "a car"}
138
+ {"file_name": "train/138.jpg", "text": "a car"}
139
+ {"file_name": "train/139.jpg", "text": "a car"}
140
+ {"file_name": "train/140.jpg", "text": "a car"}
141
+ {"file_name": "train/141.jpg", "text": "a car"}
142
+ {"file_name": "train/142.jpg", "text": "a car"}
143
+ {"file_name": "train/143.jpg", "text": "a car"}
144
+ {"file_name": "train/144.jpg", "text": "a car"}
145
+ {"file_name": "train/145.jpg", "text": "a car"}
146
+ {"file_name": "train/146.jpg", "text": "a car"}
147
+ {"file_name": "train/147.jpg", "text": "a car"}
148
+ {"file_name": "train/148.jpg", "text": "a car"}
149
+ {"file_name": "train/149.jpg", "text": "a car"}
150
+ {"file_name": "train/150.jpg", "text": "a car"}
151
+ {"file_name": "train/151.jpg", "text": "a car"}
152
+ {"file_name": "train/152.jpg", "text": "a car"}
153
+ {"file_name": "train/153.jpg", "text": "a car"}
154
+ {"file_name": "train/154.jpg", "text": "a car"}
155
+ {"file_name": "train/155.jpg", "text": "a car"}
156
+ {"file_name": "train/156.jpg", "text": "a car"}
157
+ {"file_name": "train/157.jpg", "text": "a car"}
158
+ {"file_name": "train/158.jpg", "text": "a car"}
159
+ {"file_name": "train/159.jpg", "text": "a car"}
160
+ {"file_name": "train/160.jpg", "text": "a car"}
161
+ {"file_name": "train/161.jpg", "text": "a car"}
162
+ {"file_name": "train/162.jpg", "text": "a car"}
163
+ {"file_name": "train/163.jpg", "text": "a car"}
164
+ {"file_name": "train/164.jpg", "text": "a car"}
165
+ {"file_name": "train/165.jpg", "text": "a car"}
166
+ {"file_name": "train/166.jpg", "text": "a car"}
167
+ {"file_name": "train/167.jpg", "text": "a car"}
168
+ {"file_name": "train/168.jpg", "text": "a car"}
169
+ {"file_name": "train/169.jpg", "text": "a car"}
170
+ {"file_name": "train/170.jpg", "text": "a car"}
171
+ {"file_name": "train/171.jpg", "text": "a car"}
172
+ {"file_name": "train/172.jpg", "text": "a car"}
173
+ {"file_name": "train/173.jpg", "text": "a car"}
174
+ {"file_name": "train/174.jpg", "text": "a car"}
175
+ {"file_name": "train/175.jpg", "text": "a car"}
176
+ {"file_name": "train/176.jpg", "text": "a car"}
177
+ {"file_name": "train/177.jpg", "text": "a car"}
178
+ {"file_name": "train/178.jpg", "text": "a car"}
179
+ {"file_name": "train/179.jpg", "text": "a car"}
180
+ {"file_name": "train/180.jpg", "text": "a car"}
181
+ {"file_name": "train/181.jpg", "text": "a car"}
182
+ {"file_name": "train/182.jpg", "text": "a car"}
183
+ {"file_name": "train/183.jpg", "text": "a car"}
184
+ {"file_name": "train/184.jpg", "text": "a car"}
185
+ {"file_name": "train/185.jpg", "text": "a car"}
186
+ {"file_name": "train/186.jpg", "text": "a car"}
187
+ {"file_name": "train/187.jpg", "text": "a car"}
188
+ {"file_name": "train/188.jpg", "text": "a car"}
189
+ {"file_name": "train/189.jpg", "text": "a car"}
190
+ {"file_name": "train/190.jpg", "text": "a car"}
191
+ {"file_name": "train/191.jpg", "text": "a car"}
192
+ {"file_name": "train/192.jpg", "text": "a car"}
193
+ {"file_name": "train/193.jpg", "text": "a car"}
194
+ {"file_name": "train/194.jpg", "text": "a car"}
195
+ {"file_name": "train/195.jpg", "text": "a car"}
196
+ {"file_name": "train/196.jpg", "text": "a car"}
197
+ {"file_name": "train/197.jpg", "text": "a car"}
198
+ {"file_name": "train/198.jpg", "text": "a car"}
199
+ {"file_name": "train/199.jpg", "text": "a car"}
200
+ {"file_name": "train/200.jpg", "text": "a car"}
201
+ {"file_name": "train/201.jpg", "text": "a car"}
202
+ {"file_name": "train/202.jpg", "text": "a car"}
203
+ {"file_name": "train/203.jpg", "text": "a car"}
204
+ {"file_name": "train/204.jpg", "text": "a car"}
205
+ {"file_name": "train/205.jpg", "text": "a car"}
206
+ {"file_name": "train/206.jpg", "text": "a car"}
207
+ {"file_name": "train/207.jpg", "text": "a car"}
208
+ {"file_name": "train/208.jpg", "text": "a car"}
209
+ {"file_name": "train/209.jpg", "text": "a car"}
210
+ {"file_name": "train/210.jpg", "text": "a car"}
211
+ {"file_name": "train/211.jpg", "text": "a car"}
212
+ {"file_name": "train/212.jpg", "text": "a car"}
213
+ {"file_name": "train/213.jpg", "text": "a car"}
214
+ {"file_name": "train/214.jpg", "text": "a car"}
215
+ {"file_name": "train/215.jpg", "text": "a car"}
216
+ {"file_name": "train/216.jpg", "text": "a car"}
217
+ {"file_name": "train/217.jpg", "text": "a car"}
218
+ {"file_name": "train/218.jpg", "text": "a car"}
219
+ {"file_name": "train/219.jpg", "text": "a car"}
220
+ {"file_name": "train/220.jpg", "text": "a car"}
221
+ {"file_name": "train/221.jpg", "text": "a car"}
222
+ {"file_name": "train/222.jpg", "text": "a car"}
223
+ {"file_name": "train/223.jpg", "text": "a car"}
224
+ {"file_name": "train/224.jpg", "text": "a car"}
225
+ {"file_name": "train/225.jpg", "text": "a car"}
226
+ {"file_name": "train/226.jpg", "text": "a car"}
227
+ {"file_name": "train/227.jpg", "text": "a car"}
228
+ {"file_name": "train/228.jpg", "text": "a car"}
229
+ {"file_name": "train/229.jpg", "text": "a car"}
230
+ {"file_name": "train/230.jpg", "text": "a car"}
231
+ {"file_name": "train/231.jpg", "text": "a car"}
232
+ {"file_name": "train/232.jpg", "text": "a car"}
233
+ {"file_name": "train/233.jpg", "text": "a car"}
234
+ {"file_name": "train/234.jpg", "text": "a car"}
235
+ {"file_name": "train/235.jpg", "text": "a car"}
236
+ {"file_name": "train/236.jpg", "text": "a car"}
237
+ {"file_name": "train/237.jpg", "text": "a car"}
238
+ {"file_name": "train/238.jpg", "text": "a car"}
239
+ {"file_name": "train/239.jpg", "text": "a car"}
240
+ {"file_name": "train/240.jpg", "text": "a car"}
241
+ {"file_name": "train/241.jpg", "text": "a car"}
242
+ {"file_name": "train/242.jpg", "text": "a car"}
243
+ {"file_name": "train/243.jpg", "text": "a car"}
244
+ {"file_name": "train/244.jpg", "text": "a car"}
245
+ {"file_name": "train/245.jpg", "text": "a car"}
246
+ {"file_name": "train/246.jpg", "text": "a car"}
247
+ {"file_name": "train/247.jpg", "text": "a car"}
248
+ {"file_name": "train/248.jpg", "text": "a car"}
249
+ {"file_name": "train/249.jpg", "text": "a car"}
250
+ {"file_name": "train/250.jpg", "text": "a car"}
251
+ {"file_name": "train/251.jpg", "text": "a car"}
252
+ {"file_name": "train/252.jpg", "text": "a car"}
253
+ {"file_name": "train/253.jpg", "text": "a car"}
254
+ {"file_name": "train/254.jpg", "text": "a car"}
255
+ {"file_name": "train/255.jpg", "text": "a car"}
256
+ {"file_name": "train/256.jpg", "text": "a car"}
257
+ {"file_name": "train/257.jpg", "text": "a car"}
258
+ {"file_name": "train/258.jpg", "text": "a car"}
259
+ {"file_name": "train/259.jpg", "text": "a car"}
260
+ {"file_name": "train/260.jpg", "text": "a car"}
261
+ {"file_name": "train/261.jpg", "text": "a car"}
262
+ {"file_name": "train/262.jpg", "text": "a car"}
263
+ {"file_name": "train/263.jpg", "text": "a car"}
264
+ {"file_name": "train/264.jpg", "text": "a car"}
265
+ {"file_name": "train/265.jpg", "text": "a car"}
266
+ {"file_name": "train/266.jpg", "text": "a car"}
267
+ {"file_name": "train/267.jpg", "text": "a car"}
268
+ {"file_name": "train/268.jpg", "text": "a car"}
269
+ {"file_name": "train/269.jpg", "text": "a car"}
270
+ {"file_name": "train/270.jpg", "text": "a car"}
271
+ {"file_name": "train/271.jpg", "text": "a car"}
272
+ {"file_name": "train/272.jpg", "text": "a car"}
273
+ {"file_name": "train/273.jpg", "text": "a car"}
274
+ {"file_name": "train/274.jpg", "text": "a car"}
275
+ {"file_name": "train/275.jpg", "text": "a car"}
276
+ {"file_name": "train/276.jpg", "text": "a car"}
277
+ {"file_name": "train/277.jpg", "text": "a car"}
278
+ {"file_name": "train/278.jpg", "text": "a car"}
279
+ {"file_name": "train/279.jpg", "text": "a car"}
280
+ {"file_name": "train/280.jpg", "text": "a car"}
281
+ {"file_name": "train/281.jpg", "text": "a car"}
282
+ {"file_name": "train/282.jpg", "text": "a car"}
283
+ {"file_name": "train/283.jpg", "text": "a car"}
284
+ {"file_name": "train/284.jpg", "text": "a car"}
285
+ {"file_name": "train/285.jpg", "text": "a car"}
286
+ {"file_name": "train/286.jpg", "text": "a car"}
287
+ {"file_name": "train/287.jpg", "text": "a car"}
288
+ {"file_name": "train/288.jpg", "text": "a car"}
289
+ {"file_name": "train/289.jpg", "text": "a car"}
290
+ {"file_name": "train/290.jpg", "text": "a car"}
291
+ {"file_name": "train/291.jpg", "text": "a car"}
292
+ {"file_name": "train/292.jpg", "text": "a car"}
293
+ {"file_name": "train/293.jpg", "text": "a car"}
294
+ {"file_name": "train/294.jpg", "text": "a car"}
295
+ {"file_name": "train/295.jpg", "text": "a car"}
296
+ {"file_name": "train/296.jpg", "text": "a car"}
297
+ {"file_name": "train/297.jpg", "text": "a car"}
298
+ {"file_name": "train/298.jpg", "text": "a car"}
299
+ {"file_name": "train/299.jpg", "text": "a car"}
300
+ {"file_name": "train/300.jpg", "text": "a car"}
301
+ {"file_name": "train/301.jpg", "text": "a car"}
302
+ {"file_name": "train/302.jpg", "text": "a car"}
303
+ {"file_name": "train/303.jpg", "text": "a car"}
304
+ {"file_name": "train/304.jpg", "text": "a car"}
305
+ {"file_name": "train/305.jpg", "text": "a car"}
306
+ {"file_name": "train/306.jpg", "text": "a car"}
307
+ {"file_name": "train/307.jpg", "text": "a car"}
308
+ {"file_name": "train/308.jpg", "text": "a car"}
309
+ {"file_name": "train/309.jpg", "text": "a car"}
310
+ {"file_name": "train/310.jpg", "text": "a car"}
311
+ {"file_name": "train/311.jpg", "text": "a car"}
312
+ {"file_name": "train/312.jpg", "text": "a car"}
313
+ {"file_name": "train/313.jpg", "text": "a car"}
314
+ {"file_name": "train/314.jpg", "text": "a car"}
315
+ {"file_name": "train/315.jpg", "text": "a car"}
316
+ {"file_name": "train/316.jpg", "text": "a car"}
317
+ {"file_name": "train/317.jpg", "text": "a car"}
318
+ {"file_name": "train/318.jpg", "text": "a car"}
319
+ {"file_name": "train/319.jpg", "text": "a car"}
320
+ {"file_name": "train/320.jpg", "text": "a car"}
321
+ {"file_name": "train/321.jpg", "text": "a car"}
322
+ {"file_name": "train/322.jpg", "text": "a car"}
323
+ {"file_name": "train/323.jpg", "text": "a car"}
324
+ {"file_name": "train/324.jpg", "text": "a car"}
325
+ {"file_name": "train/325.jpg", "text": "a car"}
326
+ {"file_name": "train/326.jpg", "text": "a car"}
327
+ {"file_name": "train/327.jpg", "text": "a car"}
328
+ {"file_name": "train/328.jpg", "text": "a car"}
329
+ {"file_name": "train/329.jpg", "text": "a car"}
330
+ {"file_name": "train/330.jpg", "text": "a car"}
331
+ {"file_name": "train/331.jpg", "text": "a car"}
332
+ {"file_name": "train/332.jpg", "text": "a car"}
333
+ {"file_name": "train/333.jpg", "text": "a car"}
334
+ {"file_name": "train/334.jpg", "text": "a car"}
335
+ {"file_name": "train/335.jpg", "text": "a car"}
336
+ {"file_name": "train/336.jpg", "text": "a car"}
337
+ {"file_name": "train/337.jpg", "text": "a car"}
338
+ {"file_name": "train/338.jpg", "text": "a car"}
339
+ {"file_name": "train/339.jpg", "text": "a car"}
340
+ {"file_name": "train/340.jpg", "text": "a car"}
341
+ {"file_name": "train/341.jpg", "text": "a car"}
342
+ {"file_name": "train/342.jpg", "text": "a car"}
343
+ {"file_name": "train/343.jpg", "text": "a car"}
344
+ {"file_name": "train/344.jpg", "text": "a car"}
345
+ {"file_name": "train/345.jpg", "text": "a car"}
346
+ {"file_name": "train/346.jpg", "text": "a car"}
347
+ {"file_name": "train/347.jpg", "text": "a car"}
348
+ {"file_name": "train/348.jpg", "text": "a car"}
349
+ {"file_name": "train/349.jpg", "text": "a car"}
350
+ {"file_name": "train/350.jpg", "text": "a car"}
351
+ {"file_name": "train/351.jpg", "text": "a car"}
352
+ {"file_name": "train/352.jpg", "text": "a car"}
353
+ {"file_name": "train/353.jpg", "text": "a car"}
354
+ {"file_name": "train/354.jpg", "text": "a car"}
355
+ {"file_name": "train/355.jpg", "text": "a car"}
356
+ {"file_name": "train/356.jpg", "text": "a car"}
357
+ {"file_name": "train/357.jpg", "text": "a car"}
358
+ {"file_name": "train/358.jpg", "text": "a car"}
359
+ {"file_name": "train/359.jpg", "text": "a car"}
360
+ {"file_name": "train/360.jpg", "text": "a car"}
361
+ {"file_name": "train/361.jpg", "text": "a car"}
362
+ {"file_name": "train/362.jpg", "text": "a car"}
363
+ {"file_name": "train/363.jpg", "text": "a car"}
364
+ {"file_name": "train/364.jpg", "text": "a car"}
365
+ {"file_name": "train/365.jpg", "text": "a car"}
366
+ {"file_name": "train/366.jpg", "text": "a car"}
367
+ {"file_name": "train/367.jpg", "text": "a car"}
368
+ {"file_name": "train/368.jpg", "text": "a car"}
369
+ {"file_name": "train/369.jpg", "text": "a car"}
370
+ {"file_name": "train/370.jpg", "text": "a car"}
371
+ {"file_name": "train/371.jpg", "text": "a car"}
372
+ {"file_name": "train/372.jpg", "text": "a car"}
373
+ {"file_name": "train/373.jpg", "text": "a car"}
374
+ {"file_name": "train/374.jpg", "text": "a car"}
375
+ {"file_name": "train/375.jpg", "text": "a car"}
376
+ {"file_name": "train/376.jpg", "text": "a car"}
377
+ {"file_name": "train/377.jpg", "text": "a car"}
378
+ {"file_name": "train/378.jpg", "text": "a car"}
379
+ {"file_name": "train/379.jpg", "text": "a car"}
380
+ {"file_name": "train/380.jpg", "text": "a car"}
381
+ {"file_name": "train/381.jpg", "text": "a car"}
382
+ {"file_name": "train/382.jpg", "text": "a car"}
383
+ {"file_name": "train/383.jpg", "text": "a car"}
384
+ {"file_name": "train/384.jpg", "text": "a car"}
385
+ {"file_name": "train/385.jpg", "text": "a car"}
386
+ {"file_name": "train/386.jpg", "text": "a car"}
387
+ {"file_name": "train/387.jpg", "text": "a car"}
388
+ {"file_name": "train/388.jpg", "text": "a car"}
389
+ {"file_name": "train/389.jpg", "text": "a car"}
390
+ {"file_name": "train/390.jpg", "text": "a car"}
391
+ {"file_name": "train/391.jpg", "text": "a car"}
392
+ {"file_name": "train/392.jpg", "text": "a car"}
393
+ {"file_name": "train/393.jpg", "text": "a car"}
394
+ {"file_name": "train/394.jpg", "text": "a car"}
395
+ {"file_name": "train/395.jpg", "text": "a car"}
396
+ {"file_name": "train/396.jpg", "text": "a car"}
397
+ {"file_name": "train/397.jpg", "text": "a car"}
398
+ {"file_name": "train/398.jpg", "text": "a car"}
399
+ {"file_name": "train/399.jpg", "text": "a car"}
400
+ {"file_name": "train/400.jpg", "text": "a car"}
401
+ {"file_name": "train/401.jpg", "text": "a car"}
402
+ {"file_name": "train/402.jpg", "text": "a car"}
403
+ {"file_name": "train/403.jpg", "text": "a car"}
404
+ {"file_name": "train/404.jpg", "text": "a car"}
405
+ {"file_name": "train/405.jpg", "text": "a car"}
406
+ {"file_name": "train/406.jpg", "text": "a car"}
407
+ {"file_name": "train/407.jpg", "text": "a car"}
408
+ {"file_name": "train/408.jpg", "text": "a car"}
409
+ {"file_name": "train/409.jpg", "text": "a car"}
410
+ {"file_name": "train/410.jpg", "text": "a car"}
411
+ {"file_name": "train/411.jpg", "text": "a car"}
412
+ {"file_name": "train/412.jpg", "text": "a car"}
413
+ {"file_name": "train/413.jpg", "text": "a car"}
414
+ {"file_name": "train/414.jpg", "text": "a car"}
415
+ {"file_name": "train/415.jpg", "text": "a car"}
416
+ {"file_name": "train/416.jpg", "text": "a car"}
417
+ {"file_name": "train/417.jpg", "text": "a car"}
418
+ {"file_name": "train/418.jpg", "text": "a car"}
419
+ {"file_name": "train/419.jpg", "text": "a car"}
420
+ {"file_name": "train/420.jpg", "text": "a car"}
421
+ {"file_name": "train/421.jpg", "text": "a car"}
422
+ {"file_name": "train/422.jpg", "text": "a car"}
423
+ {"file_name": "train/423.jpg", "text": "a car"}
424
+ {"file_name": "train/424.jpg", "text": "a car"}
425
+ {"file_name": "train/425.jpg", "text": "a car"}
426
+ {"file_name": "train/426.jpg", "text": "a car"}
427
+ {"file_name": "train/427.jpg", "text": "a car"}
428
+ {"file_name": "train/428.jpg", "text": "a car"}
429
+ {"file_name": "train/429.jpg", "text": "a car"}
430
+ {"file_name": "train/430.jpg", "text": "a car"}
431
+ {"file_name": "train/431.jpg", "text": "a car"}
432
+ {"file_name": "train/432.jpg", "text": "a car"}
433
+ {"file_name": "train/433.jpg", "text": "a car"}
434
+ {"file_name": "train/434.jpg", "text": "a car"}
435
+ {"file_name": "train/435.jpg", "text": "a car"}
436
+ {"file_name": "train/436.jpg", "text": "a car"}
437
+ {"file_name": "train/437.jpg", "text": "a car"}
438
+ {"file_name": "train/438.jpg", "text": "a car"}
439
+ {"file_name": "train/439.jpg", "text": "a car"}
440
+ {"file_name": "train/440.jpg", "text": "a car"}
441
+ {"file_name": "train/441.jpg", "text": "a car"}
442
+ {"file_name": "train/442.jpg", "text": "a car"}
443
+ {"file_name": "train/443.jpg", "text": "a car"}
444
+ {"file_name": "train/444.jpg", "text": "a car"}
445
+ {"file_name": "train/445.jpg", "text": "a car"}
446
+ {"file_name": "train/446.jpg", "text": "a car"}
447
+ {"file_name": "train/447.jpg", "text": "a car"}
448
+ {"file_name": "train/448.jpg", "text": "a car"}
449
+ {"file_name": "train/449.jpg", "text": "a car"}
450
+ {"file_name": "train/450.jpg", "text": "a car"}
451
+ {"file_name": "train/451.jpg", "text": "a car"}
452
+ {"file_name": "train/452.jpg", "text": "a car"}
453
+ {"file_name": "train/453.jpg", "text": "a car"}
454
+ {"file_name": "train/454.jpg", "text": "a car"}
455
+ {"file_name": "train/455.jpg", "text": "a car"}
456
+ {"file_name": "train/456.jpg", "text": "a car"}
457
+ {"file_name": "train/457.jpg", "text": "a car"}
458
+ {"file_name": "train/458.jpg", "text": "a car"}
459
+ {"file_name": "train/459.jpg", "text": "a car"}
460
+ {"file_name": "train/460.jpg", "text": "a car"}
461
+ {"file_name": "train/461.jpg", "text": "a car"}
462
+ {"file_name": "train/462.jpg", "text": "a car"}
463
+ {"file_name": "train/463.jpg", "text": "a car"}
464
+ {"file_name": "train/464.jpg", "text": "a car"}
465
+ {"file_name": "train/465.jpg", "text": "a car"}
466
+ {"file_name": "train/466.jpg", "text": "a car"}
467
+ {"file_name": "train/467.jpg", "text": "a car"}
468
+ {"file_name": "train/468.jpg", "text": "a car"}
469
+ {"file_name": "train/469.jpg", "text": "a car"}
470
+ {"file_name": "train/470.jpg", "text": "a car"}
471
+ {"file_name": "train/471.jpg", "text": "a car"}
472
+ {"file_name": "train/472.jpg", "text": "a car"}
473
+ {"file_name": "train/473.jpg", "text": "a car"}
474
+ {"file_name": "train/474.jpg", "text": "a car"}
475
+ {"file_name": "train/475.jpg", "text": "a car"}
476
+ {"file_name": "train/476.jpg", "text": "a car"}
477
+ {"file_name": "train/477.jpg", "text": "a car"}
478
+ {"file_name": "train/478.jpg", "text": "a car"}
479
+ {"file_name": "train/479.jpg", "text": "a car"}
480
+ {"file_name": "train/480.jpg", "text": "a car"}
481
+ {"file_name": "train/481.jpg", "text": "a car"}
482
+ {"file_name": "train/482.jpg", "text": "a car"}
483
+ {"file_name": "train/483.jpg", "text": "a car"}
484
+ {"file_name": "train/484.jpg", "text": "a car"}
485
+ {"file_name": "train/485.jpg", "text": "a car"}
486
+ {"file_name": "train/486.jpg", "text": "a car"}
487
+ {"file_name": "train/487.jpg", "text": "a car"}
488
+ {"file_name": "train/488.jpg", "text": "a car"}
489
+ {"file_name": "train/489.jpg", "text": "a car"}
490
+ {"file_name": "train/490.jpg", "text": "a car"}
491
+ {"file_name": "train/491.jpg", "text": "a car"}
492
+ {"file_name": "train/492.jpg", "text": "a car"}
493
+ {"file_name": "train/493.jpg", "text": "a car"}
494
+ {"file_name": "train/494.jpg", "text": "a car"}
495
+ {"file_name": "train/495.jpg", "text": "a car"}
496
+ {"file_name": "train/496.jpg", "text": "a car"}
497
+ {"file_name": "train/497.jpg", "text": "a car"}
498
+ {"file_name": "train/498.jpg", "text": "a car"}
499
+ {"file_name": "train/499.jpg", "text": "a car"}
500
+ {"file_name": "train/500.jpg", "text": "a car"}
501
+ {"file_name": "train/501.jpg", "text": "a car"}
502
+ {"file_name": "train/502.jpg", "text": "a car"}
503
+ {"file_name": "train/503.jpg", "text": "a car"}
504
+ {"file_name": "train/504.jpg", "text": "a car"}
505
+ {"file_name": "train/505.jpg", "text": "a car"}
506
+ {"file_name": "train/506.jpg", "text": "a car"}
507
+ {"file_name": "train/507.jpg", "text": "a car"}
508
+ {"file_name": "train/508.jpg", "text": "a car"}
509
+ {"file_name": "train/509.jpg", "text": "a car"}
510
+ {"file_name": "train/510.jpg", "text": "a car"}
511
+ {"file_name": "train/511.jpg", "text": "a car"}
512
+ {"file_name": "train/512.jpg", "text": "a car"}
513
+ {"file_name": "train/513.jpg", "text": "a car"}
514
+ {"file_name": "train/514.jpg", "text": "a car"}
515
+ {"file_name": "train/515.jpg", "text": "a car"}
516
+ {"file_name": "train/516.jpg", "text": "a car"}
517
+ {"file_name": "train/517.jpg", "text": "a car"}
518
+ {"file_name": "train/518.jpg", "text": "a car"}
519
+ {"file_name": "train/519.jpg", "text": "a car"}
520
+ {"file_name": "train/520.jpg", "text": "a car"}
521
+ {"file_name": "train/521.jpg", "text": "a car"}
522
+ {"file_name": "train/522.jpg", "text": "a car"}
523
+ {"file_name": "train/523.jpg", "text": "a car"}
524
+ {"file_name": "train/524.jpg", "text": "a car"}
525
+ {"file_name": "train/525.jpg", "text": "a car"}
526
+ {"file_name": "train/526.jpg", "text": "a car"}
527
+ {"file_name": "train/527.jpg", "text": "a car"}
528
+ {"file_name": "train/528.jpg", "text": "a car"}
529
+ {"file_name": "train/529.jpg", "text": "a car"}
530
+ {"file_name": "train/530.jpg", "text": "a car"}
531
+ {"file_name": "train/531.jpg", "text": "a car"}
532
+ {"file_name": "train/532.jpg", "text": "a car"}
533
+ {"file_name": "train/533.jpg", "text": "a car"}
534
+ {"file_name": "train/534.jpg", "text": "a car"}
535
+ {"file_name": "train/535.jpg", "text": "a car"}
536
+ {"file_name": "train/536.jpg", "text": "a car"}
537
+ {"file_name": "train/537.jpg", "text": "a car"}
538
+ {"file_name": "train/538.jpg", "text": "a car"}
539
+ {"file_name": "train/539.jpg", "text": "a car"}
540
+ {"file_name": "train/540.jpg", "text": "a car"}
541
+ {"file_name": "train/541.jpg", "text": "a car"}
542
+ {"file_name": "train/542.jpg", "text": "a car"}
543
+ {"file_name": "train/543.jpg", "text": "a car"}
544
+ {"file_name": "train/544.jpg", "text": "a car"}
545
+ {"file_name": "train/545.jpg", "text": "a car"}
546
+ {"file_name": "train/546.jpg", "text": "a car"}
547
+ {"file_name": "train/547.jpg", "text": "a car"}
548
+ {"file_name": "train/548.jpg", "text": "a car"}
549
+ {"file_name": "train/549.jpg", "text": "a car"}
550
+ {"file_name": "train/550.jpg", "text": "a car"}
551
+ {"file_name": "train/551.jpg", "text": "a car"}
552
+ {"file_name": "train/552.jpg", "text": "a car"}
553
+ {"file_name": "train/553.jpg", "text": "a car"}
554
+ {"file_name": "train/554.jpg", "text": "a car"}
555
+ {"file_name": "train/555.jpg", "text": "a car"}
556
+ {"file_name": "train/556.jpg", "text": "a car"}
557
+ {"file_name": "train/557.jpg", "text": "a car"}
558
+ {"file_name": "train/558.jpg", "text": "a car"}
559
+ {"file_name": "train/559.jpg", "text": "a car"}
560
+ {"file_name": "train/560.jpg", "text": "a car"}
561
+ {"file_name": "train/561.jpg", "text": "a car"}
562
+ {"file_name": "train/562.jpg", "text": "a car"}
563
+ {"file_name": "train/563.jpg", "text": "a car"}
564
+ {"file_name": "train/564.jpg", "text": "a car"}
565
+ {"file_name": "train/565.jpg", "text": "a car"}
566
+ {"file_name": "train/566.jpg", "text": "a car"}
567
+ {"file_name": "train/567.jpg", "text": "a car"}
568
+ {"file_name": "train/568.jpg", "text": "a car"}
569
+ {"file_name": "train/569.jpg", "text": "a car"}
570
+ {"file_name": "train/570.jpg", "text": "a car"}
571
+ {"file_name": "train/571.jpg", "text": "a car"}
572
+ {"file_name": "train/572.jpg", "text": "a car"}
573
+ {"file_name": "train/573.jpg", "text": "a car"}
574
+ {"file_name": "train/574.jpg", "text": "a car"}
575
+ {"file_name": "train/575.jpg", "text": "a car"}
576
+ {"file_name": "train/576.jpg", "text": "a car"}
577
+ {"file_name": "train/577.jpg", "text": "a car"}
578
+ {"file_name": "train/578.jpg", "text": "a car"}
579
+ {"file_name": "train/579.jpg", "text": "a car"}
580
+ {"file_name": "train/580.jpg", "text": "a car"}
581
+ {"file_name": "train/581.jpg", "text": "a car"}
582
+ {"file_name": "train/582.jpg", "text": "a car"}
583
+ {"file_name": "train/583.jpg", "text": "a car"}
584
+ {"file_name": "train/584.jpg", "text": "a car"}
585
+ {"file_name": "train/585.jpg", "text": "a car"}
586
+ {"file_name": "train/586.jpg", "text": "a car"}
587
+ {"file_name": "train/587.jpg", "text": "a car"}
588
+ {"file_name": "train/588.jpg", "text": "a car"}
589
+ {"file_name": "train/589.jpg", "text": "a car"}
590
+ {"file_name": "train/590.jpg", "text": "a car"}
591
+ {"file_name": "train/591.jpg", "text": "a car"}
592
+ {"file_name": "train/592.jpg", "text": "a car"}
593
+ {"file_name": "train/593.jpg", "text": "a car"}
594
+ {"file_name": "train/594.jpg", "text": "a car"}
595
+ {"file_name": "train/595.jpg", "text": "a car"}
596
+ {"file_name": "train/596.jpg", "text": "a car"}
597
+ {"file_name": "train/597.jpg", "text": "a car"}
598
+ {"file_name": "train/598.jpg", "text": "a car"}
599
+ {"file_name": "train/599.jpg", "text": "a car"}
600
+ {"file_name": "train/600.jpg", "text": "a car"}
601
+ {"file_name": "train/601.jpg", "text": "a car"}
602
+ {"file_name": "train/602.jpg", "text": "a car"}
603
+ {"file_name": "train/603.jpg", "text": "a car"}
604
+ {"file_name": "train/604.jpg", "text": "a car"}
605
+ {"file_name": "train/605.jpg", "text": "a car"}
606
+ {"file_name": "train/606.jpg", "text": "a car"}
607
+ {"file_name": "train/607.jpg", "text": "a car"}
608
+ {"file_name": "train/608.jpg", "text": "a car"}
609
+ {"file_name": "train/609.jpg", "text": "a car"}
610
+ {"file_name": "train/610.jpg", "text": "a car"}
611
+ {"file_name": "train/611.jpg", "text": "a car"}
612
+ {"file_name": "train/612.jpg", "text": "a car"}
613
+ {"file_name": "train/613.jpg", "text": "a car"}
614
+ {"file_name": "train/614.jpg", "text": "a car"}
615
+ {"file_name": "train/615.jpg", "text": "a car"}
616
+ {"file_name": "train/616.jpg", "text": "a car"}
617
+ {"file_name": "train/617.jpg", "text": "a car"}
618
+ {"file_name": "train/618.jpg", "text": "a car"}
619
+ {"file_name": "train/619.jpg", "text": "a car"}
620
+ {"file_name": "train/620.jpg", "text": "a car"}
621
+ {"file_name": "train/621.jpg", "text": "a car"}
622
+ {"file_name": "train/622.jpg", "text": "a car"}
623
+ {"file_name": "train/623.jpg", "text": "a car"}
624
+ {"file_name": "train/624.jpg", "text": "a car"}
625
+ {"file_name": "train/625.jpg", "text": "a car"}
626
+ {"file_name": "train/626.jpg", "text": "a car"}
627
+ {"file_name": "train/627.jpg", "text": "a car"}
628
+ {"file_name": "train/628.jpg", "text": "a car"}
629
+ {"file_name": "train/629.jpg", "text": "a car"}
630
+ {"file_name": "train/630.jpg", "text": "a car"}
631
+ {"file_name": "train/631.jpg", "text": "a car"}
632
+ {"file_name": "train/632.jpg", "text": "a car"}
633
+ {"file_name": "train/633.jpg", "text": "a car"}
634
+ {"file_name": "train/634.jpg", "text": "a car"}
635
+ {"file_name": "train/635.jpg", "text": "a car"}
636
+ {"file_name": "train/636.jpg", "text": "a car"}
637
+ {"file_name": "train/637.jpg", "text": "a car"}
638
+ {"file_name": "train/638.jpg", "text": "a car"}
639
+ {"file_name": "train/639.jpg", "text": "a car"}
640
+ {"file_name": "train/640.jpg", "text": "a car"}
641
+ {"file_name": "train/641.jpg", "text": "a car"}
642
+ {"file_name": "train/642.jpg", "text": "a car"}
643
+ {"file_name": "train/643.jpg", "text": "a car"}
644
+ {"file_name": "train/644.jpg", "text": "a car"}
645
+ {"file_name": "train/645.jpg", "text": "a car"}
646
+ {"file_name": "train/646.jpg", "text": "a car"}
647
+ {"file_name": "train/647.jpg", "text": "a car"}
648
+ {"file_name": "train/648.jpg", "text": "a car"}
649
+ {"file_name": "train/649.jpg", "text": "a car"}
650
+ {"file_name": "train/650.jpg", "text": "a car"}
651
+ {"file_name": "train/651.jpg", "text": "a car"}
652
+ {"file_name": "train/652.jpg", "text": "a car"}
653
+ {"file_name": "train/653.jpg", "text": "a car"}
654
+ {"file_name": "train/654.jpg", "text": "a car"}
655
+ {"file_name": "train/655.jpg", "text": "a car"}
656
+ {"file_name": "train/656.jpg", "text": "a car"}
657
+ {"file_name": "train/657.jpg", "text": "a car"}
658
+ {"file_name": "train/658.jpg", "text": "a car"}
659
+ {"file_name": "train/659.jpg", "text": "a car"}
660
+ {"file_name": "train/660.jpg", "text": "a car"}
661
+ {"file_name": "train/661.jpg", "text": "a car"}
662
+ {"file_name": "train/662.jpg", "text": "a car"}
663
+ {"file_name": "train/663.jpg", "text": "a car"}
664
+ {"file_name": "train/664.jpg", "text": "a car"}
665
+ {"file_name": "train/665.jpg", "text": "a car"}
666
+ {"file_name": "train/666.jpg", "text": "a car"}
667
+ {"file_name": "train/667.jpg", "text": "a car"}
668
+ {"file_name": "train/668.jpg", "text": "a car"}
669
+ {"file_name": "train/669.jpg", "text": "a car"}
670
+ {"file_name": "train/670.jpg", "text": "a car"}
671
+ {"file_name": "train/671.jpg", "text": "a car"}
672
+ {"file_name": "train/672.jpg", "text": "a car"}
673
+ {"file_name": "train/673.jpg", "text": "a car"}
674
+ {"file_name": "train/674.jpg", "text": "a car"}
675
+ {"file_name": "train/675.jpg", "text": "a car"}
676
+ {"file_name": "train/676.jpg", "text": "a car"}
677
+ {"file_name": "train/677.jpg", "text": "a car"}
678
+ {"file_name": "train/678.jpg", "text": "a car"}
679
+ {"file_name": "train/679.jpg", "text": "a car"}
680
+ {"file_name": "train/680.jpg", "text": "a car"}
681
+ {"file_name": "train/681.jpg", "text": "a car"}
682
+ {"file_name": "train/682.jpg", "text": "a car"}
683
+ {"file_name": "train/683.jpg", "text": "a car"}
684
+ {"file_name": "train/684.jpg", "text": "a car"}
685
+ {"file_name": "train/685.jpg", "text": "a car"}
686
+ {"file_name": "train/686.jpg", "text": "a car"}
687
+ {"file_name": "train/687.jpg", "text": "a car"}
688
+ {"file_name": "train/688.jpg", "text": "a car"}
689
+ {"file_name": "train/689.jpg", "text": "a car"}
690
+ {"file_name": "train/690.jpg", "text": "a car"}
691
+ {"file_name": "train/691.jpg", "text": "a car"}
692
+ {"file_name": "train/692.jpg", "text": "a car"}
693
+ {"file_name": "train/693.jpg", "text": "a car"}
694
+ {"file_name": "train/694.jpg", "text": "a car"}
695
+ {"file_name": "train/695.jpg", "text": "a car"}
696
+ {"file_name": "train/696.jpg", "text": "a car"}
697
+ {"file_name": "train/697.jpg", "text": "a car"}
698
+ {"file_name": "train/698.jpg", "text": "a car"}
699
+ {"file_name": "train/699.jpg", "text": "a car"}
700
+ {"file_name": "train/700.jpg", "text": "a car"}
701
+ {"file_name": "train/701.jpg", "text": "a car"}
702
+ {"file_name": "train/702.jpg", "text": "a car"}
703
+ {"file_name": "train/703.jpg", "text": "a car"}
704
+ {"file_name": "train/704.jpg", "text": "a car"}
705
+ {"file_name": "train/705.jpg", "text": "a car"}
706
+ {"file_name": "train/706.jpg", "text": "a car"}
707
+ {"file_name": "train/707.jpg", "text": "a car"}
708
+ {"file_name": "train/708.jpg", "text": "a car"}
709
+ {"file_name": "train/709.jpg", "text": "a car"}
710
+ {"file_name": "train/710.jpg", "text": "a car"}
711
+ {"file_name": "train/711.jpg", "text": "a car"}
712
+ {"file_name": "train/712.jpg", "text": "a car"}
713
+ {"file_name": "train/713.jpg", "text": "a car"}
714
+ {"file_name": "train/714.jpg", "text": "a car"}
715
+ {"file_name": "train/715.jpg", "text": "a car"}
716
+ {"file_name": "train/716.jpg", "text": "a car"}
717
+ {"file_name": "train/717.jpg", "text": "a car"}
718
+ {"file_name": "train/718.jpg", "text": "a car"}
719
+ {"file_name": "train/719.jpg", "text": "a car"}
720
+ {"file_name": "train/720.jpg", "text": "a car"}
721
+ {"file_name": "train/721.jpg", "text": "a car"}
722
+ {"file_name": "train/722.jpg", "text": "a car"}
723
+ {"file_name": "train/723.jpg", "text": "a car"}
724
+ {"file_name": "train/724.jpg", "text": "a car"}
725
+ {"file_name": "train/725.jpg", "text": "a car"}
726
+ {"file_name": "train/726.jpg", "text": "a car"}
727
+ {"file_name": "train/727.jpg", "text": "a car"}
728
+ {"file_name": "train/728.jpg", "text": "a car"}
729
+ {"file_name": "train/729.jpg", "text": "a car"}
730
+ {"file_name": "train/730.jpg", "text": "a car"}
731
+ {"file_name": "train/731.jpg", "text": "a car"}
732
+ {"file_name": "train/732.jpg", "text": "a car"}
733
+ {"file_name": "train/733.jpg", "text": "a car"}
734
+ {"file_name": "train/734.jpg", "text": "a car"}
735
+ {"file_name": "train/735.jpg", "text": "a car"}
736
+ {"file_name": "train/736.jpg", "text": "a car"}
737
+ {"file_name": "train/737.jpg", "text": "a car"}
738
+ {"file_name": "train/738.jpg", "text": "a car"}
739
+ {"file_name": "train/739.jpg", "text": "a car"}
740
+ {"file_name": "train/740.jpg", "text": "a car"}
741
+ {"file_name": "train/741.jpg", "text": "a car"}
742
+ {"file_name": "train/742.jpg", "text": "a car"}
743
+ {"file_name": "train/743.jpg", "text": "a car"}
744
+ {"file_name": "train/744.jpg", "text": "a car"}
745
+ {"file_name": "train/745.jpg", "text": "a car"}
746
+ {"file_name": "train/746.jpg", "text": "a car"}
747
+ {"file_name": "train/747.jpg", "text": "a car"}
748
+ {"file_name": "train/748.jpg", "text": "a car"}
749
+ {"file_name": "train/749.jpg", "text": "a car"}
750
+ {"file_name": "train/750.jpg", "text": "a car"}
751
+ {"file_name": "train/751.jpg", "text": "a car"}
752
+ {"file_name": "train/752.jpg", "text": "a car"}
753
+ {"file_name": "train/753.jpg", "text": "a car"}
754
+ {"file_name": "train/754.jpg", "text": "a car"}
755
+ {"file_name": "train/755.jpg", "text": "a car"}
756
+ {"file_name": "train/756.jpg", "text": "a car"}
757
+ {"file_name": "train/757.jpg", "text": "a car"}
758
+ {"file_name": "train/758.jpg", "text": "a car"}
759
+ {"file_name": "train/759.jpg", "text": "a car"}
760
+ {"file_name": "train/760.jpg", "text": "a car"}
761
+ {"file_name": "train/761.jpg", "text": "a car"}
762
+ {"file_name": "train/762.jpg", "text": "a car"}
763
+ {"file_name": "train/763.jpg", "text": "a car"}
764
+ {"file_name": "train/764.jpg", "text": "a car"}
765
+ {"file_name": "train/765.jpg", "text": "a car"}
766
+ {"file_name": "train/766.jpg", "text": "a car"}
767
+ {"file_name": "train/767.jpg", "text": "a car"}
768
+ {"file_name": "train/768.jpg", "text": "a car"}
769
+ {"file_name": "train/769.jpg", "text": "a car"}
770
+ {"file_name": "train/770.jpg", "text": "a car"}
771
+ {"file_name": "train/771.jpg", "text": "a car"}
772
+ {"file_name": "train/772.jpg", "text": "a car"}
773
+ {"file_name": "train/773.jpg", "text": "a car"}
774
+ {"file_name": "train/774.jpg", "text": "a car"}
775
+ {"file_name": "train/775.jpg", "text": "a car"}
776
+ {"file_name": "train/776.jpg", "text": "a car"}
777
+ {"file_name": "train/777.jpg", "text": "a car"}
778
+ {"file_name": "train/778.jpg", "text": "a car"}
779
+ {"file_name": "train/779.jpg", "text": "a car"}
780
+ {"file_name": "train/780.jpg", "text": "a car"}
781
+ {"file_name": "train/781.jpg", "text": "a car"}
782
+ {"file_name": "train/782.jpg", "text": "a car"}
783
+ {"file_name": "train/783.jpg", "text": "a car"}
784
+ {"file_name": "train/784.jpg", "text": "a car"}
785
+ {"file_name": "train/785.jpg", "text": "a car"}
786
+ {"file_name": "train/786.jpg", "text": "a car"}
787
+ {"file_name": "train/787.jpg", "text": "a car"}
788
+ {"file_name": "train/788.jpg", "text": "a car"}
789
+ {"file_name": "train/789.jpg", "text": "a car"}
790
+ {"file_name": "train/790.jpg", "text": "a car"}
791
+ {"file_name": "train/791.jpg", "text": "a car"}
792
+ {"file_name": "train/792.jpg", "text": "a car"}
793
+ {"file_name": "train/793.jpg", "text": "a car"}
794
+ {"file_name": "train/794.jpg", "text": "a car"}
795
+ {"file_name": "train/795.jpg", "text": "a car"}
796
+ {"file_name": "train/796.jpg", "text": "a car"}
797
+ {"file_name": "train/797.jpg", "text": "a car"}
798
+ {"file_name": "train/798.jpg", "text": "a car"}
799
+ {"file_name": "train/799.jpg", "text": "a car"}
800
+ {"file_name": "train/800.jpg", "text": "a car"}
801
+ {"file_name": "train/801.jpg", "text": "a car"}
802
+ {"file_name": "train/802.jpg", "text": "a car"}
803
+ {"file_name": "train/803.jpg", "text": "a car"}
804
+ {"file_name": "train/804.jpg", "text": "a car"}
805
+ {"file_name": "train/805.jpg", "text": "a car"}
806
+ {"file_name": "train/806.jpg", "text": "a car"}
807
+ {"file_name": "train/807.jpg", "text": "a car"}
808
+ {"file_name": "train/808.jpg", "text": "a car"}
809
+ {"file_name": "train/809.jpg", "text": "a car"}
810
+ {"file_name": "train/810.jpg", "text": "a car"}
811
+ {"file_name": "train/811.jpg", "text": "a car"}
812
+ {"file_name": "train/812.jpg", "text": "a car"}
813
+ {"file_name": "train/813.jpg", "text": "a car"}
814
+ {"file_name": "train/814.jpg", "text": "a car"}
815
+ {"file_name": "train/815.jpg", "text": "a car"}
816
+ {"file_name": "train/816.jpg", "text": "a car"}
817
+ {"file_name": "train/817.jpg", "text": "a car"}
818
+ {"file_name": "train/818.jpg", "text": "a car"}
819
+ {"file_name": "train/819.jpg", "text": "a car"}
820
+ {"file_name": "train/820.jpg", "text": "a car"}
821
+ {"file_name": "train/821.jpg", "text": "a car"}
822
+ {"file_name": "train/822.jpg", "text": "a car"}
823
+ {"file_name": "train/823.jpg", "text": "a car"}
824
+ {"file_name": "train/824.jpg", "text": "a car"}
825
+ {"file_name": "train/825.jpg", "text": "a car"}
826
+ {"file_name": "train/826.jpg", "text": "a car"}
827
+ {"file_name": "train/827.jpg", "text": "a car"}
828
+ {"file_name": "train/828.jpg", "text": "a car"}
829
+ {"file_name": "train/829.jpg", "text": "a car"}
830
+ {"file_name": "train/830.jpg", "text": "a car"}
831
+ {"file_name": "train/831.jpg", "text": "a car"}
832
+ {"file_name": "train/832.jpg", "text": "a car"}
833
+ {"file_name": "train/833.jpg", "text": "a car"}
834
+ {"file_name": "train/834.jpg", "text": "a car"}
835
+ {"file_name": "train/835.jpg", "text": "a car"}
836
+ {"file_name": "train/836.jpg", "text": "a car"}
837
+ {"file_name": "train/837.jpg", "text": "a car"}
838
+ {"file_name": "train/838.jpg", "text": "a car"}
839
+ {"file_name": "train/839.jpg", "text": "a car"}
840
+ {"file_name": "train/840.jpg", "text": "a car"}
841
+ {"file_name": "train/841.jpg", "text": "a car"}
842
+ {"file_name": "train/842.jpg", "text": "a car"}
843
+ {"file_name": "train/843.jpg", "text": "a car"}
844
+ {"file_name": "train/844.jpg", "text": "a car"}
845
+ {"file_name": "train/845.jpg", "text": "a car"}
846
+ {"file_name": "train/846.jpg", "text": "a car"}
847
+ {"file_name": "train/847.jpg", "text": "a car"}
848
+ {"file_name": "train/848.jpg", "text": "a car"}
849
+ {"file_name": "train/849.jpg", "text": "a car"}
850
+ {"file_name": "train/850.jpg", "text": "a car"}
851
+ {"file_name": "train/851.jpg", "text": "a car"}
852
+ {"file_name": "train/852.jpg", "text": "a car"}
853
+ {"file_name": "train/853.jpg", "text": "a car"}
854
+ {"file_name": "train/854.jpg", "text": "a car"}
855
+ {"file_name": "train/855.jpg", "text": "a car"}
856
+ {"file_name": "train/856.jpg", "text": "a car"}
857
+ {"file_name": "train/857.jpg", "text": "a car"}
858
+ {"file_name": "train/858.jpg", "text": "a car"}
859
+ {"file_name": "train/859.jpg", "text": "a car"}
860
+ {"file_name": "train/860.jpg", "text": "a car"}
861
+ {"file_name": "train/861.jpg", "text": "a car"}
862
+ {"file_name": "train/862.jpg", "text": "a car"}
863
+ {"file_name": "train/863.jpg", "text": "a car"}
864
+ {"file_name": "train/864.jpg", "text": "a car"}
865
+ {"file_name": "train/865.jpg", "text": "a car"}
866
+ {"file_name": "train/866.jpg", "text": "a car"}
867
+ {"file_name": "train/867.jpg", "text": "a car"}
868
+ {"file_name": "train/868.jpg", "text": "a car"}
869
+ {"file_name": "train/869.jpg", "text": "a car"}
870
+ {"file_name": "train/870.jpg", "text": "a car"}
871
+ {"file_name": "train/871.jpg", "text": "a car"}
872
+ {"file_name": "train/872.jpg", "text": "a car"}
873
+ {"file_name": "train/873.jpg", "text": "a car"}
874
+ {"file_name": "train/874.jpg", "text": "a car"}
875
+ {"file_name": "train/875.jpg", "text": "a car"}
876
+ {"file_name": "train/876.jpg", "text": "a car"}
877
+ {"file_name": "train/877.jpg", "text": "a car"}
878
+ {"file_name": "train/878.jpg", "text": "a car"}
879
+ {"file_name": "train/879.jpg", "text": "a car"}
880
+ {"file_name": "train/880.jpg", "text": "a car"}
881
+ {"file_name": "train/881.jpg", "text": "a car"}
882
+ {"file_name": "train/882.jpg", "text": "a car"}
883
+ {"file_name": "train/883.jpg", "text": "a car"}
884
+ {"file_name": "train/884.jpg", "text": "a car"}
885
+ {"file_name": "train/885.jpg", "text": "a car"}
886
+ {"file_name": "train/886.jpg", "text": "a car"}
887
+ {"file_name": "train/887.jpg", "text": "a car"}
888
+ {"file_name": "train/888.jpg", "text": "a car"}
889
+ {"file_name": "train/889.jpg", "text": "a car"}
890
+ {"file_name": "train/890.jpg", "text": "a car"}
891
+ {"file_name": "train/891.jpg", "text": "a car"}
892
+ {"file_name": "train/892.jpg", "text": "a car"}
893
+ {"file_name": "train/893.jpg", "text": "a car"}
894
+ {"file_name": "train/894.jpg", "text": "a car"}
895
+ {"file_name": "train/895.jpg", "text": "a car"}
896
+ {"file_name": "train/896.jpg", "text": "a car"}
897
+ {"file_name": "train/897.jpg", "text": "a car"}
898
+ {"file_name": "train/898.jpg", "text": "a car"}
899
+ {"file_name": "train/899.jpg", "text": "a car"}
900
+ {"file_name": "train/900.jpg", "text": "a car"}
901
+ {"file_name": "train/901.jpg", "text": "a car"}
902
+ {"file_name": "train/902.jpg", "text": "a car"}
903
+ {"file_name": "train/903.jpg", "text": "a car"}
904
+ {"file_name": "train/904.jpg", "text": "a car"}
905
+ {"file_name": "train/905.jpg", "text": "a car"}
906
+ {"file_name": "train/906.jpg", "text": "a car"}
907
+ {"file_name": "train/907.jpg", "text": "a car"}
908
+ {"file_name": "train/908.jpg", "text": "a car"}
909
+ {"file_name": "train/909.jpg", "text": "a car"}
910
+ {"file_name": "train/910.jpg", "text": "a car"}
911
+ {"file_name": "train/911.jpg", "text": "a car"}
912
+ {"file_name": "train/912.jpg", "text": "a car"}
913
+ {"file_name": "train/913.jpg", "text": "a car"}
914
+ {"file_name": "train/914.jpg", "text": "a car"}
915
+ {"file_name": "train/915.jpg", "text": "a car"}
916
+ {"file_name": "train/916.jpg", "text": "a car"}
917
+ {"file_name": "train/917.jpg", "text": "a car"}
918
+ {"file_name": "train/918.jpg", "text": "a car"}
919
+ {"file_name": "train/919.jpg", "text": "a car"}
920
+ {"file_name": "train/920.jpg", "text": "a car"}
921
+ {"file_name": "train/921.jpg", "text": "a car"}
922
+ {"file_name": "train/922.jpg", "text": "a car"}
923
+ {"file_name": "train/923.jpg", "text": "a car"}
924
+ {"file_name": "train/924.jpg", "text": "a car"}
925
+ {"file_name": "train/925.jpg", "text": "a car"}
926
+ {"file_name": "train/926.jpg", "text": "a car"}
927
+ {"file_name": "train/927.jpg", "text": "a car"}
928
+ {"file_name": "train/928.jpg", "text": "a car"}
929
+ {"file_name": "train/929.jpg", "text": "a car"}
930
+ {"file_name": "train/930.jpg", "text": "a car"}
931
+ {"file_name": "train/931.jpg", "text": "a car"}
932
+ {"file_name": "train/932.jpg", "text": "a car"}
933
+ {"file_name": "train/933.jpg", "text": "a car"}
934
+ {"file_name": "train/934.jpg", "text": "a car"}
935
+ {"file_name": "train/935.jpg", "text": "a car"}
936
+ {"file_name": "train/936.jpg", "text": "a car"}
937
+ {"file_name": "train/937.jpg", "text": "a car"}
938
+ {"file_name": "train/938.jpg", "text": "a car"}
939
+ {"file_name": "train/939.jpg", "text": "a car"}
940
+ {"file_name": "train/940.jpg", "text": "a car"}
941
+ {"file_name": "train/941.jpg", "text": "a car"}
942
+ {"file_name": "train/942.jpg", "text": "a car"}
943
+ {"file_name": "train/943.jpg", "text": "a car"}
944
+ {"file_name": "train/944.jpg", "text": "a car"}
945
+ {"file_name": "train/945.jpg", "text": "a car"}
946
+ {"file_name": "train/946.jpg", "text": "a car"}
947
+ {"file_name": "train/947.jpg", "text": "a car"}
948
+ {"file_name": "train/948.jpg", "text": "a car"}
949
+ {"file_name": "train/949.jpg", "text": "a car"}
950
+ {"file_name": "train/950.jpg", "text": "a car"}
951
+ {"file_name": "train/951.jpg", "text": "a car"}
952
+ {"file_name": "train/952.jpg", "text": "a car"}
953
+ {"file_name": "train/953.jpg", "text": "a car"}
954
+ {"file_name": "train/954.jpg", "text": "a car"}
955
+ {"file_name": "train/955.jpg", "text": "a car"}
956
+ {"file_name": "train/956.jpg", "text": "a car"}
957
+ {"file_name": "train/957.jpg", "text": "a car"}
958
+ {"file_name": "train/958.jpg", "text": "a car"}
959
+ {"file_name": "train/959.jpg", "text": "a car"}
960
+ {"file_name": "train/960.jpg", "text": "a car"}
961
+ {"file_name": "train/961.jpg", "text": "a car"}
962
+ {"file_name": "train/962.jpg", "text": "a car"}
963
+ {"file_name": "train/963.jpg", "text": "a car"}
964
+ {"file_name": "train/964.jpg", "text": "a car"}
965
+ {"file_name": "train/965.jpg", "text": "a car"}
966
+ {"file_name": "train/966.jpg", "text": "a car"}
967
+ {"file_name": "train/967.jpg", "text": "a car"}
968
+ {"file_name": "train/968.jpg", "text": "a car"}
969
+ {"file_name": "train/969.jpg", "text": "a car"}
970
+ {"file_name": "train/970.jpg", "text": "a car"}
971
+ {"file_name": "train/971.jpg", "text": "a car"}
972
+ {"file_name": "train/972.jpg", "text": "a car"}
973
+ {"file_name": "train/973.jpg", "text": "a car"}
974
+ {"file_name": "train/974.jpg", "text": "a car"}
975
+ {"file_name": "train/975.jpg", "text": "a car"}
976
+ {"file_name": "train/976.jpg", "text": "a car"}
977
+ {"file_name": "train/977.jpg", "text": "a car"}
978
+ {"file_name": "train/978.jpg", "text": "a car"}
979
+ {"file_name": "train/979.jpg", "text": "a car"}
980
+ {"file_name": "train/980.jpg", "text": "a car"}
981
+ {"file_name": "train/981.jpg", "text": "a car"}
982
+ {"file_name": "train/982.jpg", "text": "a car"}
983
+ {"file_name": "train/983.jpg", "text": "a car"}
984
+ {"file_name": "train/984.jpg", "text": "a car"}
985
+ {"file_name": "train/985.jpg", "text": "a car"}
986
+ {"file_name": "train/986.jpg", "text": "a car"}
987
+ {"file_name": "train/987.jpg", "text": "a car"}
988
+ {"file_name": "train/988.jpg", "text": "a car"}
989
+ {"file_name": "train/989.jpg", "text": "a car"}
990
+ {"file_name": "train/990.jpg", "text": "a car"}
991
+ {"file_name": "train/991.jpg", "text": "a car"}
992
+ {"file_name": "train/992.jpg", "text": "a car"}
993
+ {"file_name": "train/993.jpg", "text": "a car"}
994
+ {"file_name": "train/994.jpg", "text": "a car"}
995
+ {"file_name": "train/995.jpg", "text": "a car"}
996
+ {"file_name": "train/996.jpg", "text": "a car"}
997
+ {"file_name": "train/997.jpg", "text": "a car"}
998
+ {"file_name": "train/998.jpg", "text": "a car"}
999
+ {"file_name": "train/999.jpg", "text": "a car"}
1000
+ {"file_name": "train/1000.jpg", "text": "a car"}
models/ESD-X/esd-picasso_from_picasso-xattn_1-epochs_200.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f52f71a02de7c24b6191ec4385a6a5b08a7b2177458a7998dfffed28a6b49f50
3
- size 175883766
 
 
 
 
pytorch_env.yaml ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: pytorch_env
2
+ channels:
3
+ - conda-forge
4
+ - defaults
5
+ dependencies:
6
+ - _libgcc_mutex=0.1=conda_forge
7
+ - _openmp_mutex=4.5=2_gnu
8
+ - aiohappyeyeballs=2.4.0=py310h06a4308_0
9
+ - aiohttp=3.10.5=py310h5eee18b_0
10
+ - aiosignal=1.2.0=pyhd3eb1b0_0
11
+ - anyio=4.2.0=py310h06a4308_0
12
+ - argon2-cffi=21.3.0=pyhd3eb1b0_0
13
+ - argon2-cffi-bindings=21.2.0=py310h7f8727e_0
14
+ - arrow-cpp=16.1.0=hc1eb8f0_0
15
+ - asttokens=2.0.5=pyhd3eb1b0_0
16
+ - async-lru=2.0.4=py310h06a4308_0
17
+ - async-timeout=4.0.3=py310h06a4308_0
18
+ - attrs=23.1.0=py310h06a4308_0
19
+ - aws-c-auth=0.6.19=h5eee18b_0
20
+ - aws-c-cal=0.5.20=hdbd6064_0
21
+ - aws-c-common=0.8.5=h5eee18b_0
22
+ - aws-c-compression=0.2.16=h5eee18b_0
23
+ - aws-c-event-stream=0.2.15=h6a678d5_0
24
+ - aws-c-http=0.6.25=h5eee18b_0
25
+ - aws-c-io=0.13.10=h5eee18b_0
26
+ - aws-c-mqtt=0.7.13=h5eee18b_0
27
+ - aws-c-s3=0.1.51=hdbd6064_0
28
+ - aws-c-sdkutils=0.1.6=h5eee18b_0
29
+ - aws-checksums=0.1.13=h5eee18b_0
30
+ - aws-crt-cpp=0.18.16=h6a678d5_0
31
+ - aws-sdk-cpp=1.10.55=h721c034_0
32
+ - babel=2.11.0=py310h06a4308_0
33
+ - beautifulsoup4=4.12.2=py310h06a4308_0
34
+ - blas=1.0=mkl
35
+ - bleach=4.1.0=pyhd3eb1b0_0
36
+ - boost-cpp=1.82.0=hdb19cb5_2
37
+ - bottleneck=1.3.7=py310ha9d4c09_0
38
+ - brotli-python=1.0.9=py310h6a678d5_8
39
+ - bzip2=1.0.8=hd590300_5
40
+ - c-ares=1.19.1=h5eee18b_0
41
+ - ca-certificates=2024.9.24=h06a4308_0
42
+ - certifi=2024.8.30=py310h06a4308_0
43
+ - cffi=1.16.0=py310h5eee18b_1
44
+ - charset-normalizer=2.0.4=pyhd3eb1b0_0
45
+ - comm=0.2.1=py310h06a4308_0
46
+ - datasets=2.19.1=py310h06a4308_0
47
+ - debugpy=1.6.7=py310h6a678d5_0
48
+ - decorator=5.1.1=pyhd3eb1b0_0
49
+ - defusedxml=0.7.1=pyhd3eb1b0_0
50
+ - dill=0.3.8=py310h06a4308_0
51
+ - exceptiongroup=1.2.0=py310h06a4308_0
52
+ - executing=0.8.3=pyhd3eb1b0_0
53
+ - frozenlist=1.4.0=py310h5eee18b_0
54
+ - gflags=2.2.2=h6a678d5_1
55
+ - glog=0.5.0=h6a678d5_1
56
+ - huggingface_hub=0.24.6=py310h06a4308_0
57
+ - icu=73.1=h6a678d5_0
58
+ - idna=3.7=py310h06a4308_0
59
+ - intel-openmp=2023.1.0=hdb19cb5_46306
60
+ - ipykernel=6.28.0=py310h06a4308_0
61
+ - ipython=8.20.0=py310h06a4308_0
62
+ - jedi=0.18.1=py310h06a4308_1
63
+ - jinja2=3.1.3=py310h06a4308_0
64
+ - json5=0.9.6=pyhd3eb1b0_0
65
+ - jsonschema=4.19.2=py310h06a4308_0
66
+ - jsonschema-specifications=2023.7.1=py310h06a4308_0
67
+ - jupyter-lsp=2.2.0=py310h06a4308_0
68
+ - jupyter_client=8.6.0=py310h06a4308_0
69
+ - jupyter_core=5.5.0=py310h06a4308_0
70
+ - jupyter_events=0.8.0=py310h06a4308_0
71
+ - jupyter_server=2.10.0=py310h06a4308_0
72
+ - jupyter_server_terminals=0.4.4=py310h06a4308_1
73
+ - jupyterlab=4.0.11=py310h06a4308_0
74
+ - jupyterlab_pygments=0.1.2=py_0
75
+ - jupyterlab_server=2.25.1=py310h06a4308_0
76
+ - krb5=1.20.1=h143b758_1
77
+ - ld_impl_linux-64=2.40=h55db66e_0
78
+ - libabseil=20240116.2=cxx17_h6a678d5_0
79
+ - libboost=1.82.0=h109eef0_2
80
+ - libbrotlicommon=1.0.9=h5eee18b_8
81
+ - libbrotlidec=1.0.9=h5eee18b_8
82
+ - libbrotlienc=1.0.9=h5eee18b_8
83
+ - libcurl=8.9.1=h251f7ec_0
84
+ - libedit=3.1.20230828=h5eee18b_0
85
+ - libev=4.33=h7f8727e_1
86
+ - libevent=2.1.12=hdbd6064_1
87
+ - libffi=3.4.2=h7f98852_5
88
+ - libgcc-ng=13.2.0=h77fa898_7
89
+ - libgomp=13.2.0=h77fa898_7
90
+ - libgrpc=1.62.2=h2d74bed_0
91
+ - libnghttp2=1.57.0=h2d74bed_0
92
+ - libnsl=2.0.1=hd590300_0
93
+ - libprotobuf=4.25.3=he621ea3_0
94
+ - libsodium=1.0.18=h7b6447c_0
95
+ - libssh2=1.11.0=h251f7ec_0
96
+ - libstdcxx-ng=11.2.0=h1234567_1
97
+ - libthrift=0.15.0=h1795dd8_2
98
+ - libuuid=1.41.5=h5eee18b_0
99
+ - libxcrypt=4.4.36=hd590300_1
100
+ - lz4-c=1.9.4=h6a678d5_1
101
+ - markupsafe=2.1.3=py310h5eee18b_0
102
+ - matplotlib-inline=0.1.6=py310h06a4308_0
103
+ - mistune=2.0.4=py310h06a4308_0
104
+ - mkl=2023.1.0=h213fc3f_46344
105
+ - mkl-service=2.4.0=py310h5eee18b_1
106
+ - mkl_fft=1.3.10=py310h5eee18b_0
107
+ - mkl_random=1.2.7=py310h1128e8f_0
108
+ - multidict=6.0.4=py310h5eee18b_0
109
+ - multiprocess=0.70.15=py310h06a4308_0
110
+ - nbclient=0.8.0=py310h06a4308_0
111
+ - nbconvert=7.10.0=py310h06a4308_0
112
+ - nbformat=5.9.2=py310h06a4308_0
113
+ - ncurses=6.5=h59595ed_0
114
+ - nest-asyncio=1.6.0=py310h06a4308_0
115
+ - notebook-shim=0.2.3=py310h06a4308_0
116
+ - numexpr=2.8.7=py310h85018f9_0
117
+ - numpy=1.26.4=py310h5f9d8c6_0
118
+ - numpy-base=1.26.4=py310hb5e798b_0
119
+ - openssl=3.0.15=h5eee18b_0
120
+ - orc=2.0.1=h2d29ad5_0
121
+ - overrides=7.4.0=py310h06a4308_0
122
+ - packaging=23.2=py310h06a4308_0
123
+ - pandas=2.2.2=py310h6a678d5_0
124
+ - pandocfilters=1.5.0=pyhd3eb1b0_0
125
+ - parso=0.8.3=pyhd3eb1b0_0
126
+ - pexpect=4.8.0=pyhd3eb1b0_3
127
+ - pip=24.0=pyhd8ed1ab_0
128
+ - platformdirs=3.10.0=py310h06a4308_0
129
+ - prometheus_client=0.14.1=py310h06a4308_0
130
+ - prompt-toolkit=3.0.43=py310h06a4308_0
131
+ - prompt_toolkit=3.0.43=hd3eb1b0_0
132
+ - psutil=5.9.0=py310h5eee18b_0
133
+ - ptyprocess=0.7.0=pyhd3eb1b0_2
134
+ - pure_eval=0.2.2=pyhd3eb1b0_0
135
+ - pyarrow=16.1.0=py310h1128e8f_0
136
+ - pycparser=2.21=pyhd3eb1b0_0
137
+ - pygments=2.15.1=py310h06a4308_1
138
+ - pysocks=1.7.1=py310h06a4308_0
139
+ - python=3.10.15=he870216_1
140
+ - python-dateutil=2.9.0post0=py310h06a4308_0
141
+ - python-fastjsonschema=2.16.2=py310h06a4308_0
142
+ - python-json-logger=2.0.7=py310h06a4308_0
143
+ - python-tzdata=2023.3=pyhd3eb1b0_0
144
+ - python-xxhash=2.0.2=py310h5eee18b_1
145
+ - pytz=2024.1=py310h06a4308_0
146
+ - pyyaml=6.0.1=py310h5eee18b_0
147
+ - pyzmq=25.1.2=py310h6a678d5_0
148
+ - re2=2022.04.01=h295c915_0
149
+ - readline=8.2=h8228510_1
150
+ - referencing=0.30.2=py310h06a4308_0
151
+ - requests=2.31.0=py310h06a4308_1
152
+ - rfc3339-validator=0.1.4=py310h06a4308_0
153
+ - rfc3986-validator=0.1.1=py310h06a4308_0
154
+ - rpds-py=0.10.6=py310hb02cf49_0
155
+ - s2n=1.3.27=hdbd6064_0
156
+ - send2trash=1.8.2=py310h06a4308_0
157
+ - setuptools=69.5.1=pyhd8ed1ab_0
158
+ - six=1.16.0=pyhd3eb1b0_1
159
+ - snappy=1.2.1=h6a678d5_0
160
+ - sniffio=1.3.0=py310h06a4308_0
161
+ - soupsieve=2.5=py310h06a4308_0
162
+ - sqlite=3.45.3=h5eee18b_0
163
+ - stack_data=0.2.0=pyhd3eb1b0_0
164
+ - tbb=2021.8.0=hdb19cb5_0
165
+ - terminado=0.17.1=py310h06a4308_0
166
+ - tinycss2=1.2.1=py310h06a4308_0
167
+ - tk=8.6.14=h39e8969_0
168
+ - tomli=2.0.1=py310h06a4308_0
169
+ - tornado=6.3.3=py310h5eee18b_0
170
+ - traitlets=5.7.1=py310h06a4308_0
171
+ - typing-extensions=4.11.0=py310h06a4308_0
172
+ - typing_extensions=4.11.0=py310h06a4308_0
173
+ - urllib3=2.2.1=py310h06a4308_0
174
+ - utf8proc=2.6.1=h5eee18b_1
175
+ - wcwidth=0.2.5=pyhd3eb1b0_0
176
+ - webencodings=0.5.1=py310h06a4308_1
177
+ - websocket-client=1.8.0=py310h06a4308_0
178
+ - wheel=0.43.0=pyhd8ed1ab_1
179
+ - xxhash=0.8.0=h7f8727e_3
180
+ - xz=5.4.6=h5eee18b_1
181
+ - yaml=0.2.5=h7b6447c_0
182
+ - yarl=1.11.0=py310h5eee18b_0
183
+ - zeromq=4.3.5=h6a678d5_0
184
+ - zlib=1.2.13=h5eee18b_1
185
+ - zstd=1.5.6=hc292b87_0
186
+ - pip:
187
+ - accelerate==0.30.1
188
+ - contourpy==1.2.1
189
+ - cycler==0.12.1
190
+ - diffusers==0.31.0
191
+ - filelock==3.14.0
192
+ - fonttools==4.51.0
193
+ - fsspec==2024.5.0
194
+ - git-lfs==1.6
195
+ - huggingface-hub==0.26.2
196
+ - importlib-metadata==7.1.0
197
+ - kiwisolver==1.4.5
198
+ - matplotlib==3.9.0
199
+ - mpmath==1.3.0
200
+ - networkx==3.3
201
+ - nvidia-cublas-cu12==12.1.3.1
202
+ - nvidia-cuda-cupti-cu12==12.1.105
203
+ - nvidia-cuda-nvrtc-cu12==12.1.105
204
+ - nvidia-cuda-runtime-cu12==12.1.105
205
+ - nvidia-cudnn-cu12==8.9.2.26
206
+ - nvidia-cufft-cu12==11.0.2.54
207
+ - nvidia-curand-cu12==10.3.2.106
208
+ - nvidia-cusolver-cu12==11.4.5.107
209
+ - nvidia-cusparse-cu12==12.1.0.106
210
+ - nvidia-nccl-cu12==2.20.5
211
+ - nvidia-nvjitlink-cu12==12.4.127
212
+ - nvidia-nvtx-cu12==12.1.105
213
+ - pillow==10.3.0
214
+ - pyparsing==3.1.2
215
+ - regex==2024.5.15
216
+ - safetensors==0.4.3
217
+ - scipy==1.13.0
218
+ - sympy==1.12
219
+ - tokenizers==0.19.1
220
+ - torch==2.3.0
221
+ - torchaudio==2.3.0
222
+ - torchvision==0.18.0
223
+ - tqdm==4.66.4
224
+ - transformers==4.41.0
225
+ - triton==2.3.0
226
+ - tzdata==2024.1
227
+ - zipp==3.18.2
228
+ prefix: /home/lu.kev/.conda/envs/pytorch_env
test_gpu.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ # Check if PyTorch is installed and environment is working
4
+ print("Conda environment is working!")
5
+
6
+ # Check for GPU availability
7
+ if torch.cuda.is_available():
8
+ print("GPU is available!")
9
+ print(f"GPU Name: {torch.cuda.get_device_name(0)}")
10
+ else:
11
+ print("GPU is not available.")