Wendy-Fly
/

Truck2

Model card Files Files and versions Community

Wendy-Fly commited on 4 days ago

Commit

656cc39

verified ·

1 Parent(s): 429af14

Upload generate_prompt.py with huggingface_hub

Browse files

Files changed (1) hide show

generate_prompt.py +26 -26

generate_prompt.py CHANGED Viewed

@@ -68,7 +68,7 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
     sd_ans = []
     # while True:
     for idx, i in enumerate(batch):
-        save_ =  [{
             "role": "user",
             "content": [
                 {
@@ -79,7 +79,7 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
              "text": "Please help me write a prompt for image editing on this picture. The requirements are as follows: complex editing instructions should include two to five simple editing instructions involving spatial relationships (simple editing instructions such as ADD: add an object to the left of a certain object, DELETE: delete a certain object, MODIFY: change a certain object into another object). We hope that the editing instructions can have simple reasoning and can also include some abstract concept-based editing (such as making the atmosphere more romantic, or making the diet healthier, or making the boy more handsome and the girl more beautiful, etc.). Please give me clear editing instructions and also consider whether such editing instructions are reasonable."},
             ],
             "result":""
-        }]
         #idx_real = batch_idx * batch_size + idx
         messages = batch[idx]
         save_[0]['content'][0]['image'] = messages['content'][0]['image']
@@ -88,30 +88,30 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
         data_list.append(messages)
         save_list.append(save_)
     #print(len(data_list))
-        text = processor.apply_chat_template([messages], tokenize=False, add_generation_prompt=True)
-        #print(len(text))
-        image_inputs, video_inputs = process_vision_info([messages])
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        inputs = inputs.to(model.device)
-        # Inference: Generation of the output
-        generated_ids = model.generate(**inputs, max_new_tokens=128)
-        #print(generated_ids.shape)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-        #print(output_text)
-        save_[0]['result'] = output_text
-        save_data.append(save_)
     if batch_idx % 4 ==0:
         write_json(json_path,save_data)

     sd_ans = []
     # while True:
     for idx, i in enumerate(batch):
+        save_ =  {
             "role": "user",
             "content": [
                 {
              "text": "Please help me write a prompt for image editing on this picture. The requirements are as follows: complex editing instructions should include two to five simple editing instructions involving spatial relationships (simple editing instructions such as ADD: add an object to the left of a certain object, DELETE: delete a certain object, MODIFY: change a certain object into another object). We hope that the editing instructions can have simple reasoning and can also include some abstract concept-based editing (such as making the atmosphere more romantic, or making the diet healthier, or making the boy more handsome and the girl more beautiful, etc.). Please give me clear editing instructions and also consider whether such editing instructions are reasonable."},
             ],
             "result":""
+        }
         #idx_real = batch_idx * batch_size + idx
         messages = batch[idx]
         save_[0]['content'][0]['image'] = messages['content'][0]['image']
         data_list.append(messages)
         save_list.append(save_)
     #print(len(data_list))
+    text = processor.apply_chat_template([data_list], tokenize=False, add_generation_prompt=True)
+    #print(len(text))
+    image_inputs, video_inputs = process_vision_info([data_list])
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to(model.device)
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    #print(generated_ids.shape)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    #print(output_text)
+    save_[0]['result'] = output_text
+    save_data.append(save_)
     if batch_idx % 4 ==0:
         write_json(json_path,save_data)