Upload infer_3.py with huggingface_hub
Browse files- infer_3.py +12 -12
infer_3.py
CHANGED
@@ -19,11 +19,7 @@ def write_json(file_path, data):
|
|
19 |
# default: Load the model on the available device(s)
|
20 |
print(torch.cuda.device_count())
|
21 |
model_path = "/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/LLaMA-Factory/output/Qwen2.5-VL-3B_all"
|
22 |
-
# model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
23 |
-
# model_path, torch_dtype="auto", device_map="auto"
|
24 |
-
# )
|
25 |
|
26 |
-
# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
|
27 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
28 |
model_path,
|
29 |
torch_dtype=torch.bfloat16,
|
@@ -50,8 +46,8 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
|
|
50 |
image_list = []
|
51 |
input_text_list = []
|
52 |
data_list = []
|
53 |
-
|
54 |
-
|
55 |
# while True:
|
56 |
for idx, i in enumerate(batch):
|
57 |
save_ = {
|
@@ -66,7 +62,8 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
|
|
66 |
{"type": "image", "image": "file:///path/to/image2.jpg"},
|
67 |
{"type": "text", "text": "Describe this video."},
|
68 |
],
|
69 |
-
"answer":""
|
|
|
70 |
}
|
71 |
messages = {
|
72 |
"role": "user",
|
@@ -95,8 +92,9 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
|
|
95 |
save_['content'][1]['image'] = image_path
|
96 |
save_['content'][2]['text'] = question
|
97 |
save_['answer'] = answer
|
|
|
98 |
data_list.append(messages)
|
99 |
-
|
100 |
|
101 |
text = processor.apply_chat_template(data_list, tokenize=False, add_generation_prompt=True)
|
102 |
image_inputs, video_inputs, video_kwargs = process_vision_info(data_list, return_video_kwargs=True)
|
@@ -119,11 +117,13 @@ for batch_idx in tqdm(range(begin, end, batch_size)):
|
|
119 |
output_text = processor.batch_decode(
|
120 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
121 |
)
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
print("correct_num", correct_num)
|
127 |
write_json("infer_answer_finetune.json",save_data)
|
128 |
|
|
|
|
|
129 |
|
|
|
19 |
# default: Load the model on the available device(s)
|
20 |
print(torch.cuda.device_count())
|
21 |
model_path = "/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/LLaMA-Factory/output/Qwen2.5-VL-3B_all"
|
|
|
|
|
|
|
22 |
|
|
|
23 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
24 |
model_path,
|
25 |
torch_dtype=torch.bfloat16,
|
|
|
46 |
image_list = []
|
47 |
input_text_list = []
|
48 |
data_list = []
|
49 |
+
save_list = []
|
50 |
+
sd_ans = []
|
51 |
# while True:
|
52 |
for idx, i in enumerate(batch):
|
53 |
save_ = {
|
|
|
62 |
{"type": "image", "image": "file:///path/to/image2.jpg"},
|
63 |
{"type": "text", "text": "Describe this video."},
|
64 |
],
|
65 |
+
"answer":"None",
|
66 |
+
"result":"None",
|
67 |
}
|
68 |
messages = {
|
69 |
"role": "user",
|
|
|
92 |
save_['content'][1]['image'] = image_path
|
93 |
save_['content'][2]['text'] = question
|
94 |
save_['answer'] = answer
|
95 |
+
sd_ans.append(answer)
|
96 |
data_list.append(messages)
|
97 |
+
save_list.append(save_)
|
98 |
|
99 |
text = processor.apply_chat_template(data_list, tokenize=False, add_generation_prompt=True)
|
100 |
image_inputs, video_inputs, video_kwargs = process_vision_info(data_list, return_video_kwargs=True)
|
|
|
117 |
output_text = processor.batch_decode(
|
118 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
119 |
)
|
120 |
+
for idx,x in enumerate(output_text):
|
121 |
+
save_list[idx]['result'] = x
|
122 |
+
save_data.append(save_list[idx])
|
123 |
+
|
124 |
print("correct_num", correct_num)
|
125 |
write_json("infer_answer_finetune.json",save_data)
|
126 |
|
127 |
+
|
128 |
+
|
129 |
|