Issues running the model.
#1
by
SospiDev
- opened
I am trying to run Qwen2-VL-72B-Instruct-GPTQ-Int8-tpfix
For this I have rented GPU cloud (2xA100)
I tried to follow the instructions provided by Hugging face, but when adding the tensor parallel = 2, the model is not working. (Image doesn't seem to be recognized).
Some guidance and help will be quite appreciated.
import torch
from PIL import Image
from transformers import AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
from vllm import LLM, SamplingParams
# Initialize the tokenizer and processor
tokenizer = AutoTokenizer.from_pretrained("CalamitousFelicitousness/Qwen2-VL-72B-Instruct-GPTQ-Int8-tpfix", do_rescale=False)
processor = AutoProcessor.from_pretrained("CalamitousFelicitousness/Qwen2-VL-72B-Instruct-GPTQ-Int8-tpfix", do_rescale=False)
# Define sampling parameters
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, max_tokens=15000)
# Initialize the LLM with tensor parallelism
llm = LLM(model="CalamitousFelicitousness/Qwen2-VL-72B-Instruct-GPTQ-Int8-tpfix", tensor_parallel_size=2)
# Load the image
image_path = "k1_1065_test_form-1.png"
image = Image.open(image_path)
# Prepare messages
prompt = "Please extract the information from the uploaded form"
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image_path},
{"type": "text", "text": prompt }
],
}
]
# Prepare text
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
# Generate outputs
outputs = llm.generate([text], sampling_params)
# Decode and print results
generated_ids = [output.outputs[0].token_ids for output in outputs]
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(text, generated_ids)
]
output_text = tokenizer.batch_decode(generated_ids_trimmed, skip_special_tokens=True)
print("Output Text:", output_text)