Spaces:

robot0820
/

VLM_Test

Runtime error

App Files Files Community

robot0820 commited on Aug 26

Commit

dcef8cb

verified ·

1 Parent(s): df33f8b

Create app.py

Browse files

Files changed (1) hide show

app.py +52 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+from transformers import AutoModelForCausalLM
+from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
+from deepseek_vl.utils.io import load_pil_images
+# specify the path to the model
+model_path = "deepseek-ai/deepseek-vl-7b-chat"
+vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
+tokenizer = vl_chat_processor.tokenizer
+vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
+vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
+conversation = [
+    {
+        "role": "User",
+        "content": "<image_placeholder>Describe each stage of this image.",
+        "images": ["./images/training_pipelines.png"]
+    },
+    {
+        "role": "Assistant",
+        "content": ""
+    }
+]
+# load images and prepare for inputs
+pil_images = load_pil_images(conversation)
+prepare_inputs = vl_chat_processor(
+    conversations=conversation,
+    images=pil_images,
+    force_batchify=True
+).to(vl_gpt.device)
+# run image encoder to get the image embeddings
+inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
+# run the model to get the response
+outputs = vl_gpt.language_model.generate(
+    inputs_embeds=inputs_embeds,
+    attention_mask=prepare_inputs.attention_mask,
+    pad_token_id=tokenizer.eos_token_id,
+    bos_token_id=tokenizer.bos_token_id,
+    eos_token_id=tokenizer.eos_token_id,
+    max_new_tokens=512,
+    do_sample=False,
+    use_cache=True
+)
+answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
+print(f"{prepare_inputs['sft_format'][0]}", answer)