memray commited on
Commit
bb6d72b
·
verified ·
1 Parent(s): 4e5802b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -15
README.md CHANGED
@@ -43,31 +43,31 @@ pip -r requirements.txt
43
  ```python
44
  from src.model import MMEBModel
45
  from src.arguments import ModelArguments
46
- from src.utils import load_processor
47
-
48
- import torch
49
- from transformers import HfArgumentParser, AutoProcessor
50
  from PIL import Image
51
- import numpy as np
52
-
53
 
54
- model_args = (
55
- model_name='TIGER-Lab/VLM2Vec-Qwen2VL',
 
56
  pooling='last',
57
  normalize=True,
58
- model_backbone='qwen2_vl')
 
 
59
 
60
  processor = load_processor(model_args)
61
-
62
  model = MMEBModel.load(model_args)
63
- model.eval()
64
  model = model.to('cuda', dtype=torch.bfloat16)
 
65
 
66
  # Image + Text -> Text
67
- inputs = processor(text='<image> Represent the given image with the following question: What is in the image',
68
  images=Image.open('figures/example.jpg'),
69
  return_tensors="pt")
70
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
 
 
71
  qry_output = model(qry=inputs)["qry_reps"]
72
 
73
  string = 'A cat and a dog'
@@ -77,7 +77,7 @@ inputs = processor(text=string,
77
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
78
  tgt_output = model(tgt=inputs)["tgt_reps"]
79
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
80
- ## A cat and a dog = tensor([[0.4414]], device='cuda:0', dtype=torch.bfloat16)
81
 
82
  string = 'A cat and a tiger'
83
  inputs = processor(text=string,
@@ -86,8 +86,7 @@ inputs = processor(text=string,
86
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
87
  tgt_output = model(tgt=inputs)["tgt_reps"]
88
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
89
- ## A cat and a tiger = tensor([[0.3555]], device='cuda:0', dtype=torch.bfloat16)
90
-
91
  ```
92
 
93
 
 
43
  ```python
44
  from src.model import MMEBModel
45
  from src.arguments import ModelArguments
46
+ from src.model_utils import load_processor, QWEN2_VL, vlm_image_tokens
 
 
 
47
  from PIL import Image
48
+ import torch
 
49
 
50
+ model_args = ModelArguments(
51
+ model_name='Qwen/Qwen2-VL-7B-Instruct',
52
+ checkpoint_path='TIGER-Lab/VLM2Vec-Qwen2VL-7B',
53
  pooling='last',
54
  normalize=True,
55
+ model_backbone='qwen2_vl',
56
+ lora=True
57
+ )
58
 
59
  processor = load_processor(model_args)
 
60
  model = MMEBModel.load(model_args)
 
61
  model = model.to('cuda', dtype=torch.bfloat16)
62
+ model.eval()
63
 
64
  # Image + Text -> Text
65
+ inputs = processor(text=f'{vlm_image_tokens[QWEN2_VL]} Represent the given image with the following question: What is in the image',
66
  images=Image.open('figures/example.jpg'),
67
  return_tensors="pt")
68
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
69
+ inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
70
+ inputs['image_grid_thw'] = inputs['image_grid_thw'].unsqueeze(0)
71
  qry_output = model(qry=inputs)["qry_reps"]
72
 
73
  string = 'A cat and a dog'
 
77
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
78
  tgt_output = model(tgt=inputs)["tgt_reps"]
79
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
80
+ ## A cat and a dog = tensor([[0.3301]], device='cuda:0', dtype=torch.bfloat16)
81
 
82
  string = 'A cat and a tiger'
83
  inputs = processor(text=string,
 
86
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
87
  tgt_output = model(tgt=inputs)["tgt_reps"]
88
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
89
+ ## A cat and a tiger = tensor([[0.2891]], device='cuda:0', dtype=torch.bfloat16)
 
90
  ```
91
 
92