RE-N-Y commited on
Commit
1205833
·
verified ·
1 Parent(s): 17bf699

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +105 -33
README.md CHANGED
@@ -33,51 +33,123 @@ The model has been finetuned using LoRA to generate python pseudocode outputs to
33
 
34
  ## Uses
35
 
36
- The inference method is identical to [LLaVA-1.5-13B](https://huggingface.co/llava-hf/llava-1.5-13b-hf).
 
 
 
 
 
 
37
 
38
  ```python
39
  import torch
40
- from transformers import AutoProcessor, LlavaForConditionalGeneration
 
41
  from PIL import Image
42
 
43
- image = Image.open("<path to image>")
44
- image = image.convert("RGB")
45
-
46
- question = "What material attribute do the stove, the oven behind the white and dirty wall and the tea_kettle have in common?"
47
-
48
- codes = """
49
- selected_wall = select(wall)
50
- filtered_wall = filter(selected_wall, ['white', 'dirty'])
51
- related_oven = relate(oven, behind, o, filtered_wall)
52
- selected_stove = select(stove)
53
- selected_tea_kettle = select(tea_kettle)
54
- materials = query_material(related_oven, selected_stove, selected_tea_kettle)
55
- material = common(materials)
56
- """
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- prompt = """
59
- USER: <image>
60
- Executes the code and logs the results step-by-step to provide an answer to the question.
61
- Question
62
- {question}
63
- Code
64
- {codes}
65
- ASSISTANT:
66
- Log
67
  """
68
 
69
- prompt = prompt.format(question=question, codes=codes)
70
-
71
- model = LlavaForConditionalGeneration.from_pretrained("RE-N-Y/logic2vision", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True)
72
 
73
- processor = AutoProcessor.from_pretrained("RE-N-Y/logic2vision")
74
- processor.tokenizer.pad_token = processor.tokenizer.eos_token
75
- processor.tokenizer.padding_side = "left"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- prompts = processor(images=image, text=prompt, return_tensors="pt")
 
 
78
 
79
  generate_ids = model.generate(**inputs, max_new_tokens=256)
80
- processor.batch_decode(generate_ids, skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ```
82
 
83
  ## Bias, Risks, and Limitations
 
33
 
34
  ## Uses
35
 
36
+ The inference method is similar to [LLaVA-1.5-13B](https://huggingface.co/llava-hf/llava-1.5-13b-hf).
37
+
38
+ ### Example images
39
+
40
+ [zebras.jpg](https://huggingface.co/RE-N-Y/logic2vision/resolve/main/zebras.jpg)
41
+
42
+ [room.jpg](https://huggingface.co/RE-N-Y/logic2vision/resolve/main/room.jpg)
43
 
44
  ```python
45
  import torch
46
+ from transformers import LlavaProcessor, LlavaForConditionalGeneration
47
+ import requests
48
  from PIL import Image
49
 
50
+ class LLaVACodeTemplate:
51
+ prompt = """
52
+ USER: <image>
53
+ Executes the code and logs the results step-by-step to provide an answer to the question.
54
+ Question
55
+ {question}
56
+ Code
57
+ {codes}
58
+ ASSISTANT:
59
+ Log
60
+ """
61
+
62
+ answer = """
63
+ {logs}
64
+ Answer:
65
+ {answer}</s>
66
+ """
67
+
68
+ template = LLaVACodeTemplate()
69
+ model = LlavaForConditionalGeneration.from_pretrained("RE-N-Y/logic2vision", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, cache_dir="/data/tir/projects/tir6/general/sakter/cache")
70
+ model.to("cuda")
71
+
72
+ processor = LlavaProcessor.from_pretrained("RE-N-Y/logic2vision")
73
+ processor.tokenizer.pad_token = processor.tokenizer.eos_token
74
+ processor.tokenizer.padding_side = "left"
75
 
76
+ image = Image.open(requests.get("https://huggingface.co/RE-N-Y/logic2vision/resolve/main/zebras.jpg", stream=True).raw)
77
+ question = "What else in the image is striped as the rope and the mane to the left of the white clouds?"
78
+ codes = """selected_clouds = select(clouds)
79
+ filtered_clouds = filter(selected_clouds, ['white'])
80
+ related_mane = relate(mane, to the left of, o, filtered_clouds)
81
+ selected_rope = select(rope)
82
+ pattern = query_pattern(['selected_rope', 'related_mane'])
83
+ result = select(objects, attr=pattern)
 
84
  """
85
 
86
+ prompt = template.prompt.format(question=question, codes=codes)
87
+ inputs = processor(images=image, text=prompt, return_tensors="pt")
88
+ inputs.to("cuda")
89
 
90
+ generate_ids = model.generate(**inputs, max_new_tokens=256)
91
+ output = processor.batch_decode(generate_ids, skip_special_tokens=True)
92
+ print(output[0])
93
+
94
+ # USER:
95
+ # Executes the code and logs the results step-by-step to provide an answer to the question.
96
+ # Question
97
+ # What else in the image is striped as the rope and the mane to the left of the white clouds?
98
+
99
+ # Code
100
+ # selected_clouds = select(clouds)
101
+ # filtered_clouds = filter(selected_clouds, ['white'])
102
+ # related_mane = relate(mane, to the left of, o, filtered_clouds)
103
+ # selected_rope = select(rope)
104
+ # pattern = query_pattern(['selected_rope', 'related_mane'])
105
+ # result = select(objects, attr=pattern)
106
+
107
+ # ASSISTANT:
108
+ # Log
109
+ # ('clouds', ['white'])
110
+ # ('clouds', ['white'])
111
+ # ('mane', ['striped'])
112
+ # ('rope', ['no object'])
113
+ # ['the question itself is problematic']
114
+ # ['the question itself is problematic']
115
+ # Answer:
116
+ # the question itself is problematic
117
+
118
+
119
+ image = Image.open(requests.get("https://huggingface.co/RE-N-Y/logic2vision/resolve/main/room.jpg", stream=True).raw)
120
+ question = "What material do the chair and the table have in common?"
121
+ codes = """selected_chair = select(chair)
122
+ selected_table = select(table)
123
+ materials = query_material([selected_chair, selected_table])
124
+ common_material = common(materials)
125
+ """
126
 
127
+ prompt = template.prompt.format(question=question, codes=codes)
128
+ inputs = processor(images=image, text=prompt, return_tensors="pt")
129
+ inputs.to("cuda")
130
 
131
  generate_ids = model.generate(**inputs, max_new_tokens=256)
132
+ output = processor.batch_decode(generate_ids, skip_special_tokens=True)
133
+ print(output[0])
134
+
135
+ # USER:
136
+ # Executes the code and logs the results step-by-step to provide an answer to the question.
137
+ # Question
138
+ # What material do the chair and the table have in common?
139
+ # Code
140
+ # selected_chair = select(chair)
141
+ # selected_table = select(table)
142
+ # materials = query_material([selected_chair, selected_table])
143
+ # common_material = common(materials)
144
+
145
+ # ASSISTANT:
146
+ # Log
147
+ # ('chair', ['wood'])
148
+ # ('table', ['wood'])
149
+ # [['wood'], ['wood']]
150
+ # ['wood']
151
+ # Answer:
152
+ # wood
153
  ```
154
 
155
  ## Bias, Risks, and Limitations