zainimam commited on
Commit
222fb60
·
verified ·
1 Parent(s): 1ecc1ca

Updated main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -16
main.py CHANGED
@@ -1,21 +1,22 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
- # Load the tokenizer and model from Hugging Face
5
- model_name = "impactframes/molmo-7B-D-bnb-4bit"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
9
- # Example input prompt
10
- prompt = "What is the meaning of life?"
 
11
 
12
- # Tokenize the input
13
- inputs = tokenizer(prompt, return_tensors="pt")
 
14
 
15
- # Generate output
16
- with torch.no_grad():
17
- outputs = model.generate(inputs.input_ids, max_length=100)
18
 
19
- # Decode the output
20
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
- print(response)
 
1
+ from transformers import AutoModelForCausalLM, AutoProcessor
2
+ from PIL import Image
3
+ import requests
4
 
5
+ # Load the processor and model
6
+ processor = AutoProcessor.from_pretrained('allenai/Molmo-7B-D-0924', trust_remote_code=True, device_map='auto')
7
+ model = AutoModelForCausalLM.from_pretrained('allenai/Molmo-7B-D-0924', trust_remote_code=True, device_map='auto')
 
8
 
9
+ # Download an image
10
+ image_url = "https://picsum.photos/id/237/536/354"
11
+ image = Image.open(requests.get(image_url, stream=True).raw)
12
 
13
+ # Process the image with some input text
14
+ inputs = processor(images=[image], text="Describe this image.")
15
+ inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
16
 
17
+ # Generate text based on the input
18
+ output = model.generate_from_batch(inputs, max_new_tokens=200)
 
19
 
20
+ # Decode and print the generated text
21
+ generated_text = processor.tokenizer.decode(output[0], skip_special_tokens=True)
22
+ print(generated_text)