bhavitvyamalik commited on
Commit
0e55637
·
1 Parent(s): 6cf649a

update README

Browse files
Files changed (1) hide show
  1. README.md +15 -15
README.md CHANGED
@@ -13,22 +13,22 @@ Note that this model is primarily aimed at being fine-tuned on tasks like multi-
13
  ### How to use❓
14
  You will need to clone the model from [here](https://github.com/gchhablani/multilingual-image-captioning). An example of usage is shown below:
15
  ```python
16
- >>> from torchvision.io import read_image
17
- >>> import numpy as np
18
- >>> import os
19
- >>> from transformers import CLIPProcessor, MBart50TokenizerFast
20
- >>> from model.flax_clip_vision_mbart.modeling_clip_vision_mbart import FlaxCLIPVisionMBartForConditionalGeneration
21
- >>> image_path = os.path.join('images/val2014', os.listdir('images/val2014')[0])
22
- >>> img = read_image(image_path) # reading image
23
- >>> clip_processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
24
- >>> clip_outputs = clip_processor(images=img)
25
- >>> clip_outputs['pixel_values'][0] = clip_outputs['pixel_values'][0].transpose(1,2,0) # Need to transpose images as model expected channel last images.
26
- >>> tokenizer = MBart50TokenizerFast.from_pretrained('facebook/mbart-large-50"')
27
- >>> model = FlaxCLIPVisionBertForMaskedLM.from_pretrained('flax-community/clip-vit-base-patch32_mbart-large-50')
28
- >>> output_ids = model.generate(batch["pixel_values"], forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"], num_beams=4, max_length=64).sequences # "en_XX is the language code in which you want the translation
29
  # en_XX: English, fr_XX: French, es_XX: Spanish, de_DE: Deutsch
30
- >>> output_string = tokenizer.batch_decode(output_ids.reshape(-1, 64), skip_special_tokens=True, max_length=64)
31
- >>> output_string # relevant caption
32
  ```
33
 
34
  ## Training data 🏋🏻‍♂️
 
13
  ### How to use❓
14
  You will need to clone the model from [here](https://github.com/gchhablani/multilingual-image-captioning). An example of usage is shown below:
15
  ```python
16
+ from torchvision.io import read_image
17
+ import numpy as np
18
+ import os, wget
19
+ from transformers import CLIPProcessor, MBart50TokenizerFast
20
+ from model.flax_clip_vision_mbart.modeling_clip_vision_mbart import FlaxCLIPVisionMBartForConditionalGeneration
21
+ img = wget("http://images.cocodataset.org/val2017/000000397133.jpg")
22
+ img = read_image(img) # reading image
23
+ clip_processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
24
+ clip_outputs = clip_processor(images=img)
25
+ clip_outputs['pixel_values'][0] = clip_outputs['pixel_values'][0].transpose(1,2,0) # Need to transpose images as model expected channel last images.
26
+ tokenizer = MBart50TokenizerFast.from_pretrained('facebook/mbart-large-50"')
27
+ model = FlaxCLIPVisionBertForMaskedLM.from_pretrained('flax-community/clip-vit-base-patch32_mbart-large-50')
28
+ output_ids = model.generate(batch["pixel_values"], forced_bos_token_id=tokenizer.lang_code_to_id["es_XX"], num_beams=4, max_length=64).sequences # "es_XX is the language code in which you want the translation
29
  # en_XX: English, fr_XX: French, es_XX: Spanish, de_DE: Deutsch
30
+ output_string = tokenizer.batch_decode(output_ids.reshape(-1, 64), skip_special_tokens=True, max_length=64)
31
+ output_string # Un restaurante u otro lugar para comer en el Hotel
32
  ```
33
 
34
  ## Training data 🏋🏻‍♂️