sourabhbargi11 commited on
Commit
7e847dc
·
verified ·
1 Parent(s): fbf0945

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -2,6 +2,14 @@ import streamlit as st
2
  from PIL import Image
3
  from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel,RobertaTokenizerFast
4
 
 
 
 
 
 
 
 
 
5
  #import torch
6
  #from transformers import BlipProcessor, BlipForConditionalGeneration
7
 
@@ -12,22 +20,22 @@ def set_page_config():
12
  layout='wide',
13
  )
14
 
15
- #def initialize_model():
16
- # hf_model = "Salesforce/blip-image-captioning-large"
17
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
- # processor = BlipProcessor.from_pretrained(hf_model)
19
- # model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device) # type: ignore
20
- # return processor, model, device
 
21
 
22
  def upload_image():
23
  return st.sidebar.file_uploader("Upload an image (we aren't storing anything)", type=["jpg", "jpeg", "png"])
24
 
25
  def resize_image(image, max_width):
26
- width, height = image.size
27
- if width > max_width:
28
- ratio = max_width / width
29
- height = int(height * ratio)
30
- image = image.resize((max_width, height))
31
  return image
32
 
33
  def generate_caption(processor, model, device, image):
 
2
  from PIL import Image
3
  from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel,RobertaTokenizerFast
4
 
5
+
6
+ import requests
7
+ from PIL import Image
8
+
9
+ from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
10
+
11
+
12
+
13
  #import torch
14
  #from transformers import BlipProcessor, BlipForConditionalGeneration
15
 
 
20
  layout='wide',
21
  )
22
 
23
+ def initialize_model():
24
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
+ # load a fine-tuned image captioning model and corresponding tokenizer and image processor
26
+ model = VisionEncoderDecoderModel.from_pretrained("sourabhbargi11/caption4").to(device)
27
+ tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
28
+ image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
29
+ return image_processor, model,tokenizer, device
30
 
31
  def upload_image():
32
  return st.sidebar.file_uploader("Upload an image (we aren't storing anything)", type=["jpg", "jpeg", "png"])
33
 
34
  def resize_image(image, max_width):
35
+
36
+ image = image.resize((max_width, height))
37
+ if image.mode == "L":
38
+ image = image.convert("RGB")
 
39
  return image
40
 
41
  def generate_caption(processor, model, device, image):