Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PIL import Image | |
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel | |
import torch | |
vitgpt_processor = AutoImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
vitgpt_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
vitgpt_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
vitgpt_model.to(device) | |
def generate_caption(processor, model, image, num_seq, tokenizer=None): | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
generated_ids = model.generate(pixel_values=inputs.pixel_values, | |
max_length=50, | |
num_beams=5, | |
do_sample=True, | |
temperature=2., | |
top_k = 20, | |
no_repeat_ngram_size=5, | |
num_return_sequences=num_seq) | |
if tokenizer is not None: | |
generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) | |
else: | |
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True) | |
return generated_caption | |
def generate_captions(image, num_seq): | |
caption_vitgpt = generate_caption(vitgpt_processor, vitgpt_model, image, num_seq, vitgpt_tokenizer) | |
return caption_vitgpt | |
st.title('Generate text to your image') | |
uploaded_file = st.file_uploader("Upload your image") | |
num_seq = st.slider('Return sequences quantity', 1, 5, 2) | |
if uploaded_file is not None: | |
if st.button('Generate!'): | |
col1, col2 = st.columns(2) | |
with col1: | |
image = Image.open(uploaded_file) | |
st.image(image) | |
with col2: | |
generated_caption = generate_caption(vitgpt_processor, vitgpt_model, image, num_seq, vitgpt_tokenizer) | |
for i in generated_caption: | |
st.write(i) |