Shahabmoin's picture
Update app.py
8c4efdd verified
raw
history blame contribute delete
2.03 kB
import streamlit as st
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel
import matplotlib.pyplot as plt
# Load the pre-trained CLIP model and processor
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
# Function to make predictions from the image
def predict_image_description(image):
# Preprocess the image and generate text inputs
inputs = processor(text=["a photo of an animal", "a photo of a human", "a photo of a car", "a photo of a tree", "a photo of a house"],
images=image,
return_tensors="pt",
padding=True)
# Get model predictions
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # Softmax to get probabilities
# Return top 3 predictions
top_3_probabilities, top_3_indices = torch.topk(probs, 3)
labels = ["an animal", "a human", "a car", "a tree", "a house"]
predictions = []
for i in range(3):
prediction = labels[top_3_indices[0][i]] # Get the label
probability = top_3_probabilities[0][i].item() # Get probability
predictions.append(f"{prediction}: {probability * 100:.2f}%")
return predictions
# Streamlit UI
st.title("Real-Time Image-to-Text Generator")
st.markdown("Upload an image, and I will tell you what it is!")
# Image upload feature
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Open the uploaded image
image = Image.open(uploaded_file)
# Display the image
st.image(image, caption="Uploaded Image", use_column_width=True)
# Predict the description
predictions = predict_image_description(image)
# Display the predictions
st.write("Predictions:")
for prediction in predictions:
st.write(prediction)