import streamlit as st import torch import spaces from transformers import pipeline, AutoModelForCausalLM, AutoProcessor from PIL import Image pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog") st.title("Hot Dog? Or Not?") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") image_model_id = "microsoft/git-large-coco" image_processor = AutoProcessor.from_pretrained(image_model_id) image_model = AutoModelForCausalLM.from_pretrained(image_model_id).to(device) file_name = st.file_uploader("Upload a hot dog candidate image") if file_name is not None: col1, col2 = st.columns(2) image = Image.open(file_name) col1.image(image, use_column_width=True) predictions = pipeline(image) col2.header("Probabilities") for p in predictions: col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%") pixel_values = image_processor(images=image, return_tensors="pt").pixel_values generated_ids = image_model.generate(pixel_values=pixel_values, max_length=50) generated_caption = image_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] print(generated_caption)