import streamlit as st
import torch
import spaces
from transformers import pipeline, AutoModelForCausalLM, AutoProcessor
from PIL import Image


pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

st.title("Hot Dog? Or Not?")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

image_model_id = "microsoft/git-large-coco"
image_processor = AutoProcessor.from_pretrained(image_model_id)
image_model = AutoModelForCausalLM.from_pretrained(image_model_id).to(device)

file_name = st.file_uploader("Upload a hot dog candidate image")

if file_name is not None:
    col1, col2 = st.columns(2)

    image = Image.open(file_name)
    col1.image(image, use_column_width=True)
    predictions = pipeline(image)

    col2.header("Probabilities")
    for p in predictions:
        col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")

pixel_values = image_processor(images=image, return_tensors="pt").pixel_values

generated_ids = image_model.generate(pixel_values=pixel_values, max_length=50)
generated_caption = image_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(generated_caption)