maahin commited on
Commit
baf8eca
·
verified ·
1 Parent(s): 75afbce

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from transformers import AutoProcessor, AutoModelForVision2Seq
5
+
6
+ # Load the PaliGemma model and processor
7
+ @st.cache_resource
8
+ def load_model():
9
+ model_name = "google/paligemma2-3b-mix-224"
10
+ processor = AutoProcessor.from_pretrained(model_name)
11
+ model = AutoModelForVision2Seq.from_pretrained(model_name)
12
+ return processor, model
13
+
14
+ processor, model = load_model()
15
+
16
+ # Streamlit UI
17
+ st.title("🖼️ Image Q&A using PaliGemma")
18
+
19
+ uploaded_file = st.file_uploader("Upload an Image", type=["png", "jpg", "jpeg"])
20
+
21
+ if uploaded_file:
22
+ image = Image.open(uploaded_file).convert("RGB")
23
+ st.image(image, caption="Uploaded Image", use_column_width=True)
24
+
25
+ question = st.text_input("Ask a question about the image:")
26
+ if question:
27
+ # Process the image and question
28
+ inputs = processor(text=question, images=image, return_tensors="pt")
29
+ with torch.no_grad():
30
+ output = model.generate(**inputs)
31
+
32
+ answer = processor.batch_decode(output, skip_special_tokens=True)[0]
33
+ st.success(f"Answer: {answer}")