ved1beta commited on
Commit
0dc6935
·
1 Parent(s): 45f9b3d
Files changed (2) hide show
  1. app.py +34 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,8 +1,38 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ import requests
5
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
6
 
7
+ # Load the PaliGemma model and processor
8
+ model_id = "google/paligemma-3b-mix-224"
9
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
10
+ processor = AutoProcessor.from_pretrained(model_id)
11
 
12
+ def generate_response(image, prompt):
13
+ """Generate response for image and prompt."""
14
+ if image is None:
15
+ return "Please upload an image."
16
+
17
+ try:
18
+ inputs = processor(image, prompt, return_tensors="pt")
19
+ output = model.generate(**inputs, max_new_tokens=50)
20
+ response = processor.decode(output[0], skip_special_tokens=True)[inputs.input_ids.shape[1]:]
21
+ return response
22
+ except Exception as e:
23
+ return f"Error: {str(e)}"
24
 
25
+ # Gradio interface
26
+ demo = gr.Interface(
27
+ fn=generate_response,
28
+ inputs=[
29
+ gr.Image(type="pil", label="Upload Image"),
30
+ gr.Textbox(label="Prompt", placeholder="What do you want to know?")
31
+ ],
32
+ outputs=gr.Textbox(label="Model Response"),
33
+ title="PaliGemma Vision-Language Model",
34
+ description="Ask questions about uploaded images"
35
+ )
36
+
37
+ if __name__ == "__main__":
38
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ transformers>=4.35.0
3
+ gradio>=4.0.0
4
+ pillow>=9.0.0
5
+ requests>=2.28.0