Spaces:

V-E-D
/

paligamma

Sleeping

paligamma / app.py

ved1beta

note

e0a390e about 2 months ago

2.72 kB

	import gradio as gr
	from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
	from PIL import Image
	import torch
	import os

	# Load the model and processor
	model_id = "google/paligemma-3b-mix-224"
	HF_TOKEN = os.getenv('HF_TOKEN')
	model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval()
	processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)

	def generate_caption(image, prompt="What is in this image?", max_tokens=100):
	"""Generate image description"""
	if image is None:
	return "Please upload an image."

	# Update UI to show processing
	gr.Info("Analysis starting. This may take up to 119 seconds.")

	# Modify prompt to include image token
	full_prompt = "<image> " + prompt

	# Preprocess inputs
	model_inputs = processor(text=full_prompt, images=image, return_tensors="pt")
	input_len = model_inputs["input_ids"].shape[-1]

	# Generate caption
	with torch.inference_mode():
	generation = model.generate(**model_inputs, max_new_tokens=max_tokens, do_sample=False)
	generation = generation[0][input_len:]
	decoded = processor.decode(generation, skip_special_tokens=True)

	return decoded

	# Load local example images
	def load_local_images():
	"""Load images from the repository"""
	image_files = ['image1.jpg', 'image2.jpg', 'image3.jpg']
	local_images = []
	for img_file in image_files:
	try:
	img_path = os.path.join('.', img_file)
	if os.path.exists(img_path):
	local_images.append(Image.open(img_path))
	except Exception as e:
	print(f"Could not load {img_file}: {e}")
	return local_images

	# Prepare example images
	EXAMPLE_IMAGES = load_local_images()

	# Create Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# PaliGemma Image Analysis")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", label="Upload or Select Image")
	custom_prompt = gr.Textbox(label="Custom Prompt", value="What is in this image?")
	submit_btn = gr.Button("Analyze Image")

	with gr.Column():
	output_text = gr.Textbox(label="Image Description")

	# Connect components
	submit_btn.click(
	fn=generate_caption,
	inputs=[input_image, custom_prompt],
	outputs=output_text
	)

	# Add example images
	gr.Examples(
	examples=[[img, "What is in this image?"] for img in EXAMPLE_IMAGES],
	inputs=[input_image, custom_prompt],
	fn=generate_caption,
	outputs=output_text
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()