import gradio as gr import time from transformers import BlipProcessor, BlipForConditionalGeneration # Load pre-trained BLIP model and processor processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # Function to generate detailed text prompt from the uploaded image def generate_caption(image): start_time = time.time() # Start time # Preprocess the image and generate a detailed caption inputs = processor(images=image, return_tensors="pt") out = model.generate(**inputs, num_beams=3, max_length=500, early_stopping=True) caption = processor.decode(out[0], skip_special_tokens=True) end_time = time.time() # End time processing_time = end_time - start_time # Calculate processing time return caption, f"Time taken: {processing_time:.1f} seconds" # Function to handle cancellation (clear image and output) def cancel_action(): return None, None # Clear the output # Gradio Interface with gr.Blocks(css="body {background-color: #f8f9fa;}") as iface: with gr.Column(): gr.Markdown("
Upload an image to generate a detailed, beautiful description of it.
", elem_id="intro") with gr.Row(): image_input = gr.Image(type="pil", label="Upload an image", elem_id="image_upload") with gr.Row(): output_caption = gr.Textbox(label="Generated Caption", interactive=False, elem_id="output_caption", lines=5) output_time = gr.Textbox(label="Processing Time", interactive=False, elem_id="output_time", lines=1) with gr.Row(): submit_btn = gr.Button("Submit", elem_id="submit_btn", variant="primary") cancel_btn = gr.Button("Cancel", elem_id="cancel_btn", variant="secondary") submit_btn.click(generate_caption, inputs=image_input, outputs=[output_caption, output_time]) cancel_btn.click(cancel_action, outputs=[output_caption, output_time]) # Launch the interface iface.launch()