# 🖼️ BLIP Image Captioning - Fast, Accurate, CPU-Friendly import torch from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import gradio as gr # Load model and processor device = "cpu" processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) # Captioning function def caption_image(image): try: inputs = processor(images=image, return_tensors="pt").to(device) out = model.generate(**inputs, max_length=30) caption = processor.tokenizer.decode(out[0], skip_special_tokens=True) return caption.capitalize() except Exception: return "Could not generate caption. Try a different image." # Launch Gradio interface gr.Interface( fn=caption_image, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Generated Caption"), title="🖼️ Image Caption Generator (BLIP)", description="Accurate, fast image captioning using BLIP. No API keys. CPU-friendly. Instant output.", examples=["example.jpg"], # Optional: preload sample image cache_examples=True # Optional: speeds up UX ).launch()