import torch from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import gradio as gr # Load model and processor device = torch.device("cpu") processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) # Captioning function with fallback logic def caption_image(image): try: inputs = processor(images=image, return_tensors="pt").to(device) output = model.generate(**inputs, max_length=30) caption = processor.tokenizer.decode(output[0], skip_special_tokens=True) return caption.capitalize() except Exception as e: return f"⚠️ Error: {str(e)[:100]}" # Gradio UI gr.Interface( fn=caption_image, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Generated Caption"), title="🖼️ BLIP Image Caption Generator", description="Fast, accurate image captioning using BLIP. No API keys. CPU-friendly. Instant output.", allow_flagging="never" ).launch()