import json import os import time import uuid import tempfile from PIL import Image import gradio as gr import base64 from google import genai from google.genai import types class ImageEditor: def __init__(self): self.model_name = "gemini-2.0-flash-exp" def save_file(self, file_path, data): """Save binary data to a file""" with open(file_path, "wb") as f: f.write(data) def get_client(self, api_key): """Initialize and return a Gemini client""" key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY") return genai.Client(api_key=key) def upload_file(self, client, file_path): """Upload a file to Gemini""" return client.files.upload(file=file_path) def create_content(self, file_uri, file_mime_type, prompt_text): """Create content for the Gemini API request""" return [ types.Content( role="user", parts=[ types.Part.from_uri( file_uri=file_uri, mime_type=file_mime_type, ), types.Part.from_text(text=prompt_text), ], ), ] def create_config(self): """Create configuration for the Gemini API request""" return types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=["image", "text"], response_mime_type="text/plain", ) def process_response(self, response_stream, temp_path): """Process the response stream from Gemini""" text_response = "" image_path = None for chunk in response_stream: if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: continue candidate = chunk.candidates[0].content.parts[0] if candidate.inline_data: self.save_file(temp_path, candidate.inline_data.data) print(f"Image saved to: {temp_path}") image_path = temp_path break else: text_response += chunk.text + "\n" return image_path, text_response def generate_image(self, prompt_text, file_path, api_key): """Generate an image based on prompt and input image""" client = self.get_client(api_key) # Upload the file uploaded_file = self.upload_file(client, file_path) # Create content and config contents = self.create_content(uploaded_file.uri, uploaded_file.mime_type, prompt_text) config = self.create_config() # Process the response with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: temp_path = tmp.name response_stream = client.models.generate_content_stream( model=self.model_name, contents=contents, config=config, ) image_path, text_response = self.process_response(response_stream, temp_path) # Clean up del uploaded_file return image_path, text_response def process_image_and_prompt(self, input_image, prompt, api_key): """Process the input image and prompt""" try: # Save the input image to a temporary file with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: image_path = tmp.name input_image.save(image_path) # Generate the image result_path, text_response = self.generate_image(prompt, image_path, api_key) if result_path: # Load and convert the image if needed result_img = Image.open(result_path) if result_img.mode == "RGBA": result_img = result_img.convert("RGB") return [result_img], "" else: # Return no image and the text response return None, text_response except Exception as e: raise gr.Error(f"Error: {e}", duration=5) def create_interface(): """Create the Gradio interface""" image_editor = ImageEditor() with gr.Blocks(css="style.css") as app: # Header gr.HTML( """
Gemini logo

My Image Editing App

Powered by Gradio⚡️ and Gemini | Get an API Key

""" ) # API Configuration with gr.Accordion("⚠️ API Configuration ⚠️", open=False): gr.Markdown(""" - **Note:** You need to provide a Gemini API key for image generation - Sometimes the model returns text instead of an image - try adjusting your prompt """) # Usage Instructions with gr.Accordion("📌 Usage Instructions", open=False): gr.Markdown(""" ### How to Use - Upload an image (PNG format recommended) - Enter a prompt describing the edit you want - Click Generate to create your output - If text is returned instead of an image, it will appear in the text output area - ❌ **Do not use NSFW images!** """) # Main Content with gr.Row(): # Input Column with gr.Column(): image_input = gr.Image( type="pil", label="Upload Image", image_mode="RGBA" ) api_key_input = gr.Textbox( lines=1, placeholder="Enter Gemini API Key", label="Gemini API Key", type="password" ) prompt_input = gr.Textbox( lines=2, placeholder="Describe the edit you want...", label="Edit Prompt" ) generate_btn = gr.Button("Generate Edit") # Output Column with gr.Column(): output_gallery = gr.Gallery(label="Edited Image") output_text = gr.Textbox( label="Text Output", placeholder="Text response will appear here if no image is generated." ) # Connect the interface generate_btn.click( fn=image_editor.process_image_and_prompt, inputs=[image_input, prompt_input, api_key_input], outputs=[output_gallery, output_text], ) # Examples gr.Markdown("## Example Prompts") examples = [ ["data/1.webp", 'change text to "MY TEXT"', ""], ["data/2.webp", "remove the spoon from the image", ""], ["data/3.webp", 'change text to "Custom Text"', ""], ["data/1.jpg", "add cartoon style to the face", ""], ] gr.Examples( examples=examples, inputs=[image_input, prompt_input] ) return app # Create and launch the app if __name__ == "__main__": app = create_interface() app.queue(max_size=50).launch()