import json import os import time import uuid import tempfile from PIL import Image import gradio as gr import base64 from google import genai from google.genai import types class ImageEditor: def __init__(self): self.model_name = "gemini-2.0-flash-exp" def save_file(self, file_path, data): """Save binary data to a file""" with open(file_path, "wb") as f: f.write(data) def get_client(self, api_key): """Initialize and return a Gemini client""" key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY") return genai.Client(api_key=key) def upload_file(self, client, file_path): """Upload a file to Gemini""" return client.files.upload(file=file_path) def create_content(self, file_uri, file_mime_type, prompt_text): """Create content for the Gemini API request""" return [ types.Content( role="user", parts=[ types.Part.from_uri( file_uri=file_uri, mime_type=file_mime_type, ), types.Part.from_text(text=prompt_text), ], ), ] def create_config(self): """Create configuration for the Gemini API request""" return types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=["image", "text"], response_mime_type="text/plain", ) def process_response(self, response_stream, temp_path): """Process the response stream from Gemini""" text_response = "" image_path = None for chunk in response_stream: if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: continue candidate = chunk.candidates[0].content.parts[0] if candidate.inline_data: self.save_file(temp_path, candidate.inline_data.data) print(f"Image saved to: {temp_path}") image_path = temp_path break else: text_response += chunk.text + "\n" return image_path, text_response def generate_image(self, prompt_text, file_path, api_key): """Generate an image based on prompt and input image""" client = self.get_client(api_key) # Upload the file uploaded_file = self.upload_file(client, file_path) # Create content and config contents = self.create_content(uploaded_file.uri, uploaded_file.mime_type, prompt_text) config = self.create_config() # Process the response with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: temp_path = tmp.name response_stream = client.models.generate_content_stream( model=self.model_name, contents=contents, config=config, ) image_path, text_response = self.process_response(response_stream, temp_path) # Clean up del uploaded_file return image_path, text_response def process_image_and_prompt(self, input_image, prompt, api_key): """Process the input image and prompt""" try: # Save the input image to a temporary file with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: image_path = tmp.name input_image.save(image_path) # Generate the image result_path, text_response = self.generate_image(prompt, image_path, api_key) if result_path: # Load and convert the image if needed result_img = Image.open(result_path) if result_img.mode == "RGBA": result_img = result_img.convert("RGB") return [result_img], "" else: # Return no image and the text response return None, text_response except Exception as e: raise gr.Error(f"Error: {e}", duration=5) def create_interface(): """Create the Gradio interface""" image_editor = ImageEditor() with gr.Blocks(css="style.css") as app: # Header gr.HTML( """
Powered by Gradio⚡️ and Gemini | Get an API Key