import base64 from openai import OpenAI from dotenv import load_dotenv import fal_client import requests import gradio as gr load_dotenv() client = OpenAI() # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") def generate_description_openai(image, model_name): """Generate description using OpenAI models""" if image is None: return "Please upload an image first." try: # Handle both PIL Image and file path from PIL import Image # If image is a string (file path), load it as PIL Image if isinstance(image, str): image = Image.open(image) # Convert PIL image to base64 import io buffered = io.BytesIO() image.save(buffered, format="JPEG") base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8") prompt_text = "Describe the image as if it were a quick phone snapshot. Clearly note the person’s pose — how they’re sitting, standing, leaning, or holding something — and the angle of their body or head in everyday terms. Capture relaxed, imperfect details: casual clothing, slightly messy hair, natural or uneven makeup, and ordinary surroundings that might include clutter or background distractions. Emphasize the spontaneous feeling of the pose (mid-gesture, half-smile, looking away, or casually adjusting something) rather than a staged look. Mention natural or uneven lighting, possible grain, soft focus, or off-center framing to match a phone camera shot. Highlight whether it feels like a front-camera selfie or a back-camera candid, with the authentic vibe of being in the moment, not studio-quality." if model_name == "gpt-4o": response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt_text}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} } ] } ], max_tokens=1000 ) description = response.choices[0].message.content + "\n Photo taken with iPhone front camera in-the-moment." else: # For other OpenAI models, use the responses API response = client.responses.create( model=model_name, input=[ { "role": "user", "content": [ {"type": "input_text", "text": prompt_text}, { "type": "input_image", "image_url": f"data:image/jpeg;base64,{base64_image}", }, ], } ], ) description = response.output_text + "\n Photo taken with iPhone front camera in-the-moment." return description except Exception as e: return f"Error generating description with {model_name}: {str(e)}" def generate_description_fal(image, model_name): """Generate description using FAL models""" if image is None: return "Please upload an image first." try: # Handle both PIL Image and file path import tempfile import os from PIL import Image # If image is a string (file path), load it as PIL Image if isinstance(image, str): image = Image.open(image) # Save PIL image to temporary file and upload to FAL with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file: image.save(tmp_file.name, format='JPEG') tmp_path = tmp_file.name try: # Upload image to FAL image_url = fal_client.upload_image(tmp_path) print(f"Uploaded image to FAL: {image_url}") prompt_text = "Describe the image in rich, natural detail for txt2img model. Focus on the scene, the person, their casual clothes, accessories, makeup, the everyday environment, and what they're doing. Capture the emotions, expressions, mood, and candid vibe as if it were a real Instagram Reel, TikTok, or Pinterest-style snapshot. Mention the camera angle, framing, depth of field, shadows, colors, and textures in a way that feels spontaneous and unpolished, with natural light and subtle motion blur. Convey the overall feeling, atmosphere, and sense of being in that moment. Describe the angle of the camera and pose of the model precisely. The result will be used to generate a similar image with a txt2img model, emphasizing realism, casualness, and lived-in authenticity rather than studio-perfect or commercialized style. Highlight the photo taken with iPhone front camera in-the-moment." print(f"Trying to generate description with {model_name}") if model_name == "fal-ai/bagel/understand": result = fal_client.subscribe( model_name, arguments={ "image_url": image_url, "prompt": prompt_text } ) print(f"Bagel result: {result}") description = result.get("text", "") if not description: description = "Unable to generate description with this model. Please try a different model." else: description += "\n Photo taken with iPhone front camera in-the-moment." elif model_name == "perceptron/isaac-01": stream = fal_client.stream( model_name, arguments={ "image_url": image_url, "prompt": prompt_text, "response_style": "text" }, ) description = "" for event in stream: print(f"Isaac event: {event}") if hasattr(event, 'content'): description += event.content if not description: description = "Unable to generate description with this model. Please try a different model." else: description += "\n Photo taken with iPhone front camera in-the-moment." elif model_name in ["fal-ai/moondream2/visual-query", "fal-ai/moondream2"]: if model_name == "fal-ai/moondream2": result = fal_client.subscribe( model_name, arguments={ "image_url": image_url } ) else: result = fal_client.subscribe( model_name, arguments={ "image_url": image_url, "prompt": prompt_text } ) print(f"Moondream result: {result}") description = result.get("output", "") if not description: description = "Unable to generate description with this model. Please try a different model." else: description += "\n Photo taken with iPhone front camera in-the-moment." return description finally: # Clean up temporary file if os.path.exists(tmp_path): os.unlink(tmp_path) except Exception as e: print(f"Error with {model_name}: {str(e)}") return f"Error generating description with {model_name}: {str(e)}. Please try a different model." def generate_description(image, model_name): """Generate description for uploaded image using selected model""" if image is None: return "Please upload an image first." # Determine which function to use based on model if model_name in ["gpt-4o", "gpt-4.1", "gpt-5"]: return generate_description_openai(image, model_name) else: # Try FAL model first result = generate_description_fal(image, model_name) # If FAL model fails or returns empty, fallback to GPT-4o if not result or "Unable to generate description" in result or "Error generating description" in result: print(f"FAL model {model_name} failed, falling back to GPT-4o") return generate_description_openai(image, "gpt-4o") return result model = "fal-ai/gemini-25-flash-image" # , "fal-ai/nano-banana", def on_queue_update(update): if isinstance(update, fal_client.InProgress): for log in update.logs: print(log["message"]) def generate_avatar_flux_srpo(prompt): """Generate avatar using fal-ai/flux/srpo""" try: stream = fal_client.stream( "fal-ai/flux/srpo", arguments={ "prompt": prompt, "image_size": "portrait_16_9", "num_inference_steps": 28, "guidance_scale": 4.5, "num_images": 1, "enable_safety_checker": True, "output_format": "jpeg", "acceleration": "none" }, ) for event in stream: if hasattr(event, 'images') and event.images: image_url = event.images[0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with flux/srpo: {str(e)}" def generate_avatar_flux1_srpo(prompt): """Generate avatar using fal-ai/flux-1/srpo""" try: stream = fal_client.stream( "fal-ai/flux-1/srpo", arguments={ "prompt": prompt, "image_size": "portrait_16_9", "num_inference_steps": 28, "guidance_scale": 4.5, "num_images": 1, "enable_safety_checker": True, "output_format": "jpeg", "acceleration": "regular" }, ) for event in stream: if hasattr(event, 'images') and event.images: image_url = event.images[0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with flux-1/srpo: {str(e)}" def generate_avatar_hunyuan(prompt): """Generate avatar using fal-ai/hunyuan-image/v2.1/text-to-image""" try: result = fal_client.subscribe( "fal-ai/hunyuan-image/v2.1/text-to-image", arguments={ "prompt": prompt, "negative_prompt": "blurry, low quality, watermark, signature", "image_size": "portrait_16_9", "num_images": 1, "num_inference_steps": 28, "guidance_scale": 3.5, "use_reprompt": True, "enable_safety_checker": True }, with_logs=True, on_queue_update=on_queue_update, ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with hunyuan: {str(e)}" def generate_avatar_seedream(prompt): """Generate avatar using fal-ai/bytedance/seedream/v4/text-to-image""" try: result = fal_client.subscribe( "fal-ai/bytedance/seedream/v4/text-to-image", arguments={ "prompt": prompt, "image_size": "portrait_16_9", "num_images": 1, "max_images": 1, "enable_safety_checker": True }, with_logs=True, on_queue_update=on_queue_update, ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with seedream: {str(e)}" def generate_avatar_nano_banana(prompt): """Generate avatar using fal-ai/nano-banana""" try: result = fal_client.subscribe( "fal-ai/nano-banana", arguments={ "prompt": prompt, "num_images": 1, "output_format": "jpeg" } ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with nano-banana: {str(e)}" def generate_avatar_dreamina(prompt): """Generate avatar using fal-ai/bytedance/dreamina/v3.1/text-to-image""" try: result = fal_client.subscribe( "fal-ai/bytedance/dreamina/v3.1/text-to-image", arguments={ "prompt": prompt, "image_size": "portrait_16_9", "num_images": 1 }, with_logs=True, on_queue_update=on_queue_update, ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with dreamina: {str(e)}" def generate_avatar_wan(prompt): """Generate avatar using fal-ai/wan/v2.2-a14b/text-to-image""" try: result = fal_client.subscribe( "fal-ai/wan/v2.2-a14b/text-to-image", arguments={ "prompt": prompt, "num_inference_steps": 27, "enable_safety_checker": True, "enable_prompt_expansion": False, "acceleration": "regular", "guidance_scale": 3.5, "guidance_scale_2": 4, "shift": 2, "image_size": "portrait_16_9" }, with_logs=True, on_queue_update=on_queue_update, ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with wan: {str(e)}" def generate_avatar_flux1_krea(prompt): """Generate avatar using fal-ai/flux-1/krea""" try: stream = fal_client.stream( "fal-ai/flux-1/krea", arguments={ "prompt": prompt, "image_size": "portrait_16_9", "num_inference_steps": 28, "guidance_scale": 4.5, "num_images": 1, "enable_safety_checker": True, "output_format": "jpeg", "acceleration": "regular" }, ) for event in stream: if hasattr(event, 'images') and event.images: image_url = event.images[0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with flux-1/krea: {str(e)}" def generate_avatar_imagen4(prompt): """Generate avatar using fal-ai/imagen4/preview""" try: result = fal_client.subscribe( "fal-ai/imagen4/preview", arguments={ "prompt": prompt, "aspect_ratio": "9:16", "num_images": 1, "resolution": "1K" } ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with imagen4: {str(e)}" def generate_avatar_gemini(prompt): """Generate avatar using fal-ai/gemini-25-flash-image""" try: result = fal_client.subscribe( "fal-ai/gemini-25-flash-image", arguments={ "prompt": prompt, "num_images": 1, "output_format": "jpeg" } ) if 'images' in result and len(result['images']) > 0: image_url = result['images'][0]['url'] return download_and_convert_image(image_url) return None, "No images generated" except Exception as e: return None, f"Error with gemini: {str(e)}" def download_and_convert_image(image_url): """Helper function to download and convert image to PIL Image""" try: response = requests.get(image_url) if response.status_code == 200: from PIL import Image import io image = Image.open(io.BytesIO(response.content)) return image, "Avatar generated successfully!" else: return None, f"Failed to download image: {response.status_code}" except Exception as e: return None, f"Error downloading image: {str(e)}" def generate_avatar(prompt, selected_model): """Generate avatar image with given prompt and selected model""" if not prompt or prompt.strip() == "": return None, "Please provide a prompt for avatar generation." # Route to appropriate function based on model if selected_model == "fal-ai/flux/srpo": return generate_avatar_flux_srpo(prompt) elif selected_model == "fal-ai/flux-1/srpo": return generate_avatar_flux1_srpo(prompt) elif selected_model == "fal-ai/hunyuan-image/v2.1/text-to-image": return generate_avatar_hunyuan(prompt) elif selected_model == "fal-ai/bytedance/seedream/v4/text-to-image": return generate_avatar_seedream(prompt) elif selected_model == "fal-ai/nano-banana": return generate_avatar_nano_banana(prompt) elif selected_model == "fal-ai/bytedance/dreamina/v3.1/text-to-image": return generate_avatar_dreamina(prompt) elif selected_model == "fal-ai/wan/v2.2-a14b/text-to-image": return generate_avatar_wan(prompt) elif selected_model == "fal-ai/flux-1/krea": return generate_avatar_flux1_krea(prompt) elif selected_model == "fal-ai/imagen4/preview": return generate_avatar_imagen4(prompt) elif selected_model == "fal-ai/gemini-25-flash-image": return generate_avatar_gemini(prompt) else: return None, f"Unknown model: {selected_model}" # Gradio Interface def process_image_and_generate_avatar(image): """Process uploaded image and generate avatar""" if image is None: return None, "Please upload an image first.", "Upload an image to get started!" # Generate description description = generate_description(image) # Generate avatar avatar_image, message = generate_avatar(description) return avatar_image, description, message # Available description models available_description_models = [ "gpt-4o", "gpt-4.1", "gpt-5", "fal-ai/bagel/understand", "perceptron/isaac-01", "fal-ai/moondream2/visual-query", "fal-ai/moondream2" ] # Available generation models available_models = [ "fal-ai/flux/srpo", "fal-ai/flux-1/srpo", "fal-ai/hunyuan-image/v2.1/text-to-image", "fal-ai/bytedance/seedream/v4/text-to-image", "fal-ai/nano-banana", "fal-ai/bytedance/dreamina/v3.1/text-to-image", "fal-ai/wan/v2.2-a14b/text-to-image", "fal-ai/flux-1/krea", "fal-ai/imagen4/preview", "fal-ai/gemini-25-flash-image" ] # Create Gradio interface with gr.Blocks(title="Avatar Clone Generator") as demo: gr.Markdown("# Avatar Clone Generator") gr.Markdown("Upload a photo to generate a description and create an avatar!") with gr.Row(): with gr.Column(): # Image upload input_image = gr.Image( label="Upload Photo", type="pil", height=300 ) # Description model selection description_model_dropdown = gr.Dropdown( choices=available_description_models, value="gpt-4o", label="Select Description Model", info="Choose the AI model for describing your image" ) # Generate description button generate_desc_btn = gr.Button("Generate Description", variant="primary") # Description text area (editable) description_text = gr.Textbox( label="Generated Description (You can edit this)", lines=8, placeholder="Description will appear here after uploading an image..." ) # Generation model selection generation_model_dropdown = gr.Dropdown( choices=available_models, value="fal-ai/flux/srpo", label="Select Generation Model", info="Choose the AI model for generating your avatar" ) # Generate avatar button generate_avatar_btn = gr.Button("Generate Avatar", variant="secondary") # Status message status_text = gr.Textbox( label="Status", lines=2, interactive=False ) with gr.Column(): # Generated avatar display output_image = gr.Image( label="Generated Avatar", height=400 ) # Event handlers def on_image_upload(image): if image is not None: return "", "Image uploaded! Select a description model and click 'Generate Description' to proceed." return "", "Please upload an image." def on_generate_description(image, selected_description_model): if image is None: return "", "Please upload an image first." description = generate_description(image, selected_description_model) return description, f"Description generated using {selected_description_model}! You can edit it if needed." def on_generate_avatar(description, selected_generation_model): if not description or description.strip() == "": return None, "Please generate a description first." avatar_image, message = generate_avatar(description, selected_generation_model) return avatar_image, message # Connect events input_image.change( fn=on_image_upload, inputs=[input_image], outputs=[description_text, status_text] ) generate_desc_btn.click( fn=on_generate_description, inputs=[input_image, description_model_dropdown], outputs=[description_text, status_text] ) generate_avatar_btn.click( fn=on_generate_avatar, inputs=[description_text, generation_model_dropdown], outputs=[output_image, status_text] ) if __name__ == "__main__": demo.launch(share=True)