import os import re from dataclasses import dataclass from typing import Dict, Any, Optional, Tuple, List import gradio as gr from PIL import Image, ImageStat import numpy as np # Optional imports for reverse-prompting try: import cv2 except Exception: cv2 = None try: from transformers import pipeline HAS_TRANSFORMERS = True except Exception: HAS_TRANSFORMERS = False APP_TITLE = "Ultra‑Realistic Prompt Builder" NEGATIVE_BASELINE = ( "cgi, 3d render, cartoon, illustration, plastic/waxy skin, overprocessed, oversharpened halos, " "lowres, noise, banding, posterization, watermark, text, logo, bad anatomy, extra fingers, " "deformed hands, blurry, depth map artifacts, harsh HDR, unrealistic colors" ) @dataclass class CameraSpec: cameraBody: str = "" focalLengthMm: Optional[int] = None aperture: str = "" iso: Optional[int] = None @dataclass class PromptFields: subject: str = "" environment: str = "" timeWeather: str = "" camera: CameraSpec = CameraSpec() composition: str = "" lighting: str = "" microDetails: str = "" motionAtmosphere: str = "" colorGrade: str = "" realismCues: bool = True aspectRatio: str = "4:5" negatives: str = NEGATIVE_BASELINE model: str = "sdxl" # "mj" | "sdxl" | "dalle" # MJ settings_mj_s: int = 100 settings_mj_chaos: int = 5 settings_mj_seed: int = 42 # SDXL settings_sdxl_steps: int = 34 settings_sdxl_cfg: int = 5 settings_sdxl_sampler: str = "DPM++ SDE Karras" settings_sdxl_resolution: str = "1024x1280" settings_sdxl_refiner: float = 0.25 # DALL·E settings_dalle_resolution: str = "1024x1024" def realism_string(enabled: bool) -> str: if not enabled: return "" return ( "Photorealistic, true-to-life colors, subsurface scattering, global illumination, " "soft shadows, accurate reflections, natural skin, shallow DOF, film grain 3–5%, " "subtle chromatic aberration, vignette." ) def safe_join(parts: List[str]) -> str: return " ".join([p.strip() for p in parts if p and str(p).strip()]).replace(" ", " ").strip() def build_universal(f: PromptFields) -> str: s1 = f"Photo of {f.subject}" if f.subject else "Photo" if f.environment: s1 += f" in/at {f.environment}" if f.timeWeather: s1 += f", {f.timeWeather}" s1 += "." cam_bits = [] if f.camera and f.camera.focalLengthMm: cam_bits.append(f"{f.camera.focalLengthMm}mm lens") if f.camera and f.camera.aperture: cam_bits.append(f"at {f.camera.aperture}") if f.camera and f.camera.iso: cam_bits.append(f"ISO {f.camera.iso}") s2 = "Shot with a " + ", ".join(cam_bits) + "." if cam_bits else "" s3 = f"{f.composition}." if f.composition else "" s4 = f"Lighting: {f.lighting}." if f.lighting else "" s5 = f"Materials & micro-detail: {f.microDetails}." if f.microDetails else "" s6 = f"Motion/atmosphere: {f.motionAtmosphere}." if f.motionAtmosphere else "" s7 = f"Color & grade: {f.colorGrade}." if f.colorGrade else "" s8 = realism_string(f.realismCues) return safe_join([s1, s2, s3, s4, s5, s6, s7, s8]) def format_midjourney(universal: str, f: PromptFields) -> str: return f"{universal} --style raw --ar {f.aspectRatio} --s {f.settings_mj_s} --chaos {f.settings_mj_chaos} --seed {f.settings_mj_seed}" def format_sdxl(universal: str, f: PromptFields) -> Dict[str, Any]: return { "positive": universal, "negative": f.negatives or NEGATIVE_BASELINE, "settings": { "steps": f.settings_sdxl_steps, "cfg": f.settings_sdxl_cfg, "sampler": f.settings_sdxl_sampler, "resolution": f.settings_sdxl_resolution, "refiner": f.settings_sdxl_refiner, "tips": "Use SDXL Refiner at 0.2–0.4 denoise; Upscale 1.5–2.0x for micro-detail" } } def format_dalle(universal: str, f: PromptFields) -> Dict[str, Any]: prose = f"A high‑resolution photograph. {universal}" return {"prompt": prose, "resolution": f.settings_dalle_resolution} def compose( subject, environment, timeWeather, cameraBody, focalLengthMm, aperture, iso, composition, lighting, microDetails, motionAtmosphere, colorGrade, realismCues, aspectRatio, negatives, mj_s, mj_chaos, mj_seed, sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner, dalle_resolution ) -> Tuple[str, str, Dict[str, Any], Dict[str, Any], str]: f = PromptFields( subject=subject or "", environment=environment or "", timeWeather=timeWeather or "", camera=CameraSpec( cameraBody=cameraBody or "", focalLengthMm=int(focalLengthMm) if str(focalLengthMm).strip() else None, aperture=aperture or "", iso=int(iso) if str(iso).strip() else None ), composition=composition or "", lighting=lighting or "", microDetails=microDetails or "", motionAtmosphere=motionAtmosphere or "", colorGrade=colorGrade or "", realismCues=bool(realismCues), aspectRatio=aspectRatio or "4:5", negatives=negatives or NEGATIVE_BASELINE, settings_mj_s=int(mj_s) if str(mj_s).strip() else 100, settings_mj_chaos=int(mj_chaos) if str(mj_chaos).strip() else 5, settings_mj_seed=int(mj_seed) if str(mj_seed).strip() else 42, settings_sdxl_steps=int(sdxl_steps) if str(sdxl_steps).strip() else 34, settings_sdxl_cfg=int(sdxl_cfg) if str(sdxl_cfg).strip() else 5, settings_sdxl_sampler=sdxl_sampler or "DPM++ SDE Karras", settings_sdxl_resolution=sdxl_resolution or "1024x1280", settings_sdxl_refiner=float(sdxl_refiner) if str(sdxl_refiner).strip() else 0.25, settings_dalle_resolution=dalle_resolution or "1024x1024", ) universal = build_universal(f) mj = format_midjourney(universal, f) sdxl = format_sdxl(universal, f) dalle = format_dalle(universal, f) return universal, mj, sdxl, dalle, (f.negatives or NEGATIVE_BASELINE) # ---------- Reverse prompt helpers ---------- CAPTIONER = None def init_captioner(): if not HAS_TRANSFORMERS: return None try: return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") except Exception: return None CAPTIONER = init_captioner() OBJDET = None def init_objdet(): if not HAS_TRANSFORMERS: return None try: return pipeline("object-detection", model="facebook/detr-resnet-50") except Exception: return None OBJDET = init_objdet() def download_haarcascade() -> Optional[str]: if cv2 is None: return None fname = "haarcascade_frontalface_default.xml" if os.path.exists(fname): return fname import requests url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml" try: r = requests.get(url, timeout=15) r.raise_for_status() with open(fname, "wb") as f: f.write(r.content) return fname except Exception: return None def detect_faces(pil_img: Image.Image) -> int: if cv2 is None: return 0 path = download_haarcascade() if not path: return 0 try: img = np.array(pil_img.convert("RGB")) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) face_cascade = cv2.CascadeClassifier(path) faces = face_cascade.detectMultiScale(gray, 1.1, 4) return 0 if faces is None else len(faces) except Exception: return 0 def avg_brightness(pil_img: Image.Image) -> float: stat = ImageStat.Stat(pil_img.convert("L")) return float(stat.mean[0]) def nearest_aspect(w: int, h: int) -> str: target = w / h candidates = { "1:1": 1.0, "4:5": 0.8, "5:4": 1.25, "4:3": 1.333, "3:2": 1.5, "16:9": 1.777 } return min(candidates.items(), key=lambda kv: abs(kv[1] - target))[0] def _article(word: str) -> str: return "an" if word and word[0].lower() in "aeiou" else "a" def _label_to_phrase(label: str) -> str: nice = {"tv": "television", "cell phone": "phone", "sports ball": "ball", "potted plant": "potted plant"} word = nice.get(label, label) return f"{_article(word)} {word}" def _centrality_score(cx, cy, W, H): dx = abs(cx - W/2) / (W/2) dy = abs(cy - H/2) / (H/2) dist = min(1.0, (dx*dx + dy*dy) ** 0.5) return 1.0 - dist def _detect_main_subject(img: Image.Image): if OBJDET is None: return None, [] try: dets = OBJDET(img) except Exception: return None, [] if not dets: return None, [] W, H = img.size scored = [] for d in dets: box = d.get("box", {}) xmin, ymin = box.get("xmin", 0), box.get("ymin", 0) xmax, ymax = box.get("xmax", 0), box.get("ymax", 0) w, h = max(1, xmax - xmin), max(1, ymax - ymin) area = (w * h) / float(W * H) cx, cy = xmin + w/2, ymin + h/2 central = _centrality_score(cx, cy, W, H) conf = float(d.get("score", 0.0)) label = d.get("label", "") score = conf * (0.6 * area + 0.4 * central) scored.append({"label": label, "score": score}) scored.sort(key=lambda x: x["score"], reverse=True) main_phrase = _label_to_phrase(scored[0]["label"]) suggestions, seen = [], set() for s in scored: p = _label_to_phrase(s["label"]) if p not in seen: suggestions.append(p) seen.add(p) if len(suggestions) >= 5: break return main_phrase, suggestions def _action_from_caption(caption: str) -> str: c = (caption or "").lower() for key in ["running", "sprinting", "walking", "standing", "jumping", "riding", "driving", "sitting"]: if key in c: return key return "" def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]: caption = "" if CAPTIONER: try: out = CAPTIONER(img) if isinstance(out, list) and out: caption = out[0].get("generated_text", "") except Exception: caption = "" brightness = avg_brightness(img) if brightness > 140: timeWeather = "daylight" elif 100 < brightness <= 140: timeWeather = "overcast daylight" else: timeWeather = "night with ambient light" subject_phrase, subject_suggestions = _detect_main_subject(img) faces = detect_faces(img) if not subject_phrase and faces > 0: subject_phrase = "a person" if not subject_phrase: m = re.search(r"(a|an|the)\s+([^,.]+?)(?:\s+(on|in|at|by|with|near|amid|from)\b|[.,]|$)", (caption or "").lower()) subject_phrase = m.group(0).rstrip(",.") if m else ("a person" if faces > 0 else "a real-world subject") if subject_phrase.startswith(("a person", "an person")): act = _action_from_caption(caption) if act and act not in subject_phrase: subject_phrase = f"{subject_phrase} {act}" if subject_phrase.startswith(("a person", "an person")): focal = 35 aperture = "f/2.8" iso = 200 if "day" in timeWeather else 800 composition = "eye‑level, rear three‑quarter or profile, leading lines, shallow DOF" lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows" micro = "skin pores, fabric textures, scuffs, dust in the air" motion = "slight motion blur on limbs if running" if "running" in subject_phrase else "no visible motion blur" color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast" else: focal = 35 aperture = "f/2.8" iso = 200 if "day" in timeWeather else 800 composition = "eye‑level, balanced framing, leading lines, shallow DOF" lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows" micro = "texture of materials, dust, subtle scratches, specular highlights" motion = "slight motion blur if present, volumetric light if applicable" color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast" w, h = img.size aspect = nearest_aspect(w, h) return { "subject": subject_phrase, "subjectCandidates": subject_suggestions, "environment": "", "timeWeather": timeWeather, "camera": { "cameraBody": "", "focalLengthMm": focal, "aperture": aperture, "iso": iso }, "composition": composition, "lighting": lighting, "microDetails": micro, "motionAtmosphere": motion, "colorGrade": color_grade, "realismCues": True, "aspectRatio": aspect, "negatives": NEGATIVE_BASELINE, "model": "sdxl" } def reverse_prompt(image: Image.Image): if image is None: return {}, "", "", {"positive": "", "negative": "", "settings": {}}, {"prompt": "", "resolution": ""}, NEGATIVE_BASELINE, gr.update(choices=[], value=None) fields = extract_fields_from_image(image) f = PromptFields( subject=fields["subject"], environment=fields.get("environment", ""), timeWeather=fields.get("timeWeather", ""), camera=CameraSpec( cameraBody=fields["camera"].get("cameraBody", ""), focalLengthMm=fields["camera"].get("focalLengthMm", None), aperture=fields["camera"].get("aperture", ""), iso=fields["camera"].get("iso", None), ), composition=fields.get("composition", ""), lighting=fields.get("lighting", ""), microDetails=fields.get("microDetails", ""), motionAtmosphere=fields.get("motionAtmosphere", ""), colorGrade=fields.get("colorGrade", ""), realismCues=True, aspectRatio=fields.get("aspectRatio", "4:5"), negatives=fields.get("negatives", NEGATIVE_BASELINE), ) universal = build_universal(f) mj = format_midjourney(universal, f) sdxl = format_sdxl(universal, f) dalle = format_dalle(universal, f) cands = fields.get("subjectCandidates", []) or [] dd = gr.update(choices=cands, value=(cands[0] if cands else None)) return fields, universal, mj, sdxl, dalle, (fields.get("negatives") or NEGATIVE_BASELINE), dd # ---------- Presets ---------- PRESETS = { "Portrait (4:5)": PromptFields( subject="a 30‑year‑old person with freckles", environment="sunlit loft by a large window", timeWeather="golden hour", camera=CameraSpec(cameraBody="", focalLengthMm=85, aperture="f/1.8", iso=200), composition="eye‑level half‑body, rule of thirds, shallow DOF, circular bokeh", lighting="soft window key at 45°, reflector fill, subtle hair rim, 5400K", microDetails="skin pores, peach fuzz, flyaway hairs, natural imperfections", motionAtmosphere="gentle breeze, no visible blur", colorGrade="warm Portra‑like, soft contrast, high dynamic range", realismCues=True, aspectRatio="4:5", negatives=NEGATIVE_BASELINE ), "Product beverage (3:2)": PromptFields( subject="a cold amber beer bottle", environment="on a wet slate surface with ice, studio", timeWeather="controlled studio", camera=CameraSpec(cameraBody="", focalLengthMm=50, aperture="f/4.0", iso=100), composition="low angle, hero shot, product centered", lighting="large softbox key from 45°, strip rim from behind, black flags, 5000K", microDetails="condensation droplets, micro‑scratches on glass, label fibers, subtle fingerprints", motionAtmosphere="no motion, crisp detail", colorGrade="clean neutrals, high micro‑contrast", realismCues=True, aspectRatio="3:2", negatives=NEGATIVE_BASELINE ), "Architecture (16:9)": PromptFields( subject="a modern concrete house facade", environment="suburban street", timeWeather="overcast day", camera=CameraSpec(cameraBody="", focalLengthMm=24, aperture="f/8", iso=100), composition="straight‑on elevation, leading lines, no keystone distortion", lighting="soft diffuse skylight, no harsh shadows, 6000K", microDetails="concrete texture, subtle stains, window reflections", motionAtmosphere="static scene, no motion blur, crisp detail", colorGrade="neutral, low contrast, high DR", realismCues=True, aspectRatio="16:9", negatives=NEGATIVE_BASELINE ), "Night street (3:2)": PromptFields( subject="a rainy night city street with neon signs", environment="downtown alley", timeWeather="night, light rain", camera=CameraSpec(cameraBody="", focalLengthMm=35, aperture="f/1.8", iso=1600), composition="eye‑level, leading lines, reflections on wet pavement", lighting="neon signs as key, practicals for fill, 3200–4500K mix, glows and halos", microDetails="raindrops, puddle ripples, specular reflections, wet textures", motionAtmosphere="light motion blur on pedestrians, volumetric haze", colorGrade="cinematic teal‑magenta, gentle contrast", realismCues=True, aspectRatio="3:2", negatives=NEGATIVE_BASELINE ) } def load_preset(name: str): f = PRESETS.get(name) if not f: return [gr.update()] * 23 return ( f.subject, f.environment, f.timeWeather, f.camera.cameraBody, f.camera.focalLengthMm or "", f.camera.aperture, f.camera.iso or "", f.composition, f.lighting, f.microDetails, f.motionAtmosphere, f.colorGrade, f.realismCues, f.aspectRatio, f.negatives, f.settings_mj_s, f.settings_mj_chaos, f.settings_mj_seed, f.settings_sdxl_steps, f.settings_sdxl_cfg, f.settings_sdxl_sampler, f.settings_sdxl_resolution, f.settings_sdxl_refiner, f.settings_dalle_resolution ) # ---------- UI ---------- with gr.Blocks(title=APP_TITLE) as demo: gr.Markdown(f"# {APP_TITLE}\nCreate model‑ready, ultra‑realistic photo prompts. Reverse‑prompt from an image if you like. Free, no API keys.\n\nNote: Reverse analysis avoids identifying real people; it only describes general appearance/lighting/style.") with gr.Tab("Build"): with gr.Row(): with gr.Column(scale=1): preset = gr.Dropdown(choices=list(PRESETS.keys()), label="Presets") load_btn = gr.Button("Load preset") subject = gr.Textbox(label="Subject", placeholder="e.g., a person running") environment = gr.Textbox(label="Environment/Setting", placeholder="e.g., sunlit loft by a large window") timeWeather = gr.Textbox(label="Time & Weather", placeholder="e.g., golden hour") with gr.Accordion("Camera", open=False): cameraBody = gr.Textbox(label="Camera body (optional)", placeholder="e.g., Canon R5") focalLengthMm = gr.Textbox(label="Focal length (mm)", placeholder="e.g., 85") aperture = gr.Textbox(label="Aperture", placeholder="e.g., f/1.8") iso = gr.Textbox(label="ISO", placeholder="e.g., 200") composition = gr.Textbox(label="Composition & Perspective", placeholder="e.g., eye‑level, shallow DOF, rule of thirds") lighting = gr.Textbox(label="Lighting", placeholder="e.g., soft window key at 45°, reflector fill, rim, 5400K") microDetails = gr.Textbox(label="Materials & Micro‑detail", placeholder="e.g., skin pores, fabric weave, subtle scratches") motionAtmosphere = gr.Textbox(label="Motion/Atmosphere", placeholder="e.g., slight motion blur, volumetric light, haze") colorGrade = gr.Textbox(label="Color & Grade", placeholder="e.g., warm Portra‑like, soft contrast, high DR") realismCues = gr.Checkbox(value=True, label="Include realism cues") aspectRatio = gr.Textbox(label="Aspect ratio", value="4:5", placeholder="e.g., 4:5, 3:2, 16:9") negatives = gr.Textbox(label="Negative prompt", value=NEGATIVE_BASELINE) with gr.Accordion("Model settings", open=False): mj_s = gr.Slider(1, 1000, value=100, step=1, label="Midjourney --s") mj_chaos = gr.Slider(0, 100, value=5, step=1, label="Midjourney --chaos") mj_seed = gr.Slider(0, 999999, value=42, step=1, label="Midjourney --seed") sdxl_steps = gr.Slider(10, 100, value=34, step=1, label="SDXL steps") sdxl_cfg = gr.Slider(1, 20, value=5, step=1, label="SDXL CFG") sdxl_sampler = gr.Textbox(label="SDXL sampler", value="DPM++ SDE Karras") sdxl_resolution = gr.Textbox(label="SDXL resolution", value="1024x1280") sdxl_refiner = gr.Slider(0.0, 1.0, value=0.25, step=0.05, label="SDXL refiner denoise") dalle_resolution = gr.Textbox(label="DALL·E resolution", value="1024x1024") gen_btn = gr.Button("Generate prompts") with gr.Column(scale=1): universal_out = gr.Textbox(label="Universal prompt", lines=6) mj_out = gr.Textbox(label="Midjourney prompt", lines=6) sdxl_out = gr.JSON(label="SDXL prompt (positive, negative, settings)") dalle_out = gr.JSON(label="DALL·E 3 prompt") neg_out = gr.Textbox(label="Negative prompt (for SDXL)", value=NEGATIVE_BASELINE) load_btn.click(load_preset, inputs=[preset], outputs=[ subject, environment, timeWeather, cameraBody, focalLengthMm, aperture, iso, composition, lighting, microDetails, motionAtmosphere, colorGrade, realismCues, aspectRatio, negatives, mj_s, mj_chaos, mj_seed, sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner, dalle_resolution ]) gen_btn.click( compose, inputs=[ subject, environment, timeWeather, cameraBody, focalLengthMm, aperture, iso, composition, lighting, microDetails, motionAtmosphere, colorGrade, realismCues, aspectRatio, negatives, mj_s, mj_chaos, mj_seed, sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner, dalle_resolution ], outputs=[universal_out, mj_out, sdxl_out, dalle_out, neg_out] ) with gr.Tab("Reverse (Image → Prompt)"): gr.Markdown("Upload an image. The app will infer fields without identifying real people, then build prompts. Use the detected-subject dropdown to set the main subject.") image_in = gr.Image(type="pil", label="Upload image") analyze_btn = gr.Button("Analyze & Generate") subject_pick = gr.Dropdown(label="Detected subjects (pick one)", choices=[], value=None) fields_out = gr.JSON(label="Extracted fields (editable in Build tab if needed)") universal_out_r = gr.Textbox(label="Universal prompt", lines=6) mj_out_r = gr.Textbox(label="Midjourney prompt", lines=6) sdxl_out_r = gr.JSON(label="SDXL prompt (positive, negative, settings)") dalle_out_r = gr.JSON(label="DALL·E 3 prompt") neg_out_r = gr.Textbox(label="Negative prompt (for SDXL)", value=NEGATIVE_BASELINE) analyze_btn.click( reverse_prompt, inputs=[image_in], outputs=[fields_out, universal_out_r, mj_out_r, sdxl_out_r, dalle_out_r, neg_out_r, subject_pick] ) def use_picked_subject(picked): return picked or "" subject_pick.change(use_picked_subject, inputs=[subject_pick], outputs=[subject]) gr.Markdown( "Tips\n" "- For Midjourney, prepend 1–2 reference image URLs; keep --style raw.\n" "- For SDXL, use Refiner at 0.2–0.4 and upscale 1.5–2.0x for micro‑detail.\n" "- DALL·E 3 responds best to concise photographic prose with lens + lighting." ) if __name__ == "__main__": demo.launch()