Spaces:

danube2024
/

text-to-image-depth-map

Sleeping

App Files Files Community

danube2024 commited on Feb 2

Commit

18b2b4b

verified ·

1 Parent(s): 8b1647d

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -54

app.py CHANGED Viewed

@@ -12,8 +12,6 @@ from PIL import Image, ImageEnhance, ImageOps
 device = "cpu"  # or "cuda" if GPU is available
 torch_dtype = torch.float32  # if using CPU or float16 for GPU
-# --- Load Base SDXL Model ---
-# (Large model, be sure you have enough memory or use fewer steps)
 print("Loading SDXL Base model...")
 pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
@@ -21,86 +19,57 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
 )
 pipe.to(device)
-# --- Load LoRA Weights from KappaNeuro/bas-relief ---
-#   The safetensors file is named "BAS-RELIEF.safetensors"
-#   This merges the LoRA into the pipeline so you can use it via the "BAS-RELIEF" token
 print("Loading bas-relief LoRA weights...")
 pipe.load_lora_weights(
-    repo_id_or_path="KappaNeuro/bas-relief",
     weight_name="BAS-RELIEF.safetensors"
 )
-# --- Load Depth Estimation Model ---
-#   We'll use Intel's DPT for depth. On CPU, it's also relatively large, so be cautious of performance.
 print("Loading DPT Depth model...")
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
-############################################
-# 2. Depth Map Enhancement (PIL-based)
-############################################
 def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
     """
-    - Normalize depth to [0, 255]
-    - Auto-contrast to emphasize details
-    - Sharpen edges
     """
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
     depth_pil = Image.fromarray(depth_stretched)
-    # Auto-contrast
     depth_pil = ImageOps.autocontrast(depth_pil)
-    # Sharpen
     enhancer = ImageEnhance.Sharpness(depth_pil)
     depth_pil = enhancer.enhance(2.0)
     return depth_pil
-############################################
-# 3. Generation + Depth Inference Function
-############################################
 def generate_bas_relief_and_depth(prompt: str):
-    """
-    1) Generate a 'bas-relief' style image using the LoRA from KappaNeuro/bas-relief.
-       - Must include "BAS-RELIEF" token in the prompt for the style to apply.
-    2) Compute a depth map using Intel/DPT-Large.
-    3) Return (image, depth_map).
-    """
-    # -- Step A: Merge the user's prompt with "BAS-RELIEF" instance token --
-    #    You can experiment with different prompt styles:
-    #    e.g. "BAS-RELIEF sculpture of a woman in shibari, marble, octane render..."
     full_prompt = f"BAS-RELIEF {prompt}"
-    # -- Step B: Generate the image with SDXL + LoRA
-    #    Keep resolution modest to avoid timeouts on CPU
     print("Generating bas-relief image...")
     result = pipe(
         prompt=full_prompt,
-        num_inference_steps=15,    # Lower steps => faster (but lower quality)
         guidance_scale=7.5,
-        height=512,               # can reduce to e.g. 384 if still too slow
         width=512
     )
-    # Extract image from pipeline result
     generated_image = result.images[0]
-    # -- Step C: Depth Estimation with DPT
     print("Running depth estimation...")
     inputs = feature_extractor(generated_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
-        predicted_depth = outputs.predicted_depth  # shape: [batch, height, width]
-    # Resize to match original image resolution
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=generated_image.size[::-1],
@@ -109,31 +78,24 @@ def generate_bas_relief_and_depth(prompt: str):
     ).squeeze(0)
     depth_arr = prediction.cpu().numpy()
-    depth_map_pil = enhance_depth_map(depth_arr)
-    return generated_image, depth_map_pil
-############################################
-# 4. Gradio Interface
-############################################
-title = "Bas-Relief with SDXL + LoRA + Depth Map"
 description = (
-    "This demo loads SDXL-base on CPU (slow!) and merges LoRA from KappaNeuro/bas-relief. "
-    "Use 'BAS-RELIEF' in your prompt for the style. Then we generate a depth map using DPT."
-    "Lower resolution or fewer steps if you get timeouts."
 )
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
     inputs=gr.Textbox(
         label="Describe your scene/style",
-        placeholder="sculpture of a woman in shibari, marble, intricate details"
     ),
-    outputs=[
-        gr.Image(label="Bas-Relief Image"),
-        gr.Image(label="Depth Map"),
-    ],
     title=title,
     description=description
 )

 device = "cpu"  # or "cuda" if GPU is available
 torch_dtype = torch.float32  # if using CPU or float16 for GPU
 print("Loading SDXL Base model...")
 pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
 )
 pipe.to(device)
 print("Loading bas-relief LoRA weights...")
+# IMPORTANT: Pass the first argument as a string to the repo or path,
+# and `weight_name` as a kwarg. That matches the actual function signature.
 pipe.load_lora_weights(
+    "KappaNeuro/bas-relief",         # repo / path
     weight_name="BAS-RELIEF.safetensors"
 )
 print("Loading DPT Depth model...")
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
 def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
     """
+    Normalize depth to [0, 255], auto-contrast, and sharpen.
     """
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
     depth_pil = Image.fromarray(depth_stretched)
     depth_pil = ImageOps.autocontrast(depth_pil)
     enhancer = ImageEnhance.Sharpness(depth_pil)
     depth_pil = enhancer.enhance(2.0)
     return depth_pil
 def generate_bas_relief_and_depth(prompt: str):
+    # We prepend "BAS-RELIEF" to ensure the LoRA style is triggered.
     full_prompt = f"BAS-RELIEF {prompt}"
     print("Generating bas-relief image...")
     result = pipe(
         prompt=full_prompt,
+        num_inference_steps=15,    # Lower for speed on CPU
         guidance_scale=7.5,
+        height=512,
         width=512
     )
     generated_image = result.images[0]
     print("Running depth estimation...")
     inputs = feature_extractor(generated_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    # Resize depth map to match original image
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=generated_image.size[::-1],
     ).squeeze(0)
     depth_arr = prediction.cpu().numpy()
+    depth_pil = enhance_depth_map(depth_arr)
+    return generated_image, depth_pil
+title = "Bas-Relief (SDXL + LoRA) + Depth Map"
 description = (
+    "Load SDXL base on CPU, apply 'BAS-RELIEF.safetensors' LoRA from KappaNeuro/bas-relief. "
+    "Then run DPT for depth estimation."
 )
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
     inputs=gr.Textbox(
         label="Describe your scene/style",
+        placeholder="e.g., 'sculpture of a woman in shibari, marble, intricate details'"
     ),
+    outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Depth Map")],
     title=title,
     description=description
 )