Spaces:

danube2024
/

text-to-image-depth-map

Running

App Files Files Community

text-to-image-depth-map / app.py

danube2024

Update app.py

3a64eb8 verified 3 months ago

raw

history blame contribute delete

2.96 kB

	import gradio as gr
	import torch
	import numpy as np
	from diffusers import StableDiffusionXLPipeline
	from transformers import DPTFeatureExtractor, DPTForDepthEstimation
	from PIL import Image, ImageEnhance, ImageOps

	device = "cpu" # or "cuda" if you have a GPU
	torch_dtype = torch.float32

	print("Loading SDXL Base model...")
	pipe = StableDiffusionXLPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	torch_dtype=torch_dtype
	).to(device)

	print("Loading bas-relief LoRA weights with PEFT...")
	pipe.load_lora_weights(
	"KappaNeuro/bas-relief", # The HF repo with BAS-RELIEF.safetensors
	weight_name="BAS-RELIEF.safetensors",
	peft_backend="peft" # This is crucial
	)

	print("Loading DPT Depth Model...")
	feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
	depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)

	def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
	d_min, d_max = depth_arr.min(), depth_arr.max()
	depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
	depth_stretched = (depth_stretched * 255).astype(np.uint8)

	depth_pil = Image.fromarray(depth_stretched)
	depth_pil = ImageOps.autocontrast(depth_pil)

	enhancer = ImageEnhance.Sharpness(depth_pil)
	depth_pil = enhancer.enhance(2.0)

	return depth_pil

	def generate_bas_relief_and_depth(prompt):
	# Use the token "BAS-RELIEF" so the LoRA triggers
	full_prompt = f"BAS-RELIEF {prompt}"
	print("Generating image with LoRA style...")
	result = pipe(
	prompt=full_prompt,
	num_inference_steps=15, # reduce if too slow
	guidance_scale=7.5,
	height=512, # reduce if you still get timeouts
	width=512
	)
	image = result.images[0]

	print("Running DPT Depth Estimation...")
	inputs = feature_extractor(image, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = depth_model(**inputs)
	predicted_depth = outputs.predicted_depth

	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=image.size[::-1],
	mode="bicubic",
	align_corners=False
	).squeeze()

	depth_map_pil = enhance_depth_map(prediction.cpu().numpy())

	return image, depth_map_pil

	title = "Bas-Relief (SDXL + LoRA) + Depth Map (with PEFT)"
	description = (
	"Loads stable-diffusion-xl-base-1.0 on CPU, merges LoRA from 'KappaNeuro/bas-relief'. "
	"Use 'BAS-RELIEF' token in your prompt to trigger the style, then compute a depth map."
	)

	iface = gr.Interface(
	fn=generate_bas_relief_and_depth,
	inputs=gr.Textbox(
	label="Description",
	placeholder="bas-relief with roman soldier, marble relief, intricately carved"
	),
	outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Depth Map")],
	title=title,
	description=description
	)

	if __name__ == "__main__":
	iface.launch()