Spaces:

xingyang1
/

Distill-Any-Depth

Running on Zero

App Files Files Community

Distill-Any-Depth / app.py

xingyang1

Update app.py

642c115 verified 16 days ago

raw

history blame contribute delete

5.18 kB

	import gradio as gr
	import torch
	from PIL import Image
	import numpy as np
	from distillanydepth.modeling.archs.dam.dam import DepthAnything
	from distillanydepth.utils.image_util import chw2hwc, colorize_depth_maps
	from distillanydepth.midas.transforms import Resize, NormalizeImage, PrepareForNet
	from torchvision.transforms import Compose
	import cv2
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file
	from gradio_imageslider import ImageSlider
	import spaces
	import tempfile

	# Helper function to load model from Hugging Face
	def load_model_by_name(arch_name, checkpoint_path, device):
	model = None
	if arch_name == 'depthanything':
	# 使用 safetensors 加载模型权重
	model_weights = load_file(checkpoint_path) # safetensors 加载方式

	# 初始化模型
	model = DepthAnything(checkpoint_path=None).to(device)
	model.load_state_dict(model_weights) # 将加载的权重应用到模型

	model = model.to(device) # 确保模型在正确的设备上
	else:
	raise NotImplementedError(f"Unknown architecture: {arch_name}")
	return model

	# Image processing function
	def process_image(image, model, device):
	if model is None:
	return None, None, None, None

	# Preprocess the image
	image_np = np.array(image)[..., ::-1] / 255

	transform = Compose([
	Resize(756, 756, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
	NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	PrepareForNet()
	])

	image_tensor = transform({'image': image_np})['image']
	image_tensor = torch.from_numpy(image_tensor).unsqueeze(0).to(device)

	with torch.no_grad():
	pred_disp, _ = model(image_tensor)
	torch.cuda.empty_cache()

	# Convert depth map to numpy
	pred_disp_np = pred_disp.cpu().detach().numpy()[0, 0, :, :]

	# Normalize depth map
	pred_disp_normalized = (pred_disp_np - pred_disp_np.min()) / (pred_disp_np.max() - pred_disp_np.min())

	# Colorized depth map
	cmap = "Spectral_r"
	depth_colored = colorize_depth_maps(pred_disp_normalized[None, ..., None], 0, 1, cmap=cmap).squeeze()
	depth_colored = (depth_colored * 255).astype(np.uint8)
	depth_colored_hwc = chw2hwc(depth_colored)

	# Gray depth map
	depth_gray = (pred_disp_normalized * 255).astype(np.uint8)
	depth_gray_hwc = np.stack([depth_gray] * 3, axis=-1) # Convert to 3-channel grayscale

	# Save raw depth map as a temporary npy file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".npy") as temp_file:
	np.save(temp_file.name, pred_disp_normalized)
	depth_raw_path = temp_file.name

	# Resize outputs to match original image size
	h, w = image_np.shape[:2]
	depth_colored_hwc = cv2.resize(depth_colored_hwc, (w, h), cv2.INTER_LINEAR)
	depth_gray_hwc = cv2.resize(depth_gray_hwc, (w, h), cv2.INTER_LINEAR)

	# Convert to PIL images
	return image, Image.fromarray(depth_colored_hwc), Image.fromarray(depth_gray_hwc), depth_raw_path



	# Gradio interface function with GPU support
	@spaces.GPU
	def gradio_interface(image):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model_kwargs = dict(
	vitb=dict(
	encoder='vitb',
	features=128,
	out_channels=[96, 192, 384, 768],
	),
	vitl=dict(
	encoder="vitl",
	features=256,
	out_channels=[256, 512, 1024, 1024],
	use_bn=False,
	use_clstoken=False,
	max_depth=150.0,
	mode='disparity',
	pretrain_type='dinov2',
	del_mask_token=False
	)
	)
	# Load model
	model = DepthAnything(**model_kwargs['vitl']).to(device)
	checkpoint_path = hf_hub_download(repo_id=f"xingyang1/Distill-Any-Depth", filename=f"large/model.safetensors", repo_type="model")

	# 使用 safetensors 加载模型权重
	model_weights = load_file(checkpoint_path) # safetensors 加载方式
	model.load_state_dict(model_weights)
	model = model.to(device) # 确保模型在正确的设备上

	if model is None:
	return None, None, None, None

	# Process image and return output
	image, depth_image, depth_gray, depth_raw = process_image(image, model, device)
	return (image, depth_image), depth_gray, depth_raw

	# Create Gradio interface
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Image(type="pil"), # Only image input, no mode selection
	outputs = [ImageSlider(label="Depth slider", type="pil", slider_color="pink"), # Depth image out with a slider
	gr.Image(type="pil", label="Gray Depth"),
	gr.File(label="Raw Depth (NumPy File)")
	],
	title="Depth Estimation Demo",
	description="Upload an image to see the depth estimation results. Our model is running on GPU for faster processing.",
	examples=["1.jpg", "2.jpg", "4.png", "5.jpg", "6.jpg"],
	cache_examples=True,)

	# Launch the Gradio interface
	iface.launch()