aliabd HF staff commited on
Commit
172af70
·
1 Parent(s): b7543c7

Upload with huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. run.ipynb +1 -1
  3. run.py +2 -3
README.md CHANGED
@@ -5,7 +5,7 @@ emoji: 🔥
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
- sdk_version: 3.24.1
9
  app_file: run.py
10
  pinned: false
11
  ---
 
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
+ sdk_version: 3.25.0
9
  app_file: run.py
10
  pinned: false
11
  ---
run.ipynb CHANGED
@@ -1 +1 @@
1
- {"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: depth_estimation\n", "### A demo for predicting the depth of an image and generating a 3D model of it.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch git+https://github.com/nielsrogge/transformers.git@add_dpt_redesign#egg=transformers numpy Pillow jinja2 open3d"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('examples')\n", "!wget -q -O examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/packages.txt"]}, {"cell_type": "code", "execution_count": null, "id": 44380577570523278879349135829904343037, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import DPTFeatureExtractor, DPTForDepthEstimation\n", "import torch\n", "import numpy as np\n", "from PIL import Image\n", "import open3d as o3d\n", "from pathlib import Path\n", "import os\n", "\n", "feature_extractor = DPTFeatureExtractor.from_pretrained(\"Intel/dpt-large\")\n", "model = DPTForDepthEstimation.from_pretrained(\"Intel/dpt-large\")\n", "\n", "def process_image(image_path):\n", " image_path = Path(image_path)\n", " image_raw = Image.open(image_path)\n", " image = image_raw.resize(\n", " (800, int(800 * image_raw.size[1] / image_raw.size[0])),\n", " Image.Resampling.LANCZOS)\n", "\n", " # prepare image for the model\n", " encoding = feature_extractor(image, return_tensors=\"pt\")\n", "\n", " # forward pass\n", " with torch.no_grad():\n", " outputs = model(**encoding)\n", " predicted_depth = outputs.predicted_depth\n", "\n", " # interpolate to original size\n", " prediction = torch.nn.functional.interpolate(\n", " predicted_depth.unsqueeze(1),\n", " size=image.size[::-1],\n", " mode=\"bicubic\",\n", " align_corners=False,\n", " ).squeeze()\n", " output = prediction.cpu().numpy()\n", " depth_image = (output * 255 / np.max(output)).astype('uint8')\n", " try:\n", " gltf_path = create_3d_obj(np.array(image), depth_image, image_path)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except Exception as e:\n", " gltf_path = create_3d_obj(\n", " np.array(image), depth_image, image_path, depth=8)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except:\n", " print(\"Error reconstructing 3D model\")\n", " raise Exception(\"Error reconstructing 3D model\")\n", "\n", "\n", "def create_3d_obj(rgb_image, depth_image, image_path, depth=10):\n", " depth_o3d = o3d.geometry.Image(depth_image)\n", " image_o3d = o3d.geometry.Image(rgb_image)\n", " rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", " image_o3d, depth_o3d, convert_rgb_to_intensity=False)\n", " w = int(depth_image.shape[1])\n", " h = int(depth_image.shape[0])\n", "\n", " camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()\n", " camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)\n", "\n", " pcd = o3d.geometry.PointCloud.create_from_rgbd_image(\n", " rgbd_image, camera_intrinsic)\n", "\n", " print('normals')\n", " pcd.normals = o3d.utility.Vector3dVector(\n", " np.zeros((1, 3))) # invalidate existing normals\n", " pcd.estimate_normals(\n", " search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))\n", " pcd.orient_normals_towards_camera_location(\n", " camera_location=np.array([0., 0., 1000.]))\n", " pcd.transform([[1, 0, 0, 0],\n", " [0, -1, 0, 0],\n", " [0, 0, -1, 0],\n", " [0, 0, 0, 1]])\n", " pcd.transform([[-1, 0, 0, 0],\n", " [0, 1, 0, 0],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 1]])\n", "\n", " print('run Poisson surface reconstruction')\n", " with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:\n", " mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(\n", " pcd, depth=depth, width=0, scale=1.1, linear_fit=True)\n", "\n", " voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256\n", " print(f'voxel_size = {voxel_size:e}')\n", " mesh = mesh_raw.simplify_vertex_clustering(\n", " voxel_size=voxel_size,\n", " contraction=o3d.geometry.SimplificationContraction.Average)\n", "\n", " # vertices_to_remove = densities < np.quantile(densities, 0.001)\n", " # mesh.remove_vertices_by_mask(vertices_to_remove)\n", " bbox = pcd.get_axis_aligned_bounding_box()\n", " mesh_crop = mesh.crop(bbox)\n", " gltf_path = f'./{image_path.stem}.gltf'\n", " o3d.io.write_triangle_mesh(\n", " gltf_path, mesh_crop, write_triangle_uvs=True)\n", " return gltf_path\n", "\n", "title = \"Demo: zero-shot depth estimation with DPT + 3D Point Cloud\"\n", "description = \"This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object.\"\n", "examples = [[\"examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\"]]\n", "\n", "iface = gr.Interface(fn=process_image,\n", " inputs=[gr.Image(\n", " type=\"filepath\", label=\"Input Image\")],\n", " outputs=[gr.Image(label=\"predicted depth\", type=\"pil\"),\n", " gr.Model3D(label=\"3d mesh reconstruction\", clear_color=[\n", " 1.0, 1.0, 1.0, 1.0]),\n", " gr.File(label=\"3d gLTF\")],\n", " title=title,\n", " description=description,\n", " examples=examples,\n", " allow_flagging=\"never\",\n", " cache_examples=False)\n", "\n", "iface.launch(debug=True, enable_queue=False)"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
 
1
+ {"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: depth_estimation\n", "### A demo for predicting the depth of an image and generating a 3D model of it.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch git+https://github.com/nielsrogge/transformers.git@add_dpt_redesign#egg=transformers numpy Pillow jinja2 open3d"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('examples')\n", "!wget -q -O examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/packages.txt"]}, {"cell_type": "code", "execution_count": null, "id": 44380577570523278879349135829904343037, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import DPTFeatureExtractor, DPTForDepthEstimation\n", "import torch\n", "import numpy as np\n", "from PIL import Image\n", "import open3d as o3d\n", "from pathlib import Path\n", "\n", "feature_extractor = DPTFeatureExtractor.from_pretrained(\"Intel/dpt-large\")\n", "model = DPTForDepthEstimation.from_pretrained(\"Intel/dpt-large\")\n", "\n", "def process_image(image_path):\n", " image_path = Path(image_path)\n", " image_raw = Image.open(image_path)\n", " image = image_raw.resize(\n", " (800, int(800 * image_raw.size[1] / image_raw.size[0])),\n", " Image.Resampling.LANCZOS)\n", "\n", " # prepare image for the model\n", " encoding = feature_extractor(image, return_tensors=\"pt\")\n", "\n", " # forward pass\n", " with torch.no_grad():\n", " outputs = model(**encoding)\n", " predicted_depth = outputs.predicted_depth\n", "\n", " # interpolate to original size\n", " prediction = torch.nn.functional.interpolate(\n", " predicted_depth.unsqueeze(1),\n", " size=image.size[::-1],\n", " mode=\"bicubic\",\n", " align_corners=False,\n", " ).squeeze()\n", " output = prediction.cpu().numpy()\n", " depth_image = (output * 255 / np.max(output)).astype('uint8')\n", " try:\n", " gltf_path = create_3d_obj(np.array(image), depth_image, image_path)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except Exception:\n", " gltf_path = create_3d_obj(\n", " np.array(image), depth_image, image_path, depth=8)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except:\n", " print(\"Error reconstructing 3D model\")\n", " raise Exception(\"Error reconstructing 3D model\")\n", "\n", "\n", "def create_3d_obj(rgb_image, depth_image, image_path, depth=10):\n", " depth_o3d = o3d.geometry.Image(depth_image)\n", " image_o3d = o3d.geometry.Image(rgb_image)\n", " rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", " image_o3d, depth_o3d, convert_rgb_to_intensity=False)\n", " w = int(depth_image.shape[1])\n", " h = int(depth_image.shape[0])\n", "\n", " camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()\n", " camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)\n", "\n", " pcd = o3d.geometry.PointCloud.create_from_rgbd_image(\n", " rgbd_image, camera_intrinsic)\n", "\n", " print('normals')\n", " pcd.normals = o3d.utility.Vector3dVector(\n", " np.zeros((1, 3))) # invalidate existing normals\n", " pcd.estimate_normals(\n", " search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))\n", " pcd.orient_normals_towards_camera_location(\n", " camera_location=np.array([0., 0., 1000.]))\n", " pcd.transform([[1, 0, 0, 0],\n", " [0, -1, 0, 0],\n", " [0, 0, -1, 0],\n", " [0, 0, 0, 1]])\n", " pcd.transform([[-1, 0, 0, 0],\n", " [0, 1, 0, 0],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 1]])\n", "\n", " print('run Poisson surface reconstruction')\n", " with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):\n", " mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(\n", " pcd, depth=depth, width=0, scale=1.1, linear_fit=True)\n", "\n", " voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256\n", " print(f'voxel_size = {voxel_size:e}')\n", " mesh = mesh_raw.simplify_vertex_clustering(\n", " voxel_size=voxel_size,\n", " contraction=o3d.geometry.SimplificationContraction.Average)\n", "\n", " # vertices_to_remove = densities < np.quantile(densities, 0.001)\n", " # mesh.remove_vertices_by_mask(vertices_to_remove)\n", " bbox = pcd.get_axis_aligned_bounding_box()\n", " mesh_crop = mesh.crop(bbox)\n", " gltf_path = f'./{image_path.stem}.gltf'\n", " o3d.io.write_triangle_mesh(\n", " gltf_path, mesh_crop, write_triangle_uvs=True)\n", " return gltf_path\n", "\n", "title = \"Demo: zero-shot depth estimation with DPT + 3D Point Cloud\"\n", "description = \"This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object.\"\n", "examples = [[\"examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\"]]\n", "\n", "iface = gr.Interface(fn=process_image,\n", " inputs=[gr.Image(\n", " type=\"filepath\", label=\"Input Image\")],\n", " outputs=[gr.Image(label=\"predicted depth\", type=\"pil\"),\n", " gr.Model3D(label=\"3d mesh reconstruction\", clear_color=[\n", " 1.0, 1.0, 1.0, 1.0]),\n", " gr.File(label=\"3d gLTF\")],\n", " title=title,\n", " description=description,\n", " examples=examples,\n", " allow_flagging=\"never\",\n", " cache_examples=False)\n", "\n", "iface.launch(debug=True, enable_queue=False)"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
run.py CHANGED
@@ -5,7 +5,6 @@ import numpy as np
5
  from PIL import Image
6
  import open3d as o3d
7
  from pathlib import Path
8
- import os
9
 
10
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
11
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
@@ -38,7 +37,7 @@ def process_image(image_path):
38
  gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
39
  img = Image.fromarray(depth_image)
40
  return [img, gltf_path, gltf_path]
41
- except Exception as e:
42
  gltf_path = create_3d_obj(
43
  np.array(image), depth_image, image_path, depth=8)
44
  img = Image.fromarray(depth_image)
@@ -79,7 +78,7 @@ def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
79
  [0, 0, 0, 1]])
80
 
81
  print('run Poisson surface reconstruction')
82
- with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
83
  mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
84
  pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
85
 
 
5
  from PIL import Image
6
  import open3d as o3d
7
  from pathlib import Path
 
8
 
9
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
10
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
 
37
  gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
38
  img = Image.fromarray(depth_image)
39
  return [img, gltf_path, gltf_path]
40
+ except Exception:
41
  gltf_path = create_3d_obj(
42
  np.array(image), depth_image, image_path, depth=8)
43
  img = Image.fromarray(depth_image)
 
78
  [0, 0, 0, 1]])
79
 
80
  print('run Poisson surface reconstruction')
81
+ with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):
82
  mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
83
  pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
84