Spaces:

LTT
/

Kiss3DGen

Running on Zero

App Files Files Community

JiantaoLin commited on Dec 21, 2024

Commit

d346594

1 Parent(s): c8bf07b

new

Browse files

Files changed (3) hide show

app.py +424 -322
app_demo.py +384 -0
app_demo_.py +0 -491

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
-import gradio as gr
 import os
 import subprocess
-import shlex
 import spaces
 import torch
-access_token = os.getenv("HUGGINGFACE_TOKEN")
 subprocess.run(
     shlex.split(
         "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html"
@@ -41,6 +47,7 @@ def install_cuda_toolkit():
     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
     print("==> finfish install")
 install_cuda_toolkit()
 @spaces.GPU
 def check_gpu():
     os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
@@ -51,334 +58,429 @@ def check_gpu():
     print(f"torch.cuda.is_available:{torch.cuda.is_available()}")
 check_gpu()
 from PIL import Image
-from einops import rearrange
-from diffusers import FluxPipeline
-from models.lrm.utils.camera_util import get_flux_input_cameras
-from models.lrm.utils.infer_util import save_video
-from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl
-from models.lrm.utils.render_utils import rotate_x, rotate_y
-from models.lrm.utils.train_util import instantiate_from_config
-from models.ISOMER.reconstruction_func import reconstruction
-from models.ISOMER.projection_func import projection
-import os
-from einops import rearrange
-from omegaconf import OmegaConf
-import torch
-import numpy as np
 import trimesh
-import torchvision
-import torch.nn.functional as F
-from PIL import Image
-from torchvision import transforms
-from torchvision.transforms import v2
-from diffusers import  DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
-from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
-from diffusers import FluxPipeline
-from pytorch_lightning import seed_everything
-import os
-from huggingface_hub import hf_hub_download
-from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames
-device_0 = "cuda"
-device_1 = "cuda"
-resolution = 512
-save_dir = "./outputs"
-normal_transfer = NormalTransfer()
-isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1)
-isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1)
-isomer_radius = 4.5
-isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1)
-isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1)
-# model initialization and loading
-# flux
-# # taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0)
-# # good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0)
-# flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16)
-# # flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0)
-# flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model", token=access_token)
-# flux_pipe.load_lora_weights(flux_lora_ckpt_path)
-# flux_pipe.to(device=device_0, dtype=torch.bfloat16)
-# torch.cuda.empty_cache()
-# flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe)
-# lrm
-config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml")
-model_config = config.model_config
-infer_config = config.infer_config
-model = instantiate_from_config(model_config)
-model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
-state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
-state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
-model.load_state_dict(state_dict, strict=True)
-model = model.to(device_1)
-torch.cuda.empty_cache()
-@spaces.GPU
-def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False):
-    images = image.unsqueeze(0).to(device_1)
-    images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1)
-    # breakpoint()
-    with torch.no_grad():
-        # get triplane
-        planes = model.forward_planes(images, input_cameras)
-        mesh_path_idx = os.path.join(save_path, f'{name}.obj')
-        mesh_out = model.extract_mesh(
-            planes,
-            use_texture_map=export_texmap,
-            **infer_config,
-        )
-        if export_texmap:
-            vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
-            save_obj_with_mtl(
-                vertices.data.cpu().numpy(),
-                uvs.data.cpu().numpy(),
-                faces.data.cpu().numpy(),
-                mesh_tex_idx.data.cpu().numpy(),
-                tex_map.permute(1, 2, 0).data.cpu().numpy(),
-                mesh_path_idx,
-            )
-        else:
-            vertices, faces, vertex_colors = mesh_out
-            save_obj(vertices, faces, vertex_colors, mesh_path_idx)
-        print(f"Mesh saved to {mesh_path_idx}")
-        render_size = 512
-        if if_save_video:
-            video_path_idx = os.path.join(save_path, f'{name}.mp4')
-            render_size = infer_config.render_resolution
-            ENV = load_mipmap("models/lrm/env_mipmap/6")
-            materials = (0.0,0.9)
-            all_mv, all_mvp, all_campos = get_render_cameras_video(
-                batch_size=1,
-                M=24,
-                radius=4.5,
-                elevation=(90, 60.0),
-                is_flexicubes=True,
-                fov=30
-            )
-            frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
-                model,
-                planes,
-                render_cameras=all_mvp,
-                camera_pos=all_campos,
-                env=ENV,
-                materials=materials,
-                render_size=render_size,
-                chunk_size=20,
-                is_flexicubes=True,
-            )
-            normals = (torch.nn.functional.normalize(normals) + 1) / 2
-            normals = normals * alphas + (1-alphas)
-            all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)
-            save_video(
-                all_frames,
-                video_path_idx,
-                fps=30,
-            )
-            print(f"Video saved to {video_path_idx}")
-    return vertices, faces
-def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg):
-    if local_normal_images.min() >= 0:
-        local_normal = local_normal_images.float() * 2 - 1
-    else:
-        local_normal = local_normal_images.float()
-    global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False)
-    global_normal[...,0] *= -1
-    global_normal = (global_normal + 1) / 2
-    global_normal = global_normal.permute(0, 3, 1, 2)
-    return global_normal
-# 生成多视图图像
-@spaces.GPU(duration=120)
-def generate_multi_view_images(prompt, seed):
-    # torch.cuda.empty_cache()
-    # generator = torch.manual_seed(seed)
-    generator = torch.Generator().manual_seed(seed)
-    with torch.no_grad():
-        img = flux_pipe(
-            prompt=prompt,
-            num_inference_steps=5,
-            guidance_scale=3.5,
-            num_images_per_prompt=1,
-            width=resolution * 2,
-            height=resolution * 1,
-            output_type='np',
-            generator=generator,
-        ).images
-        # for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images(
-        #     prompt=prompt,
-        #     guidance_scale=3.5,
-        #     num_inference_steps=4,
-        #     width=resolution * 4,
-        #     height=resolution * 2,
-        #     generator=generator,
-        #     output_type="np",
-        #     good_vae=good_vae,
-        # ):
-        #     pass
-    # 返回最终的图像和种子（通过外部调用处理）
-    return img
-# 重建 3D 模型
 @spaces.GPU
-def reconstruct_3d_model(images, prompt):
-    global model
-    model.init_flexicubes_geometry(device_1, fovy=50.0)
-    model = model.eval()
-    rgb_normal_grid = images
-    save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_"))
-    os.makedirs(save_dir_path, exist_ok=True)
-    images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float()     # (3, 1024, 2048)
-    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4)        # (8, 3, 512, 512)
-    rgb_multi_view = images[:4, :3, :, :]
-    normal_multi_view = images[4:, :3, :, :]
-    multi_view_mask = get_background(normal_multi_view)
-    rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask)
-    input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1)
-    vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=True)
-    # local normal to global normal
-    global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations)
-    global_normal = global_normal * multi_view_mask + (1-multi_view_mask)
-    global_normal = global_normal.permute(0,2,3,1)
-    rgb_multi_view = rgb_multi_view.permute(0,2,3,1)
-    multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1)
-    vertices = torch.from_numpy(vertices).to(device_1)
-    faces = torch.from_numpy(faces).to(device_1)
-    vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3]
-    vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3]
-    # global_normal: B,H,W,3
-    # multi_view_mask: B,H,W
-    # rgb_multi_view: B,H,W,3
-    meshes = reconstruction(
-        normal_pils=global_normal,
-        masks=multi_view_mask,
-        weights=isomer_geo_weights,
-        fov=30,
-        radius=isomer_radius,
-        camera_angles_azi=isomer_azimuths,
-        camera_angles_ele=isomer_elevations,
-        expansion_weight_stage1=0.1,
-        init_type="file",
-        init_verts=vertices,
-        init_faces=faces,
-        stage1_steps=0,
-        stage2_steps=50,
-        start_edge_len_stage1=0.1,
-        end_edge_len_stage1=0.02,
-        start_edge_len_stage2=0.02,
-        end_edge_len_stage2=0.005,
-    )
-    save_glb_addr = projection(
-        meshes,
-        masks=multi_view_mask,
-        images=rgb_multi_view,
-        azimuths=isomer_azimuths,
-        elevations=isomer_elevations,
-        weights=isomer_color_weights,
-        fov=30,
-        radius=isomer_radius,
-        save_dir=f"{save_dir_path}/ISOMER/",
-    )
-    return save_glb_addr
-# Gradio 接口函数
 @spaces.GPU
-def gradio_pipeline(prompt, seed):
-    import ctypes
-    # 显式加载 libnvrtc.so.12
-    cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12"
-    try:
-        ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL)
-        print(f"Successfully preloaded {cuda_lib_path}")
-    except OSError as e:
-        print(f"Failed to preload {cuda_lib_path}: {e}")
-    # 生成多视图图像
-    # rgb_normal_grid = generate_multi_view_images(prompt, seed)
-    rgb_normal_grid = np.load("rgb_normal_grid.npy")
-    image_preview = Image.fromarray((rgb_normal_grid[0] * 255).astype(np.uint8))
-    # 3d reconstruction
-    # 重建 3D 模型并返回 glb 路径
-    save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt)
-    # save_glb_addr = None
-    return image_preview, save_glb_addr
-# Gradio Blocks 应用
-with gr.Blocks() as demo:
-    with gr.Row(variant="panel"):
-        # 左侧输入区域
-        with gr.Column():
-            with gr.Row():
-                prompt_input = gr.Textbox(
-                    label="Enter Prompt",
-                    placeholder="Describe your 3D model...",
-                    lines=2,
-                    elem_id="prompt_input"
-                )
-            with gr.Row():
-                sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
-            with gr.Row():
-                submit = gr.Button("Generate", elem_id="generate", variant="primary")
-            with gr.Row(variant="panel"):
-                gr.Markdown("Examples:")
-                gr.Examples(
-                    examples=[
-                        ["a castle on a hill"],
-                        ["an owl wearing a hat"],
-                        ["a futuristic car"]
-                    ],
-                    inputs=[prompt_input],
-                    label="Prompt Examples"
-                )
-        # 右侧输出区域
-        with gr.Column():
-            with gr.Row():
-                rgb_normal_grid_image = gr.Image(
-                    label="RGB Normal Grid",
-                    type="pil",
-                    interactive=False
-                )
-            with gr.Row():
-                with gr.Tab("GLB"):
-                    output_glb_model = gr.Model3D(
-                        label="Generated 3D Model (GLB Format)",
-                        interactive=False
-                    )
-                    gr.Markdown("Download the model for proper visualization.")
-    # 处理逻辑
-    submit.click(
-        fn=gradio_pipeline, inputs=[prompt_input, sample_seed],
-        outputs=[rgb_normal_grid_image, output_glb_model]
-    )
-# 启动应用
-# demo.queue(max_size=10)
-demo.launch()

 import os
+import gradio as gr
 import subprocess
 import spaces
+import ctypes
+import shlex
 import torch
+subprocess.run(
+    shlex.split(
+        "pip install ./custom_diffusers --force-reinstall --no-deps"
+    )
+)
 subprocess.run(
     shlex.split(
         "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html"
     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
     print("==> finfish install")
 install_cuda_toolkit()
 @spaces.GPU
 def check_gpu():
     os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
     print(f"torch.cuda.is_available:{torch.cuda.is_available()}")
 check_gpu()
+import base64
+import re
+import sys
+sys.path.append(os.path.abspath(os.path.join(__file__, '../')))
+if 'OMP_NUM_THREADS' not in os.environ:
+    os.environ['OMP_NUM_THREADS'] = '32'
+import shutil
+import json
+import requests
+import shutil
+import threading
 from PIL import Image
+import time
 import trimesh
+import random
+import time
+import numpy as np
+from video_render import render_video_from_obj
+access_token = os.getenv("HUGGINGFACE_TOKEN")
+from pipeline.kiss3d_wrapper import init_wrapper_from_config, run_text_to_3d, run_image_to_3d, image2mesh_preprocess, image2mesh_main
+# Add logo file path and hyperlinks
+LOGO_PATH = "app_assets/logo_temp_.png"  # Update this to the actual path of your logo
+ARXIV_LINK = "https://arxiv.org/abs/example"
+GITHUB_LINK = "https://github.com/example"
+k3d_wrapper = init_wrapper_from_config('./pipeline/pipeline_config/default.yaml')
+from models.ISOMER.scripts.utils import fix_vert_color_glb
+torch.backends.cuda.matmul.allow_tf32 = True
+TEMP_MESH_ADDRESS=''
+mesh_cache = None
+preprocessed_input_image = None
+def save_cached_mesh():
+    global mesh_cache
+    return mesh_cache
+    # if mesh_cache is None:
+    #     return None
+    # return save_py3dmesh_with_trimesh_fast(mesh_cache)
+def save_py3dmesh_with_trimesh_fast(meshes, save_glb_path=TEMP_MESH_ADDRESS, apply_sRGB_to_LinearRGB=True):
+    from pytorch3d.structures import Meshes
+    import trimesh
+    # convert from pytorch3d meshes to trimesh mesh
+    vertices = meshes.verts_packed().cpu().float().numpy()
+    triangles = meshes.faces_packed().cpu().long().numpy()
+    np_color = meshes.textures.verts_features_packed().cpu().float().numpy()
+    if save_glb_path.endswith(".glb"):
+        # rotate 180 along +Y
+        vertices[:, [0, 2]] = -vertices[:, [0, 2]]
+    def srgb_to_linear(c_srgb):
+        c_linear = np.where(c_srgb <= 0.04045, c_srgb / 12.92, ((c_srgb + 0.055) / 1.055) ** 2.4)
+        return c_linear.clip(0, 1.)
+    if apply_sRGB_to_LinearRGB:
+        np_color = srgb_to_linear(np_color)
+    assert vertices.shape[0] == np_color.shape[0]
+    assert np_color.shape[1] == 3
+    assert 0 <= np_color.min() and np_color.max() <= 1, f"min={np_color.min()}, max={np_color.max()}"
+    mesh = trimesh.Trimesh(vertices=vertices, faces=triangles, vertex_colors=np_color)
+    mesh.remove_unreferenced_vertices()
+    # save mesh
+    mesh.export(save_glb_path)
+    if save_glb_path.endswith(".glb"):
+        fix_vert_color_glb(save_glb_path)
+    print(f"saving to {save_glb_path}")
+#
+#
+# @spaces.GPU
+def text_to_detailed(prompt, seed=None):
+    # print(torch.cuda.is_available())
+    # print(f"Before text_to_detailed: {torch.cuda.memory_allocated() / 1024**3} GB")
+    return k3d_wrapper.get_detailed_prompt(prompt, seed)
+def text_to_image(prompt, seed=None, strength=1.0,lora_scale=1.0, num_inference_steps=30, redux_hparam=None, init_image=None, **kwargs):
+    # print(f"Before text_to_image: {torch.cuda.memory_allocated() / 1024**3} GB")
+    k3d_wrapper.renew_uuid()
+    init_image = None
+    # if init_image_path is not None:
+    #     init_image = Image.open(init_image_path)
+    result = k3d_wrapper.generate_3d_bundle_image_text(
+                                      prompt,
+                                      image=init_image,
+                                      strength=strength,
+                                      lora_scale=lora_scale,
+                                      num_inference_steps=num_inference_steps,
+                                      seed=int(seed) if seed is not None else None,
+                                      redux_hparam=redux_hparam,
+                                      save_intermediate_results=True,
+                                      **kwargs)
+    return result[-1]
+def image2mesh_preprocess_(input_image_, seed, use_mv_rgb=True):
+    global preprocessed_input_image
+    seed = int(seed) if seed is not None else None
+    # TODO: delete this later
+    k3d_wrapper.del_llm_model()
+    input_image_save_path, reference_save_path, caption = image2mesh_preprocess(k3d_wrapper, input_image_, seed, use_mv_rgb)
+    preprocessed_input_image = Image.open(input_image_save_path)
+    return reference_save_path, caption
 @spaces.GPU
+def image2mesh_main_(reference_3d_bundle_image, caption, seed, strength1=0.5, strength2=0.95, enable_redux=True, use_controlnet=True, if_video=True):
+    global mesh_cache
+    seed = int(seed) if seed is not None else None
+    # TODO: delete this later
+    k3d_wrapper.del_llm_model()
+    input_image = preprocessed_input_image
+    reference_3d_bundle_image = torch.tensor(reference_3d_bundle_image).permute(2,0,1)/255
+    gen_save_path, recon_mesh_path = image2mesh_main(k3d_wrapper, input_image, reference_3d_bundle_image, caption=caption, seed=seed, strength1=strength1, strength2=strength2, enable_redux=enable_redux, use_controlnet=use_controlnet)
+    mesh_cache = recon_mesh_path
+    # gen_save_ = Image.open(gen_save_path)
+    if if_video:
+        video_path = recon_mesh_path.replace('.obj','.mp4').replace('.glb','.mp4')
+        render_video_from_obj(recon_mesh_path, video_path)
+        print(f"After bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
+        return gen_save_path, video_path
+    else:
+        return gen_save_path, recon_mesh_path
+    # return gen_save_path, recon_mesh_path
 @spaces.GPU
+def bundle_image_to_mesh(
+        gen_3d_bundle_image,
+        lrm_radius = 4.15,
+        isomer_radius = 4.5,
+        reconstruction_stage1_steps = 10,
+        reconstruction_stage2_steps = 50,
+         save_intermediate_results=True,
+        if_video=True
+    ):
+    global mesh_cache
+    print(f"Before bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
+    k3d_wrapper.recon_model.init_flexicubes_geometry("cuda:0", fovy=50.0)
+    # TODO: delete this later
+    k3d_wrapper.del_llm_model()
+    print(f"Before bundle_image_to_mesh after deleting llm model: {torch.cuda.memory_allocated() / 1024**3} GB")
+    gen_3d_bundle_image = torch.tensor(gen_3d_bundle_image).permute(2,0,1)/255
+    # recon from 3D Bundle image
+    recon_mesh_path = k3d_wrapper.reconstruct_3d_bundle_image(gen_3d_bundle_image, lrm_render_radius=lrm_radius, isomer_radius=isomer_radius, save_intermediate_results=save_intermediate_results, reconstruction_stage1_steps=int(reconstruction_stage1_steps), reconstruction_stage2_steps=int(reconstruction_stage2_steps))
+    mesh_cache = recon_mesh_path
+    if if_video:
+        video_path = recon_mesh_path.replace('.obj','.mp4').replace('.glb','.mp4')
+        # # 检查这个video_path文件大小是是否超过50KB，不超过的话就认为是空文件，需要重新渲染
+        # if os.path.exists(video_path):
+        #     print(f"file size:{os.path.getsize(video_path)}")
+        #     if os.path.getsize(video_path) > 50*1024:
+        #         print(f"video path:{video_path}")
+        #         return video_path
+        render_video_from_obj(recon_mesh_path, video_path)
+        print(f"After bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
+        return video_path
+    else:
+        return recon_mesh_path
+_HEADER_=f"""
+<img src="{LOGO_PATH}">
+    <h2><b>Official 🤗 Gradio Demo</b></h2><h2>
+    <b>Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation</b></a></h2>
+<p>**Kiss3DGen** is xxxxxxxxx</p>
+[![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})  [![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})
+"""
+_CITE_ = r"""
+<h2>If Kiss3DGen is helpful, please help to ⭐ the <a href='{""" + GITHUB_LINK + r"""}' target='_blank'>Github Repo</a>. Thanks!</h2>
+📝 **Citation**
+If you find our work useful for your research or applications, please cite using this bibtex:
+```bibtex
+@article{xxxx,
+  title={xxxx},
+  author={xxxx},
+  journal={xxxx},
+  year={xxxx}
+}
+```
+📋 **License**
+Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.
+📧 **Contact**
+If you have any questions, feel free to open a discussion or contact us at <b>xxx@xxxx</b>.
+"""
+def image_to_base64(image_path):
+    """Converts an image file to a base64-encoded string."""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode('utf-8')
+def main():
+    torch.set_grad_enabled(False)
+    # Convert the logo image to base64
+    logo_base64 = image_to_base64(LOGO_PATH)
+    # with gr.Blocks() as demo:
+    with gr.Blocks(css="""
+        body {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            min-height: 100vh;
+            margin: 0;
+            padding: 0;
+        }
+        #col-container { margin: 0px auto; max-width: 200px; }
+        .gradio-container {
+            max-width: 1000px;
+            margin: auto;
+            width: 100%;
+        }
+        #center-align-column {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+        }
+        #right-align-column {
+            display: flex;
+            justify-content: flex-end;
+            align-items: center;
+        }
+        h1 {text-align: center;}
+        h2 {text-align: center;}
+        h3 {text-align: center;}
+        p {text-align: center;}
+        img {text-align: right;}
+        .right {
+        display: block;
+        margin-left: auto;
+        }
+        .center {
+        display: block;
+        margin-left: auto;
+        margin-right: auto;
+        width: 50%;
+        #content-container {
+            max-width: 1200px;
+            margin: 0 auto;
+        }
+        #example-container {
+            max-width: 300px;
+            margin: 0 auto;
+        }
+    """,elem_id="col-container") as demo:
+        # Header Section
+        # gr.Image(value=LOGO_PATH, width=64, height=64)
+        # gr.Markdown(_HEADER_)
+        with gr.Row(elem_id="content-container"):
+            # with gr.Column(scale=1):
+            #     pass
+            # with gr.Column(scale=1, elem_id="right-align-column"):
+            #     # gr.Image(value=LOGO_PATH, interactive=False, show_label=False, width=64, height=64, elem_id="logo-image")
+            #     # gr.Markdown(f"<img src='{LOGO_PATH}' alt='Logo' style='width:64px;height:64px;border:0;'>")
+            #     # gr.HTML(f"<img src='data:image/png;base64,{logo_base64}' alt='Logo' class='right' style='width:64px;height:64px;border:0;text-align:right;'>")
+            #     pass
+            with gr.Column(scale=7, elem_id="center-align-column"):
+                gr.Markdown(f"""
+                ## Official 🤗 Gradio Demo
+                # Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation""")
+                gr.HTML(f"<img src='data:image/png;base64,{logo_base64}' alt='Logo' class='center' style='width:64px;height:64px;border:0;text-align:center;'>")
+                gr.HTML(f"""
+                <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
+                    <a href="{ARXIV_LINK}" target="_blank">
+                        <img src="https://img.shields.io/badge/arXiv-Link-red" alt="arXiv">
+                    </a>
+                    <a href="{GITHUB_LINK}" target="_blank">
+                        <img src="https://img.shields.io/badge/GitHub-Repo-blue" alt="GitHub">
+                    </a>
+                </div>
+                """)
+                # gr.HTML(f"""
+                # <div style="display: flex; gap: 10px; align-items: center;"><a href="{ARXIV_LINK}" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/arXiv-Link-red" alt="arXiv"></a>  <a href="{GITHUB_LINK}" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/GitHub-Repo-blue" alt="GitHub"></a></div>
+                # """)
+                # gr.Markdown(f"""
+                # [![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})  [![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})
+                # """, elem_id="title")
+            # with gr.Column(scale=1):
+            #     pass
+                # with gr.Row():
+                #     gr.Markdown(f"[![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})")
+                #     gr.Markdown(f"[![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})")
+        # Tabs Section
+        with gr.Tabs(selected='tab_text_to_3d', elem_id="content-container") as main_tabs:
+            with gr.TabItem('Text-to-3D', id='tab_text_to_3d'):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        prompt = gr.Textbox(value="", label="Input Prompt", lines=4)
+                        seed1 = gr.Number(value=10, label="Seed")
+                        with gr.Row(elem_id="example-container"):
+                            gr.Examples(
+                                examples=[
+                                    # ["A tree with red leaves"],
+                                    # ["A dragon with black texture"],
+                                    ["A girl with pink hair"],
+                                    ["A boy playing guitar"],
+                                    ["A dog wearing a hat"],
+                                    ["A boy playing basketball"],
+                                    # [""],
+                                    # [""],
+                                    # [""],
+                                ],
+                                inputs=[prompt],  # 将选中的示例填入 prompt 文本框
+                                label="Example Prompts"
+                            )
+                        btn_text2detailed = gr.Button("Refine to detailed prompt")
+                        detailed_prompt = gr.Textbox(value="", label="Detailed Prompt", placeholder="detailed prompt will be generated here base on your input prompt. You can also edit this prompt", lines=4, interactive=True)
+                        btn_text2img = gr.Button("Generate Images")
+                    with gr.Column(scale=1):
+                        output_image1 = gr.Image(label="Generated image", interactive=False)
+                        # lrm_radius = gr.Number(value=4.15, label="lrm_radius")
+                        # isomer_radius = gr.Number(value=4.5, label="isomer_radius")
+                        # reconstruction_stage1_steps = gr.Number(value=10, label="reconstruction_stage1_steps")
+                        # reconstruction_stage2_steps = gr.Number(value=50, label="reconstruction_stage2_steps")
+                        btn_gen_mesh = gr.Button("Generate Mesh")
+                        output_video1 = gr.Video(label="Generated Video", interactive=False, loop=True, autoplay=True)
+                        btn_download1 = gr.Button("Download Mesh")
+                        file_output1 = gr.File()
+            with gr.TabItem('Image-to-3D', id='tab_image_to_3d'):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        image = gr.Image(label="Input Image", type="pil")
+                        seed2 = gr.Number(value=10, label="Seed (0 for random)")
+                        btn_img2mesh_preprocess = gr.Button("Preprocess Image")
+                        image_caption = gr.Textbox(value="", label="Image Caption", placeholder="caption will be generated here base on your input image. You can also edit this caption", lines=4, interactive=True)
+                        output_image2 = gr.Image(label="Generated image", interactive=False)
+                        strength1 = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.5, label="strength1")
+                        strength2 = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.95, label="strength2")
+                        enable_redux = gr.Checkbox(label="enable redux", value=True)
+                        use_controlnet = gr.Checkbox(label="use controlnet", value=True)
+                        btn_img2mesh_main = gr.Button("Generate Mesh")
+                    with gr.Column(scale=1):
+                        # output_mesh2 = gr.Model3D(label="Generated Mesh", interactive=False)
+                        output_image3 = gr.Image(label="gen save image", interactive=False)
+                        output_video2 = gr.Video(label="Generated Video", interactive=False, loop=True, autoplay=True)
+                        btn_download2 = gr.Button("Download Mesh")
+                        file_output2 = gr.File()
+        # Image2
+        btn_img2mesh_preprocess.click(fn=image2mesh_preprocess_, inputs=[image, seed2], outputs=[output_image2, image_caption])
+        btn_img2mesh_main.click(fn=image2mesh_main_, inputs=[output_image2, image_caption, seed2, strength1, strength2, enable_redux, use_controlnet], outputs=[output_image3, output_video2])
+        btn_download2.click(fn=save_cached_mesh, inputs=[], outputs=file_output2)
+        # Button Click Events
+        # Text2
+        btn_text2detailed.click(fn=text_to_detailed, inputs=[prompt, seed1], outputs=detailed_prompt)
+        btn_text2img.click(fn=text_to_image, inputs=[detailed_prompt, seed1], outputs=output_image1)
+        btn_gen_mesh.click(fn=bundle_image_to_mesh, inputs=[output_image1,], outputs=output_video1)
+        # btn_gen_mesh.click(fn=bundle_image_to_mesh, inputs=[output_image1, lrm_radius, isomer_radius, reconstruction_stage1_steps, reconstruction_stage2_steps], outputs=output_video1)
+        with gr.Row():
+            pass
+        with gr.Row():
+            gr.Markdown(_CITE_)
+    # demo.queue(default_concurrency_limit=1)
+    # demo.launch(server_name="0.0.0.0", server_port=9239)
+    # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
+    demo.launch()
+if __name__ == "__main__":
+    main()

app_demo.py ADDED Viewed

	@@ -0,0 +1,384 @@

+import gradio as gr
+import os
+import subprocess
+import shlex
+import spaces
+import torch
+access_token = os.getenv("HUGGINGFACE_TOKEN")
+subprocess.run(
+    shlex.split(
+        "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html"
+    )
+)
+subprocess.run(
+    shlex.split(
+        "pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps"
+    )
+)
+subprocess.run(
+    shlex.split(
+        "pip install ./extension/renderutils_plugin-0.1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
+    )
+)
+def install_cuda_toolkit():
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
+    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
+    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+        os.environ["CUDA_HOME"],
+        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+    )
+    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+    print("==> finfish install")
+install_cuda_toolkit()
+@spaces.GPU
+def check_gpu():
+    os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
+    os.environ['PATH'] += ':/usr/local/cuda-12.1/bin'
+    # os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64'
+    os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '')
+    subprocess.run(['nvidia-smi'])  # 测试 CUDA 是否可用
+    print(f"torch.cuda.is_available:{torch.cuda.is_available()}")
+check_gpu()
+from PIL import Image
+from einops import rearrange
+from diffusers import FluxPipeline
+from models.lrm.utils.camera_util import get_flux_input_cameras
+from models.lrm.utils.infer_util import save_video
+from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl
+from models.lrm.utils.render_utils import rotate_x, rotate_y
+from models.lrm.utils.train_util import instantiate_from_config
+from models.ISOMER.reconstruction_func import reconstruction
+from models.ISOMER.projection_func import projection
+import os
+from einops import rearrange
+from omegaconf import OmegaConf
+import torch
+import numpy as np
+import trimesh
+import torchvision
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from torchvision.transforms import v2
+from diffusers import  DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
+from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
+from diffusers import FluxPipeline
+from pytorch_lightning import seed_everything
+import os
+from huggingface_hub import hf_hub_download
+from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames
+device_0 = "cuda"
+device_1 = "cuda"
+resolution = 512
+save_dir = "./outputs"
+normal_transfer = NormalTransfer()
+isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1)
+isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1)
+isomer_radius = 4.5
+isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1)
+isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1)
+# model initialization and loading
+# flux
+# # taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0)
+# # good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0)
+# flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16)
+# # flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0)
+# flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model", token=access_token)
+# flux_pipe.load_lora_weights(flux_lora_ckpt_path)
+# flux_pipe.to(device=device_0, dtype=torch.bfloat16)
+# torch.cuda.empty_cache()
+# flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe)
+# lrm
+config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml")
+model_config = config.model_config
+infer_config = config.infer_config
+model = instantiate_from_config(model_config)
+model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
+state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
+state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device_1)
+torch.cuda.empty_cache()
+@spaces.GPU
+def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False):
+    images = image.unsqueeze(0).to(device_1)
+    images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1)
+    # breakpoint()
+    with torch.no_grad():
+        # get triplane
+        planes = model.forward_planes(images, input_cameras)
+        mesh_path_idx = os.path.join(save_path, f'{name}.obj')
+        mesh_out = model.extract_mesh(
+            planes,
+            use_texture_map=export_texmap,
+            **infer_config,
+        )
+        if export_texmap:
+            vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
+            save_obj_with_mtl(
+                vertices.data.cpu().numpy(),
+                uvs.data.cpu().numpy(),
+                faces.data.cpu().numpy(),
+                mesh_tex_idx.data.cpu().numpy(),
+                tex_map.permute(1, 2, 0).data.cpu().numpy(),
+                mesh_path_idx,
+            )
+        else:
+            vertices, faces, vertex_colors = mesh_out
+            save_obj(vertices, faces, vertex_colors, mesh_path_idx)
+        print(f"Mesh saved to {mesh_path_idx}")
+        render_size = 512
+        if if_save_video:
+            video_path_idx = os.path.join(save_path, f'{name}.mp4')
+            render_size = infer_config.render_resolution
+            ENV = load_mipmap("models/lrm/env_mipmap/6")
+            materials = (0.0,0.9)
+            all_mv, all_mvp, all_campos = get_render_cameras_video(
+                batch_size=1,
+                M=24,
+                radius=4.5,
+                elevation=(90, 60.0),
+                is_flexicubes=True,
+                fov=30
+            )
+            frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
+                model,
+                planes,
+                render_cameras=all_mvp,
+                camera_pos=all_campos,
+                env=ENV,
+                materials=materials,
+                render_size=render_size,
+                chunk_size=20,
+                is_flexicubes=True,
+            )
+            normals = (torch.nn.functional.normalize(normals) + 1) / 2
+            normals = normals * alphas + (1-alphas)
+            all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)
+            save_video(
+                all_frames,
+                video_path_idx,
+                fps=30,
+            )
+            print(f"Video saved to {video_path_idx}")
+    return vertices, faces
+def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg):
+    if local_normal_images.min() >= 0:
+        local_normal = local_normal_images.float() * 2 - 1
+    else:
+        local_normal = local_normal_images.float()
+    global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False)
+    global_normal[...,0] *= -1
+    global_normal = (global_normal + 1) / 2
+    global_normal = global_normal.permute(0, 3, 1, 2)
+    return global_normal
+# 生成多视图图像
+@spaces.GPU(duration=120)
+def generate_multi_view_images(prompt, seed):
+    # torch.cuda.empty_cache()
+    # generator = torch.manual_seed(seed)
+    generator = torch.Generator().manual_seed(seed)
+    with torch.no_grad():
+        img = flux_pipe(
+            prompt=prompt,
+            num_inference_steps=5,
+            guidance_scale=3.5,
+            num_images_per_prompt=1,
+            width=resolution * 2,
+            height=resolution * 1,
+            output_type='np',
+            generator=generator,
+        ).images
+        # for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images(
+        #     prompt=prompt,
+        #     guidance_scale=3.5,
+        #     num_inference_steps=4,
+        #     width=resolution * 4,
+        #     height=resolution * 2,
+        #     generator=generator,
+        #     output_type="np",
+        #     good_vae=good_vae,
+        # ):
+        #     pass
+    # 返回最终的图像和种子（通过外部调用处理）
+    return img
+# 重建 3D 模型
+@spaces.GPU
+def reconstruct_3d_model(images, prompt):
+    global model
+    model.init_flexicubes_geometry(device_1, fovy=50.0)
+    model = model.eval()
+    rgb_normal_grid = images
+    save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_"))
+    os.makedirs(save_dir_path, exist_ok=True)
+    images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float()     # (3, 1024, 2048)
+    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4)        # (8, 3, 512, 512)
+    rgb_multi_view = images[:4, :3, :, :]
+    normal_multi_view = images[4:, :3, :, :]
+    multi_view_mask = get_background(normal_multi_view)
+    rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask)
+    input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1)
+    vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=True)
+    # local normal to global normal
+    global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations)
+    global_normal = global_normal * multi_view_mask + (1-multi_view_mask)
+    global_normal = global_normal.permute(0,2,3,1)
+    rgb_multi_view = rgb_multi_view.permute(0,2,3,1)
+    multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1)
+    vertices = torch.from_numpy(vertices).to(device_1)
+    faces = torch.from_numpy(faces).to(device_1)
+    vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3]
+    vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3]
+    # global_normal: B,H,W,3
+    # multi_view_mask: B,H,W
+    # rgb_multi_view: B,H,W,3
+    meshes = reconstruction(
+        normal_pils=global_normal,
+        masks=multi_view_mask,
+        weights=isomer_geo_weights,
+        fov=30,
+        radius=isomer_radius,
+        camera_angles_azi=isomer_azimuths,
+        camera_angles_ele=isomer_elevations,
+        expansion_weight_stage1=0.1,
+        init_type="file",
+        init_verts=vertices,
+        init_faces=faces,
+        stage1_steps=0,
+        stage2_steps=50,
+        start_edge_len_stage1=0.1,
+        end_edge_len_stage1=0.02,
+        start_edge_len_stage2=0.02,
+        end_edge_len_stage2=0.005,
+    )
+    save_glb_addr = projection(
+        meshes,
+        masks=multi_view_mask,
+        images=rgb_multi_view,
+        azimuths=isomer_azimuths,
+        elevations=isomer_elevations,
+        weights=isomer_color_weights,
+        fov=30,
+        radius=isomer_radius,
+        save_dir=f"{save_dir_path}/ISOMER/",
+    )
+    return save_glb_addr
+# Gradio 接口函数
+@spaces.GPU
+def gradio_pipeline(prompt, seed):
+    import ctypes
+    # 显式加载 libnvrtc.so.12
+    cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12"
+    try:
+        ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL)
+        print(f"Successfully preloaded {cuda_lib_path}")
+    except OSError as e:
+        print(f"Failed to preload {cuda_lib_path}: {e}")
+    # 生成多视图图像
+    # rgb_normal_grid = generate_multi_view_images(prompt, seed)
+    rgb_normal_grid = np.load("rgb_normal_grid.npy")
+    image_preview = Image.fromarray((rgb_normal_grid[0] * 255).astype(np.uint8))
+    # 3d reconstruction
+    # 重建 3D 模型并返回 glb 路径
+    save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt)
+    # save_glb_addr = None
+    return image_preview, save_glb_addr
+# Gradio Blocks 应用
+with gr.Blocks() as demo:
+    with gr.Row(variant="panel"):
+        # 左侧输入区域
+        with gr.Column():
+            with gr.Row():
+                prompt_input = gr.Textbox(
+                    label="Enter Prompt",
+                    placeholder="Describe your 3D model...",
+                    lines=2,
+                    elem_id="prompt_input"
+                )
+            with gr.Row():
+                sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
+            with gr.Row():
+                submit = gr.Button("Generate", elem_id="generate", variant="primary")
+            with gr.Row(variant="panel"):
+                gr.Markdown("Examples:")
+                gr.Examples(
+                    examples=[
+                        ["a castle on a hill"],
+                        ["an owl wearing a hat"],
+                        ["a futuristic car"]
+                    ],
+                    inputs=[prompt_input],
+                    label="Prompt Examples"
+                )
+        # 右侧输出区域
+        with gr.Column():
+            with gr.Row():
+                rgb_normal_grid_image = gr.Image(
+                    label="RGB Normal Grid",
+                    type="pil",
+                    interactive=False
+                )
+            with gr.Row():
+                with gr.Tab("GLB"):
+                    output_glb_model = gr.Model3D(
+                        label="Generated 3D Model (GLB Format)",
+                        interactive=False
+                    )
+                    gr.Markdown("Download the model for proper visualization.")
+    # 处理逻辑
+    submit.click(
+        fn=gradio_pipeline, inputs=[prompt_input, sample_seed],
+        outputs=[rgb_normal_grid_image, output_glb_model]
+    )
+# 启动应用
+# demo.queue(max_size=10)
+demo.launch()

app_demo_.py DELETED Viewed

@@ -1,491 +0,0 @@
-import os
-import gradio as gr
-import subprocess
-import spaces
-import ctypes
-import shlex
-import torch
-subprocess.run(
-    shlex.split(
-        "pip install ./custom_diffusers --force-reinstall --no-deps"
-    )
-)
-subprocess.run(
-    shlex.split(
-        "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html"
-    )
-)
-subprocess.run(
-    shlex.split(
-        "pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps"
-    )
-)
-subprocess.run(
-    shlex.split(
-        "pip install ./extension/renderutils_plugin-0.1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
-    )
-)
-# download cudatoolkit
-def install_cuda_toolkit():
-    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
-    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
-    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
-    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
-    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
-    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
-    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
-    os.environ["CUDA_HOME"] = "/usr/local/cuda"
-    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
-    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
-        os.environ["CUDA_HOME"],
-        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
-    )
-    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
-    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
-    print("==> finfish install")
-install_cuda_toolkit()
-import base64
-import re
-import sys
-sys.path.append(os.path.abspath(os.path.join(__file__, '../')))
-if 'OMP_NUM_THREADS' not in os.environ:
-    os.environ['OMP_NUM_THREADS'] = '32'
-import shutil
-import json
-import requests
-import shutil
-import threading
-from PIL import Image
-import time
-import trimesh
-import random
-import time
-import numpy as np
-from video_render import render_video_from_obj
-access_token = os.getenv("HUGGINGFACE_TOKEN")
-from pipeline.kiss3d_wrapper import init_wrapper_from_config, run_text_to_3d, run_image_to_3d, image2mesh_preprocess, image2mesh_main
-# Add logo file path and hyperlinks
-LOGO_PATH = "app_assets/logo_temp_.png"  # Update this to the actual path of your logo
-ARXIV_LINK = "https://arxiv.org/abs/example"
-GITHUB_LINK = "https://github.com/example"
-k3d_wrapper = init_wrapper_from_config('./pipeline/pipeline_config/default.yaml')
-from models.ISOMER.scripts.utils import fix_vert_color_glb
-torch.backends.cuda.matmul.allow_tf32 = True
-def check_gpu():
-    os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
-    os.environ['PATH'] += ':/usr/local/cuda-12.1/bin'
-    # os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64'
-    os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '')
-    # 显式加载 libnvrtc.so.12
-    cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12"
-    try:
-        ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL)
-        print(f"Successfully preloaded {cuda_lib_path}")
-    except OSError as e:
-        print(f"Failed to preload {cuda_lib_path}: {e}")
-check_gpu()
-print(f"GPU: {torch.cuda.is_available()}")
-subprocess.run(['nvidia-smi'])
-TEMP_MESH_ADDRESS=''
-mesh_cache = None
-preprocessed_input_image = None
-def save_cached_mesh():
-    global mesh_cache
-    return mesh_cache
-    # if mesh_cache is None:
-    #     return None
-    # return save_py3dmesh_with_trimesh_fast(mesh_cache)
-def save_py3dmesh_with_trimesh_fast(meshes, save_glb_path=TEMP_MESH_ADDRESS, apply_sRGB_to_LinearRGB=True):
-    from pytorch3d.structures import Meshes
-    import trimesh
-    # convert from pytorch3d meshes to trimesh mesh
-    vertices = meshes.verts_packed().cpu().float().numpy()
-    triangles = meshes.faces_packed().cpu().long().numpy()
-    np_color = meshes.textures.verts_features_packed().cpu().float().numpy()
-    if save_glb_path.endswith(".glb"):
-        # rotate 180 along +Y
-        vertices[:, [0, 2]] = -vertices[:, [0, 2]]
-    def srgb_to_linear(c_srgb):
-        c_linear = np.where(c_srgb <= 0.04045, c_srgb / 12.92, ((c_srgb + 0.055) / 1.055) ** 2.4)
-        return c_linear.clip(0, 1.)
-    if apply_sRGB_to_LinearRGB:
-        np_color = srgb_to_linear(np_color)
-    assert vertices.shape[0] == np_color.shape[0]
-    assert np_color.shape[1] == 3
-    assert 0 <= np_color.min() and np_color.max() <= 1, f"min={np_color.min()}, max={np_color.max()}"
-    mesh = trimesh.Trimesh(vertices=vertices, faces=triangles, vertex_colors=np_color)
-    mesh.remove_unreferenced_vertices()
-    # save mesh
-    mesh.export(save_glb_path)
-    if save_glb_path.endswith(".glb"):
-        fix_vert_color_glb(save_glb_path)
-    print(f"saving to {save_glb_path}")
-#
-#
-# @spaces.GPU
-def text_to_detailed(prompt, seed=None):
-    # print(torch.cuda.is_available())
-    # print(f"Before text_to_detailed: {torch.cuda.memory_allocated() / 1024**3} GB")
-    return k3d_wrapper.get_detailed_prompt(prompt, seed)
-def text_to_image(prompt, seed=None, strength=1.0,lora_scale=1.0, num_inference_steps=30, redux_hparam=None, init_image=None, **kwargs):
-    # print(f"Before text_to_image: {torch.cuda.memory_allocated() / 1024**3} GB")
-    k3d_wrapper.renew_uuid()
-    init_image = None
-    # if init_image_path is not None:
-    #     init_image = Image.open(init_image_path)
-    result = k3d_wrapper.generate_3d_bundle_image_text(
-                                      prompt,
-                                      image=init_image,
-                                      strength=strength,
-                                      lora_scale=lora_scale,
-                                      num_inference_steps=num_inference_steps,
-                                      seed=int(seed) if seed is not None else None,
-                                      redux_hparam=redux_hparam,
-                                      save_intermediate_results=True,
-                                      **kwargs)
-    return result[-1]
-def image2mesh_preprocess_(input_image_, seed, use_mv_rgb=True):
-    global preprocessed_input_image
-    seed = int(seed) if seed is not None else None
-    # TODO: delete this later
-    k3d_wrapper.del_llm_model()
-    input_image_save_path, reference_save_path, caption = image2mesh_preprocess(k3d_wrapper, input_image_, seed, use_mv_rgb)
-    preprocessed_input_image = Image.open(input_image_save_path)
-    return reference_save_path, caption
-@spaces.GPU
-def image2mesh_main_(reference_3d_bundle_image, caption, seed, strength1=0.5, strength2=0.95, enable_redux=True, use_controlnet=True, if_video=True):
-    global mesh_cache
-    seed = int(seed) if seed is not None else None
-    # TODO: delete this later
-    k3d_wrapper.del_llm_model()
-    input_image = preprocessed_input_image
-    reference_3d_bundle_image = torch.tensor(reference_3d_bundle_image).permute(2,0,1)/255
-    gen_save_path, recon_mesh_path = image2mesh_main(k3d_wrapper, input_image, reference_3d_bundle_image, caption=caption, seed=seed, strength1=strength1, strength2=strength2, enable_redux=enable_redux, use_controlnet=use_controlnet)
-    mesh_cache = recon_mesh_path
-    # gen_save_ = Image.open(gen_save_path)
-    if if_video:
-        video_path = recon_mesh_path.replace('.obj','.mp4').replace('.glb','.mp4')
-        render_video_from_obj(recon_mesh_path, video_path)
-        print(f"After bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
-        return gen_save_path, video_path
-    else:
-        return gen_save_path, recon_mesh_path
-    # return gen_save_path, recon_mesh_path
-@spaces.GPU
-def bundle_image_to_mesh(
-        gen_3d_bundle_image,
-        lrm_radius = 4.15,
-        isomer_radius = 4.5,
-        reconstruction_stage1_steps = 10,
-        reconstruction_stage2_steps = 50,
-         save_intermediate_results=True,
-        if_video=True
-    ):
-    global mesh_cache
-    print(f"Before bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
-    k3d_wrapper.recon_model.init_flexicubes_geometry("cuda:0", fovy=50.0)
-    # TODO: delete this later
-    k3d_wrapper.del_llm_model()
-    print(f"Before bundle_image_to_mesh after deleting llm model: {torch.cuda.memory_allocated() / 1024**3} GB")
-    gen_3d_bundle_image = torch.tensor(gen_3d_bundle_image).permute(2,0,1)/255
-    # recon from 3D Bundle image
-    recon_mesh_path = k3d_wrapper.reconstruct_3d_bundle_image(gen_3d_bundle_image, lrm_render_radius=lrm_radius, isomer_radius=isomer_radius, save_intermediate_results=save_intermediate_results, reconstruction_stage1_steps=int(reconstruction_stage1_steps), reconstruction_stage2_steps=int(reconstruction_stage2_steps))
-    mesh_cache = recon_mesh_path
-    if if_video:
-        video_path = recon_mesh_path.replace('.obj','.mp4').replace('.glb','.mp4')
-        # # 检查这个video_path文件大小是是否超过50KB，不超过的话就认为是空文件，需要重新渲染
-        # if os.path.exists(video_path):
-        #     print(f"file size:{os.path.getsize(video_path)}")
-        #     if os.path.getsize(video_path) > 50*1024:
-        #         print(f"video path:{video_path}")
-        #         return video_path
-        render_video_from_obj(recon_mesh_path, video_path)
-        print(f"After bundle_image_to_mesh: {torch.cuda.memory_allocated() / 1024**3} GB")
-        return video_path
-    else:
-        return recon_mesh_path
-_HEADER_=f"""
-<img src="{LOGO_PATH}">
-    <h2><b>Official 🤗 Gradio Demo</b></h2><h2>
-    <b>Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation</b></a></h2>
-<p>**Kiss3DGen** is xxxxxxxxx</p>
-[![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})  [![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})
-"""
-_CITE_ = r"""
-<h2>If Kiss3DGen is helpful, please help to ⭐ the <a href='{""" + GITHUB_LINK + r"""}' target='_blank'>Github Repo</a>. Thanks!</h2>
-📝 **Citation**
-If you find our work useful for your research or applications, please cite using this bibtex:
-```bibtex
-@article{xxxx,
-  title={xxxx},
-  author={xxxx},
-  journal={xxxx},
-  year={xxxx}
-}
-```
-📋 **License**
-Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.
-📧 **Contact**
-If you have any questions, feel free to open a discussion or contact us at <b>xxx@xxxx</b>.
-"""
-def image_to_base64(image_path):
-    """Converts an image file to a base64-encoded string."""
-    with open(image_path, "rb") as img_file:
-        return base64.b64encode(img_file.read()).decode('utf-8')
-def main():
-    torch.set_grad_enabled(False)
-    # Convert the logo image to base64
-    logo_base64 = image_to_base64(LOGO_PATH)
-    # with gr.Blocks() as demo:
-    with gr.Blocks(css="""
-        body {
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            min-height: 100vh;
-            margin: 0;
-            padding: 0;
-        }
-        #col-container { margin: 0px auto; max-width: 200px; }
-        .gradio-container {
-            max-width: 1000px;
-            margin: auto;
-            width: 100%;
-        }
-        #center-align-column {
-            display: flex;
-            justify-content: center;
-            align-items: center;
-        }
-        #right-align-column {
-            display: flex;
-            justify-content: flex-end;
-            align-items: center;
-        }
-        h1 {text-align: center;}
-        h2 {text-align: center;}
-        h3 {text-align: center;}
-        p {text-align: center;}
-        img {text-align: right;}
-        .right {
-        display: block;
-        margin-left: auto;
-        }
-        .center {
-        display: block;
-        margin-left: auto;
-        margin-right: auto;
-        width: 50%;
-        #content-container {
-            max-width: 1200px;
-            margin: 0 auto;
-        }
-        #example-container {
-            max-width: 300px;
-            margin: 0 auto;
-        }
-    """,elem_id="col-container") as demo:
-        # Header Section
-        # gr.Image(value=LOGO_PATH, width=64, height=64)
-        # gr.Markdown(_HEADER_)
-        with gr.Row(elem_id="content-container"):
-            # with gr.Column(scale=1):
-            #     pass
-            # with gr.Column(scale=1, elem_id="right-align-column"):
-            #     # gr.Image(value=LOGO_PATH, interactive=False, show_label=False, width=64, height=64, elem_id="logo-image")
-            #     # gr.Markdown(f"<img src='{LOGO_PATH}' alt='Logo' style='width:64px;height:64px;border:0;'>")
-            #     # gr.HTML(f"<img src='data:image/png;base64,{logo_base64}' alt='Logo' class='right' style='width:64px;height:64px;border:0;text-align:right;'>")
-            #     pass
-            with gr.Column(scale=7, elem_id="center-align-column"):
-                gr.Markdown(f"""
-                ## Official 🤗 Gradio Demo
-                # Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation""")
-                gr.HTML(f"<img src='data:image/png;base64,{logo_base64}' alt='Logo' class='center' style='width:64px;height:64px;border:0;text-align:center;'>")
-                gr.HTML(f"""
-                <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
-                    <a href="{ARXIV_LINK}" target="_blank">
-                        <img src="https://img.shields.io/badge/arXiv-Link-red" alt="arXiv">
-                    </a>
-                    <a href="{GITHUB_LINK}" target="_blank">
-                        <img src="https://img.shields.io/badge/GitHub-Repo-blue" alt="GitHub">
-                    </a>
-                </div>
-                """)
-                # gr.HTML(f"""
-                # <div style="display: flex; gap: 10px; align-items: center;"><a href="{ARXIV_LINK}" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/arXiv-Link-red" alt="arXiv"></a>  <a href="{GITHUB_LINK}" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/GitHub-Repo-blue" alt="GitHub"></a></div>
-                # """)
-                # gr.Markdown(f"""
-                # [![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})  [![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})
-                # """, elem_id="title")
-            # with gr.Column(scale=1):
-            #     pass
-                # with gr.Row():
-                #     gr.Markdown(f"[![arXiv](https://img.shields.io/badge/arXiv-Link-red)]({ARXIV_LINK})")
-                #     gr.Markdown(f"[![GitHub](https://img.shields.io/badge/GitHub-Repo-blue)]({GITHUB_LINK})")
-        # Tabs Section
-        with gr.Tabs(selected='tab_text_to_3d', elem_id="content-container") as main_tabs:
-            with gr.TabItem('Text-to-3D', id='tab_text_to_3d'):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        prompt = gr.Textbox(value="", label="Input Prompt", lines=4)
-                        seed1 = gr.Number(value=10, label="Seed")
-                        with gr.Row(elem_id="example-container"):
-                            gr.Examples(
-                                examples=[
-                                    # ["A tree with red leaves"],
-                                    # ["A dragon with black texture"],
-                                    ["A girl with pink hair"],
-                                    ["A boy playing guitar"],
-                                    ["A dog wearing a hat"],
-                                    ["A boy playing basketball"],
-                                    # [""],
-                                    # [""],
-                                    # [""],
-                                ],
-                                inputs=[prompt],  # 将选中的示例填入 prompt 文本框
-                                label="Example Prompts"
-                            )
-                        btn_text2detailed = gr.Button("Refine to detailed prompt")
-                        detailed_prompt = gr.Textbox(value="", label="Detailed Prompt", placeholder="detailed prompt will be generated here base on your input prompt. You can also edit this prompt", lines=4, interactive=True)
-                        btn_text2img = gr.Button("Generate Images")
-                    with gr.Column(scale=1):
-                        output_image1 = gr.Image(label="Generated image", interactive=False)
-                        # lrm_radius = gr.Number(value=4.15, label="lrm_radius")
-                        # isomer_radius = gr.Number(value=4.5, label="isomer_radius")
-                        # reconstruction_stage1_steps = gr.Number(value=10, label="reconstruction_stage1_steps")
-                        # reconstruction_stage2_steps = gr.Number(value=50, label="reconstruction_stage2_steps")
-                        btn_gen_mesh = gr.Button("Generate Mesh")
-                        output_video1 = gr.Video(label="Generated Video", interactive=False, loop=True, autoplay=True)
-                        btn_download1 = gr.Button("Download Mesh")
-                        file_output1 = gr.File()
-            with gr.TabItem('Image-to-3D', id='tab_image_to_3d'):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        image = gr.Image(label="Input Image", type="pil")
-                        seed2 = gr.Number(value=10, label="Seed (0 for random)")
-                        btn_img2mesh_preprocess = gr.Button("Preprocess Image")
-                        image_caption = gr.Textbox(value="", label="Image Caption", placeholder="caption will be generated here base on your input image. You can also edit this caption", lines=4, interactive=True)
-                        output_image2 = gr.Image(label="Generated image", interactive=False)
-                        strength1 = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.5, label="strength1")
-                        strength2 = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.95, label="strength2")
-                        enable_redux = gr.Checkbox(label="enable redux", value=True)
-                        use_controlnet = gr.Checkbox(label="use controlnet", value=True)
-                        btn_img2mesh_main = gr.Button("Generate Mesh")
-                    with gr.Column(scale=1):
-                        # output_mesh2 = gr.Model3D(label="Generated Mesh", interactive=False)
-                        output_image3 = gr.Image(label="gen save image", interactive=False)
-                        output_video2 = gr.Video(label="Generated Video", interactive=False, loop=True, autoplay=True)
-                        btn_download2 = gr.Button("Download Mesh")
-                        file_output2 = gr.File()
-        # Image2
-        btn_img2mesh_preprocess.click(fn=image2mesh_preprocess_, inputs=[image, seed2], outputs=[output_image2, image_caption])
-        btn_img2mesh_main.click(fn=image2mesh_main_, inputs=[output_image2, image_caption, seed2, strength1, strength2, enable_redux, use_controlnet], outputs=[output_image3, output_video2])
-        btn_download2.click(fn=save_cached_mesh, inputs=[], outputs=file_output2)
-        # Button Click Events
-        # Text2
-        btn_text2detailed.click(fn=text_to_detailed, inputs=[prompt, seed1], outputs=detailed_prompt)
-        btn_text2img.click(fn=text_to_image, inputs=[detailed_prompt, seed1], outputs=output_image1)
-        btn_gen_mesh.click(fn=bundle_image_to_mesh, inputs=[output_image1,], outputs=output_video1)
-        # btn_gen_mesh.click(fn=bundle_image_to_mesh, inputs=[output_image1, lrm_radius, isomer_radius, reconstruction_stage1_steps, reconstruction_stage2_steps], outputs=output_video1)
-        with gr.Row():
-            pass
-        with gr.Row():
-            gr.Markdown(_CITE_)
-    # demo.queue(default_concurrency_limit=1)
-    # demo.launch(server_name="0.0.0.0", server_port=9239)
-    # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
-    demo.launch()
-if __name__ == "__main__":
-    main()