import gradio as gr import os import subprocess import shlex import spaces import torch access_token = os.getenv("HUGGINGFACE_TOKEN") subprocess.run( shlex.split( "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html" ) ) subprocess.run( shlex.split( "pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps" ) ) subprocess.run( shlex.split( "pip install ./extension/renderutils_plugin-0.1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps" ) ) def install_cuda_toolkit(): # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run" CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run" CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) os.environ["CUDA_HOME"] = "/usr/local/cuda" os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( os.environ["CUDA_HOME"], "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], ) # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" print("==> finfish install") install_cuda_toolkit() @spaces.GPU def check_gpu(): os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1' os.environ['PATH'] += ':/usr/local/cuda-12.1/bin' # os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64' os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '') subprocess.run(['nvidia-smi']) # 测试 CUDA 是否可用 print(f"torch.cuda.is_available:{torch.cuda.is_available()}") check_gpu() from PIL import Image from einops import rearrange from diffusers import FluxPipeline from models.lrm.utils.camera_util import get_flux_input_cameras from models.lrm.utils.infer_util import save_video from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl from models.lrm.utils.render_utils import rotate_x, rotate_y from models.lrm.utils.train_util import instantiate_from_config from models.ISOMER.reconstruction_func import reconstruction from models.ISOMER.projection_func import projection import os from einops import rearrange from omegaconf import OmegaConf import torch import numpy as np import trimesh import torchvision import torch.nn.functional as F from PIL import Image from torchvision import transforms from torchvision.transforms import v2 from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast from diffusers import FluxPipeline from pytorch_lightning import seed_everything import os from huggingface_hub import hf_hub_download from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames device_0 = "cuda" device_1 = "cuda" resolution = 512 save_dir = "./outputs" normal_transfer = NormalTransfer() isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1) isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1) isomer_radius = 4.5 isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1) isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1) # model initialization and loading # flux # # taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0) # # good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0) # flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16) # # flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0) # flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model", token=access_token) # flux_pipe.load_lora_weights(flux_lora_ckpt_path) # flux_pipe.to(device=device_0, dtype=torch.bfloat16) # torch.cuda.empty_cache() # flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe) # lrm config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml") model_config = config.model_config infer_config = config.infer_config model = instantiate_from_config(model_config) model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model") state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict'] state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')} model.load_state_dict(state_dict, strict=True) model = model.to(device_1) torch.cuda.empty_cache() @spaces.GPU def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False): images = image.unsqueeze(0).to(device_1) images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1) # breakpoint() with torch.no_grad(): # get triplane planes = model.forward_planes(images, input_cameras) mesh_path_idx = os.path.join(save_path, f'{name}.obj') mesh_out = model.extract_mesh( planes, use_texture_map=export_texmap, **infer_config, ) if export_texmap: vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out save_obj_with_mtl( vertices.data.cpu().numpy(), uvs.data.cpu().numpy(), faces.data.cpu().numpy(), mesh_tex_idx.data.cpu().numpy(), tex_map.permute(1, 2, 0).data.cpu().numpy(), mesh_path_idx, ) else: vertices, faces, vertex_colors = mesh_out save_obj(vertices, faces, vertex_colors, mesh_path_idx) print(f"Mesh saved to {mesh_path_idx}") render_size = 512 if if_save_video: video_path_idx = os.path.join(save_path, f'{name}.mp4') render_size = infer_config.render_resolution ENV = load_mipmap("models/lrm/env_mipmap/6") materials = (0.0,0.9) all_mv, all_mvp, all_campos = get_render_cameras_video( batch_size=1, M=24, radius=4.5, elevation=(90, 60.0), is_flexicubes=True, fov=30 ) frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames( model, planes, render_cameras=all_mvp, camera_pos=all_campos, env=ENV, materials=materials, render_size=render_size, chunk_size=20, is_flexicubes=True, ) normals = (torch.nn.functional.normalize(normals) + 1) / 2 normals = normals * alphas + (1-alphas) all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3) save_video( all_frames, video_path_idx, fps=30, ) print(f"Video saved to {video_path_idx}") return vertices, faces def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg): if local_normal_images.min() >= 0: local_normal = local_normal_images.float() * 2 - 1 else: local_normal = local_normal_images.float() global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False) global_normal[...,0] *= -1 global_normal = (global_normal + 1) / 2 global_normal = global_normal.permute(0, 3, 1, 2) return global_normal # 生成多视图图像 @spaces.GPU(duration=120) def generate_multi_view_images(prompt, seed): # torch.cuda.empty_cache() # generator = torch.manual_seed(seed) generator = torch.Generator().manual_seed(seed) with torch.no_grad(): img = flux_pipe( prompt=prompt, num_inference_steps=5, guidance_scale=3.5, num_images_per_prompt=1, width=resolution * 2, height=resolution * 1, output_type='np', generator=generator, ).images # for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images( # prompt=prompt, # guidance_scale=3.5, # num_inference_steps=4, # width=resolution * 4, # height=resolution * 2, # generator=generator, # output_type="np", # good_vae=good_vae, # ): # pass # 返回最终的图像和种子(通过外部调用处理) return img # 重建 3D 模型 @spaces.GPU def reconstruct_3d_model(images, prompt): global model model.init_flexicubes_geometry(device_1, fovy=50.0) model = model.eval() rgb_normal_grid = images save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_")) os.makedirs(save_dir_path, exist_ok=True) images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float() # (3, 1024, 2048) images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4) # (8, 3, 512, 512) rgb_multi_view = images[:4, :3, :, :] normal_multi_view = images[4:, :3, :, :] multi_view_mask = get_background(normal_multi_view) rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask) input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1) vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=True) # local normal to global normal global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations) global_normal = global_normal * multi_view_mask + (1-multi_view_mask) global_normal = global_normal.permute(0,2,3,1) rgb_multi_view = rgb_multi_view.permute(0,2,3,1) multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1) vertices = torch.from_numpy(vertices).to(device_1) faces = torch.from_numpy(faces).to(device_1) vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3] vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3] # global_normal: B,H,W,3 # multi_view_mask: B,H,W # rgb_multi_view: B,H,W,3 meshes = reconstruction( normal_pils=global_normal, masks=multi_view_mask, weights=isomer_geo_weights, fov=30, radius=isomer_radius, camera_angles_azi=isomer_azimuths, camera_angles_ele=isomer_elevations, expansion_weight_stage1=0.1, init_type="file", init_verts=vertices, init_faces=faces, stage1_steps=0, stage2_steps=50, start_edge_len_stage1=0.1, end_edge_len_stage1=0.02, start_edge_len_stage2=0.02, end_edge_len_stage2=0.005, ) save_glb_addr = projection( meshes, masks=multi_view_mask, images=rgb_multi_view, azimuths=isomer_azimuths, elevations=isomer_elevations, weights=isomer_color_weights, fov=30, radius=isomer_radius, save_dir=f"{save_dir_path}/ISOMER/", ) return save_glb_addr # Gradio 接口函数 @spaces.GPU def gradio_pipeline(prompt, seed): import ctypes # 显式加载 libnvrtc.so.12 cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12" try: ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL) print(f"Successfully preloaded {cuda_lib_path}") except OSError as e: print(f"Failed to preload {cuda_lib_path}: {e}") # 生成多视图图像 # rgb_normal_grid = generate_multi_view_images(prompt, seed) rgb_normal_grid = np.load("rgb_normal_grid.npy") image_preview = Image.fromarray((rgb_normal_grid[0] * 255).astype(np.uint8)) # 3d reconstruction # 重建 3D 模型并返回 glb 路径 save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt) # save_glb_addr = None return image_preview, save_glb_addr # Gradio Blocks 应用 with gr.Blocks() as demo: with gr.Row(variant="panel"): # 左侧输入区域 with gr.Column(): with gr.Row(): prompt_input = gr.Textbox( label="Enter Prompt", placeholder="Describe your 3D model...", lines=2, elem_id="prompt_input" ) with gr.Row(): sample_seed = gr.Number(value=42, label="Seed Value", precision=0) with gr.Row(): submit = gr.Button("Generate", elem_id="generate", variant="primary") with gr.Row(variant="panel"): gr.Markdown("Examples:") gr.Examples( examples=[ ["a castle on a hill"], ["an owl wearing a hat"], ["a futuristic car"] ], inputs=[prompt_input], label="Prompt Examples" ) # 右侧输出区域 with gr.Column(): with gr.Row(): rgb_normal_grid_image = gr.Image( label="RGB Normal Grid", type="pil", interactive=False ) with gr.Row(): with gr.Tab("GLB"): output_glb_model = gr.Model3D( label="Generated 3D Model (GLB Format)", interactive=False ) gr.Markdown("Download the model for proper visualization.") # 处理逻辑 submit.click( fn=gradio_pipeline, inputs=[prompt_input, sample_seed], outputs=[rgb_normal_grid_image, output_glb_model] ) # 启动应用 # demo.queue(max_size=10) demo.launch()