import gradio as gr import os import subprocess import shlex import spaces import torch import numpy as numpy access_token = os.getenv("HUGGINGFACE_TOKEN") subprocess.run( shlex.split( "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt210/download.html" ) ) subprocess.run( shlex.split( "pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps" ) ) subprocess.run( shlex.split( "pip install ./extension/renderutils_plugin-1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps" ) ) def install_cuda_toolkit(): # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run" CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run" CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) os.environ["CUDA_HOME"] = "/usr/local/cuda" os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( os.environ["CUDA_HOME"], "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], ) # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" print("==> finfish install") # install_cuda_toolkit() @spaces.GPU def check_gpu(): os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1' os.environ['PATH'] += ':/usr/local/cuda-12.1/bin' # os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64' os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '') subprocess.run(['nvidia-smi']) # 测试 CUDA 是否可用 print(f"torch.cuda.is_available:{torch.cuda.is_available()}") check_gpu() from PIL import Image from einops import rearrange from diffusers import FluxPipeline from models.lrm.utils.camera_util import get_flux_input_cameras from models.lrm.utils.infer_util import save_video from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl from models.lrm.utils.render_utils import rotate_x, rotate_y from models.lrm.utils.train_util import instantiate_from_config from models.ISOMER.reconstruction_func import reconstruction from models.ISOMER.projection_func import projection import os from einops import rearrange from omegaconf import OmegaConf import torch import numpy as np import trimesh import torchvision import torch.nn.functional as F from PIL import Image from torchvision import transforms from torchvision.transforms import v2 from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images from diffusers import FluxPipeline from pytorch_lightning import seed_everything import os from huggingface_hub import hf_hub_download from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames device_0 = "cuda:0" device_1 = "cuda:1" resolution = 512 save_dir = "./outputs" normal_transfer = NormalTransfer() isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1) isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1) isomer_radius = 4.5 isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1) isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1) # model initialization and loading # flux taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0) good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0) # flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16) flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0) flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model") flux_pipe.load_lora_weights(flux_lora_ckpt_path) # flux_pipe.to(device=device_0, dtype=torch.bfloat16) torch.cuda.empty_cache() flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe) # lrm config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml") model_config = config.model_config infer_config = config.infer_config model = instantiate_from_config(model_config) model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model") state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict'] state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')} model.load_state_dict(state_dict, strict=True) model = model.to(device_1) torch.cuda.empty_cache() @spaces.GPU def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False): images = image.unsqueeze(0).to(device_1) images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1) # breakpoint() with torch.no_grad(): # get triplane planes = model.forward_planes(images, input_cameras) mesh_path_idx = os.path.join(save_path, f'{name}.obj') mesh_out = model.extract_mesh( planes, use_texture_map=export_texmap, **infer_config, ) if export_texmap: vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out save_obj_with_mtl( vertices.data.cpu().numpy(), uvs.data.cpu().numpy(), faces.data.cpu().numpy(), mesh_tex_idx.data.cpu().numpy(), tex_map.permute(1, 2, 0).data.cpu().numpy(), mesh_path_idx, ) else: vertices, faces, vertex_colors = mesh_out save_obj(vertices, faces, vertex_colors, mesh_path_idx) print(f"Mesh saved to {mesh_path_idx}") render_size = 512 if if_save_video: video_path_idx = os.path.join(save_path, f'{name}.mp4') render_size = infer_config.render_resolution ENV = load_mipmap("models/lrm/env_mipmap/6") materials = (0.0,0.9) all_mv, all_mvp, all_campos = get_render_cameras_video( batch_size=1, M=240, radius=4.5, elevation=(90, 60.0), is_flexicubes=True, fov=30 ) frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames( model, planes, render_cameras=all_mvp, camera_pos=all_campos, env=ENV, materials=materials, render_size=render_size, chunk_size=20, is_flexicubes=True, ) normals = (torch.nn.functional.normalize(normals) + 1) / 2 normals = normals * alphas + (1-alphas) all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3) save_video( all_frames, video_path_idx, fps=30, ) print(f"Video saved to {video_path_idx}") return vertices, faces def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg): if local_normal_images.min() >= 0: local_normal = local_normal_images.float() * 2 - 1 else: local_normal = local_normal_images.float() global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False) global_normal[...,0] *= -1 global_normal = (global_normal + 1) / 2 global_normal = global_normal.permute(0, 3, 1, 2) return global_normal # 生成多视图图像 @spaces.GPU(duration=120) def generate_multi_view_images(prompt, seed): # torch.cuda.empty_cache() # generator = torch.manual_seed(seed) generator = torch.Generator().manual_seed(seed) with torch.no_grad(): # images = flux_pipe( # prompt=prompt, # num_inference_steps=10, # guidance_scale=3.5, # num_images_per_prompt=1, # width=resolution * 4, # height=resolution * 2, # output_type='np', # generator=generator, # good_vae=good_vae, # ).images for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images( prompt=prompt, guidance_scale=3.5, num_inference_steps=10, width=resolution * 4, height=resolution * 2, generator=generator, output_type="np", good_vae=good_vae, ): pass # 返回最终的图像和种子(通过外部调用处理) return img # 重建 3D 模型 @spaces.GPU def reconstruct_3d_model(images, prompt): global model model.init_flexicubes_geometry(device_1, fovy=50.0) model = model.eval() rgb_normal_grid = images save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_")) os.makedirs(save_dir_path, exist_ok=True) images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float() # (3, 1024, 2048) images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4) # (8, 3, 512, 512) rgb_multi_view = images[:4, :3, :, :] normal_multi_view = images[4:, :3, :, :] multi_view_mask = get_background(normal_multi_view) rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask) input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1) vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=False) # local normal to global normal global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations) global_normal = global_normal * multi_view_mask + (1-multi_view_mask) global_normal = global_normal.permute(0,2,3,1) rgb_multi_view = rgb_multi_view.permute(0,2,3,1) multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1) vertices = torch.from_numpy(vertices).to(device_1) faces = torch.from_numpy(faces).to(device_1) vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3] vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3] # global_normal: B,H,W,3 # multi_view_mask: B,H,W # rgb_multi_view: B,H,W,3 meshes = reconstruction( normal_pils=global_normal, masks=multi_view_mask, weights=isomer_geo_weights, fov=30, radius=isomer_radius, camera_angles_azi=isomer_azimuths, camera_angles_ele=isomer_elevations, expansion_weight_stage1=0.1, init_type="file", init_verts=vertices, init_faces=faces, stage1_steps=0, stage2_steps=50, start_edge_len_stage1=0.1, end_edge_len_stage1=0.02, start_edge_len_stage2=0.02, end_edge_len_stage2=0.005, ) save_glb_addr = projection( meshes, masks=multi_view_mask, images=rgb_multi_view, azimuths=isomer_azimuths, elevations=isomer_elevations, weights=isomer_color_weights, fov=30, radius=isomer_radius, save_dir=f"{save_dir_path}/ISOMER/", ) return save_glb_addr # Gradio 接口函数 @spaces.GPU def gradio_pipeline(prompt, seed): # 生成多视图图像 rgb_normal_grid = generate_multi_view_images(prompt, seed) image_preview = Image.fromarray((rgb_normal_grid * 255).astype(np.uint8)) # 3d reconstruction # 重建 3D 模型并返回 glb 路径 save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt) return image_preview, save_glb_addr if __name__ == "__main__": prompt_input = "a owm" sample_seed = 42 gradio_pipeline(prompt_input, sample_seed)