""" """ # Upgrade PyTorch import os os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 torch torchvision spaces') # CUDA toolkit install from utils.cuda_toolkit import install_cuda_toolkit; install_cuda_toolkit() # Actual app.py import os import gradio as gr import spaces import torch import torch._inductor from torch._inductor.package import package_aoti from torch.export.pt2_archive._package import AOTICompiledModel from torch.export.pt2_archive._package_weights import Weights from torchvision.models import ResNet18_Weights, resnet18 model = resnet18(weights=ResNet18_Weights.DEFAULT) model.eval() model.to('cuda') package_path = os.path.join(os.getcwd(), 'resnet18.pt2') inductor_configs = {'max_autotune': True} example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),) @spaces.GPU def compile_model(): with torch.inference_mode(): exported_program = torch.export.export( model, example_inputs, ) artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={ 'aot_inductor.package_constants_in_so': False, 'aot_inductor.package_constants_on_disk': True, 'aot_inductor.package': True, 'max_autotune': True, }) files = [file for file in artifacts if isinstance(file, str)] package_aoti(package_path, files) weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights)) weights_: dict[str, torch.Tensor] = {} for name in weights: tensor, _properties = weights.get_weight(name) tensor_ = torch.empty_like(tensor, device='cpu').pin_memory() weights_[name] = tensor_.copy_(tensor).detach().share_memory_() return weights_ weights = compile_model() weights = {name: tensor.to('cuda') for name, tensor in weights.items()} del model compiled_model: AOTICompiledModel | None = None @spaces.GPU def run_model(): global compiled_model if compiled_model is None: compiled_model = torch._inductor.aoti_load_package(package_path) compiled_model.load_constants(weights, check_full_update=True, user_managed=True) with torch.inference_mode(): compiled_model(example_inputs) with torch.inference_mode(): return str(compiled_model(example_inputs)) gr.Interface(run_model, [], 'text').launch(show_error=True)