cbensimon's picture
cbensimon HF Staff
Two calls in a row
04ce204
raw
history blame
2.44 kB
"""
"""
# Upgrade PyTorch
import os
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 torch torchvision spaces')
# CUDA toolkit install
from utils.cuda_toolkit import install_cuda_toolkit; install_cuda_toolkit()
# Actual app.py
import os
import gradio as gr
import spaces
import torch
import torch._inductor
from torch._inductor.package import package_aoti
from torch.export.pt2_archive._package import AOTICompiledModel
from torch.export.pt2_archive._package_weights import Weights
from torchvision.models import ResNet18_Weights, resnet18
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.eval()
model.to('cuda')
package_path = os.path.join(os.getcwd(), 'resnet18.pt2')
inductor_configs = {'max_autotune': True}
example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),)
@spaces.GPU
def compile_model():
with torch.inference_mode():
exported_program = torch.export.export(
model,
example_inputs,
)
artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={
'aot_inductor.package_constants_in_so': False,
'aot_inductor.package_constants_on_disk': True,
'aot_inductor.package': True,
'max_autotune': True,
})
files = [file for file in artifacts if isinstance(file, str)]
package_aoti(package_path, files)
weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
weights_: dict[str, torch.Tensor] = {}
for name in weights:
tensor, _properties = weights.get_weight(name)
tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
return weights_
weights = compile_model()
weights = {name: tensor.to('cuda') for name, tensor in weights.items()}
del model
compiled_model: AOTICompiledModel | None = None
@spaces.GPU
def run_model():
global compiled_model
if compiled_model is None:
compiled_model = torch._inductor.aoti_load_package(package_path)
compiled_model.load_constants(weights, check_full_update=True, user_managed=True)
with torch.inference_mode():
compiled_model(example_inputs)
with torch.inference_mode():
return str(compiled_model(example_inputs))
gr.Interface(run_model, [], 'text').launch(show_error=True)