Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,443 Bytes
8c2e0d0 ee7158f 4e717d6 ee7158f 8c2e0d0 ee7158f 4e717d6 8c2e0d0 4e717d6 495d59a 8012ef2 4e717d6 8c2e0d0 4e717d6 8012ef2 495d59a 8012ef2 9292d1e 8012ef2 060b6b6 8012ef2 8c2e0d0 4f27510 4e717d6 4f27510 04ce204 4e717d6 2e8adf4 8012ef2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
"""
"""
# Upgrade PyTorch
import os
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 torch torchvision spaces')
# CUDA toolkit install
from utils.cuda_toolkit import install_cuda_toolkit; install_cuda_toolkit()
# Actual app.py
import os
import gradio as gr
import spaces
import torch
import torch._inductor
from torch._inductor.package import package_aoti
from torch.export.pt2_archive._package import AOTICompiledModel
from torch.export.pt2_archive._package_weights import Weights
from torchvision.models import ResNet18_Weights, resnet18
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.eval()
model.to('cuda')
package_path = os.path.join(os.getcwd(), 'resnet18.pt2')
inductor_configs = {'max_autotune': True}
example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),)
@spaces.GPU
def compile_model():
with torch.inference_mode():
exported_program = torch.export.export(
model,
example_inputs,
)
artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={
'aot_inductor.package_constants_in_so': False,
'aot_inductor.package_constants_on_disk': True,
'aot_inductor.package': True,
'max_autotune': True,
})
files = [file for file in artifacts if isinstance(file, str)]
package_aoti(package_path, files)
weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
weights_: dict[str, torch.Tensor] = {}
for name in weights:
tensor, _properties = weights.get_weight(name)
tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
return weights_
weights = compile_model()
weights = {name: tensor.to('cuda') for name, tensor in weights.items()}
del model
compiled_model: AOTICompiledModel | None = None
@spaces.GPU
def run_model():
global compiled_model
if compiled_model is None:
compiled_model = torch._inductor.aoti_load_package(package_path)
compiled_model.load_constants(weights, check_full_update=True, user_managed=True)
with torch.inference_mode():
compiled_model(example_inputs)
with torch.inference_mode():
return str(compiled_model(example_inputs))
gr.Interface(run_model, [], 'text').launch(show_error=True)
|