Spaces:

zerogpu-aoti
/

FLUX.1-dev-fa3-aoti

Running on Zero

File size: 2,443 Bytes

8c2e0d0
 
 
ee7158f
 
4e717d6
ee7158f
 
8c2e0d0
ee7158f
4e717d6
 
 
 
8c2e0d0
4e717d6
 
495d59a
8012ef2
 
4e717d6
 
 
 
 
 
 
 
 
 
 
8c2e0d0
4e717d6
 
 
 
 
 
8012ef2
 
 
 
 
 
495d59a
 
8012ef2
 
 
 
9292d1e
 
8012ef2
 
 
060b6b6
 
8012ef2
8c2e0d0
4f27510
 
4e717d6
 
4f27510
 
 
 
04ce204
 
4e717d6
 
2e8adf4
 
8012ef2

"""
"""

# Upgrade PyTorch
import os
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 torch torchvision spaces')

# CUDA toolkit install
from utils.cuda_toolkit import install_cuda_toolkit; install_cuda_toolkit()

# Actual app.py
import os

import gradio as gr
import spaces
import torch
import torch._inductor
from torch._inductor.package import package_aoti
from torch.export.pt2_archive._package import AOTICompiledModel
from torch.export.pt2_archive._package_weights import Weights
from torchvision.models import ResNet18_Weights, resnet18


model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.eval()
model.to('cuda')

package_path = os.path.join(os.getcwd(), 'resnet18.pt2')
inductor_configs = {'max_autotune': True}
example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),)

@spaces.GPU
def compile_model():
    with torch.inference_mode():
        exported_program = torch.export.export(
            model,
            example_inputs,
        )
        artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={
            'aot_inductor.package_constants_in_so': False,
            'aot_inductor.package_constants_on_disk': True,
            'aot_inductor.package': True,
            'max_autotune': True,
        })
    files = [file for file in artifacts if isinstance(file, str)]
    package_aoti(package_path, files)
    weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
    weights_: dict[str, torch.Tensor] = {}
    for name in weights:
        tensor, _properties = weights.get_weight(name)
        tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
        weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
    return weights_

weights = compile_model()
weights = {name: tensor.to('cuda') for name, tensor in weights.items()}

del model

compiled_model: AOTICompiledModel | None = None

@spaces.GPU
def run_model():
    global compiled_model
    if compiled_model is None:
        compiled_model = torch._inductor.aoti_load_package(package_path)
        compiled_model.load_constants(weights, check_full_update=True, user_managed=True)
    with torch.inference_mode():
        compiled_model(example_inputs)
    with torch.inference_mode():
        return str(compiled_model(example_inputs))


gr.Interface(run_model, [], 'text').launch(show_error=True)