Spaces:

zerogpu-aoti
/

FLUX.1-dev-fa3-aoti

Running on Zero

App Files Files Community

FLUX.1-dev-fa3-aoti / app.py

cbensimon HF Staff

Two calls in a row

04ce204 3 months ago

raw

history blame

2.44 kB

	"""
	"""

	# Upgrade PyTorch
	import os
	os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 torch torchvision spaces')

	# CUDA toolkit install
	from utils.cuda_toolkit import install_cuda_toolkit; install_cuda_toolkit()

	# Actual app.py
	import os

	import gradio as gr
	import spaces
	import torch
	import torch._inductor
	from torch._inductor.package import package_aoti
	from torch.export.pt2_archive._package import AOTICompiledModel
	from torch.export.pt2_archive._package_weights import Weights
	from torchvision.models import ResNet18_Weights, resnet18


	model = resnet18(weights=ResNet18_Weights.DEFAULT)
	model.eval()
	model.to('cuda')

	package_path = os.path.join(os.getcwd(), 'resnet18.pt2')
	inductor_configs = {'max_autotune': True}
	example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),)

	@spaces.GPU
	def compile_model():
	with torch.inference_mode():
	exported_program = torch.export.export(
	model,
	example_inputs,
	)
	artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={
	'aot_inductor.package_constants_in_so': False,
	'aot_inductor.package_constants_on_disk': True,
	'aot_inductor.package': True,
	'max_autotune': True,
	})
	files = [file for file in artifacts if isinstance(file, str)]
	package_aoti(package_path, files)
	weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
	weights_: dict[str, torch.Tensor] = {}
	for name in weights:
	tensor, _properties = weights.get_weight(name)
	tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
	weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
	return weights_

	weights = compile_model()
	weights = {name: tensor.to('cuda') for name, tensor in weights.items()}

	del model

	compiled_model: AOTICompiledModel \| None = None

	@spaces.GPU
	def run_model():
	global compiled_model
	if compiled_model is None:
	compiled_model = torch._inductor.aoti_load_package(package_path)
	compiled_model.load_constants(weights, check_full_update=True, user_managed=True)
	with torch.inference_mode():
	compiled_model(example_inputs)
	with torch.inference_mode():
	return str(compiled_model(example_inputs))


	gr.Interface(run_model, [], 'text').launch(show_error=True)