Spaces:
Running
on
Zero
Running
on
Zero
Further optimize + quantize
Browse files- app.py +8 -0
- requirements.txt +1 -0
app.py
CHANGED
@@ -15,6 +15,8 @@ import gradio as gr
|
|
15 |
import spaces
|
16 |
import torch
|
17 |
from diffusers import FluxPipeline
|
|
|
|
|
18 |
|
19 |
from zerogpu import aoti_compile
|
20 |
|
@@ -26,6 +28,12 @@ print('FluxPipeline.from_pretrained', -(t0 - (t0 := datetime.now())))
|
|
26 |
@spaces.GPU(duration=1500)
|
27 |
def compile_transformer():
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def _example_tensor(*shape):
|
30 |
return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
|
31 |
|
|
|
15 |
import spaces
|
16 |
import torch
|
17 |
from diffusers import FluxPipeline
|
18 |
+
from torchao.quantization import quantize_
|
19 |
+
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
|
20 |
|
21 |
from zerogpu import aoti_compile
|
22 |
|
|
|
28 |
@spaces.GPU(duration=1500)
|
29 |
def compile_transformer():
|
30 |
|
31 |
+
pipeline.transformer.fuse_qkv_projections()
|
32 |
+
pipeline.vae.fuse_qkv_projections()
|
33 |
+
pipeline.vae.to(memory_format=torch.channels_last)
|
34 |
+
|
35 |
+
quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
|
36 |
+
|
37 |
def _example_tensor(*shape):
|
38 |
return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
|
39 |
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ diffusers
|
|
3 |
transformers
|
4 |
sentencepiece
|
5 |
protobuf
|
|
|
|
3 |
transformers
|
4 |
sentencepiece
|
5 |
protobuf
|
6 |
+
torchao
|