cbensimon HF Staff commited on
Commit
ce8b907
·
1 Parent(s): 49c2af2

Further optimize + quantize

Browse files
Files changed (2) hide show
  1. app.py +8 -0
  2. requirements.txt +1 -0
app.py CHANGED
@@ -15,6 +15,8 @@ import gradio as gr
15
  import spaces
16
  import torch
17
  from diffusers import FluxPipeline
 
 
18
 
19
  from zerogpu import aoti_compile
20
 
@@ -26,6 +28,12 @@ print('FluxPipeline.from_pretrained', -(t0 - (t0 := datetime.now())))
26
  @spaces.GPU(duration=1500)
27
  def compile_transformer():
28
 
 
 
 
 
 
 
29
  def _example_tensor(*shape):
30
  return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
31
 
 
15
  import spaces
16
  import torch
17
  from diffusers import FluxPipeline
18
+ from torchao.quantization import quantize_
19
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
20
 
21
  from zerogpu import aoti_compile
22
 
 
28
  @spaces.GPU(duration=1500)
29
  def compile_transformer():
30
 
31
+ pipeline.transformer.fuse_qkv_projections()
32
+ pipeline.vae.fuse_qkv_projections()
33
+ pipeline.vae.to(memory_format=torch.channels_last)
34
+
35
+ quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
36
+
37
  def _example_tensor(*shape):
38
  return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
39
 
requirements.txt CHANGED
@@ -3,3 +3,4 @@ diffusers
3
  transformers
4
  sentencepiece
5
  protobuf
 
 
3
  transformers
4
  sentencepiece
5
  protobuf
6
+ torchao