jimmycarter commited on
Commit
9da6974
·
verified ·
1 Parent(s): d32b08f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -3
README.md CHANGED
@@ -1,3 +1,37 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ Use like:
6
+
7
+ ```py
8
+ CACHE_ROOT = pathlib.Path("qwen-image-int8-quanto ") # where we store INT8 modules
9
+ TRANSFORMER_DIR = CACHE_ROOT / "qwen_image_transformer_int8"
10
+ TEXT_ENCODER_DIR = CACHE_ROOT / "qwen_text_encoder_int8"
11
+
12
+ def load_quantized_modules(transformer_dir: pathlib.Path, text_encoder_dir: pathlib.Path):
13
+ """
14
+ Load quantized modules (we saved them with the exact filenames the loaders expect).
15
+ """
16
+ tr = torch.load(str(transformer_dir / 'pytorch_model.bin'), weights_only=False)
17
+ te = torch.load(str(text_encoder_dir / 'pytorch_model.bin'), weights_only=False)
18
+ return tr, te
19
+
20
+ def build_pipe(cls, transformer_dir: pathlib.Path, text_encoder_dir: pathlib.Path):
21
+ """
22
+ Build a pipeline of class `cls` by loading the quantized modules from disk.
23
+ Fresh module instances each time avoids offload-hook/state reuse hangs.
24
+ """
25
+ transformer, text_encoder = load_quantized_modules(transformer_dir, text_encoder_dir)
26
+ pipe = cls.from_pretrained(
27
+ BASE_MODEL_ID,
28
+ transformer=transformer,
29
+ text_encoder=text_encoder,
30
+ torch_dtype=torch.bfloat16,
31
+ use_safetensors=True,
32
+ low_cpu_mem_usage=True,
33
+ )
34
+ pipe.enable_model_cpu_offload()
35
+ pipe.set_progress_bar_config(disable=False)
36
+ return pipe
37
+ ```