Update README.md
Browse files
README.md
CHANGED
|
@@ -1,3 +1,37 @@
|
|
| 1 |
-
---
|
| 2 |
-
license: apache-2.0
|
| 3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
Use like:
|
| 6 |
+
|
| 7 |
+
```py
|
| 8 |
+
CACHE_ROOT = pathlib.Path("qwen-image-int8-quanto ") # where we store INT8 modules
|
| 9 |
+
TRANSFORMER_DIR = CACHE_ROOT / "qwen_image_transformer_int8"
|
| 10 |
+
TEXT_ENCODER_DIR = CACHE_ROOT / "qwen_text_encoder_int8"
|
| 11 |
+
|
| 12 |
+
def load_quantized_modules(transformer_dir: pathlib.Path, text_encoder_dir: pathlib.Path):
|
| 13 |
+
"""
|
| 14 |
+
Load quantized modules (we saved them with the exact filenames the loaders expect).
|
| 15 |
+
"""
|
| 16 |
+
tr = torch.load(str(transformer_dir / 'pytorch_model.bin'), weights_only=False)
|
| 17 |
+
te = torch.load(str(text_encoder_dir / 'pytorch_model.bin'), weights_only=False)
|
| 18 |
+
return tr, te
|
| 19 |
+
|
| 20 |
+
def build_pipe(cls, transformer_dir: pathlib.Path, text_encoder_dir: pathlib.Path):
|
| 21 |
+
"""
|
| 22 |
+
Build a pipeline of class `cls` by loading the quantized modules from disk.
|
| 23 |
+
Fresh module instances each time avoids offload-hook/state reuse hangs.
|
| 24 |
+
"""
|
| 25 |
+
transformer, text_encoder = load_quantized_modules(transformer_dir, text_encoder_dir)
|
| 26 |
+
pipe = cls.from_pretrained(
|
| 27 |
+
BASE_MODEL_ID,
|
| 28 |
+
transformer=transformer,
|
| 29 |
+
text_encoder=text_encoder,
|
| 30 |
+
torch_dtype=torch.bfloat16,
|
| 31 |
+
use_safetensors=True,
|
| 32 |
+
low_cpu_mem_usage=True,
|
| 33 |
+
)
|
| 34 |
+
pipe.enable_model_cpu_offload()
|
| 35 |
+
pipe.set_progress_bar_config(disable=False)
|
| 36 |
+
return pipe
|
| 37 |
+
```
|