Quantized with GPTQModel 4.0.0 dev with the following code:
quantization code
import base64
from io import BytesIO
from random import seed, shuffle
from datasets import concatenate_datasets, load_dataset
from gptqmodel import GPTQModel, QuantizeConfig
from transformers import AutoTokenizer
seed(0)
MODEL_ID = "XiaomiMiMo/MiMo-VL-7B-RL-2508"
SAVE_DIR = "MiMo-VL-7B-RL-2508-gptq-q4"
NUM_TEXT_SAMPLES = 128
NUM_IMAGE_SAMPLES = 128
MAX_TOKENS = 1024
def encode_pil_to_data_uri(pil_image) -> str:
buff = BytesIO()
pil_image.save(buff, format="PNG")
encoded = base64.b64encode(buff.getvalue()).decode("utf-8")
return f"data:image;base64,{encoded}"
def make_text_conversations(texts, tok, max_tokens=1024):
convs = []
for t in texts:
if not isinstance(t, str):
continue
tt = t.strip()
if not tt:
continue
ids = tok.encode(tt, add_special_tokens=False)[:max_tokens]
if not ids:
continue
trunc = tok.decode(ids, skip_special_tokens=True)
convs.append(
[
{
"role": "user",
"content": [{"type": "text", "text": trunc}],
}
]
)
return convs
def make_image_conversations(hf_dataset, num_samples=64):
convs = []
for ex in hf_dataset.select(range(min(num_samples, len(hf_dataset)))):
data_uri = encode_pil_to_data_uri(ex["image"])
convs.append(
[
{
"role": "user",
"content": [
{"type": "image", "image": data_uri},
{"type": "text", "text": "What does the image show?"},
],
}
]
)
return convs
en_ds = load_dataset(
"allenai/c4", data_files="en/c4-train.00001-of-01024.json.gz", split="train"
).shuffle(seed=0)
es_ds = load_dataset(
"allenai/c4", data_files="multilingual/c4-es.tfrecord-00001-of-02048.json.gz", split="train"
).shuffle(seed=0)
texts = [x["text"] for x in concatenate_datasets([en_ds, es_ds])]
texts = [t for t in texts if isinstance(t, str) and t.strip()]
shuffle(texts)
texts = texts[:NUM_TEXT_SAMPLES]
tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
text_conversations = make_text_conversations(texts, tok, max_tokens=MAX_TOKENS)
img_ds = load_dataset("lmms-lab/flickr30k", split="test[:512]").shuffle(seed=42)
image_conversations = make_image_conversations(img_ds, num_samples=NUM_IMAGE_SAMPLES)
calibration_conversations = text_conversations + image_conversations
shuffle(calibration_conversations)
print(
f"Prepared {len(text_conversations)} text-only and "
f"{len(image_conversations)} image+text conversations "
f"(total {len(calibration_conversations)})."
)
qconf = QuantizeConfig(
bits=4,
group_size=128,
device="cuda:0",
v2=False, # v2 is giving much worse results
)
model = GPTQModel.load(MODEL_ID, qconf)
model.quantize(
calibration_conversations,
batch_size=1,
)
model.save(SAVE_DIR)
print(f"Saved quantized model to: {SAVE_DIR}")
- Downloads last month
- 112
Model tree for mediainbox/MiMo-VL-7B-RL-2508-gptq-q4
Unable to build the model tree, the base model loops to the model itself. Learn more.