#!/usr/bin/env python3 from __future__ import annotations import logging import argparse import os import sys import json from pathlib import Path from tqdm import tqdm from typing import Any, Sequence, NamedTuple # Necessary to load the local gguf package if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import gguf logger = logging.getLogger("gguf-mmproj-merge") class MetadataDetails(NamedTuple): type: gguf.GGUFValueType value: Any description: str = '' sub_type: gguf.GGUFValueType | None = None def get_field_data(reader: gguf.GGUFReader, key: str) -> Any: field = reader.get_field(key) return field.contents() if field else None def merge_multiple_ggufs(readers: Sequence[gguf.GGUFReader], writer: gguf.GGUFWriter) -> None: total_bytes = 0 seen_fields = set() for reader in readers: for field in reader.fields.values(): # Suppress virtual fields and fields written by GGUFWriter if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.') or "projector_type" in field.name: logger.debug(f'Suppressing {field.name}') continue if field.name in seen_fields: logger.debug(f'Skipping duplicate field {field.name}') continue seen_fields.add(field.name) val_type = field.types[0] sub_type = field.types[-1] if val_type == gguf.GGUFValueType.ARRAY else None old_val = MetadataDetails(val_type, field.contents(), sub_type=sub_type) val = old_val assert val.value is not None logger.debug(f'Copying {field.name}') writer.add_key_value(field.name, val.value, val.type, sub_type=sub_type if val.sub_type is None else val.sub_type) for tensor in reader.tensors: total_bytes += tensor.n_bytes writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) writer.add_string("clip.vision.projector_type", "pixtral") writer.add_string("clip.audio.projector_type", "voxtral") writer.write_header_to_file() writer.write_kv_data_to_file() writer.write_ti_data_to_file() for reader in readers: for tensor in reader.tensors: writer.write_tensor_data(tensor.data) bar.update(tensor.n_bytes) writer.close() def main() -> None: reader0 = gguf.GGUFReader('audio.gguf', 'r') reader1 = gguf.GGUFReader('vision.gguf', 'r') output_path = 'mmproj-model.gguf' logger.info(f'* Writing: {output_path}') writer = gguf.GGUFWriter(output_path, arch='clip', endianess=reader0.endianess) merge_multiple_ggufs([reader0, reader1], writer) if __name__ == '__main__': main()