From 2161ee01cb397242ade2196e198465c03fe0e838 Mon Sep 17 00:00:00 2001 From: saood06 Date: Wed, 11 Mar 2026 09:41:17 -0500 Subject: [PATCH] Vibe coded script + constants from mainline + pip requirements (#1405) --- convert_imatrix_gguf_to_dat.py | 209 ++++++++++++++++++ gguf-py/gguf/constants.py | 6 + requirements.txt | 1 + ...quirements-convert_imatrix_gguf_to_dat.txt | 1 + 4 files changed, 217 insertions(+) create mode 100644 convert_imatrix_gguf_to_dat.py create mode 100644 requirements/requirements-convert_imatrix_gguf_to_dat.txt diff --git a/convert_imatrix_gguf_to_dat.py b/convert_imatrix_gguf_to_dat.py new file mode 100644 index 00000000..fb729a77 --- /dev/null +++ b/convert_imatrix_gguf_to_dat.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import os +import sys +import logging +import argparse + +from pathlib import Path +from dataclasses import dataclass + +import numpy as np +import numpy.typing as npt + +if 'NO_LOCAL_GGUF' not in os.environ: + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) +import gguf + + +logger = logging.getLogger("gguf-to-imatrix") + + +def _key_names(attr: str, fallback: str) -> set[str]: + """Get possible GGUF key names, tolerating missing attributes.""" + names = {fallback} + try: + names.add(getattr(gguf.Keys.IMatrix, attr)) + except AttributeError: + pass + return names + + +CHUNK_COUNT_KEYS = _key_names('CHUNK_COUNT', 'imatrix.chunk_count') +CHUNK_SIZE_KEYS = _key_names('CHUNK_SIZE', 'imatrix.chunk_size') +DATASET_KEYS = _key_names('DATASETS', 'imatrix.datasets') + + +@dataclass +class IMatrixEntry: + values: npt.NDArray[np.float32] + counts: npt.NDArray[np.float32] + + +class IMatrixDatWriter: + """Writes the old binary imatrix .dat format.""" + + def __init__(self, outfile: Path): + self.outfile = outfile + self.chunk_size: int = 512 + self.chunk_count: int = 0 + self.dataset: str = "" + self.entries: dict[str, IMatrixEntry] = {} + + def write(self) -> None: + if self.chunk_size == 0: + raise ValueError("chunk_size is 0, cannot write imatrix") + + with open(self.outfile, "wb") as f: + np.array([len(self.entries)], dtype=np.int32).tofile(f) + + for name, entry in self.entries.items(): + name_bytes = name.encode("utf-8") + np.array([len(name_bytes)], dtype=np.int32).tofile(f) + f.write(name_bytes) + + ncall = int(entry.counts[0] / self.chunk_size) + np.array([ncall], dtype=np.int32).tofile(f) + np.array([len(entry.values)], dtype=np.int32).tofile(f) + + (entry.values / np.float32(self.chunk_size)).astype(np.float32).tofile(f) + + logger.debug(" %s: ncall=%d, nval=%d", name, ncall, len(entry.values)) + + np.array([self.chunk_count], dtype=np.int32).tofile(f) + + dataset_bytes = self.dataset.encode("utf-8") + np.array([len(dataset_bytes)], dtype=np.int32).tofile(f) + if dataset_bytes: + f.write(dataset_bytes) + + +class GGUFIMatrixReader: + """Reads imatrix data from a GGUF file.""" + + SUMS_SUFFIXES = (".sums", ".in_sum2") + COUNTS_SUFFIX = ".counts" + + def __init__(self, gguf_path: Path): + reader = gguf.GGUFReader(gguf_path) + + self.chunk_count: int = 0 + self.chunk_size: int = 512 + self.dataset: str = "" + self.entries: dict[str, IMatrixEntry] = {} + + # --- Read KV metadata --- + for field in reader.fields.values(): + key = field.name + if key in CHUNK_COUNT_KEYS: + val = int(field.parts[field.data[0]][0]) + self.chunk_count = val + elif key in CHUNK_SIZE_KEYS: + val = int(field.parts[field.data[0]][0]) + self.chunk_size = val + elif key in DATASET_KEYS: + val = bytes(field.parts[field.data[0]]).decode("utf-8") + self.dataset = val + + # --- Read all tensors (copy + ensure float32) --- + tensor_map: dict[str, npt.NDArray[np.float32]] = {} + for tensor in reader.tensors: + tensor_map[tensor.name] = np.array(tensor.data, dtype=np.float32) + logger.debug(" Tensor: %s shape=%s", tensor.name, tensor_map[tensor.name].shape) + + # --- Match sums/counts pairs --- + sums_tensors: dict[str, npt.NDArray[np.float32]] = {} + counts_tensors: dict[str, npt.NDArray[np.float32]] = {} + + for tname, tdata in tensor_map.items(): + matched_sum = False + for suffix in self.SUMS_SUFFIXES: + if tname.endswith(suffix): + sums_tensors[tname[:-len(suffix)]] = tdata + matched_sum = True + break + if not matched_sum and tname.endswith(self.COUNTS_SUFFIX): + counts_tensors[tname[:-len(self.COUNTS_SUFFIX)]] = tdata + + for name, sums in sums_tensors.items(): + counts = counts_tensors.get(name) + if counts is None: + logger.warning("No counts tensor for %r, assuming 0", name) + counts = np.array([0.0], dtype=np.float32) + self.entries[name] = IMatrixEntry(values=sums, counts=counts) + + logger.info("Loaded %d imatrix entries from GGUF", len(self.entries)) + + # --- Diagnostic output if nothing matched --- + if not self.entries: + logger.error("No imatrix tensor pairs found!") + logger.error( + "Expected pairs like '%s' + '%s'", + self.SUMS_SUFFIXES[0], self.COUNTS_SUFFIX + ) + if tensor_map: + logger.error("Tensors actually present in the file (%d):", len(tensor_map)) + for n in sorted(tensor_map): + logger.error(" %s", n) + else: + logger.error("The file contains no tensors at all.") + logger.error( + "This file may not be a GGUF imatrix, or it may use a " + "naming convention this script doesn't recognize yet." + ) + + def to_writer(self, outfile: Path) -> IMatrixDatWriter: + writer = IMatrixDatWriter(outfile) + writer.chunk_count = self.chunk_count + writer.chunk_size = self.chunk_size + writer.dataset = self.dataset + writer.entries = self.entries + return writer + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Convert a GGUF imatrix file to the old imatrix.dat format") + parser.add_argument( + "--outfile", type=Path, + help="path to write to; default: based on input.", + ) + parser.add_argument( + "--verbose", action="store_true", + help="increase output verbosity", + ) + parser.add_argument( + "imatrix", type=Path, + help="path to a GGUF imatrix file", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + if args.outfile is None: + input_file: Path = args.imatrix + if input_file.suffix == ".gguf": + args.outfile = input_file.with_suffix(".dat") + else: + args.outfile = Path(str(input_file) + ".dat") + + if args.outfile.exists(): + logger.error( + "Default output already exists, use --outfile to overwrite: %s", + args.outfile + ) + sys.exit(1) + + reader = GGUFIMatrixReader(args.imatrix) + + if not reader.entries: + logger.error("Nothing to write (no entries). Re-run with --verbose for details.") + sys.exit(1) + + writer = reader.to_writer(args.outfile) + writer.write() + + logger.info("Wrote %d entries to %s", len(writer.entries), args.outfile) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index d4af03c0..f4e95d0e 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -186,6 +186,11 @@ class Keys: TYPE = "adapter.type" LORA_ALPHA = "adapter.lora.alpha" + class IMatrix: + CHUNK_COUNT = "imatrix.chunk_count" + CHUNK_SIZE = "imatrix.chunk_size" + DATASETS = "imatrix.datasets" + # # recommended mapping of model tensor names for storage in gguf # @@ -194,6 +199,7 @@ class Keys: class GGUFType: MODEL = "model" ADAPTER = "adapter" + IMATRIX = "imatrix" class MODEL_ARCH(IntEnum): diff --git a/requirements.txt b/requirements.txt index f2a18d62..61c83317 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ -r ./requirements/requirements-convert_llama_ggml_to_gguf.txt -r ./requirements/requirements-convert_lora_to_gguf.txt -r ./requirements/requirements-tool_bench.txt +-r ./requirements/requirements-convert_imatrix_gguf_to_dat.txt diff --git a/requirements/requirements-convert_imatrix_gguf_to_dat.txt b/requirements/requirements-convert_imatrix_gguf_to_dat.txt new file mode 100644 index 00000000..538dca30 --- /dev/null +++ b/requirements/requirements-convert_imatrix_gguf_to_dat.txt @@ -0,0 +1 @@ +numpy~=1.26.4 \ No newline at end of file