Make gguf-py stuff work with numpy 2.0 (#991)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-30 19:31:48 +00:00 · 2025-11-20 10:20:55 +01:00
parent 187e37bad8
commit e919c00cc9
3 changed files with 339 additions and 35 deletions
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -6,6 +6,7 @@ from __future__ import annotations
 import logging
 import os
 import sys
 from collections import OrderedDict
 from typing import Any, Literal, NamedTuple, TypeVar, Union
@@ -15,7 +16,6 @@ import numpy.typing as npt
 from .quants import quant_shape_to_byte_shape
 if __name__ == "__main__":
    import sys
    from pathlib import Path
    # Allow running file in package as a script.
@@ -28,6 +28,7 @@ from gguf.constants import (
    GGUF_VERSION,
    GGMLQuantizationType,
    GGUFValueType,
    GGUFEndian,
 )
 logger = logging.getLogger(__name__)
@@ -53,6 +54,48 @@ class ReaderField(NamedTuple):
    types: list[GGUFValueType] = []
    def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
        if self.types:
            to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
            main_type = self.types[0]
            if main_type == GGUFValueType.ARRAY:
                sub_type = self.types[-1]
                if sub_type == GGUFValueType.STRING:
                    indices = self.data[index_or_slice]
                    if isinstance(index_or_slice, int):
                        return to_string(self.parts[indices]) # type: ignore
                    else:
                        return [to_string(self.parts[idx]) for idx in indices] # type: ignore
                else:
                    # FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
                    # Check if it's unsafe to perform slice optimization on data
                    # if any(True for idx in self.data if len(self.parts[idx]) != 1):
                    #     optim_slice = slice(None)
                    # else:
                    #     optim_slice = index_or_slice
                    #     index_or_slice = slice(None)
                    # if isinstance(optim_slice, int):
                    #     return self.parts[self.data[optim_slice]].tolist()[0]
                    # else:
                    #     return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
                    if isinstance(index_or_slice, int):
                        return self.parts[self.data[index_or_slice]].tolist()[0]
                    else:
                        return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
            if main_type == GGUFValueType.STRING:
                return to_string(self.parts[-1])
            else:
                return self.parts[-1].tolist()[0]
        return None
 class ReaderTensor(NamedTuple):
    name: str
@@ -101,10 +144,19 @@ class GGUFReader:
            # If we get 0 here that means it's (probably) a GGUF file created for
            # the opposite byte order of the machine this script is running on.
            self.byte_order = 'S'
-            temp_version = temp_version.newbyteorder(self.byte_order)
+            temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
        version = temp_version[0]
        if version not in READER_SUPPORTED_VERSIONS:
            raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
        if sys.byteorder == "little":
            # Host is little endian
            host_endian = GGUFEndian.LITTLE
            swapped_endian = GGUFEndian.BIG
        else:
            # Sorry PDP or other weird systems that don't use BE or LE.
            host_endian = GGUFEndian.BIG
            swapped_endian = GGUFEndian.LITTLE
        self.endianess = swapped_endian if self.byte_order == "S" else host_endian
        self.fields: OrderedDict[str, ReaderField] = OrderedDict()
        self.tensors: list[ReaderTensor] = []
        offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
@@ -145,11 +197,8 @@ class GGUFReader:
        count = int(count)
        itemsize = int(np.empty([], dtype = dtype).itemsize)
        end_offs = offset + itemsize * count
-        return (
+        arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
-            self.data[offset:end_offs]
+        return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
            .view(dtype = dtype)[:count]
            .newbyteorder(override_order or self.byte_order)
        )
    def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
        if field.name in self.fields:
@@ -191,6 +240,7 @@ class GGUFReader:
            offs += int(alen.nbytes)
            aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
            data_idxs: list[int] = []
            # FIXME: Handle multi-dimensional arrays properly instead of flattening
            for idx in range(alen[0]):
                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
                if idx == 0:
@@ -201,7 +251,7 @@ class GGUFReader:
                offs += curr_size
            return offs - orig_offs, aparts, data_idxs, types
        # We can't deal with this one.
-        raise ValueError('Unknown/unhandled field type {gtype}')
+        raise ValueError(f'Unknown/unhandled field type {gtype}')
    def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
        offs = orig_offs
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -49,6 +49,7 @@ class TensorInfo:
 class GGUFValue:
    value: Any
    type: GGUFValueType
    sub_type: GGUFValueType | None = None
 class WriterState(Enum):
@@ -137,8 +138,9 @@ class GGUFWriter:
                size = prod(shape)
                if "_exps." in name:
-                    expert_params += (size // shape[-3])
+                    expert_count = shape[-2 if ".bias" in name else -3]
-                    expert_sum += shape[-3]
+                    expert_params += (size // expert_count)
                    expert_sum += expert_count
                    n_expert_tensors += 1
                else:
                    shared_params += size
@@ -238,7 +240,7 @@ class GGUFWriter:
            for key, val in kv_data.items():
                kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
-                kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
+                kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
            fout.write(kv_bytes)
@@ -268,11 +270,11 @@ class GGUFWriter:
            fout.flush()
        self.state = WriterState.TI_DATA
-    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
+    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
        if any(key in kv_data for kv_data in self.kv_data):
-            raise ValueError(f'Duplicated key name {key!r}')
+            logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
-        self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
+        self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
    def add_uint8(self, key: str, val: int) -> None:
        self.add_key_value(key,val, GGUFValueType.UINT8)
@@ -569,6 +571,9 @@ class GGUFWriter:
    def add_base_model_organization(self, source_id: int, organization: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
    def add_base_model_description(self, source_id: int, description: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
    def add_base_model_url(self, source_id: int, url: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@@ -581,15 +586,42 @@ class GGUFWriter:
    def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
        self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
    def add_dataset_count(self, source_count: int) -> None:
        self.add_uint32(Keys.General.DATASET_COUNT, source_count)
    def add_dataset_name(self, source_id: int, name: str) -> None:
        self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
    def add_dataset_author(self, source_id: int, author: str) -> None:
        self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
    def add_dataset_version(self, source_id: int, version: str) -> None:
        self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
    def add_dataset_organization(self, source_id: int, organization: str) -> None:
        self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
    def add_dataset_description(self, source_id: int, description: str) -> None:
        self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
    def add_dataset_url(self, source_id: int, url: str) -> None:
        self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
    def add_dataset_doi(self, source_id: int, doi: str) -> None:
        self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
    def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
        self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
    def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
        self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
    def add_tags(self, tags: Sequence[str]) -> None:
        self.add_array(Keys.General.TAGS, tags)
    def add_languages(self, languages: Sequence[str]) -> None:
        self.add_array(Keys.General.LANGUAGES, languages)
    def add_datasets(self, datasets: Sequence[str]) -> None:
        self.add_array(Keys.General.DATASETS, datasets)
    def add_tensor_data_layout(self, layout: str) -> None:
        self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
@@ -602,6 +634,24 @@ class GGUFWriter:
    def add_embedding_length(self, length: int) -> None:
        self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
    def add_features_length(self, length: int) -> None:
        self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
    def add_posnet_embedding_length(self, length: int) -> None:
        self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
    def add_posnet_block_count(self, length: int) -> None:
        self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
    def add_convnext_embedding_length(self, length: int) -> None:
        self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
    def add_convnext_block_count(self, length: int) -> None:
        self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
    def add_shortconv_l_cache(self, length: int) -> None:
        self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
    def add_block_count(self, length: int) -> None:
        self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
@@ -620,12 +670,30 @@ class GGUFWriter:
    def add_expert_shared_feed_forward_length(self, length: int) -> None:
        self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
    def add_expert_chunk_feed_forward_length(self, length: int) -> None:
        self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
    def add_parallel_residual(self, use: bool) -> None:
        self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
    def add_decoder_start_token_id(self, id: int) -> None:
        self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
    def add_decoder_block_count(self, value: int) -> None:
        self.add_uint32(Keys.LLM.DECODER_BLOCK_COUNT.format(arch=self.arch), value)
    def add_embedding_length_per_layer_input(self, value: int) -> None:
        self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
    def add_altup_active_idx(self, val: int) -> None:
        self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
    def add_altup_num_inputs(self, val: int) -> None:
        self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
    def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
        self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
    def add_head_count(self, count: int | Sequence[int]) -> None:
        if isinstance(count, int):
            self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
@@ -644,12 +712,28 @@ class GGUFWriter:
    def add_value_length(self, length: int) -> None:
        self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
    def add_key_length_mla(self, length: int) -> None:
        self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
    def add_value_length_mla(self, length: int) -> None:
        self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
    def add_max_alibi_bias(self, bias: float) -> None:
        self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
    def add_clamp_kqv(self, value: float) -> None:
        self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
    def add_shared_kv_layers(self, value: int) -> None:
        self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
    def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
        self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
    def add_dense_features_dims(self, dense:str, in_f:int, out_f:int) -> None:
        self.add_uint32(Keys.LLM.DENSE_FEAT_IN_SIZE.format(arch=self.arch, dense=dense), in_f)
        self.add_uint32(Keys.LLM.DENSE_FEAT_OUT_SIZE.format(arch=self.arch, dense=dense), out_f)
    def add_logit_scale(self, value: float) -> None:
        self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
@@ -686,15 +770,57 @@ class GGUFWriter:
    def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
        self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
    def add_expert_group_scale(self, value: float) -> None:
        self.add_float32(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value)
    def add_experts_per_group(self, count: int) -> None:
        self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count)
    def add_moe_every_n_layers(self, value: int) -> None:
        self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
    def add_nextn_predict_layers(self, count: int) -> None:
        self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
    def add_swin_norm(self, value: bool) -> None:
        self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
    def add_rescale_every_n_layers(self, count: int) -> None:
        self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
    def add_time_mix_extra_dim(self, dim: int) -> None:
        self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
    def add_time_decay_extra_dim(self, dim: int) -> None:
        self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
    def add_residual_scale(self, value: float) -> None:
        self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
    def add_embedding_scale(self, value: float) -> None:
        self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
    def add_wkv_head_size(self, size: int) -> None:
        self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
    def add_token_shift_count(self, count: int) -> None:
        self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
    def add_interleave_moe_layer_step(self, value: int) -> None:
        self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
    def add_layer_norm_eps(self, value: float) -> None:
        self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
    def add_layer_norm_rms_eps(self, value: float) -> None:
        self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
    def add_group_norm_eps(self, value: float) -> None:
        self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
    def add_group_norm_groups(self, value: int) -> None:
        self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
    def add_causal_attention(self, value: bool) -> None:
        self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
@@ -704,12 +830,27 @@ class GGUFWriter:
    def add_kv_lora_rank(self, length: int) -> None:
        self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
    def add_decay_lora_rank(self, length: int) -> None:
        self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
    def add_iclr_lora_rank(self, length: int) -> None:
        self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
    def add_value_residual_mix_lora_rank(self, length: int) -> None:
        self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
    def add_gate_lora_rank(self, length: int) -> None:
        self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
    def add_relative_attn_buckets_count(self, value: int) -> None:
        self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
    def add_sliding_window(self, value: int) -> None:
        self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
    def add_attention_scale(self, value: float) -> None:
        self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
    def add_attn_output_scale(self, value: float) -> None:
        self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
@@ -719,9 +860,15 @@ class GGUFWriter:
    def add_pooling_type(self, value: PoolingType) -> None:
        self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
    def add_num_deepstack_layers(self, count: int) -> None:
        self.add_uint32(Keys.LLM.NUM_DEEPSTACK_LAYERS.format(arch=self.arch), count)
    def add_rope_dimension_count(self, count: int) -> None:
        self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
    def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
        self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
    def add_rope_freq_base(self, value: float) -> None:
        self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
@@ -767,6 +914,12 @@ class GGUFWriter:
    def add_ssm_time_step_rank(self, value: int) -> None:
        self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
    def add_ssm_group_count(self, value: int) -> None:
        self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
    def add_ssm_dt_b_c_rms(self, value: bool) -> None:
        self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
    def add_tokenizer_model(self, model: str) -> None:
        self.add_string(Keys.Tokenizer.MODEL, model)
@@ -803,9 +956,6 @@ class GGUFWriter:
    def add_pad_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.PAD_ID, id)
    def add_cls_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.CLS_ID, id)
    def add_mask_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.MASK_ID, id)
@@ -815,13 +965,16 @@ class GGUFWriter:
    def add_add_eos_token(self, value: bool) -> None:
        self.add_bool(Keys.Tokenizer.ADD_EOS, value)
    def add_add_sep_token(self, value: bool) -> None:
        self.add_bool(Keys.Tokenizer.ADD_SEP, value)
    def add_add_space_prefix(self, value: bool) -> None:
        self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
    def add_remove_extra_whitespaces(self, value: bool) -> None:
        self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
-    def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
+    def add_precompiled_charsmap(self, charsmap: bytes) -> None:
        self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
    def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
@@ -853,28 +1006,127 @@ class GGUFWriter:
        self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
    def add_prefix_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
    def add_suffix_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
    def add_middle_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
    def add_eot_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.EOT_ID, id)
    def add_eom_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.EOM_ID, id)
    def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
        self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
    # for vision models
    def add_clip_has_vision_encoder(self, value: bool) -> None:
        self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
    def add_clip_has_audio_encoder(self, value: bool) -> None:
        self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
    def add_clip_projector_type(self, value: str) -> None:
        self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
    def add_vision_projection_dim(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
    def add_vision_patch_size(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
    def add_vision_embedding_length(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
    def add_vision_feed_forward_length(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
    def add_vision_block_count(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
    def add_vision_head_count(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
    def add_vision_attention_layernorm_eps(self, value: float) -> None:
        self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
    def add_vision_image_size(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
    def add_vision_preproc_image_size(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
    def add_vision_image_mean(self, values: Sequence[float]) -> None:
        self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
    def add_vision_image_std(self, values: Sequence[float]) -> None:
        self.add_array(Keys.ClipVision.IMAGE_STD, values)
    def add_vision_spatial_merge_size(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
    def add_vision_use_gelu(self, value: bool) -> None:
        self.add_bool(Keys.ClipVision.USE_GELU, value)
    def add_vision_use_silu(self, value: bool) -> None:
        self.add_bool(Keys.ClipVision.USE_SILU, value)
    def add_vision_projector_scale_factor(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
    def add_vision_n_wa_pattern(self, value: int) -> None:
        self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
    def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
        self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
    # audio models
    def add_audio_projection_dim(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
    def add_audio_embedding_length(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
    def add_audio_feed_forward_length(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
    def add_audio_block_count(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
    def add_audio_head_count(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
    def add_audio_attention_layernorm_eps(self, value: float) -> None:
        self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
    def add_audio_num_mel_bins(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
    def add_audio_stack_factor(self, value: int) -> None:
        self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
    def add_xielu_alpha_p(self, values: Sequence[float]):
        self.add_array(Keys.xIELU.ALPHA_P, values)
    def add_xielu_alpha_n(self, values: Sequence[float]):
        self.add_array(Keys.xIELU.ALPHA_N, values)
    def add_xielu_beta(self, values: Sequence[float]):
        self.add_array(Keys.xIELU.BETA, values)
    def add_xielu_eps(self, values: Sequence[float]):
        self.add_array(Keys.xIELU.EPS, values)
    # diffusion models
    def add_diffusion_shift_logits(self, value: bool) -> None:
        self.add_bool(Keys.Diffusion.SHIFT_LOGITS, value)
    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
        pack_prefix = ''
        if not skip_pack_prefix:
            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
        return struct.pack(f'{pack_prefix}{fmt}', value)
-    def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
+    def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
        kv_data = bytearray()
        if add_vtype:
@@ -895,7 +1147,9 @@ class GGUFWriter:
            if len(val) == 0:
                raise ValueError("Invalid GGUF metadata array. Empty array")
-            if isinstance(val, bytes):
+            if sub_type is not None:
                ltype = sub_type
            elif isinstance(val, bytes):
                ltype = GGUFValueType.UINT8
            else:
                ltype = GGUFValueType.get_type(val[0])
--- a/gguf-py/scripts/gguf_dump.py
+++ b/gguf-py/scripts/gguf_dump.py
@@ -21,11 +21,11 @@ logger = logging.getLogger("gguf-dump")
 def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
-    host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
+    file_endian = reader.endianess.name
    if reader.byte_order == 'S':
-        file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
+        host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
    else:
-        file_endian = host_endian
+        host_endian = file_endian
    return (host_endian, file_endian)