Make gguf-py stuff work with numpy 2.0 (#991)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-11-20 10:20:55 +01:00
committed by GitHub
parent 187e37bad8
commit e919c00cc9
3 changed files with 339 additions and 35 deletions

View File

@@ -6,6 +6,7 @@ from __future__ import annotations
import logging
import os
import sys
from collections import OrderedDict
from typing import Any, Literal, NamedTuple, TypeVar, Union
@@ -15,7 +16,6 @@ import numpy.typing as npt
from .quants import quant_shape_to_byte_shape
if __name__ == "__main__":
import sys
from pathlib import Path
# Allow running file in package as a script.
@@ -28,6 +28,7 @@ from gguf.constants import (
GGUF_VERSION,
GGMLQuantizationType,
GGUFValueType,
GGUFEndian,
)
logger = logging.getLogger(__name__)
@@ -53,6 +54,48 @@ class ReaderField(NamedTuple):
types: list[GGUFValueType] = []
def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
if self.types:
to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
main_type = self.types[0]
if main_type == GGUFValueType.ARRAY:
sub_type = self.types[-1]
if sub_type == GGUFValueType.STRING:
indices = self.data[index_or_slice]
if isinstance(index_or_slice, int):
return to_string(self.parts[indices]) # type: ignore
else:
return [to_string(self.parts[idx]) for idx in indices] # type: ignore
else:
# FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
# Check if it's unsafe to perform slice optimization on data
# if any(True for idx in self.data if len(self.parts[idx]) != 1):
# optim_slice = slice(None)
# else:
# optim_slice = index_or_slice
# index_or_slice = slice(None)
# if isinstance(optim_slice, int):
# return self.parts[self.data[optim_slice]].tolist()[0]
# else:
# return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
if isinstance(index_or_slice, int):
return self.parts[self.data[index_or_slice]].tolist()[0]
else:
return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
if main_type == GGUFValueType.STRING:
return to_string(self.parts[-1])
else:
return self.parts[-1].tolist()[0]
return None
class ReaderTensor(NamedTuple):
name: str
@@ -101,10 +144,19 @@ class GGUFReader:
# If we get 0 here that means it's (probably) a GGUF file created for
# the opposite byte order of the machine this script is running on.
self.byte_order = 'S'
temp_version = temp_version.newbyteorder(self.byte_order)
temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
version = temp_version[0]
if version not in READER_SUPPORTED_VERSIONS:
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
if sys.byteorder == "little":
# Host is little endian
host_endian = GGUFEndian.LITTLE
swapped_endian = GGUFEndian.BIG
else:
# Sorry PDP or other weird systems that don't use BE or LE.
host_endian = GGUFEndian.BIG
swapped_endian = GGUFEndian.LITTLE
self.endianess = swapped_endian if self.byte_order == "S" else host_endian
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
self.tensors: list[ReaderTensor] = []
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
@@ -145,11 +197,8 @@ class GGUFReader:
count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize)
end_offs = offset + itemsize * count
return (
self.data[offset:end_offs]
.view(dtype = dtype)[:count]
.newbyteorder(override_order or self.byte_order)
)
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
if field.name in self.fields:
@@ -191,6 +240,7 @@ class GGUFReader:
offs += int(alen.nbytes)
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = []
# FIXME: Handle multi-dimensional arrays properly instead of flattening
for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
if idx == 0:
@@ -201,7 +251,7 @@ class GGUFReader:
offs += curr_size
return offs - orig_offs, aparts, data_idxs, types
# We can't deal with this one.
raise ValueError('Unknown/unhandled field type {gtype}')
raise ValueError(f'Unknown/unhandled field type {gtype}')
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs = orig_offs

View File

@@ -49,6 +49,7 @@ class TensorInfo:
class GGUFValue:
value: Any
type: GGUFValueType
sub_type: GGUFValueType | None = None
class WriterState(Enum):
@@ -137,8 +138,9 @@ class GGUFWriter:
size = prod(shape)
if "_exps." in name:
expert_params += (size // shape[-3])
expert_sum += shape[-3]
expert_count = shape[-2 if ".bias" in name else -3]
expert_params += (size // expert_count)
expert_sum += expert_count
n_expert_tensors += 1
else:
shared_params += size
@@ -238,7 +240,7 @@ class GGUFWriter:
for key, val in kv_data.items():
kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
fout.write(kv_bytes)
@@ -268,11 +270,11 @@ class GGUFWriter:
fout.flush()
self.state = WriterState.TI_DATA
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
if any(key in kv_data for kv_data in self.kv_data):
raise ValueError(f'Duplicated key name {key!r}')
logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
def add_uint8(self, key: str, val: int) -> None:
self.add_key_value(key,val, GGUFValueType.UINT8)
@@ -569,6 +571,9 @@ class GGUFWriter:
def add_base_model_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
def add_base_model_description(self, source_id: int, description: str) -> None:
self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
def add_base_model_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@@ -581,15 +586,42 @@ class GGUFWriter:
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
def add_dataset_count(self, source_count: int) -> None:
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
def add_dataset_name(self, source_id: int, name: str) -> None:
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
def add_dataset_author(self, source_id: int, author: str) -> None:
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
def add_dataset_version(self, source_id: int, version: str) -> None:
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
def add_dataset_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
def add_dataset_description(self, source_id: int, description: str) -> None:
self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
def add_dataset_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
def add_dataset_doi(self, source_id: int, doi: str) -> None:
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
def add_tags(self, tags: Sequence[str]) -> None:
self.add_array(Keys.General.TAGS, tags)
def add_languages(self, languages: Sequence[str]) -> None:
self.add_array(Keys.General.LANGUAGES, languages)
def add_datasets(self, datasets: Sequence[str]) -> None:
self.add_array(Keys.General.DATASETS, datasets)
def add_tensor_data_layout(self, layout: str) -> None:
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
@@ -602,6 +634,24 @@ class GGUFWriter:
def add_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
def add_features_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
def add_posnet_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
def add_posnet_block_count(self, length: int) -> None:
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
def add_convnext_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
def add_convnext_block_count(self, length: int) -> None:
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
def add_shortconv_l_cache(self, length: int) -> None:
self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
@@ -620,12 +670,30 @@ class GGUFWriter:
def add_expert_shared_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_expert_chunk_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
def add_decoder_start_token_id(self, id: int) -> None:
self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
def add_decoder_block_count(self, value: int) -> None:
self.add_uint32(Keys.LLM.DECODER_BLOCK_COUNT.format(arch=self.arch), value)
def add_embedding_length_per_layer_input(self, value: int) -> None:
self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
def add_altup_active_idx(self, val: int) -> None:
self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
def add_altup_num_inputs(self, val: int) -> None:
self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
def add_head_count(self, count: int | Sequence[int]) -> None:
if isinstance(count, int):
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
@@ -644,12 +712,28 @@ class GGUFWriter:
def add_value_length(self, length: int) -> None:
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
def add_key_length_mla(self, length: int) -> None:
self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
def add_value_length_mla(self, length: int) -> None:
self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
def add_max_alibi_bias(self, bias: float) -> None:
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
def add_clamp_kqv(self, value: float) -> None:
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
def add_shared_kv_layers(self, value: int) -> None:
self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
def add_dense_features_dims(self, dense:str, in_f:int, out_f:int) -> None:
self.add_uint32(Keys.LLM.DENSE_FEAT_IN_SIZE.format(arch=self.arch, dense=dense), in_f)
self.add_uint32(Keys.LLM.DENSE_FEAT_OUT_SIZE.format(arch=self.arch, dense=dense), out_f)
def add_logit_scale(self, value: float) -> None:
self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
@@ -686,15 +770,57 @@ class GGUFWriter:
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
def add_expert_group_scale(self, value: float) -> None:
self.add_float32(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value)
def add_experts_per_group(self, count: int) -> None:
self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count)
def add_moe_every_n_layers(self, value: int) -> None:
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
def add_nextn_predict_layers(self, count: int) -> None:
self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
def add_swin_norm(self, value: bool) -> None:
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
def add_rescale_every_n_layers(self, count: int) -> None:
self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
def add_time_mix_extra_dim(self, dim: int) -> None:
self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
def add_time_decay_extra_dim(self, dim: int) -> None:
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
def add_residual_scale(self, value: float) -> None:
self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
def add_embedding_scale(self, value: float) -> None:
self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
def add_wkv_head_size(self, size: int) -> None:
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
def add_token_shift_count(self, count: int) -> None:
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
def add_interleave_moe_layer_step(self, value: int) -> None:
self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
def add_layer_norm_eps(self, value: float) -> None:
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
def add_layer_norm_rms_eps(self, value: float) -> None:
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
def add_group_norm_eps(self, value: float) -> None:
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
def add_group_norm_groups(self, value: int) -> None:
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
def add_causal_attention(self, value: bool) -> None:
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
@@ -704,12 +830,27 @@ class GGUFWriter:
def add_kv_lora_rank(self, length: int) -> None:
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
def add_decay_lora_rank(self, length: int) -> None:
self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
def add_iclr_lora_rank(self, length: int) -> None:
self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
def add_value_residual_mix_lora_rank(self, length: int) -> None:
self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
def add_gate_lora_rank(self, length: int) -> None:
self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
def add_relative_attn_buckets_count(self, value: int) -> None:
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
def add_sliding_window(self, value: int) -> None:
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
def add_attention_scale(self, value: float) -> None:
self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
def add_attn_output_scale(self, value: float) -> None:
self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
@@ -719,9 +860,15 @@ class GGUFWriter:
def add_pooling_type(self, value: PoolingType) -> None:
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
def add_num_deepstack_layers(self, count: int) -> None:
self.add_uint32(Keys.LLM.NUM_DEEPSTACK_LAYERS.format(arch=self.arch), count)
def add_rope_dimension_count(self, count: int) -> None:
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
def add_rope_freq_base(self, value: float) -> None:
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
@@ -767,6 +914,12 @@ class GGUFWriter:
def add_ssm_time_step_rank(self, value: int) -> None:
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
def add_ssm_group_count(self, value: int) -> None:
self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
def add_tokenizer_model(self, model: str) -> None:
self.add_string(Keys.Tokenizer.MODEL, model)
@@ -803,9 +956,6 @@ class GGUFWriter:
def add_pad_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
def add_cls_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
def add_mask_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
@@ -815,13 +965,16 @@ class GGUFWriter:
def add_add_eos_token(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.ADD_EOS, value)
def add_add_sep_token(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.ADD_SEP, value)
def add_add_space_prefix(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
def add_remove_extra_whitespaces(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
def add_precompiled_charsmap(self, charsmap: bytes) -> None:
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
@@ -853,28 +1006,127 @@ class GGUFWriter:
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
def add_prefix_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
def add_suffix_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
def add_middle_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
def add_eom_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
# for vision models
def add_clip_has_vision_encoder(self, value: bool) -> None:
self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
def add_clip_has_audio_encoder(self, value: bool) -> None:
self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
def add_clip_projector_type(self, value: str) -> None:
self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
def add_vision_projection_dim(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
def add_vision_patch_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
def add_vision_embedding_length(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
def add_vision_feed_forward_length(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
def add_vision_block_count(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
def add_vision_head_count(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
def add_vision_attention_layernorm_eps(self, value: float) -> None:
self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
def add_vision_image_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
def add_vision_preproc_image_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
def add_vision_image_mean(self, values: Sequence[float]) -> None:
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
def add_vision_image_std(self, values: Sequence[float]) -> None:
self.add_array(Keys.ClipVision.IMAGE_STD, values)
def add_vision_spatial_merge_size(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
def add_vision_use_gelu(self, value: bool) -> None:
self.add_bool(Keys.ClipVision.USE_GELU, value)
def add_vision_use_silu(self, value: bool) -> None:
self.add_bool(Keys.ClipVision.USE_SILU, value)
def add_vision_projector_scale_factor(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
def add_vision_n_wa_pattern(self, value: int) -> None:
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
# audio models
def add_audio_projection_dim(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
def add_audio_embedding_length(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
def add_audio_feed_forward_length(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
def add_audio_block_count(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
def add_audio_head_count(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
def add_audio_attention_layernorm_eps(self, value: float) -> None:
self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
def add_audio_num_mel_bins(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
def add_audio_stack_factor(self, value: int) -> None:
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
def add_xielu_alpha_p(self, values: Sequence[float]):
self.add_array(Keys.xIELU.ALPHA_P, values)
def add_xielu_alpha_n(self, values: Sequence[float]):
self.add_array(Keys.xIELU.ALPHA_N, values)
def add_xielu_beta(self, values: Sequence[float]):
self.add_array(Keys.xIELU.BETA, values)
def add_xielu_eps(self, values: Sequence[float]):
self.add_array(Keys.xIELU.EPS, values)
# diffusion models
def add_diffusion_shift_logits(self, value: bool) -> None:
self.add_bool(Keys.Diffusion.SHIFT_LOGITS, value)
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix:
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
return struct.pack(f'{pack_prefix}{fmt}', value)
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
kv_data = bytearray()
if add_vtype:
@@ -895,7 +1147,9 @@ class GGUFWriter:
if len(val) == 0:
raise ValueError("Invalid GGUF metadata array. Empty array")
if isinstance(val, bytes):
if sub_type is not None:
ltype = sub_type
elif isinstance(val, bytes):
ltype = GGUFValueType.UINT8
else:
ltype = GGUFValueType.get_type(val[0])

View File

@@ -21,11 +21,11 @@ logger = logging.getLogger("gguf-dump")
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
file_endian = reader.endianess.name
if reader.byte_order == 'S':
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
else:
file_endian = host_endian
host_endian = file_endian
return (host_endian, file_endian)