diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index e8e61abf..d87e8f72 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,6 +6,7 @@ from __future__ import annotations import logging import os +import sys from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -15,7 +16,6 @@ import numpy.typing as npt from .quants import quant_shape_to_byte_shape if __name__ == "__main__": - import sys from pathlib import Path # Allow running file in package as a script. @@ -28,6 +28,7 @@ from gguf.constants import ( GGUF_VERSION, GGMLQuantizationType, GGUFValueType, + GGUFEndian, ) logger = logging.getLogger(__name__) @@ -53,6 +54,48 @@ class ReaderField(NamedTuple): types: list[GGUFValueType] = [] + def contents(self, index_or_slice: int | slice = slice(None)) -> Any: + if self.types: + to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731 + main_type = self.types[0] + + if main_type == GGUFValueType.ARRAY: + sub_type = self.types[-1] + + if sub_type == GGUFValueType.STRING: + indices = self.data[index_or_slice] + + if isinstance(index_or_slice, int): + return to_string(self.parts[indices]) # type: ignore + else: + return [to_string(self.parts[idx]) for idx in indices] # type: ignore + else: + # FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too + + # Check if it's unsafe to perform slice optimization on data + # if any(True for idx in self.data if len(self.parts[idx]) != 1): + # optim_slice = slice(None) + # else: + # optim_slice = index_or_slice + # index_or_slice = slice(None) + + # if isinstance(optim_slice, int): + # return self.parts[self.data[optim_slice]].tolist()[0] + # else: + # return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice] + + if isinstance(index_or_slice, int): + return self.parts[self.data[index_or_slice]].tolist()[0] + else: + return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()] + + if main_type == GGUFValueType.STRING: + return to_string(self.parts[-1]) + else: + return self.parts[-1].tolist()[0] + + return None + class ReaderTensor(NamedTuple): name: str @@ -101,10 +144,19 @@ class GGUFReader: # If we get 0 here that means it's (probably) a GGUF file created for # the opposite byte order of the machine this script is running on. self.byte_order = 'S' - temp_version = temp_version.newbyteorder(self.byte_order) + temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order)) version = temp_version[0] if version not in READER_SUPPORTED_VERSIONS: raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') + if sys.byteorder == "little": + # Host is little endian + host_endian = GGUFEndian.LITTLE + swapped_endian = GGUFEndian.BIG + else: + # Sorry PDP or other weird systems that don't use BE or LE. + host_endian = GGUFEndian.BIG + swapped_endian = GGUFEndian.LITTLE + self.endianess = swapped_endian if self.byte_order == "S" else host_endian self.fields: OrderedDict[str, ReaderField] = OrderedDict() self.tensors: list[ReaderTensor] = [] offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) @@ -145,11 +197,8 @@ class GGUFReader: count = int(count) itemsize = int(np.empty([], dtype = dtype).itemsize) end_offs = offset + itemsize * count - return ( - self.data[offset:end_offs] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + arr = self.data[offset:end_offs].view(dtype=dtype)[:count] + return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order)) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -191,6 +240,7 @@ class GGUFReader: offs += int(alen.nbytes) aparts: list[npt.NDArray[Any]] = [raw_itype, alen] data_idxs: list[int] = [] + # FIXME: Handle multi-dimensional arrays properly instead of flattening for idx in range(alen[0]): curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) if idx == 0: @@ -201,7 +251,7 @@ class GGUFReader: offs += curr_size return offs - orig_offs, aparts, data_idxs, types # We can't deal with this one. - raise ValueError('Unknown/unhandled field type {gtype}') + raise ValueError(f'Unknown/unhandled field type {gtype}') def _get_tensor_info_field(self, orig_offs: int) -> ReaderField: offs = orig_offs diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8ad3d065..a051daee 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -49,6 +49,7 @@ class TensorInfo: class GGUFValue: value: Any type: GGUFValueType + sub_type: GGUFValueType | None = None class WriterState(Enum): @@ -137,8 +138,9 @@ class GGUFWriter: size = prod(shape) if "_exps." in name: - expert_params += (size // shape[-3]) - expert_sum += shape[-3] + expert_count = shape[-2 if ".bias" in name else -3] + expert_params += (size // expert_count) + expert_sum += expert_count n_expert_tensors += 1 else: shared_params += size @@ -238,7 +240,7 @@ class GGUFWriter: for key, val in kv_data.items(): kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False) - kv_bytes += self._pack_val(val.value, val.type, add_vtype=True) + kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type) fout.write(kv_bytes) @@ -268,11 +270,11 @@ class GGUFWriter: fout.flush() self.state = WriterState.TI_DATA - def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None: + def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None: if any(key in kv_data for kv_data in self.kv_data): - raise ValueError(f'Duplicated key name {key!r}') + logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}') - self.kv_data[0][key] = GGUFValue(value=val, type=vtype) + self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type) def add_uint8(self, key: str, val: int) -> None: self.add_key_value(key,val, GGUFValueType.UINT8) @@ -569,6 +571,9 @@ class GGUFWriter: def add_base_model_organization(self, source_id: int, organization: str) -> None: self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization) + def add_base_model_description(self, source_id: int, description: str) -> None: + self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description) + def add_base_model_url(self, source_id: int, url: str) -> None: self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url) @@ -581,15 +586,42 @@ class GGUFWriter: def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None: self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url) + def add_dataset_count(self, source_count: int) -> None: + self.add_uint32(Keys.General.DATASET_COUNT, source_count) + + def add_dataset_name(self, source_id: int, name: str) -> None: + self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name) + + def add_dataset_author(self, source_id: int, author: str) -> None: + self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author) + + def add_dataset_version(self, source_id: int, version: str) -> None: + self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version) + + def add_dataset_organization(self, source_id: int, organization: str) -> None: + self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization) + + def add_dataset_description(self, source_id: int, description: str) -> None: + self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description) + + def add_dataset_url(self, source_id: int, url: str) -> None: + self.add_string(Keys.General.DATASET_URL.format(id=source_id), url) + + def add_dataset_doi(self, source_id: int, doi: str) -> None: + self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi) + + def add_dataset_uuid(self, source_id: int, uuid: str) -> None: + self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid) + + def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None: + self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url) + def add_tags(self, tags: Sequence[str]) -> None: self.add_array(Keys.General.TAGS, tags) def add_languages(self, languages: Sequence[str]) -> None: self.add_array(Keys.General.LANGUAGES, languages) - def add_datasets(self, datasets: Sequence[str]) -> None: - self.add_array(Keys.General.DATASETS, datasets) - def add_tensor_data_layout(self, layout: str) -> None: self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) @@ -602,6 +634,24 @@ class GGUFWriter: def add_embedding_length(self, length: int) -> None: self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) + def add_features_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length) + + def add_posnet_embedding_length(self, length: int) -> None: + self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length) + + def add_posnet_block_count(self, length: int) -> None: + self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length) + + def add_convnext_embedding_length(self, length: int) -> None: + self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length) + + def add_convnext_block_count(self, length: int) -> None: + self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length) + + def add_shortconv_l_cache(self, length: int) -> None: + self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length) + def add_block_count(self, length: int) -> None: self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) @@ -620,12 +670,30 @@ class GGUFWriter: def add_expert_shared_feed_forward_length(self, length: int) -> None: self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length) + def add_expert_chunk_feed_forward_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length) + def add_parallel_residual(self, use: bool) -> None: self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) def add_decoder_start_token_id(self, id: int) -> None: self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id) + def add_decoder_block_count(self, value: int) -> None: + self.add_uint32(Keys.LLM.DECODER_BLOCK_COUNT.format(arch=self.arch), value) + + def add_embedding_length_per_layer_input(self, value: int) -> None: + self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value) + + def add_altup_active_idx(self, val: int) -> None: + self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val) + + def add_altup_num_inputs(self, val: int) -> None: + self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val) + + def add_activation_sparsity_scale(self, values: Sequence[float]) -> None: + self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values) + def add_head_count(self, count: int | Sequence[int]) -> None: if isinstance(count, int): self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count) @@ -644,12 +712,28 @@ class GGUFWriter: def add_value_length(self, length: int) -> None: self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length) + def add_key_length_mla(self, length: int) -> None: + self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length) + + def add_value_length_mla(self, length: int) -> None: + self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length) + def add_max_alibi_bias(self, bias: float) -> None: self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias) def add_clamp_kqv(self, value: float) -> None: self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value) + def add_shared_kv_layers(self, value: int) -> None: + self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value) + + def add_sliding_window_pattern(self, value: Sequence[bool]) -> None: + self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value) + + def add_dense_features_dims(self, dense:str, in_f:int, out_f:int) -> None: + self.add_uint32(Keys.LLM.DENSE_FEAT_IN_SIZE.format(arch=self.arch, dense=dense), in_f) + self.add_uint32(Keys.LLM.DENSE_FEAT_OUT_SIZE.format(arch=self.arch, dense=dense), out_f) + def add_logit_scale(self, value: float) -> None: self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value) @@ -686,15 +770,57 @@ class GGUFWriter: def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None: self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value) + def add_expert_group_scale(self, value: float) -> None: + self.add_float32(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value) + + def add_experts_per_group(self, count: int) -> None: + self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count) + + def add_moe_every_n_layers(self, value: int) -> None: + self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value) + def add_nextn_predict_layers(self, count: int) -> None: self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count) + def add_swin_norm(self, value: bool) -> None: + self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value) + + def add_rescale_every_n_layers(self, count: int) -> None: + self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count) + + def add_time_mix_extra_dim(self, dim: int) -> None: + self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim) + + def add_time_decay_extra_dim(self, dim: int) -> None: + self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim) + + def add_residual_scale(self, value: float) -> None: + self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value) + + def add_embedding_scale(self, value: float) -> None: + self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value) + + def add_wkv_head_size(self, size: int) -> None: + self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size) + + def add_token_shift_count(self, count: int) -> None: + self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count) + + def add_interleave_moe_layer_step(self, value: int) -> None: + self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value) + def add_layer_norm_eps(self, value: float) -> None: self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value) def add_layer_norm_rms_eps(self, value: float) -> None: self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value) + def add_group_norm_eps(self, value: float) -> None: + self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value) + + def add_group_norm_groups(self, value: int) -> None: + self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value) + def add_causal_attention(self, value: bool) -> None: self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value) @@ -704,12 +830,27 @@ class GGUFWriter: def add_kv_lora_rank(self, length: int) -> None: self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length) + def add_decay_lora_rank(self, length: int) -> None: + self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length) + + def add_iclr_lora_rank(self, length: int) -> None: + self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length) + + def add_value_residual_mix_lora_rank(self, length: int) -> None: + self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length) + + def add_gate_lora_rank(self, length: int) -> None: + self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length) + def add_relative_attn_buckets_count(self, value: int) -> None: self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value) def add_sliding_window(self, value: int) -> None: self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value) + def add_attention_scale(self, value: float) -> None: + self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value) + def add_attn_output_scale(self, value: float) -> None: self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value) @@ -719,9 +860,15 @@ class GGUFWriter: def add_pooling_type(self, value: PoolingType) -> None: self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value) + def add_num_deepstack_layers(self, count: int) -> None: + self.add_uint32(Keys.LLM.NUM_DEEPSTACK_LAYERS.format(arch=self.arch), count) + def add_rope_dimension_count(self, count: int) -> None: self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count) + def add_rope_dimension_sections(self, dims: Sequence[int]) -> None: + self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims) + def add_rope_freq_base(self, value: float) -> None: self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value) @@ -767,6 +914,12 @@ class GGUFWriter: def add_ssm_time_step_rank(self, value: int) -> None: self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value) + def add_ssm_group_count(self, value: int) -> None: + self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value) + + def add_ssm_dt_b_c_rms(self, value: bool) -> None: + self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value) + def add_tokenizer_model(self, model: str) -> None: self.add_string(Keys.Tokenizer.MODEL, model) @@ -803,9 +956,6 @@ class GGUFWriter: def add_pad_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.PAD_ID, id) - def add_cls_token_id(self, id: int) -> None: - self.add_uint32(Keys.Tokenizer.CLS_ID, id) - def add_mask_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.MASK_ID, id) @@ -815,13 +965,16 @@ class GGUFWriter: def add_add_eos_token(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.ADD_EOS, value) + def add_add_sep_token(self, value: bool) -> None: + self.add_bool(Keys.Tokenizer.ADD_SEP, value) + def add_add_space_prefix(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.ADD_PREFIX, value) def add_remove_extra_whitespaces(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value) - def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None: + def add_precompiled_charsmap(self, charsmap: bytes) -> None: self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap) def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None: @@ -853,28 +1006,127 @@ class GGUFWriter: self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value) - def add_prefix_token_id(self, id: int) -> None: - self.add_uint32(Keys.Tokenizer.PREFIX_ID, id) - - def add_suffix_token_id(self, id: int) -> None: - self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id) - - def add_middle_token_id(self, id: int) -> None: - self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id) - def add_eot_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.EOT_ID, id) def add_eom_token_id(self, id: int) -> None: self.add_uint32(Keys.Tokenizer.EOM_ID, id) + def add_classifier_output_labels(self, labels: Sequence[str]) -> None: + self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels) + + # for vision models + + def add_clip_has_vision_encoder(self, value: bool) -> None: + self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value) + + def add_clip_has_audio_encoder(self, value: bool) -> None: + self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value) + + def add_clip_projector_type(self, value: str) -> None: + self.add_string(Keys.Clip.PROJECTOR_TYPE, value) + + def add_vision_projection_dim(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value) + + def add_vision_patch_size(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.PATCH_SIZE, value) + + def add_vision_embedding_length(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value) + + def add_vision_feed_forward_length(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value) + + def add_vision_block_count(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value) + + def add_vision_head_count(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value) + + def add_vision_attention_layernorm_eps(self, value: float) -> None: + self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value) + + def add_vision_image_size(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value) + + def add_vision_preproc_image_size(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value) + + def add_vision_image_mean(self, values: Sequence[float]) -> None: + self.add_array(Keys.ClipVision.IMAGE_MEAN, values) + + def add_vision_image_std(self, values: Sequence[float]) -> None: + self.add_array(Keys.ClipVision.IMAGE_STD, values) + + def add_vision_spatial_merge_size(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value) + + def add_vision_use_gelu(self, value: bool) -> None: + self.add_bool(Keys.ClipVision.USE_GELU, value) + + def add_vision_use_silu(self, value: bool) -> None: + self.add_bool(Keys.ClipVision.USE_SILU, value) + + def add_vision_projector_scale_factor(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value) + + def add_vision_n_wa_pattern(self, value: int) -> None: + self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value) + + def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None: + self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers) + + # audio models + + def add_audio_projection_dim(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value) + + def add_audio_embedding_length(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value) + + def add_audio_feed_forward_length(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value) + + def add_audio_block_count(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value) + + def add_audio_head_count(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value) + + def add_audio_attention_layernorm_eps(self, value: float) -> None: + self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value) + + def add_audio_num_mel_bins(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value) + + def add_audio_stack_factor(self, value: int) -> None: + self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value) + + def add_xielu_alpha_p(self, values: Sequence[float]): + self.add_array(Keys.xIELU.ALPHA_P, values) + + def add_xielu_alpha_n(self, values: Sequence[float]): + self.add_array(Keys.xIELU.ALPHA_N, values) + + def add_xielu_beta(self, values: Sequence[float]): + self.add_array(Keys.xIELU.BETA, values) + + def add_xielu_eps(self, values: Sequence[float]): + self.add_array(Keys.xIELU.EPS, values) + + # diffusion models + + def add_diffusion_shift_logits(self, value: bool) -> None: + self.add_bool(Keys.Diffusion.SHIFT_LOGITS, value) + def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: pack_prefix = '' if not skip_pack_prefix: pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' return struct.pack(f'{pack_prefix}{fmt}', value) - def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes: + def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes: kv_data = bytearray() if add_vtype: @@ -895,7 +1147,9 @@ class GGUFWriter: if len(val) == 0: raise ValueError("Invalid GGUF metadata array. Empty array") - if isinstance(val, bytes): + if sub_type is not None: + ltype = sub_type + elif isinstance(val, bytes): ltype = GGUFValueType.UINT8 else: ltype = GGUFValueType.get_type(val[0]) diff --git a/gguf-py/scripts/gguf_dump.py b/gguf-py/scripts/gguf_dump.py index 1b654654..28c728bd 100755 --- a/gguf-py/scripts/gguf_dump.py +++ b/gguf-py/scripts/gguf_dump.py @@ -21,11 +21,11 @@ logger = logging.getLogger("gguf-dump") def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: - host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG' + file_endian = reader.endianess.name if reader.byte_order == 'S': - file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE' + host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE' else: - file_endian = host_endian + host_endian = file_endian return (host_endian, file_endian)