mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Make gguf-py stuff work with numpy 2.0 (#991)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
||||||
|
|
||||||
@@ -15,7 +16,6 @@ import numpy.typing as npt
|
|||||||
from .quants import quant_shape_to_byte_shape
|
from .quants import quant_shape_to_byte_shape
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Allow running file in package as a script.
|
# Allow running file in package as a script.
|
||||||
@@ -28,6 +28,7 @@ from gguf.constants import (
|
|||||||
GGUF_VERSION,
|
GGUF_VERSION,
|
||||||
GGMLQuantizationType,
|
GGMLQuantizationType,
|
||||||
GGUFValueType,
|
GGUFValueType,
|
||||||
|
GGUFEndian,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -53,6 +54,48 @@ class ReaderField(NamedTuple):
|
|||||||
|
|
||||||
types: list[GGUFValueType] = []
|
types: list[GGUFValueType] = []
|
||||||
|
|
||||||
|
def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
|
||||||
|
if self.types:
|
||||||
|
to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
|
||||||
|
main_type = self.types[0]
|
||||||
|
|
||||||
|
if main_type == GGUFValueType.ARRAY:
|
||||||
|
sub_type = self.types[-1]
|
||||||
|
|
||||||
|
if sub_type == GGUFValueType.STRING:
|
||||||
|
indices = self.data[index_or_slice]
|
||||||
|
|
||||||
|
if isinstance(index_or_slice, int):
|
||||||
|
return to_string(self.parts[indices]) # type: ignore
|
||||||
|
else:
|
||||||
|
return [to_string(self.parts[idx]) for idx in indices] # type: ignore
|
||||||
|
else:
|
||||||
|
# FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
|
||||||
|
|
||||||
|
# Check if it's unsafe to perform slice optimization on data
|
||||||
|
# if any(True for idx in self.data if len(self.parts[idx]) != 1):
|
||||||
|
# optim_slice = slice(None)
|
||||||
|
# else:
|
||||||
|
# optim_slice = index_or_slice
|
||||||
|
# index_or_slice = slice(None)
|
||||||
|
|
||||||
|
# if isinstance(optim_slice, int):
|
||||||
|
# return self.parts[self.data[optim_slice]].tolist()[0]
|
||||||
|
# else:
|
||||||
|
# return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
|
||||||
|
|
||||||
|
if isinstance(index_or_slice, int):
|
||||||
|
return self.parts[self.data[index_or_slice]].tolist()[0]
|
||||||
|
else:
|
||||||
|
return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
|
||||||
|
|
||||||
|
if main_type == GGUFValueType.STRING:
|
||||||
|
return to_string(self.parts[-1])
|
||||||
|
else:
|
||||||
|
return self.parts[-1].tolist()[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ReaderTensor(NamedTuple):
|
class ReaderTensor(NamedTuple):
|
||||||
name: str
|
name: str
|
||||||
@@ -101,10 +144,19 @@ class GGUFReader:
|
|||||||
# If we get 0 here that means it's (probably) a GGUF file created for
|
# If we get 0 here that means it's (probably) a GGUF file created for
|
||||||
# the opposite byte order of the machine this script is running on.
|
# the opposite byte order of the machine this script is running on.
|
||||||
self.byte_order = 'S'
|
self.byte_order = 'S'
|
||||||
temp_version = temp_version.newbyteorder(self.byte_order)
|
temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
|
||||||
version = temp_version[0]
|
version = temp_version[0]
|
||||||
if version not in READER_SUPPORTED_VERSIONS:
|
if version not in READER_SUPPORTED_VERSIONS:
|
||||||
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
||||||
|
if sys.byteorder == "little":
|
||||||
|
# Host is little endian
|
||||||
|
host_endian = GGUFEndian.LITTLE
|
||||||
|
swapped_endian = GGUFEndian.BIG
|
||||||
|
else:
|
||||||
|
# Sorry PDP or other weird systems that don't use BE or LE.
|
||||||
|
host_endian = GGUFEndian.BIG
|
||||||
|
swapped_endian = GGUFEndian.LITTLE
|
||||||
|
self.endianess = swapped_endian if self.byte_order == "S" else host_endian
|
||||||
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
||||||
self.tensors: list[ReaderTensor] = []
|
self.tensors: list[ReaderTensor] = []
|
||||||
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
||||||
@@ -145,11 +197,8 @@ class GGUFReader:
|
|||||||
count = int(count)
|
count = int(count)
|
||||||
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
||||||
end_offs = offset + itemsize * count
|
end_offs = offset + itemsize * count
|
||||||
return (
|
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
||||||
self.data[offset:end_offs]
|
return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
|
||||||
.view(dtype = dtype)[:count]
|
|
||||||
.newbyteorder(override_order or self.byte_order)
|
|
||||||
)
|
|
||||||
|
|
||||||
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
||||||
if field.name in self.fields:
|
if field.name in self.fields:
|
||||||
@@ -191,6 +240,7 @@ class GGUFReader:
|
|||||||
offs += int(alen.nbytes)
|
offs += int(alen.nbytes)
|
||||||
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
||||||
data_idxs: list[int] = []
|
data_idxs: list[int] = []
|
||||||
|
# FIXME: Handle multi-dimensional arrays properly instead of flattening
|
||||||
for idx in range(alen[0]):
|
for idx in range(alen[0]):
|
||||||
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
@@ -201,7 +251,7 @@ class GGUFReader:
|
|||||||
offs += curr_size
|
offs += curr_size
|
||||||
return offs - orig_offs, aparts, data_idxs, types
|
return offs - orig_offs, aparts, data_idxs, types
|
||||||
# We can't deal with this one.
|
# We can't deal with this one.
|
||||||
raise ValueError('Unknown/unhandled field type {gtype}')
|
raise ValueError(f'Unknown/unhandled field type {gtype}')
|
||||||
|
|
||||||
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
||||||
offs = orig_offs
|
offs = orig_offs
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ class TensorInfo:
|
|||||||
class GGUFValue:
|
class GGUFValue:
|
||||||
value: Any
|
value: Any
|
||||||
type: GGUFValueType
|
type: GGUFValueType
|
||||||
|
sub_type: GGUFValueType | None = None
|
||||||
|
|
||||||
|
|
||||||
class WriterState(Enum):
|
class WriterState(Enum):
|
||||||
@@ -137,8 +138,9 @@ class GGUFWriter:
|
|||||||
size = prod(shape)
|
size = prod(shape)
|
||||||
|
|
||||||
if "_exps." in name:
|
if "_exps." in name:
|
||||||
expert_params += (size // shape[-3])
|
expert_count = shape[-2 if ".bias" in name else -3]
|
||||||
expert_sum += shape[-3]
|
expert_params += (size // expert_count)
|
||||||
|
expert_sum += expert_count
|
||||||
n_expert_tensors += 1
|
n_expert_tensors += 1
|
||||||
else:
|
else:
|
||||||
shared_params += size
|
shared_params += size
|
||||||
@@ -238,7 +240,7 @@ class GGUFWriter:
|
|||||||
|
|
||||||
for key, val in kv_data.items():
|
for key, val in kv_data.items():
|
||||||
kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
|
kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
|
||||||
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
|
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
|
||||||
|
|
||||||
fout.write(kv_bytes)
|
fout.write(kv_bytes)
|
||||||
|
|
||||||
@@ -268,11 +270,11 @@ class GGUFWriter:
|
|||||||
fout.flush()
|
fout.flush()
|
||||||
self.state = WriterState.TI_DATA
|
self.state = WriterState.TI_DATA
|
||||||
|
|
||||||
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
|
||||||
if any(key in kv_data for kv_data in self.kv_data):
|
if any(key in kv_data for kv_data in self.kv_data):
|
||||||
raise ValueError(f'Duplicated key name {key!r}')
|
logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
|
||||||
|
|
||||||
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
|
||||||
|
|
||||||
def add_uint8(self, key: str, val: int) -> None:
|
def add_uint8(self, key: str, val: int) -> None:
|
||||||
self.add_key_value(key,val, GGUFValueType.UINT8)
|
self.add_key_value(key,val, GGUFValueType.UINT8)
|
||||||
@@ -569,6 +571,9 @@ class GGUFWriter:
|
|||||||
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
||||||
|
|
||||||
|
def add_base_model_description(self, source_id: int, description: str) -> None:
|
||||||
|
self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
|
||||||
|
|
||||||
def add_base_model_url(self, source_id: int, url: str) -> None:
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
||||||
|
|
||||||
@@ -581,15 +586,42 @@ class GGUFWriter:
|
|||||||
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
||||||
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
||||||
|
|
||||||
|
def add_dataset_count(self, source_count: int) -> None:
|
||||||
|
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
|
||||||
|
|
||||||
|
def add_dataset_name(self, source_id: int, name: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
|
||||||
|
|
||||||
|
def add_dataset_author(self, source_id: int, author: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
|
||||||
|
|
||||||
|
def add_dataset_version(self, source_id: int, version: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
|
||||||
|
|
||||||
|
def add_dataset_organization(self, source_id: int, organization: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
|
||||||
|
|
||||||
|
def add_dataset_description(self, source_id: int, description: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
|
||||||
|
|
||||||
|
def add_dataset_url(self, source_id: int, url: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
|
||||||
|
|
||||||
|
def add_dataset_doi(self, source_id: int, doi: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
|
||||||
|
|
||||||
|
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
|
||||||
|
|
||||||
|
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
|
||||||
|
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
|
||||||
|
|
||||||
def add_tags(self, tags: Sequence[str]) -> None:
|
def add_tags(self, tags: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.General.TAGS, tags)
|
self.add_array(Keys.General.TAGS, tags)
|
||||||
|
|
||||||
def add_languages(self, languages: Sequence[str]) -> None:
|
def add_languages(self, languages: Sequence[str]) -> None:
|
||||||
self.add_array(Keys.General.LANGUAGES, languages)
|
self.add_array(Keys.General.LANGUAGES, languages)
|
||||||
|
|
||||||
def add_datasets(self, datasets: Sequence[str]) -> None:
|
|
||||||
self.add_array(Keys.General.DATASETS, datasets)
|
|
||||||
|
|
||||||
def add_tensor_data_layout(self, layout: str) -> None:
|
def add_tensor_data_layout(self, layout: str) -> None:
|
||||||
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||||
|
|
||||||
@@ -602,6 +634,24 @@ class GGUFWriter:
|
|||||||
def add_embedding_length(self, length: int) -> None:
|
def add_embedding_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_features_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_posnet_embedding_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_posnet_block_count(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_convnext_embedding_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_convnext_block_count(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_shortconv_l_cache(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_block_count(self, length: int) -> None:
|
def add_block_count(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
||||||
@@ -620,12 +670,30 @@ class GGUFWriter:
|
|||||||
def add_expert_shared_feed_forward_length(self, length: int) -> None:
|
def add_expert_shared_feed_forward_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_expert_chunk_feed_forward_length(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_parallel_residual(self, use: bool) -> None:
|
def add_parallel_residual(self, use: bool) -> None:
|
||||||
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
||||||
|
|
||||||
def add_decoder_start_token_id(self, id: int) -> None:
|
def add_decoder_start_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
|
self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
|
||||||
|
|
||||||
|
def add_decoder_block_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.DECODER_BLOCK_COUNT.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_embedding_length_per_layer_input(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_altup_active_idx(self, val: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
|
||||||
|
|
||||||
|
def add_altup_num_inputs(self, val: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
|
||||||
|
|
||||||
|
def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
|
||||||
|
self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
|
||||||
|
|
||||||
def add_head_count(self, count: int | Sequence[int]) -> None:
|
def add_head_count(self, count: int | Sequence[int]) -> None:
|
||||||
if isinstance(count, int):
|
if isinstance(count, int):
|
||||||
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
||||||
@@ -644,12 +712,28 @@ class GGUFWriter:
|
|||||||
def add_value_length(self, length: int) -> None:
|
def add_value_length(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_key_length_mla(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_value_length_mla(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_max_alibi_bias(self, bias: float) -> None:
|
def add_max_alibi_bias(self, bias: float) -> None:
|
||||||
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
||||||
|
|
||||||
def add_clamp_kqv(self, value: float) -> None:
|
def add_clamp_kqv(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_shared_kv_layers(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
|
||||||
|
self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_dense_features_dims(self, dense:str, in_f:int, out_f:int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.DENSE_FEAT_IN_SIZE.format(arch=self.arch, dense=dense), in_f)
|
||||||
|
self.add_uint32(Keys.LLM.DENSE_FEAT_OUT_SIZE.format(arch=self.arch, dense=dense), out_f)
|
||||||
|
|
||||||
def add_logit_scale(self, value: float) -> None:
|
def add_logit_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
|
self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
@@ -686,15 +770,57 @@ class GGUFWriter:
|
|||||||
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
||||||
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
||||||
|
|
||||||
|
def add_expert_group_scale(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_experts_per_group(self, count: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_moe_every_n_layers(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_nextn_predict_layers(self, count: int) -> None:
|
def add_nextn_predict_layers(self, count: int) -> None:
|
||||||
self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
|
self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_swin_norm(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_rescale_every_n_layers(self, count: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_time_mix_extra_dim(self, dim: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
|
||||||
|
|
||||||
|
def add_time_decay_extra_dim(self, dim: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
||||||
|
|
||||||
|
def add_residual_scale(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_embedding_scale(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_wkv_head_size(self, size: int) -> None:
|
||||||
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
||||||
|
|
||||||
|
def add_token_shift_count(self, count: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_interleave_moe_layer_step(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_layer_norm_eps(self, value: float) -> None:
|
def add_layer_norm_eps(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_layer_norm_rms_eps(self, value: float) -> None:
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_group_norm_eps(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_group_norm_groups(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_causal_attention(self, value: bool) -> None:
|
def add_causal_attention(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
||||||
|
|
||||||
@@ -704,12 +830,27 @@ class GGUFWriter:
|
|||||||
def add_kv_lora_rank(self, length: int) -> None:
|
def add_kv_lora_rank(self, length: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
|
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_decay_lora_rank(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_iclr_lora_rank(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_value_residual_mix_lora_rank(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
|
def add_gate_lora_rank(self, length: int) -> None:
|
||||||
|
self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_relative_attn_buckets_count(self, value: int) -> None:
|
def add_relative_attn_buckets_count(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
|
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_sliding_window(self, value: int) -> None:
|
def add_sliding_window(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_attention_scale(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_attn_output_scale(self, value: float) -> None:
|
def add_attn_output_scale(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
|
self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
|
||||||
|
|
||||||
@@ -719,9 +860,15 @@ class GGUFWriter:
|
|||||||
def add_pooling_type(self, value: PoolingType) -> None:
|
def add_pooling_type(self, value: PoolingType) -> None:
|
||||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
||||||
|
|
||||||
|
def add_num_deepstack_layers(self, count: int) -> None:
|
||||||
|
self.add_uint32(Keys.LLM.NUM_DEEPSTACK_LAYERS.format(arch=self.arch), count)
|
||||||
|
|
||||||
def add_rope_dimension_count(self, count: int) -> None:
|
def add_rope_dimension_count(self, count: int) -> None:
|
||||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
||||||
|
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
||||||
|
|
||||||
def add_rope_freq_base(self, value: float) -> None:
|
def add_rope_freq_base(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
||||||
|
|
||||||
@@ -767,6 +914,12 @@ class GGUFWriter:
|
|||||||
def add_ssm_time_step_rank(self, value: int) -> None:
|
def add_ssm_time_step_rank(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_group_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_tokenizer_model(self, model: str) -> None:
|
def add_tokenizer_model(self, model: str) -> None:
|
||||||
self.add_string(Keys.Tokenizer.MODEL, model)
|
self.add_string(Keys.Tokenizer.MODEL, model)
|
||||||
|
|
||||||
@@ -803,9 +956,6 @@ class GGUFWriter:
|
|||||||
def add_pad_token_id(self, id: int) -> None:
|
def add_pad_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
||||||
|
|
||||||
def add_cls_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
|
|
||||||
|
|
||||||
def add_mask_token_id(self, id: int) -> None:
|
def add_mask_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
||||||
|
|
||||||
@@ -815,13 +965,16 @@ class GGUFWriter:
|
|||||||
def add_add_eos_token(self, value: bool) -> None:
|
def add_add_eos_token(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Tokenizer.ADD_EOS, value)
|
self.add_bool(Keys.Tokenizer.ADD_EOS, value)
|
||||||
|
|
||||||
|
def add_add_sep_token(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.Tokenizer.ADD_SEP, value)
|
||||||
|
|
||||||
def add_add_space_prefix(self, value: bool) -> None:
|
def add_add_space_prefix(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
|
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
|
||||||
|
|
||||||
def add_remove_extra_whitespaces(self, value: bool) -> None:
|
def add_remove_extra_whitespaces(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
|
self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
|
||||||
|
|
||||||
def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
|
def add_precompiled_charsmap(self, charsmap: bytes) -> None:
|
||||||
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
|
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
|
||||||
|
|
||||||
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
||||||
@@ -853,28 +1006,127 @@ class GGUFWriter:
|
|||||||
|
|
||||||
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
||||||
|
|
||||||
def add_prefix_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
|
||||||
|
|
||||||
def add_suffix_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
|
|
||||||
|
|
||||||
def add_middle_token_id(self, id: int) -> None:
|
|
||||||
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
|
|
||||||
|
|
||||||
def add_eot_token_id(self, id: int) -> None:
|
def add_eot_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||||
|
|
||||||
def add_eom_token_id(self, id: int) -> None:
|
def add_eom_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
||||||
|
|
||||||
|
def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
|
||||||
|
self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
|
||||||
|
|
||||||
|
# for vision models
|
||||||
|
|
||||||
|
def add_clip_has_vision_encoder(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
|
||||||
|
|
||||||
|
def add_clip_has_audio_encoder(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
|
||||||
|
|
||||||
|
def add_clip_projector_type(self, value: str) -> None:
|
||||||
|
self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
|
||||||
|
|
||||||
|
def add_vision_projection_dim(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
|
||||||
|
|
||||||
|
def add_vision_patch_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
|
||||||
|
|
||||||
|
def add_vision_embedding_length(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
|
||||||
|
|
||||||
|
def add_vision_feed_forward_length(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
|
||||||
|
|
||||||
|
def add_vision_block_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
|
||||||
|
|
||||||
|
def add_vision_head_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
|
||||||
|
|
||||||
|
def add_vision_attention_layernorm_eps(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
|
||||||
|
|
||||||
|
def add_vision_image_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
|
||||||
|
|
||||||
|
def add_vision_preproc_image_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
|
||||||
|
|
||||||
|
def add_vision_image_mean(self, values: Sequence[float]) -> None:
|
||||||
|
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
|
||||||
|
|
||||||
|
def add_vision_image_std(self, values: Sequence[float]) -> None:
|
||||||
|
self.add_array(Keys.ClipVision.IMAGE_STD, values)
|
||||||
|
|
||||||
|
def add_vision_spatial_merge_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
|
||||||
|
|
||||||
|
def add_vision_use_gelu(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.ClipVision.USE_GELU, value)
|
||||||
|
|
||||||
|
def add_vision_use_silu(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.ClipVision.USE_SILU, value)
|
||||||
|
|
||||||
|
def add_vision_projector_scale_factor(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
|
||||||
|
|
||||||
|
def add_vision_n_wa_pattern(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
||||||
|
|
||||||
|
def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
|
||||||
|
self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
|
||||||
|
|
||||||
|
# audio models
|
||||||
|
|
||||||
|
def add_audio_projection_dim(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
|
||||||
|
|
||||||
|
def add_audio_embedding_length(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
|
||||||
|
|
||||||
|
def add_audio_feed_forward_length(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
|
||||||
|
|
||||||
|
def add_audio_block_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
|
||||||
|
|
||||||
|
def add_audio_head_count(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
|
||||||
|
|
||||||
|
def add_audio_attention_layernorm_eps(self, value: float) -> None:
|
||||||
|
self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
|
||||||
|
|
||||||
|
def add_audio_num_mel_bins(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
|
||||||
|
|
||||||
|
def add_audio_stack_factor(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
|
||||||
|
|
||||||
|
def add_xielu_alpha_p(self, values: Sequence[float]):
|
||||||
|
self.add_array(Keys.xIELU.ALPHA_P, values)
|
||||||
|
|
||||||
|
def add_xielu_alpha_n(self, values: Sequence[float]):
|
||||||
|
self.add_array(Keys.xIELU.ALPHA_N, values)
|
||||||
|
|
||||||
|
def add_xielu_beta(self, values: Sequence[float]):
|
||||||
|
self.add_array(Keys.xIELU.BETA, values)
|
||||||
|
|
||||||
|
def add_xielu_eps(self, values: Sequence[float]):
|
||||||
|
self.add_array(Keys.xIELU.EPS, values)
|
||||||
|
|
||||||
|
# diffusion models
|
||||||
|
|
||||||
|
def add_diffusion_shift_logits(self, value: bool) -> None:
|
||||||
|
self.add_bool(Keys.Diffusion.SHIFT_LOGITS, value)
|
||||||
|
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
pack_prefix = ''
|
pack_prefix = ''
|
||||||
if not skip_pack_prefix:
|
if not skip_pack_prefix:
|
||||||
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
||||||
return struct.pack(f'{pack_prefix}{fmt}', value)
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
||||||
|
|
||||||
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
|
||||||
kv_data = bytearray()
|
kv_data = bytearray()
|
||||||
|
|
||||||
if add_vtype:
|
if add_vtype:
|
||||||
@@ -895,7 +1147,9 @@ class GGUFWriter:
|
|||||||
if len(val) == 0:
|
if len(val) == 0:
|
||||||
raise ValueError("Invalid GGUF metadata array. Empty array")
|
raise ValueError("Invalid GGUF metadata array. Empty array")
|
||||||
|
|
||||||
if isinstance(val, bytes):
|
if sub_type is not None:
|
||||||
|
ltype = sub_type
|
||||||
|
elif isinstance(val, bytes):
|
||||||
ltype = GGUFValueType.UINT8
|
ltype = GGUFValueType.UINT8
|
||||||
else:
|
else:
|
||||||
ltype = GGUFValueType.get_type(val[0])
|
ltype = GGUFValueType.get_type(val[0])
|
||||||
|
|||||||
@@ -21,11 +21,11 @@ logger = logging.getLogger("gguf-dump")
|
|||||||
|
|
||||||
|
|
||||||
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
|
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
|
||||||
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
|
file_endian = reader.endianess.name
|
||||||
if reader.byte_order == 'S':
|
if reader.byte_order == 'S':
|
||||||
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
|
host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
|
||||||
else:
|
else:
|
||||||
file_endian = host_endian
|
host_endian = file_endian
|
||||||
return (host_endian, file_endian)
|
return (host_endian, file_endian)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user