mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 11:21:56 +00:00
Add support for bitnet2b_2501 model (#337)
* add support for bitnet2b_2501 model * Fixes * Support both model names --------- Co-authored-by: potassiummmm <zhou.hansong@outlook.com>
This commit is contained in:
@@ -219,6 +219,7 @@ class MODEL_ARCH(IntEnum):
|
||||
DEEPSEEK2 = auto()
|
||||
CHATGLM = auto()
|
||||
BITNET = auto()
|
||||
BITNET_25 = auto()
|
||||
T5 = auto()
|
||||
T5ENCODER = auto()
|
||||
JAIS = auto()
|
||||
@@ -351,6 +352,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||
MODEL_ARCH.CHATGLM: "chatglm",
|
||||
MODEL_ARCH.BITNET: "bitnet",
|
||||
MODEL_ARCH.BITNET_25: "bitnet-25",
|
||||
MODEL_ARCH.T5: "t5",
|
||||
MODEL_ARCH.T5ENCODER: "t5encoder",
|
||||
MODEL_ARCH.JAIS: "jais",
|
||||
@@ -1019,6 +1021,28 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.ATTN_SUB_NORM,
|
||||
MODEL_TENSOR.FFN_SUB_NORM,
|
||||
],
|
||||
MODEL_ARCH.BITNET_25: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
MODEL_TENSOR.FFN_GATE_INP,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.FFN_GATE_EXP,
|
||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||
MODEL_TENSOR.FFN_UP_EXP,
|
||||
MODEL_TENSOR.ATTN_SUB_NORM,
|
||||
MODEL_TENSOR.FFN_SUB_NORM,
|
||||
],
|
||||
MODEL_ARCH.T5: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
|
||||
@@ -131,6 +131,7 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
||||
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
||||
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
||||
"layers.{bid}.attention.wqkv",
|
||||
),
|
||||
|
||||
# Attention query
|
||||
@@ -464,10 +465,14 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.ATTN_SUB_NORM: (
|
||||
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
||||
"layers.{bid}.attention.attn_sub_norm", # bitnet
|
||||
"model.layers.{bid}.self_attn.attn_sub_norm",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_SUB_NORM: (
|
||||
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
|
||||
"layers.{bid}.feed_forward.ffn_sub_norm", # bitnet
|
||||
"model.layers.{bid}.mlp.ffn_sub_norm",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.DEC_ATTN_NORM: (
|
||||
|
||||
Reference in New Issue
Block a user