Deepseek V3 support added (#176)

Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
saood06
2025-01-23 10:24:10 -06:00
committed by GitHub
parent c2624b2fd3
commit 2195632581
9 changed files with 136 additions and 5 deletions

View File

@@ -251,6 +251,10 @@ class TensorNameMap:
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
),
MODEL_TENSOR.FFN_EXP_PROBS_B: (
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
),
# Feed-forward up
MODEL_TENSOR.FFN_UP: (
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox