Step-3.5-Flash support (#1231)

* WIP

* This works but is slow

* Turn off the up / gate clamps for now

* OK we need the clamping

* Fuse the clamp (CUDA)

* Fuse the clamp (CPU)

* WIP

* Be able to use merged q, k, v

* Be able to use merged up/gate experts

* Fuse the clamp (CUDA mmvq)
This commit is contained in:
Kawrakow
2026-02-05 08:13:22 +02:00
committed by GitHub
parent 8d952ff183
commit 9c1c74acda
22 changed files with 487 additions and 69 deletions

View File

@@ -5051,6 +5051,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
case LLM_ARCH_MINIMAX_M2:
case LLM_ARCH_MIMO2:
case LLM_ARCH_SEED_OSS:
case LLM_ARCH_STEP35:
return LLAMA_ROPE_TYPE_NEOX;
case LLM_ARCH_QWEN2VL: