Graph parallel for dense Qwen-3.5 models (#1331)

* Graph parallel for idense Qwen-3.5 models * Cleanup
2026-02-28 17:14:17 +00:00 · 2026-02-27 07:03:25 +01:00
parent facc8fdc44
commit 1e6d36b1b4
3 changed files with 28 additions and 14 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -661,7 +661,7 @@ llama_context::~llama_context() {
 // kv cache helpers
 //

-static inline bool llama_qwen3next_is_recurrent_layer(const llama_hparams & hparams, uint32_t il) {
+static inline bool llama_is_recurrent_layer(const llama_hparams & hparams, uint32_t il) {
    return hparams.is_recurrent(il);
 }

@@ -836,7 +836,7 @@ static bool llama_kv_cache_init(

    int n_mla = 0;
    for (int i = 0; i < (int) n_layer; i++) {
-        const bool qnext_recurrent = llama_qwen3next_is_recurrent_layer(hparams, i);
+        const bool qnext_recurrent = llama_is_recurrent_layer(hparams, i);
        const uint32_t n_embd_v_row = llama_kv_v_row_embd(model, hparams, i);
        const uint32_t n_head_kv    = hparams.n_head_kv(i);
        const uint32_t n_embd_head_k= hparams.n_embd_head_k;
@@ -1937,6 +1937,7 @@ static bool is_model_split_supported(const llama_model & model) {
        LLM_ARCH_SEED_OSS,
        LLM_ARCH_STEP35,
        LLM_ARCH_QWEN3NEXT,
+        LLM_ARCH_QWEN35,
    };
    auto it =  k_supported.find(model.arch);
    return it != k_supported.end();