From 35da97d53ee016db0705e7344cf30803dbe3c68b Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 23 Feb 2026 09:42:21 +0000 Subject: [PATCH] Minor --- src/llama-build-context.cpp | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp index f1d9df73..a7f9e785 100644 --- a/src/llama-build-context.cpp +++ b/src/llama-build-context.cpp @@ -4463,31 +4463,16 @@ ggml_cgraph * llm_build_context::build_qwen3next() { if (hparams.is_recurrent(il)) { int idx = model.default_layer_device[il]; if (inpL->op == GGML_OP_REDUCE) { - if (model.layers[il].wqkv) { - int idx_wqkv = ggml_backend_sched_get_backend_idx(lctx.sched, model.layers[il].wqkv->buffer); - //int idx_s_l = -1; - //if (kv_self.s_l[il]) idx_s_l = ggml_backend_sched_get_backend_idx(lctx.sched, kv_self.s_l[il]->buffer); - //printf("Layer %2d: %d, %d, %d\n", il, idx, idx_wqkv, idx_s_l); - if (idx_wqkv >= 0) idx = idx_wqkv; + if (kv_self.s_l[il]) { + // This shouldn't be necessary, but just in case. + int idx_s_l = ggml_backend_sched_get_backend_idx(lctx.sched, kv_self.s_l[il]->buffer); + if (idx_s_l >= 0) idx = idx_s_l; } if (inpL->src[idx]) { inpL->view_src = inpL->src[idx]; } } auto norm = model.layers[il].attn_norm->extra ? ((ggml_split_tensor_t *)model.layers[il].attn_norm->extra)->splits[idx] : model.layers[il].attn_norm; - //if (inpL->op == GGML_OP_REDUCE && inpL->src[model.default_layer_device[il]]) { - // if (model.layers[il].wqkv) { - // int idx = ggml_backend_sched_get_backend_idx(lctx.sched, model.layers[il].wqkv->buffer); - // printf("Layer %2d wqkv backend is %d\n", il, idx); - // //auto backend = ggml_backend_sched_get_tensor_backend(lctx.sched, model.layers[il].wqkv); - // //if (backend) printf("Layer %2d wqkv backend is %s\n", il, ggml_backend_name(backend)); - // //else printf("Backend for wqkv in layer %2d is not known\n", il); - // } - // inpL->view_src = inpL->src[model.default_layer_device[il]]; - // //printf("Using reduce result on device %d\n", model.default_layer_device[il]); - // //inpL = inpL->src[model.default_layer_device[il]]; - //} - //auto norm = model.layers[il].attn_norm->extra ? ((ggml_split_tensor_t *)model.layers[il].attn_norm->extra)->splits[model.default_layer_device[il]] : model.layers[il].attn_norm; cur = llm_build_norm(ctx0, inpL, hparams, norm, nullptr, LLM_NORM_RMS, cb, il); cb(cur, "attn_norm", il); cur = delta.build_layer_attn_linear(ctx0, gf, cur, causal_mask, identity, diag_mask, il, cb); @@ -4498,7 +4483,6 @@ ggml_cgraph * llm_build_context::build_qwen3next() { cur = ggml_add(ctx0, cur, inpSA); cb(cur, "attn_residual", il); } else { - //cur = build_layer_attn(cur, inp_pos, KQ_mask, il); cur = build_std_attention(gf, model.layers[il].attn_norm, inpL, inp_pos, il == n_layer - 1 ? inp_out_ids : nullptr, nullptr, KQ_mask, nullptr, nullptr, KQ_scale, 0.0f, 0, il, true, false, true, false, false); }