From 0e3891b348e50e5b6c2e92d85b8d26ad28fc556f Mon Sep 17 00:00:00 2001 From: yurko Date: Sat, 7 Feb 2026 13:25:34 -0800 Subject: [PATCH] qwen3next: remove redundant v_conv cont in delta path --- src/llama-build-context.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp index b3436460..b6f51c21 100644 --- a/src/llama-build-context.cpp +++ b/src/llama-build-context.cpp @@ -4711,12 +4711,14 @@ ggml_cgraph * llm_build_context::build_qwen3next() { ggml_tensor * q_conv = ggml_view_2d(ctx0, conv_output_silu, key_dim, n_tok, conv_output_silu->nb[1], 0); ggml_tensor * k_conv = ggml_view_2d(ctx0, conv_output_silu, key_dim, n_tok, conv_output_silu->nb[1], key_dim * ggml_element_size(conv_output_silu)); - ggml_tensor * v_conv = ggml_view_2d(ctx0, conv_output_silu, value_dim, n_tok, conv_output_silu->nb[1], + ggml_tensor * v_conv = ggml_view_4d(ctx0, conv_output_silu, head_v_dim, num_v_heads, n_tok, 1, + ggml_row_size(conv_output_silu->type, head_v_dim), + conv_output_silu->nb[1], + conv_output_silu->nb[1] * n_tok, 2 * key_dim * ggml_element_size(conv_output_silu)); q_conv = ggml_cont_4d(ctx0, q_conv, head_k_dim, num_k_heads, n_tok, 1); k_conv = ggml_cont_4d(ctx0, k_conv, head_k_dim, num_k_heads, n_tok, 1); - v_conv = ggml_cont_4d(ctx0, v_conv, head_v_dim, num_v_heads, n_tok, 1); if (num_k_heads != num_v_heads) { GGML_ASSERT(num_v_heads % num_k_heads == 0);