Minor delta-net tweak (#1308)

* Make sure we pick the reduced tensor from the right GPU

* Minor

* Minor delta-net tweak
This commit is contained in:
Kawrakow
2026-02-24 15:22:57 +01:00
committed by GitHub
parent 7065488135
commit 38ca19d828
2 changed files with 2 additions and 5 deletions

View File

@@ -3263,8 +3263,6 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
ggml_cuda_op_sum_rows_nc(ctx, cgraph->nodes[i+1]);
i += 2;
} else {
//auto src = dst->src[0];
//printf("cont(%s -> %s): %ld x %ld x %ld x %ld; %zu x %zu x %zu x %zu\n", src->name, dst->name, src->ne[0], src->ne[1], src->ne[2], src->ne[3], src->nb[0], src->nb[1], src->nb[2], src->nb[3]);
ggml_cuda_dup(ctx, dst);
}
break;

View File

@@ -636,9 +636,8 @@ ggml_tensor * delta_net::build_layer_attn_linear_core(ggml_context * ctx0, ggml_
ggml_tensor * z_2d = ggml_reshape_2d(ctx0, z, head_v_dim, num_v_heads * n_tok);
ggml_tensor * attn_out_norm = llm_build_context::llm_build_norm(ctx0, attn_out_2d, hparams, model.layers[il].ssm_norm, nullptr, LLM_NORM_RMS, cb, il);
ggml_tensor * gated_silu = ggml_silu(ctx0, z_2d);
cb(gated_silu, "gated_silu", il);
attn_out_norm = ggml_mul(ctx0, attn_out_norm, gated_silu);
cb(attn_out_norm, "attn_rms_norm", il);
attn_out_norm = ggml_fused_mul_unary(ctx0, z_2d, attn_out_norm, GGML_UNARY_OP_SILU);
cb(attn_out_norm, "attn_out_norm", il);
ggml_tensor * final_output = ggml_reshape_2d(ctx0, attn_out_norm, value_dim, n_tok);