From 3c43fe37fa4b171d5d07a8bb6a858f3243742ed9 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Fri, 27 Feb 2026 13:23:54 +0000 Subject: [PATCH] Fix race --- ggml/src/ggml-cuda/delta-net.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/ggml-cuda/delta-net.cu b/ggml/src/ggml-cuda/delta-net.cu index da96ba81..a0193b55 100644 --- a/ggml/src/ggml-cuda/delta-net.cu +++ b/ggml/src/ggml-cuda/delta-net.cu @@ -137,6 +137,9 @@ __global__ void delta_net_recurrent_f32( sum1 += all_sum1[i*WARP_SIZE_S + row]; sum2 += all_sum2[i*WARP_SIZE_S + row]; } + // To be honest, I don't understand why we need this sync. But without it I observe results varying from run to run + __syncthreads(); + float sv_new = beta_val * (v_ptr[t * qkv_stride_token + row_out] - sum1 * decay); if (col_idx_0 == 0) { out_base[t * out_token_stride + row_out] = sum2 * decay + sv_new * attn_score;