Revert "Fix race in the CUDA DeepSeek FA kernel (#406)"

This reverts commit 36e6e888b7.
I should have tested. We get NaNs.
This commit is contained in:
Kawrakow
2025-05-11 12:22:19 +03:00
parent 0abcf0749e
commit 8f7bd74afb

View File

@@ -898,8 +898,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE);
}
__syncthreads();
// Write back combined meta data:
#pragma unroll
for (int imeta = 0; imeta < nmeta; ++imeta) {