mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-06 06:20:08 +00:00
Revert "Fix race in the CUDA DeepSeek FA kernel (#406)"
This reverts commit 36e6e888b7.
I should have tested. We get NaNs.
This commit is contained in:
@@ -898,8 +898,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
|
||||
KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// Write back combined meta data:
|
||||
#pragma unroll
|
||||
for (int imeta = 0; imeta < nmeta; ++imeta) {
|
||||
|
||||
Reference in New Issue
Block a user