Add -fno-slp-vectorize option for building hstu kernels on gfx950

This commit is contained in:
Qianfeng Zhang
2026-04-30 13:37:22 +00:00
parent 7883f52d9f
commit 4c583f0574

View File

@@ -17,7 +17,8 @@ if (DEFINED ENV{ASSUME_HIGHLY_VARIED_SEQLEN})
endif()
if(GPU_TARGETS MATCHES "gfx95" AND NOT GPU_TARGETS MATCHES "gfx94" AND NOT GPU_TARGETS MATCHES "gfx90")
list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY)
## disable slp-vectorize improve pipelines performance on gfx950
list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY -fno-slp-vectorize)
endif()
target_compile_options(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS})