From 4c583f0574c8a8973fb4c210c95e801bd82571d4 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Thu, 30 Apr 2026 13:37:22 +0000 Subject: [PATCH] Add -fno-slp-vectorize option for building hstu kernels on gfx950 --- example/ck_tile/18_hstu_attention/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example/ck_tile/18_hstu_attention/CMakeLists.txt b/example/ck_tile/18_hstu_attention/CMakeLists.txt index 2a4c98bb8f..8d40e5903c 100644 --- a/example/ck_tile/18_hstu_attention/CMakeLists.txt +++ b/example/ck_tile/18_hstu_attention/CMakeLists.txt @@ -17,7 +17,8 @@ if (DEFINED ENV{ASSUME_HIGHLY_VARIED_SEQLEN}) endif() if(GPU_TARGETS MATCHES "gfx95" AND NOT GPU_TARGETS MATCHES "gfx94" AND NOT GPU_TARGETS MATCHES "gfx90") - list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY) + ## disable slp-vectorize improve pipelines performance on gfx950 + list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY -fno-slp-vectorize) endif() target_compile_options(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS})