diff --git a/example/ck_tile/18_hstu_attention/CMakeLists.txt b/example/ck_tile/18_hstu_attention/CMakeLists.txt index d7690ec66f..5b53c79201 100644 --- a/example/ck_tile/18_hstu_attention/CMakeLists.txt +++ b/example/ck_tile/18_hstu_attention/CMakeLists.txt @@ -18,7 +18,7 @@ endif() if(GPU_TARGETS MATCHES "gfx95" AND NOT GPU_TARGETS MATCHES "gfx94" AND NOT GPU_TARGETS MATCHES "gfx90") ## disable slp-vectorize improve pipelines performance on gfx950 - list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY -fno-slp-vectorize) + list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95 -fno-slp-vectorize) endif() target_compile_options(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS}) diff --git a/example/ck_tile/18_hstu_attention/README.md b/example/ck_tile/18_hstu_attention/README.md index d7249ad9ca..be8a310ddf 100644 --- a/example/ck_tile/18_hstu_attention/README.md +++ b/example/ck_tile/18_hstu_attention/README.md @@ -206,7 +206,7 @@ The build target is `tile_example_hstu_attention` (excluded from `make all` by d |---------------------|--------| | `ASSUME_HIGHLY_VARIED_SEQLEN=1` | Schedules batch dimension as a non-leading grid dimension (trades occupancy for better load balance when sequence lengths vary widely) | -On `gfx950`-only builds (`-DBUILD_HSTU_FOR_GFX95_ONLY`), SLP vectorization is disabled to +On `gfx950`-only builds (`-DBUILD_HSTU_FOR_GFX95`), SLP vectorization is disabled to improve pipeline performance. --- diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp index bec95c8c14..60318d884f 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp @@ -39,7 +39,7 @@ struct batched_forward_dispatch HstuAttentionWithSoftmaxFwdTileSetting, HstuAttentionNoSoftmaxFwdTileSetting>::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_splitkv_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_splitkv_dispatch.hpp index 7c3883cc67..5c1afb4523 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_splitkv_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_splitkv_dispatch.hpp @@ -46,7 +46,7 @@ struct batched_forward_splitkv_dispatch HstuAttentionNoSoftmaxFwdTileSetting>::Type; using HstuAttentionCombineTileSetting = HstuAttentionFwdSplitKVCombineTileSetting::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp index ce6dc897e1..a362b282a0 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp @@ -13,7 +13,7 @@ using WarpTile_16x16x16 = ck_tile::sequence<16, 16, 16>; using WarpTile_16x16x32 = ck_tile::sequence<16, 16, 32>; using WarpTile_32x32x16 = ck_tile::sequence<32, 32, 16>; -#if !defined(BUILD_HSTU_FOR_GFX95_ONLY) +#if !defined(BUILD_HSTU_FOR_GFX95) template struct HstuAttentionNoSoftmaxFwdBlockTile; @@ -239,7 +239,7 @@ template struct HstuAttentionWithSoftmaxFwdTileSetting<256, 64>; template struct HstuAttentionWithSoftmaxFwdTileSetting<256, 128>; #endif -#if defined(BUILD_HSTU_FOR_GFX95_ONLY) +#if defined(BUILD_HSTU_FOR_GFX95) template struct HstuAttentionNoSoftmaxFwdBlockTile; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp index 941f600f6b..50e7622364 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp @@ -39,7 +39,7 @@ struct group_forward_dispatch HstuAttentionWithSoftmaxFwdTileSetting, HstuAttentionNoSoftmaxFwdTileSetting>::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp index a467591216..57b7aa71d4 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp @@ -47,7 +47,7 @@ struct group_forward_splitkv_dispatch using HstuAttentionCombineTileSetting = typename HstuAttentionFwdSplitKVCombineTileSetting::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp index 03469a2525..1092cf047c 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp @@ -39,7 +39,7 @@ struct jagged_forward_dispatch HstuAttentionWithSoftmaxFwdTileSetting, HstuAttentionNoSoftmaxFwdTileSetting>::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_splitkv_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_splitkv_dispatch.hpp index 98101b1a78..1806f4b186 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_splitkv_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_splitkv_dispatch.hpp @@ -46,7 +46,7 @@ struct jagged_forward_splitkv_dispatch HstuAttentionNoSoftmaxFwdTileSetting>::Type; using HstuAttentionCombineTileSetting = HstuAttentionFwdSplitKVCombineTileSetting::Type; -#ifdef BUILD_HSTU_FOR_GFX95_ONLY +#ifdef BUILD_HSTU_FOR_GFX95 static constexpr bool use_trload_pipeline = true; #else static constexpr bool use_trload_pipeline = false;