mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-03 13:48:30 +00:00
Renaming BUILD_HSTU_FOR_GFX95_ONLY to BUILD_HSTU_FOR_GFX95
This commit is contained in:
@@ -18,7 +18,7 @@ endif()
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx95" AND NOT GPU_TARGETS MATCHES "gfx94" AND NOT GPU_TARGETS MATCHES "gfx90")
|
||||
## disable slp-vectorize improve pipelines performance on gfx950
|
||||
list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95_ONLY -fno-slp-vectorize)
|
||||
list(APPEND EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS -DBUILD_HSTU_FOR_GFX95 -fno-slp-vectorize)
|
||||
endif()
|
||||
|
||||
target_compile_options(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${EXAMPLE_HSTU_ATTENTION_COMPILE_OPTIONS})
|
||||
|
||||
@@ -206,7 +206,7 @@ The build target is `tile_example_hstu_attention` (excluded from `make all` by d
|
||||
|---------------------|--------|
|
||||
| `ASSUME_HIGHLY_VARIED_SEQLEN=1` | Schedules batch dimension as a non-leading grid dimension (trades occupancy for better load balance when sequence lengths vary widely) |
|
||||
|
||||
On `gfx950`-only builds (`-DBUILD_HSTU_FOR_GFX95_ONLY`), SLP vectorization is disabled to
|
||||
On `gfx950`-only builds (`-DBUILD_HSTU_FOR_GFX95`), SLP vectorization is disabled to
|
||||
improve pipeline performance.
|
||||
|
||||
---
|
||||
|
||||
@@ -39,7 +39,7 @@ struct batched_forward_dispatch
|
||||
HstuAttentionWithSoftmaxFwdTileSetting<MaxK, MTile>,
|
||||
HstuAttentionNoSoftmaxFwdTileSetting<MaxK, MTile>>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
@@ -46,7 +46,7 @@ struct batched_forward_splitkv_dispatch
|
||||
HstuAttentionNoSoftmaxFwdTileSetting<MaxK, MTile>>::Type;
|
||||
using HstuAttentionCombineTileSetting = HstuAttentionFwdSplitKVCombineTileSetting<MaxK>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
@@ -13,7 +13,7 @@ using WarpTile_16x16x16 = ck_tile::sequence<16, 16, 16>;
|
||||
using WarpTile_16x16x32 = ck_tile::sequence<16, 16, 32>;
|
||||
using WarpTile_32x32x16 = ck_tile::sequence<32, 32, 16>;
|
||||
|
||||
#if !defined(BUILD_HSTU_FOR_GFX95_ONLY)
|
||||
#if !defined(BUILD_HSTU_FOR_GFX95)
|
||||
template <ck_tile::index_t MaxK, ck_tile::index_t MTile = 0>
|
||||
struct HstuAttentionNoSoftmaxFwdBlockTile;
|
||||
|
||||
@@ -239,7 +239,7 @@ template struct HstuAttentionWithSoftmaxFwdTileSetting<256, 64>;
|
||||
template struct HstuAttentionWithSoftmaxFwdTileSetting<256, 128>;
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_HSTU_FOR_GFX95_ONLY)
|
||||
#if defined(BUILD_HSTU_FOR_GFX95)
|
||||
template <ck_tile::index_t MaxK, ck_tile::index_t MTile = 0>
|
||||
struct HstuAttentionNoSoftmaxFwdBlockTile;
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ struct group_forward_dispatch
|
||||
HstuAttentionWithSoftmaxFwdTileSetting<MaxK, MTile>,
|
||||
HstuAttentionNoSoftmaxFwdTileSetting<MaxK, MTile>>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
@@ -47,7 +47,7 @@ struct group_forward_splitkv_dispatch
|
||||
using HstuAttentionCombineTileSetting =
|
||||
typename HstuAttentionFwdSplitKVCombineTileSetting<MaxK>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
@@ -39,7 +39,7 @@ struct jagged_forward_dispatch
|
||||
HstuAttentionWithSoftmaxFwdTileSetting<MaxK, MTile>,
|
||||
HstuAttentionNoSoftmaxFwdTileSetting<MaxK, MTile>>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
@@ -46,7 +46,7 @@ struct jagged_forward_splitkv_dispatch
|
||||
HstuAttentionNoSoftmaxFwdTileSetting<MaxK, MTile>>::Type;
|
||||
using HstuAttentionCombineTileSetting = HstuAttentionFwdSplitKVCombineTileSetting<MaxK>::Type;
|
||||
|
||||
#ifdef BUILD_HSTU_FOR_GFX95_ONLY
|
||||
#ifdef BUILD_HSTU_FOR_GFX95
|
||||
static constexpr bool use_trload_pipeline = true;
|
||||
#else
|
||||
static constexpr bool use_trload_pipeline = false;
|
||||
|
||||
Reference in New Issue
Block a user