From 0a8c5f523a92b9b492e94d12e37b287d59e7ae01 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 2 Feb 2026 15:59:38 +0000 Subject: [PATCH] [Performance] Use N0Sub=16 for trload with softmax pipeline to reduce vgpr spilling --- .../ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp index 2f6844c483..2d9c185a27 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp @@ -260,7 +260,7 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<96> template <> struct HstuAttentionWithSoftmaxFwdBlockTile<128> { - using type = ck_tile::sequence<128, 64, 32, 128, 32, 128>; + using type = ck_tile::sequence<128, 64, 16, 128, 32, 128>; using gemm0_warps = ck_tile::sequence<4, 1, 1>; using gemm1_warps = ck_tile::sequence<4, 1, 1>; }; @@ -268,7 +268,7 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<128> template <> struct HstuAttentionWithSoftmaxFwdBlockTile<256> { - using type = ck_tile::sequence<128, 64, 32, 256, 32, 256>; + using type = ck_tile::sequence<128, 64, 16, 256, 32, 256>; using gemm0_warps = ck_tile::sequence<4, 1, 1>; using gemm1_warps = ck_tile::sequence<4, 1, 1>; };