From b3d54477f18f973c42f04403f4395039f8c5cc59 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 12 Dec 2025 14:54:11 +0000 Subject: [PATCH] Enable hdim96 instances --- .../18_hstu_attention/generate_instances.py | 4 +- .../hstu_attention_fwd_setting.hpp | 114 ++++++++++------ .../hstu_attention_hdim_switch.hpp | 5 + ...tention_no_softmax_fwd_trload_pipeline.hpp | 3 +- ...ntion_with_softmax_fwd_trload_pipeline.hpp | 3 +- ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...ion_batched_forward_bf16_instances_ref.hpp | 128 ++++++++++++++++++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...ion_batched_forward_fp16_instances_ref.hpp | 128 ++++++++++++++++++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tion_jagged_forward_bf16_instances_ref.hpp | 128 ++++++++++++++++++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tion_jagged_forward_fp16_instances_ref.hpp | 128 ++++++++++++++++++ ...max_false_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...tmax_false_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_false_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_false_no_bias_no_dropout_maxk_96.cpp | 18 +++ ...tmax_true_has_bias_has_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_has_bias_no_dropout_maxk_96.cpp | 18 +++ ...ftmax_true_no_bias_has_dropout_maxk_96.cpp | 18 +++ ...oftmax_true_no_bias_no_dropout_maxk_96.cpp | 18 +++ 73 files changed, 1751 insertions(+), 42 deletions(-) create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp create mode 100644 example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp diff --git a/example/ck_tile/18_hstu_attention/generate_instances.py b/example/ck_tile/18_hstu_attention/generate_instances.py index ca752cc3db..fb26f5e6db 100644 --- a/example/ck_tile/18_hstu_attention/generate_instances.py +++ b/example/ck_tile/18_hstu_attention/generate_instances.py @@ -64,7 +64,7 @@ BOOL_MAP_DROPOUT = { False: "no_dropout", } -INT_MAP_MAX_K = {hd: f"maxk_{hd}" for hd in [64, 128, 256]} +INT_MAP_MAX_K = {hd: f"maxk_{hd}" for hd in [64, 96, 128, 256]} TYPE_CTYPE_MAP = { "fp16": "ck_tile::fp16_t", @@ -159,7 +159,7 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None: file.write(forward_instance) if __name__ == "__main__": - headdims_fwd = [64, 128, 256] + headdims_fwd = [64, 96, 128, 256] this_dir = os.path.dirname(__file__) output_dir = Path(this_dir) / "instances" diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp index f80d7df1b6..50889863d5 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_setting.hpp @@ -18,14 +18,6 @@ struct HstuAttentionNoSoftmaxFwdBlockTile; // Tile-sizes: M N0 N0Sub N1 K1 MaxK (MaxK % N1 == 0, N0 % K1 == 0) // -template <> -struct HstuAttentionNoSoftmaxFwdBlockTile<32> -{ - using type = ck_tile::sequence<64, 64, 32, 32, 32, 32>; - using gemm0_warps = ck_tile::sequence<2, 1, 1>; - using gemm1_warps = ck_tile::sequence<2, 1, 1>; -}; - template <> struct HstuAttentionNoSoftmaxFwdBlockTile<64> { @@ -34,6 +26,14 @@ struct HstuAttentionNoSoftmaxFwdBlockTile<64> using gemm1_warps = ck_tile::sequence<4, 1, 1>; }; +template <> +struct HstuAttentionNoSoftmaxFwdBlockTile<96> +{ + using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>; + using gemm0_warps = ck_tile::sequence<4, 1, 1>; + using gemm1_warps = ck_tile::sequence<4, 1, 1>; +}; + template <> struct HstuAttentionNoSoftmaxFwdBlockTile<128> { @@ -55,14 +55,6 @@ struct HstuAttentionWithSoftmaxFwdBlockTile; // Tile-sizes: M N0 N0Sub N1 K1 MaxK (MaxK % N1 == 0, N0 % K1 == 0) // -template <> -struct HstuAttentionWithSoftmaxFwdBlockTile<32> -{ - using type = ck_tile::sequence<64, 64, 32, 32, 32, 32>; - using gemm0_warps = ck_tile::sequence<2, 1, 1>; - using gemm1_warps = ck_tile::sequence<2, 1, 1>; -}; - template <> struct HstuAttentionWithSoftmaxFwdBlockTile<64> { @@ -71,6 +63,14 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<64> using gemm1_warps = ck_tile::sequence<4, 1, 1>; }; +template <> +struct HstuAttentionWithSoftmaxFwdBlockTile<96> +{ + using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>; + using gemm0_warps = ck_tile::sequence<4, 1, 1>; + using gemm1_warps = ck_tile::sequence<4, 1, 1>; +}; + template <> struct HstuAttentionWithSoftmaxFwdBlockTile<128> { @@ -90,17 +90,6 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<256> template struct HstuAttentionNoSoftmaxFwdTileSetting; -template <> -struct HstuAttentionNoSoftmaxFwdTileSetting<32> -{ - using Type = ck_tile::HstuAttentionFwdTileSettingClass< - typename HstuAttentionNoSoftmaxFwdBlockTile<32>::type, - typename HstuAttentionNoSoftmaxFwdBlockTile<32>::gemm0_warps, - HstuAttentionFwdWarpTile1, - typename HstuAttentionNoSoftmaxFwdBlockTile<32>::gemm1_warps, - HstuAttentionFwdWarpTile1>; -}; - template <> struct HstuAttentionNoSoftmaxFwdTileSetting<64> { @@ -112,6 +101,17 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<64> HstuAttentionFwdWarpTile1>; }; +template <> +struct HstuAttentionNoSoftmaxFwdTileSetting<96> +{ + using Type = ck_tile::HstuAttentionFwdTileSettingClass< + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::type, + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm0_warps, + HstuAttentionFwdWarpTile1, + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm1_warps, + HstuAttentionFwdWarpTile1>; +}; + template <> struct HstuAttentionNoSoftmaxFwdTileSetting<128> { @@ -137,17 +137,6 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<256> template struct HstuAttentionWithSoftmaxFwdTileSetting; -template <> -struct HstuAttentionWithSoftmaxFwdTileSetting<32> -{ - using Type = ck_tile::HstuAttentionFwdTileSettingClass< - typename HstuAttentionWithSoftmaxFwdBlockTile<32>::type, - typename HstuAttentionWithSoftmaxFwdBlockTile<32>::gemm0_warps, - HstuAttentionFwdWarpTile1, - typename HstuAttentionWithSoftmaxFwdBlockTile<32>::gemm1_warps, - HstuAttentionFwdWarpTile1>; -}; - template <> struct HstuAttentionWithSoftmaxFwdTileSetting<64> { @@ -159,6 +148,17 @@ struct HstuAttentionWithSoftmaxFwdTileSetting<64> HstuAttentionFwdWarpTile3>; }; +template <> +struct HstuAttentionWithSoftmaxFwdTileSetting<96> +{ + using Type = ck_tile::HstuAttentionFwdTileSettingClass< + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::type, + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm0_warps, + HstuAttentionFwdWarpTile2, + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm1_warps, + HstuAttentionFwdWarpTile2>; +}; + template <> struct HstuAttentionWithSoftmaxFwdTileSetting<128> { @@ -204,6 +204,14 @@ struct HstuAttentionNoSoftmaxFwdBlockTile<64> using gemm1_warps = ck_tile::sequence<4, 1, 1>; }; +template <> +struct HstuAttentionNoSoftmaxFwdBlockTile<96> +{ + using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>; + using gemm0_warps = ck_tile::sequence<4, 1, 1>; + using gemm1_warps = ck_tile::sequence<4, 1, 1>; +}; + template <> struct HstuAttentionNoSoftmaxFwdBlockTile<128> { @@ -241,6 +249,14 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<64> using gemm1_warps = ck_tile::sequence<4, 1, 1>; }; +template <> +struct HstuAttentionWithSoftmaxFwdBlockTile<96> +{ + using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>; + using gemm0_warps = ck_tile::sequence<4, 1, 1>; + using gemm1_warps = ck_tile::sequence<4, 1, 1>; +}; + template <> struct HstuAttentionWithSoftmaxFwdBlockTile<128> { @@ -282,6 +298,17 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<64> HstuAttentionFwdWarpTile2>; }; +template <> +struct HstuAttentionNoSoftmaxFwdTileSetting<96> +{ + using Type = ck_tile::HstuAttentionFwdTileSettingClass< + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::type, + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm0_warps, + HstuAttentionFwdWarpTile2, + typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm1_warps, + HstuAttentionFwdWarpTile2>; +}; + template <> struct HstuAttentionNoSoftmaxFwdTileSetting<128> { @@ -329,6 +356,17 @@ struct HstuAttentionWithSoftmaxFwdTileSetting<64> HstuAttentionFwdWarpTile3>; }; +template <> +struct HstuAttentionWithSoftmaxFwdTileSetting<96> +{ + using Type = ck_tile::HstuAttentionFwdTileSettingClass< + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::type, + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm0_warps, + HstuAttentionFwdWarpTile3, + typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm1_warps, + HstuAttentionFwdWarpTile3>; +}; + template <> struct HstuAttentionWithSoftmaxFwdTileSetting<128> { diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_hdim_switch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_hdim_switch.hpp index bc1a907305..41551b2956 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_hdim_switch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_hdim_switch.hpp @@ -13,6 +13,11 @@ constexpr ck_tile::index_t CONST_NAME = 64; \ __VA_ARGS__(); \ } \ + else if(HDIM_1 <= 96 && HDIM_2 <= 96) \ + { \ + constexpr ck_tile::index_t CONST_NAME = 96; \ + __VA_ARGS__(); \ + } \ else if(HDIM_1 <= 128 && HDIM_2 <= 128) \ { \ constexpr ck_tile::index_t CONST_NAME = 128; \ diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp index 85e7829552..9eed457527 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp @@ -150,7 +150,8 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && + kSubQKHeaddim == + KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp index b02944815b..23eb04f39c 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp @@ -152,7 +152,8 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && + kSubQKHeaddim == + KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..eb0044bee3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..6f1fd2d017 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..de373620a3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..6fbca8beb1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..d04fcc50f6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..6f6292aaa2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..cd528f39ff --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..31bfc04989 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp index 92be40644d..1f5955fae1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp @@ -137,6 +137,134 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, 64>(HstuAttentionFwdParams& param, hipStream_t stream); +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..98911790e5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..cb962fad90 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..25daf7390d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..7e9204d0fd --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..4586597463 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..4cccc9875f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..baa6f85270 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..cfb7414899 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..3ab920e274 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ac9e2d3157 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..3f50442686 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9bbae70dc5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e9aed1a9ee --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..3061cd44dc --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..881be3dc25 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..78f31d7a45 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp index 9b49db3cf3..6d801492ee 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp @@ -137,6 +137,134 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, 64>(HstuAttentionFwdParams& param, hipStream_t stream); +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..303d3d4058 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..1e171e6353 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..997f91754d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..88a147f3c5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..fc31e15e31 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..c4c84abfed --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..c3da869dd0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..88a9e1549b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9fd80ca643 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..f614465337 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..3e49b95c64 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..b8c93970ae --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..6aacf56b48 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..dc258953b7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ae0ca3789a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ff0b7703be --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp index 9689dc395f..b4ac93a2e2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp @@ -137,6 +137,134 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, 64>(HstuAttentionFwdParams& param, hipStream_t stream); +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..f01be6c3f6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..0adab11fa1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..0e7308bc48 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..cacbe4e513 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..09226cde93 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..95664b6760 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..1512ed1595 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..69b328cea6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..54a50c9196 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..966fda2c99 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..92ca10b816 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..068480af6a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ead9df6b70 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..fc2d38f690 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..aa3f82f899 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..bb645bb508 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp index 702ec002b0..027aad62c3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp @@ -137,6 +137,134 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, 64>(HstuAttentionFwdParams& param, hipStream_t stream); +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); + extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..17b2e02311 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..715adc5b69 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ea6b65eb6f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e5100c1e83 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9c73bd06bb --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..22c6a225d5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..2a299dc1f3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9471013544 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionFwdParams& param, hipStream_t stream);