diff --git a/example/ck_tile/18_hstu_attention/generate_instances.py b/example/ck_tile/18_hstu_attention/generate_instances.py index 11375c1293..47fa97b0bc 100644 --- a/example/ck_tile/18_hstu_attention/generate_instances.py +++ b/example/ck_tile/18_hstu_attention/generate_instances.py @@ -30,6 +30,7 @@ HSTU_FORWARD_INSTANCE_TEMPLATE = """ {dtype}, {has_causal}, {use_softmax}, + {store_lse}, {has_bias}, {has_dropout}, {max_k}>(HstuAttention{group_or_not}FwdParams& param, hipStream_t stream); @@ -37,7 +38,7 @@ HSTU_FORWARD_INSTANCE_TEMPLATE = """ HSTU_FORWARD_INSTANCE_FNAME = ( "hstu_attention_{mode}_forward_{dtype_str}_{has_or_no_causal_str}_{use_softmax_or_not_str}_" - "{has_or_no_bias_str}_{has_or_no_dropout_str}_{max_k_str}.cpp" + "{store_lse_or_not_str}_{has_or_no_bias_str}_{has_or_no_dropout_str}_{max_k_str}.cpp" ) HSTU_INSTANCE_REF_FNAME = "hstu_attention_{mode}_{function}_{dtype}_instances_ref.hpp" @@ -54,6 +55,11 @@ BOOL_MAP_SOFTMAX = { False: "softmax_false", } +BOOL_MAP_LSE = { + True: "lse_true", + False: "lse_false", +} + BOOL_MAP_BIAS = { True: "has_bias", False: "no_bias", @@ -87,7 +93,11 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None: for mode in ["batched", "jagged", "group"]: for dtype in ["fp16", "bf16"]: for has_causal in [True, False]: - for use_softmax in [True, False]: + for use_softmax, store_lse in [ + (True, False), + (True, True), + (False, False), + ]: for has_bias in [True, False]: for has_dropout in [False]: for max_k in headdims: @@ -98,6 +108,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None: use_softmax_or_not_str=BOOL_MAP_SOFTMAX[ use_softmax ], + store_lse_or_not_str=BOOL_MAP_LSE[store_lse], has_or_no_bias_str=BOOL_MAP_BIAS[has_bias], has_or_no_dropout_str=BOOL_MAP_DROPOUT[has_dropout], max_k_str=INT_MAP_MAX_K[max_k], @@ -115,6 +126,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None: dtype=TYPE_CTYPE_MAP[dtype], has_causal=BOOL_MAP[has_causal], use_softmax=BOOL_MAP[use_softmax], + store_lse=BOOL_MAP[store_lse], has_bias=BOOL_MAP[has_bias], has_dropout=BOOL_MAP[has_dropout], max_k=max_k, @@ -148,7 +160,11 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None: for has_bias in [True, False]: for has_dropout in [False]: for has_causal in [True, False]: - for use_softmax in [True, False]: + for use_softmax, store_lse in [ + (True, False), + (True, True), + (False, False), + ]: forward_instance = ( HSTU_FORWARD_INSTANCE_TEMPLATE.format( extern="extern ", @@ -156,6 +172,7 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None: dtype=TYPE_CTYPE_MAP[dtype], has_causal=BOOL_MAP[has_causal], use_softmax=BOOL_MAP[use_softmax], + store_lse=BOOL_MAP[store_lse], has_bias=BOOL_MAP[has_bias], has_dropout=BOOL_MAP[has_dropout], max_k=max_k, diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp index 12181f9434..23042afc35 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp @@ -27,6 +27,7 @@ template ; static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) @@ -139,7 +140,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch param.v_ptr, param.bias_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.seqlen_q, param.is_cross_attention ? param.seqlen_kv : param.seqlen_q, @@ -153,19 +154,19 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch param.seq_stride_v, param.seq_stride_bias, param.seq_stride_o, - 0, // seq_stride_lse + param.seq_stride_lse, param.nhead_stride_q, param.nhead_stride_k, param.nhead_stride_v, param.nhead_stride_bias, param.nhead_stride_o, - 0, // nhead_stride_lse + param.nhead_stride_lse, param.batch_stride_q, param.batch_stride_k, param.batch_stride_v, param.batch_stride_bias, param.batch_stride_o, - 0, // batch_stride_lse + param.batch_stride_lse, param.num_targets_ptr, param.contextual_seqlen, param.window_size, @@ -190,6 +191,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch template @@ -200,6 +202,7 @@ void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGro batched_forward_causal_softmax_bias_dropout_dispatch; using OaccDataType = HstuAttentionFwdTypeConfig::OaccDataType; @@ -334,13 +335,13 @@ struct batched_forward_splitkv_causal_softmax_bias_dropout_dispatch return HstuKernel::MakeKargs(ws.o_acc_ptr, ws.lse_acc_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.batch_stride_o, - 0, // batch_stride_lse + param.batch_stride_lse, param.seq_stride_o, - 0, // seq_stride_o + param.seq_stride_lse, param.nhead_stride_o, - 0, // nhead_stride_o + param.nhead_stride_lse, param.seqlen_q, param.num_head, ws.num_splits, diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp index 9748de693d..7b6222158b 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp @@ -20,6 +20,7 @@ void hstu_attention_group_forward_bf16(HstuAttentionGroupFwdParams& param, hipSt run_group_forward_causal_softmax_bias_dropout_dispatch(param, stream); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp index d5858c36b9..a6e89dc926 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp @@ -27,6 +27,7 @@ template ; static void Run(HstuAttentionGroupFwdParams& param, hipStream_t stream) @@ -129,7 +130,7 @@ struct group_forward_causal_softmax_bias_dropout_dispatch param.v_ptr, param.bias_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.num_batch / param.num_group, param.seq_q_offsets_ptr, param.is_cross_attention ? param.seq_kv_offsets_ptr @@ -148,13 +149,13 @@ struct group_forward_causal_softmax_bias_dropout_dispatch param.seq_stride_v, param.seq_stride_bias, param.seq_stride_o, - 0, // seq_stride_lse + param.seq_stride_lse, param.nhead_stride_q, param.nhead_stride_k, param.nhead_stride_v, param.nhead_stride_bias, param.nhead_stride_o, - 0, // nhead_stride_lse + param.nhead_stride_lse, param.num_targets_ptr, param.p_drop, param.philox_seed, @@ -175,6 +176,7 @@ struct group_forward_causal_softmax_bias_dropout_dispatch template @@ -185,6 +187,7 @@ void run_group_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionGroupFw group_forward_causal_softmax_bias_dropout_dispatch(param, stream); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp index 8aac606be2..8b59d016c4 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_splitkv_dispatch.hpp @@ -31,6 +31,7 @@ template ; using OaccDataType = HstuAttentionFwdTypeConfig::OaccDataType; @@ -79,7 +80,7 @@ struct group_forward_splitkv_causal_softmax_bias_dropout_dispatch ODataType, true /* kIsJagged */, kUseSoftmax, - false, // kStoreLSE + kStoreLSE, HstuAttentionCombineTileSetting, kMaxSplits>; @@ -320,11 +321,11 @@ struct group_forward_splitkv_causal_softmax_bias_dropout_dispatch return HstuKernel::MakeKargs(ws.o_acc_ptr, ws.lse_acc_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.seq_stride_o, - 0, // seq_stride_lse + param.seq_stride_lse, param.nhead_stride_o, - 0, // nhead_stride_lse + param.nhead_stride_lse, param.seq_q_offsets_ptr, param.num_head, ws.num_splits, diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp index 91ddee786c..270a385f11 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp @@ -27,6 +27,7 @@ template ; static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) @@ -129,7 +130,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch param.v_ptr, param.bias_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.seq_q_offsets_ptr, param.is_cross_attention ? param.seq_kv_offsets_ptr : param.seq_q_offsets_ptr, @@ -144,13 +145,13 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch param.seq_stride_v, param.seq_stride_bias, param.seq_stride_o, - 0, // seq_stride_o + param.seq_stride_lse, param.nhead_stride_q, param.nhead_stride_k, param.nhead_stride_v, param.nhead_stride_bias, param.nhead_stride_o, - 0, // nhead_stride_o + param.nhead_stride_lse, param.num_targets_ptr, param.contextual_seqlen, param.window_size, @@ -178,6 +179,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch template @@ -188,6 +190,7 @@ void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGrou jagged_forward_causal_softmax_bias_dropout_dispatch; using OaccDataType = HstuAttentionFwdTypeConfig::OaccDataType; @@ -78,7 +79,7 @@ struct jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch ODataType, true /* kIsJagged */, kUseSoftmax, - false, // kStoreLSE + kStoreLSE, HstuAttentionCombineTileSetting, kMaxSplits>; @@ -323,11 +324,11 @@ struct jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch return HstuKernel::MakeKargs(ws.o_acc_ptr, ws.lse_acc_ptr, param.o_ptr, - nullptr, // lse_ptr + param.lse_ptr, param.seq_stride_o, - 0, // seq_stride_lse + param.seq_stride_lse, param.nhead_stride_o, - 0, // nhead_stride_lse + param.nhead_stride_lse, param.seq_q_offsets_ptr, param.num_head, ws.num_splits, diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_bf16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_bf16.cpp index b2e4dfd20b..1f9d9d76f2 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_bf16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_bf16.cpp @@ -23,6 +23,7 @@ void hstu_attention_no_group_forward_bf16(HstuAttentionNoGroupFwdParams& param, run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); @@ -30,6 +31,7 @@ void hstu_attention_no_group_forward_bf16(HstuAttentionNoGroupFwdParams& param, run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp index 1d4793cf7b..17102d4c1d 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp @@ -23,6 +23,7 @@ void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param, run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); @@ -30,6 +31,7 @@ void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param, run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index 11d1e0066c..6a679c5dfb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index 47c6506bd8..15fa0b3eae 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index 7399eab157..39ad3f8b20 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index 18f2bea56a..4a6f1be623 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index 10c9375fa6..250d2f3869 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index e6cc834e34..1de57681ac 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index 9303ca4771..1422bf590a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index 3937caa4b7..49282fd5bf 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 29adc7da6e..758dac8a9c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index 919a2ca2a2..5f364eb16b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 0ddd63a123..35cef4c6ce 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index c33c9d1e3c..49603b5a82 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index b407ee76fd..87c150163e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, - false, + true, true, false, + false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index 9c512137fb..9a55e27089 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, + true, true, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index b1294f9f66..b46c90b87d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, + true, true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 1599420233..2d0dafcc76 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 85e4b966a4..1e89bab3b4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 8022e051de..b0cf76d77f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 0867328c31..cedb7731aa 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index cc1e53443e..21fcfa9707 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index 524f364c94..e6adaa7f3c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + true, false, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index fd75e79d70..21935b6c67 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -12,7 +12,8 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, - false, + true, true, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index c39b871cfb..7b3c1a931d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index 2c417230db..2325a8a069 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + true, false, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp index 44f22df8d2..c19886325f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, true, + true, false, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..586fd5e6b9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..175e6f42c2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..3dcab096fe --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..d36464ebd4 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..d66dc204d7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..890eb2ac7a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..df29d5b145 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ac7aca087c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 5584564fb7..dc9d90030d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index 3e0b1da384..ae656409ee 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 1dd81afabe..f8737693bc 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 3286ad32b8..5fa13b0c48 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..2b81c190a2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..4b4bcadb1a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..44d81d96a5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9fa4e350c9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 917f970c82..f47abadec6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 44abd136a2..bb9f3c2b86 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index cdc47ab71a..1f20d2015a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, - true, false, + true, + true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 5c3a139e12..a8da51210b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, false, true, + true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..182af31a97 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..aa7b22fd1e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..dfdade8082 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..b776e5a233 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index b81abf4736..a3d1da717d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index 7b3c65da1e..36caa1b32a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index fd0bf72a60..dc3bcc12af 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index 3f9786f79a..45ffdf47e8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index 6a25bb1abb..fb8c78c80c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index 64f44a8f4e..a04311e881 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index 9c233cbec0..8161492560 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index b78536a402..444c4cc737 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 93c86bcbf0..df5ae6d224 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index d59a3e98df..f0c8e13f35 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 4c4a7cce43..8aceed304a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 0875c95641..b29f7982ce 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index 23e5b5bfb1..5ed29a2b97 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index 190727213f..42480417b8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, - false, + true, true, false, + false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index e9fadee6a6..3a6daedf4a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 0bbc7edd25..ad6fa39844 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index cc020ba8da..3eb0bbb2aa 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index acf1a19e90..1adad9d0eb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 49b4903e48..5fcf47cbe6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 150bb760a0..12c367df6f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index 0ac06ba3c3..9370a304a2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, + true, + true, false, - true, - true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index 942abca0aa..84c648bd32 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, + true, + true, false, - true, - true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index 9a94415818..32c2833d94 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index abae11d1ae..1e61e06731 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + true, false, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp index 7037b7aad1..7d98415ced 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, true, + true, false, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..015c386061 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..ce377275c9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..e89518717c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..eb1707bb0d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..68be4cf1c4 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..01a7647517 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..caff8c818c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..4622ac83f8 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 7fc6890581..ae55e13cd5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index 818e311721..45a7dfb11c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + false, true, false, - false, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index d0bd58f901..ff26d08b2a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index d867cfbccb..f1e8761b56 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..05acae7969 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..405f87316d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..37a99b8fc5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..194b80f260 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 92970beba0..8e68c6108d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, false, true, + true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 9197c03a62..3694e22b33 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, - true, false, + true, + true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index b4427b3ffc..6f9069bcbd 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index fdbee284e1..40dc8550a1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..b4f2174901 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..9210f14021 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..16db8e29e9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..34d4e9d4b0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_batched_forward_dispatch.hpp" + +template void run_batched_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index 7e7482ffc3..255218c40f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index 66945cdb32..e0070ce881 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index ca2ab49f4a..a59e5f2a95 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index 978901536f..05f1bf2064 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index d90f091b57..ce264c34e8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index 4b2824c9ef..db6d593b2c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index 3ae7c5c171..5760fa5b67 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index df67b64b7d..1928c056d4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 47e82e2d9e..7dc7b80e01 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index f7c07a3692..78efe07257 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 01e4cfb0aa..c68a40f2ef 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index d6a4cadbe5..5ecf8e8fe5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index fde8503f24..e8cff53347 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, - false, + true, true, false, + false, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index 3573aae758..d922b41957 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, + true, true, false, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index a718704492..da53a16768 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, + true, true, false, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 5e930d548c..c94afed914 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 142672c620..9afb8e248e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 131bd27fc2..ae77d8b9c5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 789c787cc2..81d6e735e7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 1aa5ff9c0a..8df42aaa8d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index a1d3f73518..24b58e6ca0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -13,6 +13,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + true, false, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index b5715f4032..6634bef4fc 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -12,7 +12,8 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, - false, + true, true, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index 74199b013a..764c6b406a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index 22902b14c6..bfb306e8fb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + true, false, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp index c77f14242a..ade5a0cb84 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, true, + true, false, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..c1981fbf9b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..dd7e941601 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..25d853ab82 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9bdfe2832d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..8f2b60e72b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..06c4b29650 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..f70004362e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..0495c977bb --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index a643f4f4a0..dfee633054 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index 5b7b50f4e1..8fe7aba7bb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 99b0a8bdee..9e9c638aa8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index d33d4d4282..31ffc94f16 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..ffb6ff5ebe --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..465b93e285 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..e29472a123 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..721f8f44b6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index f3dac51603..5b766e5ace 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 0fc4f78bae..4d17e14686 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index d2119125f2..8c22b6b147 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, - true, false, + true, + true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 293501fa3f..e6306808d9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, false, true, + true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..97675241c9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..7090e00b97 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..2b6a0ae96d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..1faecde65f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index 78506f00e3..d4870065c6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index f770e713d8..e316905b0e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index 29d1886c7f..7e5556d495 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index 34b2919708..fe22c43b96 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index a505763364..100861bc59 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index b9c4ab500e..91144229a4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index b2cc61aa54..57e3f7400b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index 24aada0d72..44b52cc862 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 999076d077..66f4bed809 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index e4f801955e..c11b43e8ab 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index d7b4fcc49f..9882a3f973 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, true, false, + true, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index f0091400c9..c5ed762fd1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, true, false, + true, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index 1ad3453a51..c2daf2017f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index c0459e603a..ac3bbe9597 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, - false, + true, true, false, + false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index 508d3595d5..98ee62e77b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 32d26a2b0f..add02192d1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 7e5edcd5b9..0bbe8b46c8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 934e41fa65..edf0ae0b26 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 0097bad485..fadab1eb96 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index e7d0b67853..c2b95839f3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index 3d7bc5393a..7d4234137d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, + true, + true, false, - true, - true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index 5bfa2a4040..d89e977848 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, + true, + true, false, - true, - true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index cf93936cc3..4b6322c596 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index e3070f20e4..4e9c262590 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + true, false, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp index 5bb513f7af..3b4f2be94e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, true, + true, false, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..49fccda41e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..a2022a6d74 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..c590524117 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..c8d80869d1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..bd1df4acc9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..a604fc0966 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..ce1f3db44f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ffc1d927ef --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index a8d24ac68b..bd240d543b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index f5d9be6044..4cdfc89604 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + false, true, false, - false, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 634439ac6c..21423bd55c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 568214ec7c..6736a29828 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..6ed5411bd7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..5acc1badba --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..57a62f5ff8 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..a35974082b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 1823d95c82..cc3f78d2b8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, false, true, + true, + true, false, 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 2dc4d47a2e..9802b52e45 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, - true, false, + true, + true, + true, false, 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 5fc3124469..5b89ad3024 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, - true, false, + true, + true, + true, false, 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index df55a484c4..399eba0a33 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_group_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - true, false, true, + true, + true, false, 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..cd205ef804 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..1fa7986068 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..3dfef6bbe9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e0795e9424 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index 4ca9b31d8b..c62bf704f3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index b55f19c252..6a5b9563c8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index 9eab205462..3f65521a53 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index 86fd8019e0..d5132f6888 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index 6b28c249ea..c3d5298178 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index 1b3a092e28..912bdae0db 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index 0d00d415f6..64842bf591 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index 435a624cab..e007745a22 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index b7b03ea309..871ffc7481 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index 944f053838..0e44c042f4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 005f8aea53..907de945c0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 09695faf5b..10817502dd 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, true, true, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index cfa3cf71a6..7baf6357a4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index 9775c6c7b9..b0b7d78625 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, - false, + true, true, false, + false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index 88b345eb08..4229b80782 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, + true, true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 0c90986c65..ee5aa6592e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - false, - false, + true, true, false, + false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 4d788664e3..43c8ac87cd 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index b8ad957a9e..73c344d4ed 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 412c2da204..e8dd1442f5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 9b2caeb43f..c5eac1e74b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index baf56e86af..cf9c3efa4b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, + true, + true, false, - true, - true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index 352a31dd7b..5502ba3703 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + true, + true, + true, false, - true, - true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index b21a909096..416306b254 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index 8b99fa00ee..6083c38d47 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + true, false, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp index c8329edfa6..0f3eb73dd4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, true, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, false, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, true, + true, false, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..7d64710584 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..165ef251dd --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..13804c54f6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..0d314866ba --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..59933b046e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..9d141022fd --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..352f922906 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..73e772e126 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index dfe6ab15ac..7f5f9cad27 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index d317710dc7..a3bfbf2f4d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index c5d0f88b95..8c37b31611 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 35a1507267..333b013f21 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, + false, true, false, - false, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..eee667f41d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..26c3124e9e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..4078b49787 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..978ea2f053 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index a481623cca..1771efeaba 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, false, true, + true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 0355705d79..21f95afa49 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, - true, false, + true, + true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 9162e89430..90a562b8c0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, - true, false, + true, + true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index f7b1033f82..6df7347540 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, - true, false, true, + true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..163dd8636d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..3d529edb9b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..49934c6078 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9894a6f45d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp index 891befa082..252e4581d6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp index a910b873b8..ae3206d22e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp index 4fd605a52b..bbeec56938 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, true, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp index ef23ceaed0..6bc5a4d508 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -11,8 +11,9 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, - false, true, false, false, + true, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp index 655979e251..906377b39b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp index 0333a9fade..90bccce0d5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp index 335804c66c..828a01e3ce 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp index 2cf1cc4a1e..5994bc39a8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -11,6 +11,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index fe2f2b761b..4a4ad0788f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index e040daa8c9..00feead14b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index fff2d480bf..ffbcfd81c9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index aaf7f1c2c4..2b4f1462ff 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp index 6798d03c1c..dda6a046b9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp index 2e6a018581..33c191637b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp index 25a731182c..72e6d673a4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp index 8034875050..14b4f740ff 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, false, false, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 3bcfa9588a..74cda1695b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index 41232f2077..2441e1f858 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index 10d6120a8a..f2ab8ef6c3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index 19654313ca..ee03279633 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp index c7eec6244c..058148d23d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -12,7 +12,8 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, - false, + true, true, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp index 75ff431b7c..a67d090831 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -13,6 +13,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + true, false, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp index 7d701e2b2e..ecd5a4d634 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -12,7 +12,8 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, - false, + true, true, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp index 7174621098..89f715d30d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -13,6 +13,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + true, false, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp index 9d7e4bc951..4a0c161c66 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp @@ -13,6 +13,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -20,6 +21,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -30,6 +50,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -37,6 +58,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -47,11 +69,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -63,11 +96,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -77,6 +121,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -84,6 +129,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -94,6 +158,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -101,6 +166,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -111,11 +177,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -127,11 +204,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -141,6 +229,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -148,6 +237,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -158,6 +266,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -165,6 +274,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -175,11 +285,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, false, false, false, @@ -191,11 +312,22 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, false, false, false, @@ -205,6 +337,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, true, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -212,6 +345,25 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, true, false, @@ -222,6 +374,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -229,6 +382,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, false, + false, true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -239,11 +393,31 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, true, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); + +extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, false, false, false, @@ -253,6 +427,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, true, + true, false, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); @@ -263,4 +438,5 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, + false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..ce59d44d15 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..b72cd78d9a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..9269d86370 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..7f9a20894a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..23983d26c6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..856e2194f0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..3dd0c52ba1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..ec519b69f3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp index 1f450a1a40..fc89bf2843 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp index e55ff6e865..61415b4559 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_256.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp index 16467d2f73..8326293fb8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp index 3b6b55a8e2..6fccb8e445 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_has_bias_no_dropout_maxk_96.cpp @@ -12,6 +12,7 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, false, + true, false, true, false, diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..829c5b5ecb --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..dde3872f28 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..1397719690 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..7d611a2b0e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp index 172f976577..586cab7d46 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_128.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp index eb8d04a1eb..ff25ce4b66 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_256.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp index b03bd1681b..5e9eeb05cb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_64.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp similarity index 98% rename from example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp rename to example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp index cd514c2a4e..30c28bfc0b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_has_bias_no_dropout_maxk_96.cpp @@ -14,5 +14,6 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, + true, false, 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..30fcdecbfe --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..a948268b7e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..b95188ee7f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..004838b0bc --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_lse_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,19 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_jagged_forward_dispatch.hpp" + +template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + false, + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);