Add instances and kStoreLSE template in dispatcher class to support outputting lse for fwd training

This commit is contained in:
Qianfeng Zhang
2026-06-05 01:06:11 +00:00
parent 8f83a2841f
commit 4414019296
305 changed files with 3232 additions and 119 deletions

View File

@@ -30,6 +30,7 @@ HSTU_FORWARD_INSTANCE_TEMPLATE = """
{dtype},
{has_causal},
{use_softmax},
{store_lse},
{has_bias},
{has_dropout},
{max_k}>(HstuAttention{group_or_not}FwdParams& param, hipStream_t stream);
@@ -37,7 +38,7 @@ HSTU_FORWARD_INSTANCE_TEMPLATE = """
HSTU_FORWARD_INSTANCE_FNAME = (
"hstu_attention_{mode}_forward_{dtype_str}_{has_or_no_causal_str}_{use_softmax_or_not_str}_"
"{has_or_no_bias_str}_{has_or_no_dropout_str}_{max_k_str}.cpp"
"{store_lse_or_not_str}_{has_or_no_bias_str}_{has_or_no_dropout_str}_{max_k_str}.cpp"
)
HSTU_INSTANCE_REF_FNAME = "hstu_attention_{mode}_{function}_{dtype}_instances_ref.hpp"
@@ -54,6 +55,11 @@ BOOL_MAP_SOFTMAX = {
False: "softmax_false",
}
BOOL_MAP_LSE = {
True: "lse_true",
False: "lse_false",
}
BOOL_MAP_BIAS = {
True: "has_bias",
False: "no_bias",
@@ -87,7 +93,11 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None:
for mode in ["batched", "jagged", "group"]:
for dtype in ["fp16", "bf16"]:
for has_causal in [True, False]:
for use_softmax in [True, False]:
for use_softmax, store_lse in [
(True, False),
(True, True),
(False, False),
]:
for has_bias in [True, False]:
for has_dropout in [False]:
for max_k in headdims:
@@ -98,6 +108,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None:
use_softmax_or_not_str=BOOL_MAP_SOFTMAX[
use_softmax
],
store_lse_or_not_str=BOOL_MAP_LSE[store_lse],
has_or_no_bias_str=BOOL_MAP_BIAS[has_bias],
has_or_no_dropout_str=BOOL_MAP_DROPOUT[has_dropout],
max_k_str=INT_MAP_MAX_K[max_k],
@@ -115,6 +126,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None:
dtype=TYPE_CTYPE_MAP[dtype],
has_causal=BOOL_MAP[has_causal],
use_softmax=BOOL_MAP[use_softmax],
store_lse=BOOL_MAP[store_lse],
has_bias=BOOL_MAP[has_bias],
has_dropout=BOOL_MAP[has_dropout],
max_k=max_k,
@@ -148,7 +160,11 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None:
for has_bias in [True, False]:
for has_dropout in [False]:
for has_causal in [True, False]:
for use_softmax in [True, False]:
for use_softmax, store_lse in [
(True, False),
(True, True),
(False, False),
]:
forward_instance = (
HSTU_FORWARD_INSTANCE_TEMPLATE.format(
extern="extern ",
@@ -156,6 +172,7 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None:
dtype=TYPE_CTYPE_MAP[dtype],
has_causal=BOOL_MAP[has_causal],
use_softmax=BOOL_MAP[use_softmax],
store_lse=BOOL_MAP[store_lse],
has_bias=BOOL_MAP[has_bias],
has_dropout=BOOL_MAP[has_dropout],
max_k=max_k,

View File

@@ -27,6 +27,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -57,7 +58,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionTileSetting>;
static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream)
@@ -139,7 +140,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch
param.v_ptr,
param.bias_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.seqlen_q,
param.is_cross_attention ? param.seqlen_kv
: param.seqlen_q,
@@ -153,19 +154,19 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch
param.seq_stride_v,
param.seq_stride_bias,
param.seq_stride_o,
0, // seq_stride_lse
param.seq_stride_lse,
param.nhead_stride_q,
param.nhead_stride_k,
param.nhead_stride_v,
param.nhead_stride_bias,
param.nhead_stride_o,
0, // nhead_stride_lse
param.nhead_stride_lse,
param.batch_stride_q,
param.batch_stride_k,
param.batch_stride_v,
param.batch_stride_bias,
param.batch_stride_o,
0, // batch_stride_lse
param.batch_stride_lse,
param.num_targets_ptr,
param.contextual_seqlen,
param.window_size,
@@ -190,6 +191,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK>
@@ -200,6 +202,7 @@ void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGro
batched_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -219,6 +222,7 @@ void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGro
batched_forward_splitkv_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -228,6 +232,7 @@ void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGro
batched_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,

View File

@@ -31,6 +31,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -64,7 +65,7 @@ struct batched_forward_splitkv_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionFwdTileSetting>;
using OaccDataType = HstuAttentionFwdTypeConfig<InOutDataType>::OaccDataType;
@@ -334,13 +335,13 @@ struct batched_forward_splitkv_causal_softmax_bias_dropout_dispatch
return HstuKernel::MakeKargs(ws.o_acc_ptr,
ws.lse_acc_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.batch_stride_o,
0, // batch_stride_lse
param.batch_stride_lse,
param.seq_stride_o,
0, // seq_stride_o
param.seq_stride_lse,
param.nhead_stride_o,
0, // nhead_stride_o
param.nhead_stride_lse,
param.seqlen_q,
param.num_head,
ws.num_splits,

View File

@@ -20,6 +20,7 @@ void hstu_attention_group_forward_bf16(HstuAttentionGroupFwdParams& param, hipSt
run_group_forward_causal_softmax_bias_dropout_dispatch<ck_tile::bf16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);

View File

@@ -27,6 +27,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -57,7 +58,7 @@ struct group_forward_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionTileSetting>;
static void Run(HstuAttentionGroupFwdParams& param, hipStream_t stream)
@@ -129,7 +130,7 @@ struct group_forward_causal_softmax_bias_dropout_dispatch
param.v_ptr,
param.bias_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.num_batch / param.num_group,
param.seq_q_offsets_ptr,
param.is_cross_attention ? param.seq_kv_offsets_ptr
@@ -148,13 +149,13 @@ struct group_forward_causal_softmax_bias_dropout_dispatch
param.seq_stride_v,
param.seq_stride_bias,
param.seq_stride_o,
0, // seq_stride_lse
param.seq_stride_lse,
param.nhead_stride_q,
param.nhead_stride_k,
param.nhead_stride_v,
param.nhead_stride_bias,
param.nhead_stride_o,
0, // nhead_stride_lse
param.nhead_stride_lse,
param.num_targets_ptr,
param.p_drop,
param.philox_seed,
@@ -175,6 +176,7 @@ struct group_forward_causal_softmax_bias_dropout_dispatch
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK>
@@ -185,6 +187,7 @@ void run_group_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionGroupFw
group_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -204,6 +207,7 @@ void run_group_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionGroupFw
group_forward_splitkv_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -213,6 +217,7 @@ void run_group_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionGroupFw
group_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,

View File

@@ -21,6 +21,7 @@ void hstu_attention_group_forward_fp16(HstuAttentionGroupFwdParams& param, hipSt
run_group_forward_causal_softmax_bias_dropout_dispatch<ck_tile::fp16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);

View File

@@ -31,6 +31,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -65,7 +66,7 @@ struct group_forward_splitkv_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionFwdTileSetting>;
using OaccDataType = HstuAttentionFwdTypeConfig<InOutDataType>::OaccDataType;
@@ -79,7 +80,7 @@ struct group_forward_splitkv_causal_softmax_bias_dropout_dispatch
ODataType,
true /* kIsJagged */,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionCombineTileSetting,
kMaxSplits>;
@@ -320,11 +321,11 @@ struct group_forward_splitkv_causal_softmax_bias_dropout_dispatch
return HstuKernel::MakeKargs(ws.o_acc_ptr,
ws.lse_acc_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.seq_stride_o,
0, // seq_stride_lse
param.seq_stride_lse,
param.nhead_stride_o,
0, // nhead_stride_lse
param.nhead_stride_lse,
param.seq_q_offsets_ptr,
param.num_head,
ws.num_splits,

View File

@@ -27,6 +27,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -57,7 +58,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionTileSetting>;
static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream)
@@ -129,7 +130,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch
param.v_ptr,
param.bias_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.seq_q_offsets_ptr,
param.is_cross_attention ? param.seq_kv_offsets_ptr
: param.seq_q_offsets_ptr,
@@ -144,13 +145,13 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch
param.seq_stride_v,
param.seq_stride_bias,
param.seq_stride_o,
0, // seq_stride_o
param.seq_stride_lse,
param.nhead_stride_q,
param.nhead_stride_k,
param.nhead_stride_v,
param.nhead_stride_bias,
param.nhead_stride_o,
0, // nhead_stride_o
param.nhead_stride_lse,
param.num_targets_ptr,
param.contextual_seqlen,
param.window_size,
@@ -178,6 +179,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK>
@@ -188,6 +190,7 @@ void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGrou
jagged_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -207,6 +210,7 @@ void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGrou
jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,
@@ -216,6 +220,7 @@ void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGrou
jagged_forward_causal_softmax_bias_dropout_dispatch<InOutDataType,
kUseCausal,
kUseSoftmax,
kStoreLSE,
kHasBias,
kHasDropout,
MaxK,

View File

@@ -31,6 +31,7 @@
template <typename InOutDataType,
bool kUseCausal,
bool kUseSoftmax,
bool kStoreLSE,
bool kHasBias,
bool kHasDropout,
ck_tile::index_t MaxK,
@@ -64,7 +65,7 @@ struct jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch
kHasDropout,
kUseCausal,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionFwdTileSetting>;
using OaccDataType = HstuAttentionFwdTypeConfig<InOutDataType>::OaccDataType;
@@ -78,7 +79,7 @@ struct jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch
ODataType,
true /* kIsJagged */,
kUseSoftmax,
false, // kStoreLSE
kStoreLSE,
HstuAttentionCombineTileSetting,
kMaxSplits>;
@@ -323,11 +324,11 @@ struct jagged_forward_splitkv_causal_softmax_bias_dropout_dispatch
return HstuKernel::MakeKargs(ws.o_acc_ptr,
ws.lse_acc_ptr,
param.o_ptr,
nullptr, // lse_ptr
param.lse_ptr,
param.seq_stride_o,
0, // seq_stride_lse
param.seq_stride_lse,
param.nhead_stride_o,
0, // nhead_stride_lse
param.nhead_stride_lse,
param.seq_q_offsets_ptr,
param.num_head,
ws.num_splits,

View File

@@ -23,6 +23,7 @@ void hstu_attention_no_group_forward_bf16(HstuAttentionNoGroupFwdParams& param,
run_jagged_forward_causal_softmax_bias_dropout_dispatch<ck_tile::bf16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);
@@ -30,6 +31,7 @@ void hstu_attention_no_group_forward_bf16(HstuAttentionNoGroupFwdParams& param,
run_batched_forward_causal_softmax_bias_dropout_dispatch<ck_tile::bf16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);

View File

@@ -23,6 +23,7 @@ void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param,
run_jagged_forward_causal_softmax_bias_dropout_dispatch<ck_tile::fp16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);
@@ -30,6 +31,7 @@ void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param,
run_batched_forward_causal_softmax_bias_dropout_dispatch<ck_tile::fp16_t,
kUseCausal,
kUseSoftmax,
false,
kHasBias,
false, // kHasDropout
MaxK>(param, stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -12,7 +12,8 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -12,7 +12,8 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -13,6 +13,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -20,6 +21,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
false,
@@ -30,6 +50,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -37,6 +58,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -47,11 +69,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
false,
@@ -63,11 +96,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
@@ -77,6 +121,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -84,6 +129,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
false,
@@ -94,6 +158,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -101,6 +166,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -111,11 +177,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
false,
@@ -127,11 +204,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
@@ -141,6 +229,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -148,6 +237,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
false,
@@ -158,6 +266,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -165,6 +274,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -175,11 +285,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
false,
@@ -191,11 +312,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
@@ -205,6 +337,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -212,6 +345,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
false,
@@ -222,6 +374,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -229,6 +382,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -239,11 +393,31 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
false,
@@ -253,6 +427,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -263,4 +438,5 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -12,7 +12,8 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -13,6 +13,7 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -13,6 +13,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -20,6 +21,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
false,
@@ -30,6 +50,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -37,6 +58,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -47,11 +69,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
false,
@@ -63,11 +96,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
@@ -77,6 +121,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -84,6 +129,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
false,
@@ -94,6 +158,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -101,6 +166,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -111,11 +177,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
false,
@@ -127,11 +204,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
@@ -141,6 +229,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -148,6 +237,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
false,
@@ -158,6 +266,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -165,6 +274,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -175,11 +285,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
false,
@@ -191,11 +312,22 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
@@ -205,6 +337,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -212,6 +345,25 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
false,
@@ -222,6 +374,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
true,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -229,6 +382,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -239,11 +393,31 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
false,
@@ -253,6 +427,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);
@@ -263,4 +438,5 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
false,
96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
false,
true,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -11,8 +11,9 @@
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
true,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -14,5 +14,6 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
true,
false,
true,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
false,
128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
false,
256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
false,
64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream);

Some files were not shown because too many files have changed in this diff Show More