diff --git a/example/ck_tile/18_hstu_attention/CMakeLists.txt b/example/ck_tile/18_hstu_attention/CMakeLists.txt index a2210e9a1a..2a4c98bb8f 100644 --- a/example/ck_tile/18_hstu_attention/CMakeLists.txt +++ b/example/ck_tile/18_hstu_attention/CMakeLists.txt @@ -3,7 +3,7 @@ set(EXAMPLE_HSTU_ATTENTION "tile_example_hstu_attention") # to be included in "make all/install/check" message("adding example ${EXAMPLE_HSTU_ATTENTION}") file(GLOB INSTANCE_SRCS instances/*.cpp) -set(INTERFACES_SRCS hstu_attention_jagged_forward_bf16.cpp hstu_attention_jagged_forward_fp16.cpp hstu_attention_batched_forward_bf16.cpp hstu_attention_batched_forward_fp16.cpp) +set(INTERFACES_SRCS hstu_attention_no_group_forward_bf16.cpp hstu_attention_no_group_forward_fp16.cpp hstu_attention_group_forward_bf16.cpp hstu_attention_group_forward_fp16.cpp) add_executable(${EXAMPLE_HSTU_ATTENTION} EXCLUDE_FROM_ALL example_hstu_attention.cpp) target_include_directories(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${EXAMPLE_HSTU_ATTENTION} PRIVATE ${INTERFACES_SRCS} ${INSTANCE_SRCS}) diff --git a/example/ck_tile/18_hstu_attention/README.md b/example/ck_tile/18_hstu_attention/README.md index 10e692df00..98830e9e30 100644 --- a/example/ck_tile/18_hstu_attention/README.md +++ b/example/ck_tile/18_hstu_attention/README.md @@ -29,27 +29,33 @@ ``` C++ arg_parser.insert("v", "1", "weather do CPU validation or not") + .insert("g", "1", "num of attention group, bigger than 1 indicating group hstu") .insert("prec", "fp16", "data type. fp16/bf16") .insert("jagged", "0", "q/k/v batched sequence is jagged or not") - .insert("b", "12", "batch size") + .insert("b", "12", "number of batches") .insert("nhead", "4", "number of heads") .insert("hdim_qk", "64", "headdim size of Q/K") .insert("hdim_v", "64", "headdim size of V/O") - .insert("seqlens", "400", "seqlen of single or all batches for query and key/value tensor, actually allocated seqlen will include the target of each batch and context_len") + .insert("seqlens", "400", "uih seqlen of single or all batches for query and key/value tensor, actually allocated seqlen will include the target of each batch and context_len") .insert("max_seqlen", "0", "max uih_seqlen, can be ignored, or else must be equal or bigger than the maximum of all uih seqlens") - .insert("targets", "16", "sequence length at the end of query/key token sequence that should be excluded from attention") + .insert("g_max_seqlens", "0", "max uih_seqlen, can be ignored, or else must be equal or bigger than the maximum of all uih seqlens") + .insert("targets", "", "sequence length at the end of query/key token sequence that should be excluded from attention") .insert("max_target", "0", "max target, can be ignored, or else must be equal of bigger than the maximum of all targets") .insert("causal", "1", "enable causal mask or not") .insert("local_len", "5", "length of the diagonal window for enabling masking, value 0 to disable") + .insert("g_local_lens", "5,", "list of all group's length of the diagonal window for enabling masking, value 0 to disable") .insert("context_len", "6", "sequence length at the begin of the query sequence the should be included for attention") + .insert("g_context_lens", "6,", "list of all group's sequence length at the begin of the query sequence that should be included for attention") .insert("minfull_len", "6", "sequence length at the end of the query sequence that should be included for attention") - .insert("init_qkv", "0", "initialize q, k, v tensor from local files q.dat, k.dat and v.data") + .insert("g_minfull_lens", "6", "list of all groups's sequence length at the end of the query sequence that should be included for attention") .insert("seed", "13579", "seed by the uniform or normal distribution generator") .insert("norm_dist", "0", "if true, initialize the data in normal distribution, or else in uniform distribution") .insert("alpha", "0", "scale factor of S=Q@K. 0 means equal to 1/sqrt(hdim)") - .insert("attn_scale", "0", "scale factor of SiLu(Q@K), 0 means using 1/max_seqlen for scaling") - .insert("save_mask", "1", "save the mask tensor to disk by the CPU validation codes") - .insert("perf", "0", "weather measure execution time or not"); + .insert("attn_scale", "0", "scale factor of SiLU(Q@K). 0 means using 1/max_seqlen for scaling") + .insert("g_attn_scales", "1.0,", "list of all groups's scale factors of S=@@K. 0 means using 1/max_seqlen of the group for scaling") + .insert("init_qkv", "0", "initialize q, k, v tensor from local files q.dat, k.dat and v.data") + .insert("save_mask", "0", "save the mask tensor to disk by the CPU validation codes") + .insert("perf", "0", "weather measure execution time or not") .insert("dump_output", "0", "dump both device and reference hstu attention outputs to files, only used when validation is true"); ``` diff --git a/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp b/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp index 9f77f83ceb..2eebcc866b 100644 --- a/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp +++ b/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -90,9 +91,10 @@ auto create_args(int argc, char* argv[]) // clang-format off arg_parser.insert("v", "1", "weather do CPU validation or not") + .insert("g", "1", "num of attention group, bigger than 1 indicating group hstu") .insert("prec", "fp16", "data type. fp16/bf16") .insert("jagged", "0", "q/k/v batched sequence is jagged or not") - .insert("b", "12", "batch size") + .insert("b", "12", "number of batches") .insert("nhead", "4", "number of heads") .insert("hdim_qk", "64", "headdim size of Q/K") .insert("hdim_v", "64", "headdim size of V/O") @@ -100,17 +102,22 @@ auto create_args(int argc, char* argv[]) .insert("seqlens_kv", "", "uih seqlen of single or all batches for key/value tensor, actually allocated seqlen will include the target of each batch and context_len") .insert("max_seqlen", "0", "max uih_seqlen, can be ignored, or else must be equal or bigger than the maximum of all uih seqlens") .insert("max_seqlen_kv", "0", "max uih_seqlen_kv, can be ignored, or else must be equal or bigger than the maximum of all uih seqlens") + .insert("g_max_seqlens", "0", "max uih_seqlen, can be ignored, or else must be equal or bigger than the maximum of all uih seqlens") .insert("targets", "", "sequence length at the end of query/key token sequence that should be excluded from attention") .insert("max_target", "0", "max target, can be ignored, or else must be equal of bigger than the maximum of all targets") .insert("softmax", "0", "use softmax or not") .insert("causal", "1", "enable causal mask or not") .insert("local_len", "5", "length of the diagonal window for enabling masking, value 0 to disable") + .insert("g_local_lens", "5,", "list of all group's length of the diagonal window for enabling masking, value 0 to disable") .insert("context_len", "6", "sequence length at the begin of the query sequence the should be included for attention") + .insert("g_context_lens", "6,", "list of all group's sequence length at the begin of the query sequence that should be included for attention") .insert("minfull_len", "6", "sequence length at the end of the query sequence that should be included for attention") + .insert("g_minfull_lens", "6", "list of all groups's sequence length at the end of the query sequence that should be included for attention") .insert("seed", "13579", "seed by the uniform or normal distribution generator") .insert("norm_dist", "0", "if true, initialize the data in normal distribution, or else in uniform distribution") .insert("alpha", "0", "scale factor of S=Q@K. 0 means equal to 1/sqrt(hdim)") .insert("attn_scale", "0", "scale factor of SiLU(Q@K). 0 means using 1/max_seqlen for scaling") + .insert("g_attn_scales", "1.0,", "list of all groups's scale factors of S=@@K. 0 means using 1/max_seqlen of the group for scaling") .insert("init_qkv", "0", "initialize q, k, v tensor from local files q.dat, k.dat and v.data") .insert("save_mask", "0", "save the mask tensor to disk by the CPU validation codes") .insert("perf", "0", "weather measure execution time or not") @@ -121,35 +128,66 @@ auto create_args(int argc, char* argv[]) return std::make_tuple(result, arg_parser); } -static std::vector get_integers_from_string(std::string lengthsStr) +static std::vector get_integers_from_string(std::string srcStr) { - std::vector lengths; + std::vector integers; std::size_t pos = 0; std::size_t new_pos; - new_pos = lengthsStr.find(',', pos); + new_pos = srcStr.find(',', pos); while(new_pos != std::string::npos) { - std::string sliceStr = lengthsStr.substr(pos, new_pos - pos); + std::string sliceStr = srcStr.substr(pos, new_pos - pos); int len = std::stoi(sliceStr); - lengths.push_back(len); + integers.push_back(len); pos = new_pos + 1; - new_pos = lengthsStr.find(',', pos); + new_pos = srcStr.find(',', pos); }; - std::string sliceStr = lengthsStr.substr(pos); + std::string sliceStr = srcStr.substr(pos); if(!sliceStr.empty()) { int len = std::stoi(sliceStr); - lengths.push_back(len); + integers.push_back(len); }; - return (lengths); + return (integers); +}; + +static std::vector get_floats_from_string(std::string srcStr) +{ + std::vector values; + std::size_t pos = 0; + std::size_t new_pos; + + new_pos = srcStr.find(',', pos); + while(new_pos != std::string::npos) + { + std::string sliceStr = srcStr.substr(pos, new_pos - pos); + + float val = std::stof(sliceStr); + + values.push_back(val); + + pos = new_pos + 1; + new_pos = srcStr.find(',', pos); + }; + + std::string sliceStr = srcStr.substr(pos); + + if(!sliceStr.empty()) + { + float val = std::stof(sliceStr); + + values.push_back(val); + }; + + return (values); }; template @@ -164,42 +202,6 @@ void supplement_array_by_last_element(std::vector& arr, int target_num_elemen }; }; -static void show_hstu_attention_fwd_param(std::ostream& os, HstuAttentionFwdParams& param) -{ - if(param.is_jagged) - { - os << "Jagged inputs used! " << std::endl; - os << "use causal: " << param.use_causal << std::endl; - os << "Num of batches: " << param.num_batch << std::endl; - os << "Num of heads: " << param.num_head << std::endl; - os << "QK hdim: " << param.hdim_qk << " V hdim: " << param.hdim_v << std::endl; - os << "Q/K/V/O seq stride: " << param.seq_stride_q << " " << param.seq_stride_k << " " - << param.seq_stride_v << " " << param.seq_stride_o << std::endl; - os << "Q/K/V/O nhead stride: " << param.nhead_stride_q << " " << param.nhead_stride_k << " " - << param.nhead_stride_v << " " << param.nhead_stride_o << std::endl; - os << "contextual_seqlen: " << param.contextual_seqlen << std::endl; - os << "window_size: " << param.window_size << std::endl; - os << "min_full_attn_seqlen: " << param.min_full_attn_seqlen << std::endl; - } - else - { - os << "Batched inputs used! " << std::endl; - os << "use causal: " << param.use_causal << std::endl; - os << "Num of batches: " << param.num_batch << std::endl; - os << "Num of heads: " << param.num_head << std::endl; - os << "QK hdim: " << param.hdim_qk << " V hdim: " << param.hdim_v << std::endl; - os << "Q/K/V/O seq stride: " << param.seq_stride_q << " " << param.seq_stride_k << " " - << param.seq_stride_v << " " << param.seq_stride_o << std::endl; - os << "Q/K/V/O nhead stride: " << param.nhead_stride_q << " " << param.nhead_stride_k << " " - << param.nhead_stride_v << " " << param.nhead_stride_o << std::endl; - os << "Q/K/V/O batch stride: " << param.batch_stride_q << " " << param.batch_stride_k << " " - << param.batch_stride_v << " " << param.batch_stride_o << std::endl; - os << "contextual_seqlen: " << param.contextual_seqlen << std::endl; - os << "window_size: " << param.window_size << std::endl; - os << "min_full_attn_seqlen: " << param.min_full_attn_seqlen << std::endl; - }; -}; - // threshold for different dtypes template auto get_elimit() @@ -219,10 +221,9 @@ auto get_elimit() } template -bool run(const ck_tile::ArgParser& arg_parser) +bool run_no_group_hstu(const ck_tile::ArgParser& arg_parser, bool is_jagged) { bool do_validation = static_cast(arg_parser.get_int("v")); - bool is_jagged = static_cast(arg_parser.get_int("jagged")); int num_batch = arg_parser.get_int("b"); int num_head = arg_parser.get_int("nhead"); int hdim_qk = arg_parser.get_int("hdim_qk"); @@ -230,11 +231,6 @@ bool run(const ck_tile::ArgParser& arg_parser) bool use_softmax = static_cast(arg_parser.get_int("softmax")); bool use_causal = static_cast(arg_parser.get_int("causal")); - int window_size = arg_parser.get_int("local_len"); - - int contextual_seqlen = arg_parser.get_int("context_len"); - int min_full_attn_seqlen = arg_parser.get_int("minfull_len"); - float alpha = arg_parser.get_float("alpha"); float attn_scale = arg_parser.get_float("attn_scale"); int seed = arg_parser.get_int("seed"); @@ -245,8 +241,15 @@ bool run(const ck_tile::ArgParser& arg_parser) bool save_mask = static_cast(arg_parser.get_int("save_mask")); bool initialize_qkv = static_cast(arg_parser.get_int("init_qkv")); - std::string str_of_targets = arg_parser.get_str("targets"); - std::vector num_targets = get_integers_from_string(str_of_targets); + std::string str_of_integers; + + str_of_integers = arg_parser.get_str("targets"); + std::vector num_targets = get_integers_from_string(str_of_integers); + + int window_size = arg_parser.get_int("local_len"); + + int contextual_seqlen = arg_parser.get_int("context_len"); + int min_full_attn_seqlen = arg_parser.get_int("minfull_len"); std::string str_of_lengths_q = arg_parser.get_str("seqlens"); std::vector seq_lengths_q = get_integers_from_string(str_of_lengths_q); @@ -265,16 +268,6 @@ bool run(const ck_tile::ArgParser& arg_parser) bool is_cross_attention = false; - if(!num_targets.empty()) - { - // supplement num_targets using the last input value if user-provided lengths not enough - supplement_array_by_last_element(num_targets, num_batch); - - // only consider num_batch values even if more values are provided by the user - for(int i = 0; i < num_batch; i++) - max_target = max(max_target, num_targets[i]); - }; - HSTU_CHECK(!seq_lengths_q.empty(), "sequence lengths of q shoud be defined!"); // assume seq_lengths_kv is same as seq_lengths_q if not defined, or else when @@ -293,15 +286,12 @@ bool run(const ck_tile::ArgParser& arg_parser) if(is_jagged) { - // supplement seq_lengths_q using the last input value if user-provided lengths not - // enough + // supplement seq_lengths_q using the last input value if user-provided lengths not enough supplement_array_by_last_element(seq_lengths_q, num_batch); - // supplement seq_lengths_kv using the last input value if user-provided lengths not - // enough + // supplement seq_lengths_kv using the last input value if user-provided lengths not enough supplement_array_by_last_element(seq_lengths_kv, num_batch); - // only consider num_batch values even if more values are provided by the user for(int i = 0; i < num_batch; i++) { max_uih_seqlen_q = max(max_uih_seqlen_q, seq_lengths_q[i]); @@ -316,6 +306,16 @@ bool run(const ck_tile::ArgParser& arg_parser) max_uih_seqlen_kv = seq_lengths_kv[0]; }; + if(!num_targets.empty()) + { + // supplement num_targets using the last input value if user-provided lengths not enough + supplement_array_by_last_element(num_targets, num_batch); + + // only consider num_batch values even if more values are provided by the user + for(int i = 0; i < num_batch; i++) + max_target = max(max_target, num_targets[i]); + }; + // the user input of max_uih_seqlen can either be ignored or be bigger than all uih_seqlens // the user input of max_target can either be ignored or be bigger than all targets HSTU_CHECK(input_max_uih_seqlen_q <= 0 || input_max_uih_seqlen_q >= max_uih_seqlen_q, @@ -386,7 +386,7 @@ bool run(const ck_tile::ArgParser& arg_parser) long total_flops = 0; - // estimate the total flops occurred, ignoring the scaling and SILu + // estimate the total flops occurred, ignoring the scaling and SiLu if(is_jagged) { for(int i = 0; i < num_batch; i++) @@ -467,7 +467,7 @@ bool run(const ck_tile::ArgParser& arg_parser) if(!num_targets.empty()) num_targets_dev.ToDevice(num_targets.data()); - HstuAttentionFwdParams params; + HstuAttentionNoGroupFwdParams params; float scale_s = (alpha != 0.f) ? alpha : 1.0f / std::sqrt(hdim_qk); @@ -552,26 +552,17 @@ bool run(const ck_tile::ArgParser& arg_parser) params.philox_offset = 0UL; }; - // show_hstu_attention_fwd_param(std::cout, params); - std::ignore = show_hstu_attention_fwd_param; - hipStream_t stream; HIP_CHECK_ERROR(hipStreamCreate(&stream)); if constexpr(std::is_same::value) { - if(is_jagged) - hstu_attention_jagged_forward_fp16(params, stream); - else - hstu_attention_batched_forward_fp16(params, stream); + hstu_attention_no_group_forward_fp16(params, stream); } else if constexpr(std::is_same::value) { - if(is_jagged) - hstu_attention_jagged_forward_bf16(params, stream); - else - hstu_attention_batched_forward_bf16(params, stream); + hstu_attention_no_group_forward_bf16(params, stream); } else throw std::runtime_error("Other data type is not supported at present!"); @@ -584,28 +575,28 @@ bool run(const ck_tile::ArgParser& arg_parser) using CompDataType = typename HstuAttentionFwdTypeConfig::CompDataType; BOOL_SWITCH_3(is_jagged, kIsJagged, use_softmax, kUseSoftmax, use_causal, kUseCausal, [&] { - ck_tile::reference_hstu_attention::Run(is_cross_attention, - q_host, - k_host, - v_host, - o_host_ref, - mask_host, - num_batch, - scale_s, - attn_scale, - max_seqlen_q, - max_seqlen_kv, - seq_offsets_q, - seq_offsets_kv, - num_targets, - contextual_seqlen, - window_size, - min_full_attn_seqlen); + ck_tile::reference_no_group_hstu_attention::Run(is_cross_attention, + q_host, + k_host, + v_host, + o_host_ref, + mask_host, + num_batch, + scale_s, + attn_scale, + max_seqlen_q, + max_seqlen_kv, + seq_offsets_q, + seq_offsets_kv, + num_targets, + contextual_seqlen, + window_size, + min_full_attn_seqlen); }); ck_tile::HostTensor o_host( @@ -638,17 +629,385 @@ bool run(const ck_tile::ArgParser& arg_parser) { if constexpr(std::is_same::value) { - if(is_jagged) - hstu_attention_jagged_forward_fp16(params, stream); - else - hstu_attention_batched_forward_fp16(params, stream); + hstu_attention_no_group_forward_fp16(params, stream); } else if constexpr(std::is_same::value) { - if(is_jagged) - hstu_attention_jagged_forward_bf16(params, stream); - else - hstu_attention_batched_forward_bf16(params, stream); + hstu_attention_no_group_forward_bf16(params, stream); + } + } + timer.stop(stream); + + auto ms = timer.duration() / 10.f; + + std::cout << "Average execution time of the hstu_attention operation is " << ms + << " milli-seconds, estimated TFLOPS is " + << (static_cast(total_flops) / ms) / 1.0e9 << std::endl; + } + + return res; +} + +template +bool run_group_hstu(const ck_tile::ArgParser& arg_parser, int num_group) +{ + bool do_validation = static_cast(arg_parser.get_int("v")); + + int num_batch = arg_parser.get_int("b"); + + HSTU_CHECK(num_group > 1, "ru_group_hstu should only be called when num_group > 1 !"); + HSTU_CHECK(num_batch > 0 && num_batch % num_group == 0, + "number of batches should be a multi-fold value of num_group!"); + + int num_batch_per_group = num_batch / num_group; + + int num_head = arg_parser.get_int("nhead"); + int hdim_qk = arg_parser.get_int("hdim_qk"); + int hdim_v = arg_parser.get_int("hdim_v"); + bool use_softmax = static_cast(arg_parser.get_int("softmax")); + bool use_causal = static_cast(arg_parser.get_int("causal")); + float alpha = arg_parser.get_float("alpha"); + int seed = arg_parser.get_int("seed"); + bool use_normal_dist = arg_parser.get_int("norm_dist"); + bool measure_perf = static_cast(arg_parser.get_int("perf")); + bool dump_output = static_cast(arg_parser.get_int("dump_output")); + + bool save_mask = static_cast(arg_parser.get_int("save_mask")); + bool initialize_qkv = static_cast(arg_parser.get_int("init_qkv")); + + std::string str_of_integers; + + str_of_integers = arg_parser.get_str("targets"); + std::vector num_targets = get_integers_from_string(str_of_integers); + + std::string str_of_lengths_q = arg_parser.get_str("seqlens"); + std::vector seq_lengths_q = get_integers_from_string(str_of_lengths_q); + + std::string str_of_lengths_kv = arg_parser.get_str("seqlens_kv"); + std::vector seq_lengths_kv = get_integers_from_string(str_of_lengths_kv); + + bool is_cross_attention = false; + + HSTU_CHECK(!seq_lengths_q.empty(), "sequence lengths shoud be defined!"); + + // assume seq_lengths_kv is same as seq_lengths_q if not defined, or else when + // seq_lengths_kv is explicitly defined, we think the input case is a cross_attention case + if(seq_lengths_kv.empty()) + seq_lengths_kv = seq_lengths_q; + else + is_cross_attention = true; + + str_of_integers = arg_parser.get_str("g_max_seqlens"); + std::vector group_max_seqlens = get_integers_from_string(str_of_integers); + + HSTU_CHECK(!group_max_seqlens.empty(), "group window sizes shoud be defined!"); + + str_of_integers = arg_parser.get_str("g_context_lens"); + std::vector group_contextual_seqlens = get_integers_from_string(str_of_integers); + + HSTU_CHECK(!group_contextual_seqlens.empty(), "group contextual seqlens shoud be defined!"); + + str_of_integers = arg_parser.get_str("g_local_lens"); + std::vector group_window_sizes = get_integers_from_string(str_of_integers); + + HSTU_CHECK(!group_window_sizes.empty(), "group window sizes shoud be defined!"); + + str_of_integers = arg_parser.get_str("g_minfull_lens"); + std::vector group_min_full_attn_seqlens = get_integers_from_string(str_of_integers); + HSTU_CHECK(!group_min_full_attn_seqlens.empty(), + "group min_full_attn seqlens shoud be defined!"); + + std::string str_of_floats = arg_parser.get_str("g_attn_scales"); + std::vector group_attn_scales = get_floats_from_string(str_of_floats); + HSTU_CHECK(!group_attn_scales.empty(), "group attn_scales shoud be defined!"); + + // supplement seq_lengths_q using the last input value if user-provided lengths not enough + supplement_array_by_last_element(seq_lengths_q, num_batch); + + // supplement seq_lengths_kv using the last input value if user-provided lengths not enough + supplement_array_by_last_element(seq_lengths_kv, num_batch); + + if(!num_targets.empty()) + { + // supplement num_targets using the last input value if user-provided lengths not enough + supplement_array_by_last_element(num_targets, num_batch); + }; + + // supplement group_max_seqlens using the last input value if user-provided lengths not enough + supplement_array_by_last_element(group_max_seqlens, num_group); + + // supplement group_contextual_seqlens using the last input value if user-provided lengths not + // enough + supplement_array_by_last_element(group_contextual_seqlens, num_group); + + // supplement group_window_sizes using the last input value if user-provided lengths not enough + supplement_array_by_last_element(group_window_sizes, num_group); + + // supplement group_min_full_attn_seqlens using the last input value if user-provided lengths + // not enough + supplement_array_by_last_element(group_min_full_attn_seqlens, num_group); + + // supplement group_attn_scales using the last input value if user-provided values not enough + supplement_array_by_last_element(group_attn_scales, num_group); + + int phy_seqlen_q = 0; + int phy_seqlen_kv = 0; + int max_max_seqlen = 0; + + // only consider num_group values even if more values were provided by the user + for(int i = 0; i < num_group; i++) + { + max_max_seqlen = max(max_max_seqlen, group_max_seqlens[i]); + }; + + std::vector seq_offsets_q; + std::vector seq_offsets_kv; + + seq_offsets_q.push_back(0); + + for(int i = 0; i < num_batch; i++) + { + int i_group = i / num_batch_per_group; + int batch_seqlen = + num_targets.empty() + ? seq_lengths_q[i] + group_contextual_seqlens[i_group] + : seq_lengths_q[i] + num_targets[i] + group_contextual_seqlens[i_group]; + + phy_seqlen_q += batch_seqlen; + seq_offsets_q.push_back(phy_seqlen_q); + }; + + seq_offsets_kv.push_back(0); + + for(int i = 0; i < num_batch; i++) + { + if(!is_cross_attention) + { + int i_group = i / num_batch_per_group; + int batch_seqlen = + num_targets.empty() + ? seq_lengths_kv[i] + group_contextual_seqlens[i_group] + : seq_lengths_kv[i] + num_targets[i] + group_contextual_seqlens[i_group]; + + phy_seqlen_kv += batch_seqlen; + seq_offsets_kv.push_back(phy_seqlen_kv); + } + else // for cross_attention, assume target_in_kv == false + { + int i_group = i / num_batch_per_group; + int batch_seqlen = seq_lengths_kv[i] + group_contextual_seqlens[i_group]; + + phy_seqlen_kv += batch_seqlen; + seq_offsets_kv.push_back(phy_seqlen_kv); + } + }; + + long total_flops = 0; + + // estimate the total flops occurred, ignoring the scaling and SILu + for(int i = 0; i < num_batch; i++) + { + int len_q = seq_offsets_q[i + 1] - seq_offsets_q[i]; + int len_kv = seq_offsets_kv[i + 1] - seq_offsets_kv[i]; + total_flops += (static_cast(len_q) * len_kv * hdim_qk + + static_cast(len_q) * hdim_v * len_kv) * + 2; + }; + + total_flops *= num_head; + + int batches_for_alloc = 1; + + ck_tile::HostTensor q_host( + std::array{batches_for_alloc, phy_seqlen_q, num_head, hdim_qk}); + ck_tile::HostTensor k_host( + std::array{batches_for_alloc, phy_seqlen_kv, num_head, hdim_qk}); + ck_tile::HostTensor v_host( + std::array{batches_for_alloc, phy_seqlen_kv, num_head, hdim_v}); + ck_tile::HostTensor o_host_ref( + std::array{batches_for_alloc, phy_seqlen_q, num_head, hdim_v}); + + ck_tile::HostTensor mask_host( + save_mask + ? std::array{num_batch, num_head, max_max_seqlen, max_max_seqlen} + : std::array{1, 1, 1, 1}); + + if(!initialize_qkv) + { + if(use_normal_dist) + { + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(q_host); + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(k_host); + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(v_host); + } + else + { + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(q_host); + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(k_host); + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(v_host); + }; + } + else + { + readDataToBufferFromFile(q_host.data(), q_host.get_element_space_size(), "q.dat"); + readDataToBufferFromFile(k_host.data(), k_host.get_element_space_size(), "k.dat"); + readDataToBufferFromFile(v_host.data(), v_host.get_element_space_size(), "v.dat"); + }; + + ck_tile::DeviceMem q_dev(q_host.get_element_space_size_in_bytes()); + ck_tile::DeviceMem k_dev(k_host.get_element_space_size_in_bytes()); + ck_tile::DeviceMem v_dev(v_host.get_element_space_size_in_bytes()); + ck_tile::DeviceMem o_dev(o_host_ref.get_element_space_size_in_bytes()); + + ck_tile::DeviceMem seq_offsets_q_dev(seq_offsets_q.size() * sizeof(int)); + ck_tile::DeviceMem seq_offsets_kv_dev(seq_offsets_kv.size() * sizeof(int)); + ck_tile::DeviceMem num_targets_dev(num_targets.size() * sizeof(int)); + + q_dev.ToDevice(q_host.data()); + k_dev.ToDevice(k_host.data()); + v_dev.ToDevice(v_host.data()); + + seq_offsets_q_dev.ToDevice(seq_offsets_q.data()); + seq_offsets_kv_dev.ToDevice(seq_offsets_kv.data()); + if(!num_targets.empty()) + num_targets_dev.ToDevice(num_targets.data()); + + ck_tile::DeviceMem group_max_seqlens_dev(group_max_seqlens.size() * sizeof(int)); + ck_tile::DeviceMem group_contextual_seqlens_dev(group_contextual_seqlens.size() * sizeof(int)); + ck_tile::DeviceMem group_window_sizes_dev(group_window_sizes.size() * sizeof(int)); + ck_tile::DeviceMem group_min_full_attn_seqlens_dev(group_min_full_attn_seqlens.size() * + sizeof(int)); + ck_tile::DeviceMem group_attn_scales_dev(group_attn_scales.size() * sizeof(float)); + + group_max_seqlens_dev.ToDevice(group_max_seqlens.data()); + group_contextual_seqlens_dev.ToDevice(group_contextual_seqlens.data()); + group_window_sizes_dev.ToDevice(group_window_sizes.data()); + group_min_full_attn_seqlens_dev.ToDevice(group_min_full_attn_seqlens.data()); + group_attn_scales_dev.ToDevice(group_attn_scales.data()); + + HstuAttentionGroupFwdParams params; + + float scale_s = (alpha != 0.f) ? alpha : 1.0f / std::sqrt(hdim_qk); + + params.is_cross_attention = is_cross_attention; + params.num_batch = num_batch; + params.num_group = num_group; + params.seq_q_offsets_ptr = seq_offsets_q_dev.GetDeviceBuffer(); + params.seq_kv_offsets_ptr = seq_offsets_kv_dev.GetDeviceBuffer(); + params.max_seqlen = max_max_seqlen; + params.q_ptr = q_dev.GetDeviceBuffer(); + params.k_ptr = k_dev.GetDeviceBuffer(); + params.v_ptr = v_dev.GetDeviceBuffer(); + params.bias_ptr = nullptr; // bias is not supported at present + params.o_ptr = o_dev.GetDeviceBuffer(); + params.hdim_qk = hdim_qk; + params.hdim_v = hdim_v; + params.num_head = num_head; + params.scale_s = scale_s; + params.seq_stride_q = q_host.get_strides()[1]; + params.seq_stride_k = k_host.get_strides()[1]; + params.seq_stride_v = v_host.get_strides()[1]; + params.seq_stride_bias = 0; + params.seq_stride_o = o_host_ref.get_strides()[1]; + params.nhead_stride_q = q_host.get_strides()[2]; + params.nhead_stride_k = k_host.get_strides()[2]; + params.nhead_stride_v = v_host.get_strides()[2]; + params.nhead_stride_bias = 0; + params.nhead_stride_o = o_host_ref.get_strides()[2]; + params.num_targets_ptr = num_targets.empty() ? nullptr : num_targets_dev.GetDeviceBuffer(); + params.use_softmax = use_softmax; + params.use_causal = use_causal; + params.p_drop = 0.0f; // dropout is not supported at present + params.philox_seed = 0UL; + params.philox_offset = 0UL; + params.group_max_seqlen_ptr = group_max_seqlens_dev.GetDeviceBuffer(); + params.group_contextual_seqlen_ptr = group_contextual_seqlens_dev.GetDeviceBuffer(); + params.group_window_size_ptr = group_window_sizes_dev.GetDeviceBuffer(); + params.group_min_full_attn_seqlen_ptr = group_min_full_attn_seqlens_dev.GetDeviceBuffer(); + params.group_attn_scale_ptr = group_attn_scales_dev.GetDeviceBuffer(); + + hipStream_t stream; + + HIP_CHECK_ERROR(hipStreamCreate(&stream)); + + if constexpr(std::is_same::value) + { + hstu_attention_group_forward_fp16(params, stream); + } + else if constexpr(std::is_same::value) + { + hstu_attention_group_forward_bf16(params, stream); + } + else + throw std::runtime_error("Other data type is not supported at present!"); + + bool res = true; + + if(do_validation) + { + using GemmAccDataType = typename HstuAttentionFwdTypeConfig::GemmAccDataType; + using CompDataType = typename HstuAttentionFwdTypeConfig::CompDataType; + + BOOL_SWITCH_2(use_softmax, kUseSoftmax, use_causal, kUseCausal, [&] { + ck_tile::reference_group_hstu_attention::Run(is_cross_attention, + q_host, + k_host, + v_host, + o_host_ref, + mask_host, + num_batch, + num_batch / num_group, + scale_s, + max_max_seqlen, + seq_offsets_q, + seq_offsets_kv, + num_targets, + group_max_seqlens, + group_contextual_seqlens, + group_window_sizes, + group_min_full_attn_seqlens, + group_attn_scales); + }); + + ck_tile::HostTensor o_host( + std::array{batches_for_alloc, phy_seqlen_q, num_head, hdim_v}); + + o_dev.FromDevice(o_host.data()); + + if(dump_output) + { + dumpBufferToFile("output_dev.dat", o_host.data(), o_host.get_element_space_size()); + dumpBufferToFile("output_host.dat", o_host_ref.data(), o_host.get_element_space_size()); + } + + if(save_mask) + dumpBufferToFile( + "ck_hstu_mask.dat", mask_host.data(), mask_host.get_element_space_size()); + + auto [rtol, atol] = get_elimit(); + + res = ck_tile::check_err( + o_host, o_host_ref, std::string("hstu_attention output error"), rtol, atol); + }; + + if(measure_perf) + { + ck_tile::gpu_timer timer{}; + + timer.start(stream); + for(int i = 0; i < 10; i++) + { + if constexpr(std::is_same::value) + { + hstu_attention_group_forward_fp16(params, stream); + } + else if constexpr(std::is_same::value) + { + hstu_attention_group_forward_bf16(params, stream); } } timer.stop(stream); @@ -672,15 +1031,33 @@ int main(int argc, char* argv[]) return -1; } + int num_group = static_cast(arg_parser.get_int("g")); const std::string data_type = arg_parser.get_str("prec"); - if(data_type == "fp16") + + if(num_group > 1) { - return run(arg_parser) ? 0 : -2; + if(data_type == "fp16") + { + return run_group_hstu(arg_parser, num_group) ? 0 : -2; + } + else if(data_type == "bf16") + { + return run_group_hstu(arg_parser, num_group) ? 0 : -2; + } } - else if(data_type == "bf16") + else { - return run(arg_parser) ? 0 : -2; - } + bool is_jagged = static_cast(arg_parser.get_int("jagged")); + + if(data_type == "fp16") + { + return run_no_group_hstu(arg_parser, is_jagged) ? 0 : -2; + } + else if(data_type == "bf16") + { + return run_no_group_hstu(arg_parser, is_jagged) ? 0 : -2; + } + }; return -3; } diff --git a/example/ck_tile/18_hstu_attention/generate_instances.py b/example/ck_tile/18_hstu_attention/generate_instances.py index fb26f5e6db..e27432e879 100644 --- a/example/ck_tile/18_hstu_attention/generate_instances.py +++ b/example/ck_tile/18_hstu_attention/generate_instances.py @@ -32,7 +32,7 @@ HSTU_FORWARD_INSTANCE_TEMPLATE = """ {use_softmax}, {has_bias}, {has_dropout}, - {max_k}>(HstuAttentionFwdParams& param, hipStream_t stream); + {max_k}>(HstuAttention{group_or_not}FwdParams& param, hipStream_t stream); """ HSTU_FORWARD_INSTANCE_FNAME = ( @@ -76,13 +76,14 @@ TYPE_FNAME_MAP = { "bf16": "half", } -MODE_NAME_MAP = { - "batched": "Batched", - "jagged": "Jagged", +MODE_GROUP_OR_NOT_MAP = { + "batched": "NoGroup", + "jagged": "NoGroup", + "group": "Group", } def create_forward_instances(instance_dir: Path, headdims: List) -> None: - for mode in ["batched", "jagged"]: + for mode in ["batched", "jagged", "group"]: for dtype in ["fp16", "bf16"]: for has_causal in [True, False]: for use_softmax in [True, False]: @@ -113,7 +114,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None: has_bias=BOOL_MAP[has_bias], has_dropout=BOOL_MAP[has_dropout], max_k=max_k, - cap_mode=MODE_NAME_MAP[mode], + group_or_not=MODE_GROUP_OR_NOT_MAP[mode], ) (instance_dir / fname).write_text( HSTU_COPYRIGHT_HEADER @@ -123,7 +124,7 @@ def create_forward_instances(instance_dir: Path, headdims: List) -> None: def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None: - for mode in ["batched", "jagged"]: + for mode in ["batched", "jagged", "group"]: for dtype in ["fp16", "bf16"]: ref_fname = HSTU_INSTANCE_REF_FNAME.format( mode=mode, @@ -153,7 +154,7 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None: has_bias=BOOL_MAP[has_bias], has_dropout=BOOL_MAP[has_dropout], max_k=max_k, - cap_mode=MODE_NAME_MAP[mode], + group_or_not=MODE_GROUP_OR_NOT_MAP[mode], ) ) file.write(forward_instance) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_api.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_api.hpp index b50815b8fb..cddf56a793 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_api.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_api.hpp @@ -7,7 +7,11 @@ #include "hstu_attention_params.hpp" -extern void hstu_attention_batched_forward_fp16(HstuAttentionFwdParams& param, hipStream_t stream); -extern void hstu_attention_batched_forward_bf16(HstuAttentionFwdParams& param, hipStream_t stream); -extern void hstu_attention_jagged_forward_fp16(HstuAttentionFwdParams& param, hipStream_t stream); -extern void hstu_attention_jagged_forward_bf16(HstuAttentionFwdParams& param, hipStream_t stream); +extern void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param, + hipStream_t stream); +extern void hstu_attention_no_group_forward_bf16(HstuAttentionNoGroupFwdParams& param, + hipStream_t stream); +extern void hstu_attention_group_forward_fp16(HstuAttentionGroupFwdParams& param, + hipStream_t stream); +extern void hstu_attention_group_forward_bf16(HstuAttentionGroupFwdParams& param, + hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp index 19dbc77fa5..513bb53f75 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_dispatch.hpp @@ -48,6 +48,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch typename HstuAttentionFwdTypeConfig::CompDataType, typename HstuAttentionFwdTypeConfig::BiasDataType, kIsCrossAttention, + false, // kUseGroup false, // kIsJagged kHasBias, kHasDropout, @@ -56,7 +57,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch kUseTrLoad, HstuAttentionTileSetting>; - static void Run(HstuAttentionFwdParams& param, hipStream_t stream) + static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { constexpr ck_tile::index_t occupancy = -1; @@ -127,7 +128,7 @@ struct batched_forward_causal_softmax_bias_dropout_dispatch }; template - static void RunWithKernel(HstuAttentionFwdParams& param, hipStream_t stream) + static void RunWithKernel(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { const auto kargs = [&] { return HstuKernel::MakeKargs(param.q_ptr, @@ -185,7 +186,7 @@ template -void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionFwdParams& param, +void run_batched_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { batched_forward_causal_softmax_bias_dropout_dispatch; static constexpr bool kIsCrossAttention = HstuAttentionPipeline::Problem::kIsCrossAttention; + static constexpr bool kUseGroup = HstuAttentionPipeline::Problem::kUseGroup; static constexpr bool kIsJagged = HstuAttentionPipeline::Problem::kIsJagged; static constexpr auto kHasBias = HstuAttentionPipeline::Problem::kHasBias; static constexpr bool kHasDropout = HstuAttentionPipeline::Problem::kHasDropout; @@ -60,7 +61,7 @@ struct HstuAttentionFwdKernel // kargs use aggregate initializer, so no constructor will provided // use inheritance to minimize karg size // user need to use MakeKargs() function to create kargs. - struct HstuAttentionFwdBatchModeBaseKargs + struct HstuAttentionNoGroupBatchedFwdBaseKargs { ck_tile::index_t batch_stride_q; ck_tile::index_t batch_stride_k; @@ -98,7 +99,7 @@ struct HstuAttentionFwdKernel ck_tile::index_t min_full_attn_seqlen; }; - struct HstuAttentionFwdJaggModeBaseKargs + struct HstuAttentionNoGroupJaggedFwdBaseKargs { const int32_t* seq_q_offsets_ptr; const int32_t* seq_kv_offsets_ptr; @@ -135,6 +136,51 @@ struct HstuAttentionFwdKernel ck_tile::index_t min_full_attn_seqlen; }; + struct HstuAttentionGroupFwdBaseKargs + { + ck_tile::index_t num_batch_per_group; + + const int32_t* seq_q_offsets_ptr; + const int32_t* seq_kv_offsets_ptr; + + ck_tile::index_t seq_stride_q; + ck_tile::index_t seq_stride_k; + ck_tile::index_t seq_stride_v; + ck_tile::index_t seq_stride_o; + + const int32_t* num_targets_ptr; + + const void* q_ptr; + const void* k_ptr; + const void* v_ptr; + void* o_ptr; + + ck_tile::index_t nhead_stride_q; + ck_tile::index_t nhead_stride_k; + ck_tile::index_t nhead_stride_v; + ck_tile::index_t nhead_stride_o; + + ck_tile::index_t hdim_qk; + ck_tile::index_t hdim_v; + + ck_tile::index_t seqlen_q; + ck_tile::index_t seqlen_kv; + + ck_tile::index_t num_head; + float scale_s; // scaling value exerted on the immediate Q@K result + float scale_p; // scaling value exerted on the SiLU result + + int32_t contextual_seqlen; // to be set by the per-group contextual_seqlen + int32_t window_size; // to be set by the per-group window_size + int32_t min_full_attn_seqlen; // to be set by the per-group min_full_attn_seqlen + + const int32_t* group_max_seqlen_ptr; + const int32_t* group_contextual_seqlen_ptr; + const int32_t* group_window_size_ptr; + const int32_t* group_min_full_attn_seqlen_ptr; + const float* group_attn_scale_ptr; + }; + struct HstuAttentionFwdCommonBiasKargs { const void* bias_ptr = nullptr; @@ -170,30 +216,48 @@ struct HstuAttentionFwdKernel uint8_t p_undrop_in_uint8_t = std::numeric_limits::max(); }; - struct HstuAttentionFwdBatchModeKargs : HstuAttentionFwdBatchModeBaseKargs, - std::conditional_t>, - std::conditional_t> + struct HstuAttentionNoGroupBatchedFwdKargs + : HstuAttentionNoGroupBatchedFwdBaseKargs, + std::conditional_t>, + std::conditional_t> { }; - struct HstuAttentionFwdJaggModeKargs : HstuAttentionFwdJaggModeBaseKargs, - std::conditional_t>, - std::conditional_t> + struct HstuAttentionNoGroupJaggedFwdKargs + : HstuAttentionNoGroupJaggedFwdBaseKargs, + std::conditional_t>, + std::conditional_t> { }; - using Kargs = std:: - conditional_t; + struct HstuAttentionGroupFwdKargs : HstuAttentionGroupFwdBaseKargs, + std::conditional_t>, + std::conditional_t> + { + }; - template + using Kargs = std::conditional_t>; + + static constexpr bool kUseNoGroupBatched = (!kUseGroup && !kIsJagged); + static constexpr bool kUseNoGroupJagged = (!kUseGroup && kIsJagged); + + template CK_TILE_HOST static constexpr std::enable_if_t MakeKargs(const void* q_ptr, const void* k_ptr, @@ -278,7 +342,7 @@ struct HstuAttentionFwdKernel return kargs; } - template + template CK_TILE_HOST static constexpr std::enable_if_t MakeKargs(const void* q_ptr, const void* k_ptr, @@ -355,11 +419,95 @@ struct HstuAttentionFwdKernel return kargs; } + template + CK_TILE_HOST static constexpr std::enable_if_t + MakeKargs(const void* q_ptr, + const void* k_ptr, + const void* v_ptr, + const void* bias_ptr, + void* o_ptr, + ck_tile::index_t num_batch_per_group, + const void* seq_q_offsets_ptr, + const void* seq_kv_offsets_ptr, + const void* group_max_seqlen_ptr, + const void* group_contextual_seqlen_ptr, + const void* group_window_size_ptr, + const void* group_min_full_attn_seqlen_ptr, + const void* group_attn_scale_ptr, + ck_tile::index_t hdim_qk, + ck_tile::index_t hdim_v, + ck_tile::index_t num_head, + float scale_s, + ck_tile::index_t seq_stride_q, + ck_tile::index_t seq_stride_k, + ck_tile::index_t seq_stride_v, + ck_tile::index_t seq_stride_bias, + ck_tile::index_t seq_stride_o, + ck_tile::index_t nhead_stride_q, + ck_tile::index_t nhead_stride_k, + ck_tile::index_t nhead_stride_v, + ck_tile::index_t nhead_stride_bias, + ck_tile::index_t nhead_stride_o, + const void* num_targets_ptr, + float p_drop, + uint64_t philox_seed, + uint64_t philox_offset) + { + Kargs kargs{ + {num_batch_per_group, + reinterpret_cast(seq_q_offsets_ptr), + reinterpret_cast(seq_kv_offsets_ptr), + seq_stride_q, + seq_stride_k, + seq_stride_v, + seq_stride_o, + reinterpret_cast(num_targets_ptr), + q_ptr, + k_ptr, + v_ptr, + o_ptr, + nhead_stride_q, + nhead_stride_k, + nhead_stride_v, + nhead_stride_o, + hdim_qk, + hdim_v, + -1, // seqlen_q will be updated by another pointer + -1, // seqlen_kv will be updated by another pointer + num_head, + scale_s, + 1.0f, // to be set according to the per-group attn_scale and max_seqlen + 0, // to be set by the per-group contextual_seqlen + 0, // to be set by the per-group window_size + 0, // to be set by the per-group min_full_attn_seqlen + reinterpret_cast(group_max_seqlen_ptr), + reinterpret_cast(group_contextual_seqlen_ptr), + reinterpret_cast(group_window_size_ptr), + reinterpret_cast(group_min_full_attn_seqlen_ptr), + reinterpret_cast(group_attn_scale_ptr)}, // args for common karg + {}, // placeholder for bias + {}, // placeholder for dropout + }; + + if constexpr(kHasBias) + { + kargs.bias_ptr = bias_ptr; + kargs.seq_stride_bias = seq_stride_bias; + kargs.nhead_stride_bias = nhead_stride_bias; + } + if constexpr(kHasDropout) + { + kargs.init_dropout(p_drop, philox_seed, philox_offset); + } + + return kargs; + } + CK_TILE_HOST static constexpr auto GridSize(ck_tile::index_t batch_size_, ck_tile::index_t nhead_, ck_tile::index_t seqlen_, ck_tile::index_t hdim_v_, - bool has_minfull_attn_seqlen) + bool has_minfull_attn_seqlen = false) { // The Q sequence [0, seqlen) will be split to two parts for allocating workgroups: // 1) [0, seqlen - target - min_full_attn_seqlen) @@ -367,8 +515,15 @@ struct HstuAttentionFwdKernel ck_tile::index_t num_tile_in_seqlen = ck_tile::integer_divide_ceil(seqlen_, HstuAttentionPipeline::kM0); - if(has_minfull_attn_seqlen) + if constexpr(kUseGroup) + { num_tile_in_seqlen += 1; + } + else + { + if(has_minfull_attn_seqlen) + num_tile_in_seqlen += 1; + }; if constexpr(HstuAttentionPipeline::kN1 < HstuAttentionPipeline::kSubQKHeaddim) { @@ -492,6 +647,20 @@ struct HstuAttentionFwdKernel kargs.seq_q_offsets_ptr[i_batch + 1] - kargs.seq_q_offsets_ptr[i_batch]; kargs.seqlen_kv = kargs.seq_kv_offsets_ptr[i_batch + 1] - kargs.seq_kv_offsets_ptr[i_batch]; + + // read from device memory for the group specific mask and scaling parameters + if constexpr(kUseGroup) + { + index_t i_group = + __builtin_amdgcn_readfirstlane(i_batch / kargs.num_batch_per_group); + + float attn_scale = kargs.group_attn_scale_ptr[i_group]; + index_t max_seqlen = kargs.group_max_seqlen_ptr[i_group]; + kargs.scale_p = (attn_scale ? attn_scale : 1.0f / static_cast(max_seqlen)); + kargs.contextual_seqlen = kargs.group_contextual_seqlen_ptr[i_group]; + kargs.window_size = kargs.group_window_size_ptr[i_group]; + kargs.min_full_attn_seqlen = kargs.group_min_full_attn_seqlen_ptr[i_group]; + }; } else { diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_bf16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp similarity index 70% rename from example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_bf16.cpp rename to example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp index d5d86fe833..c9e55d8b9f 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_bf16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_bf16.cpp @@ -6,11 +6,11 @@ #include "hstu_attention_bool_switch.hpp" #include "hstu_attention_hdim_switch.hpp" -#include "hstu_attention_jagged_forward_dispatch.hpp" +#include "hstu_attention_group_forward_dispatch.hpp" -#include "instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp" +#include "instances/hstu_attention_group_forward_bf16_instances_ref.hpp" -void hstu_attention_jagged_forward_bf16(HstuAttentionFwdParams& param, hipStream_t stream) +void hstu_attention_group_forward_bf16(HstuAttentionGroupFwdParams& param, hipStream_t stream) { const bool has_dropout = (param.p_drop > 0.0f); const bool has_bias = (param.bias_ptr != nullptr); @@ -18,12 +18,12 @@ void hstu_attention_jagged_forward_bf16(HstuAttentionFwdParams& param, hipStream BOOL_SWITCH_3(has_bias, kHasBias, has_dropout, kHasDropout, use_causal, kUseCausal, [&] { HDIM_SWITCH(param.hdim_qk, param.hdim_v, MaxK, [&] { BOOL_SWITCH(param.use_softmax, kUseSoftmax, [&] { - run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); + run_group_forward_causal_softmax_bias_dropout_dispatch(param, stream); }); }); }); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp new file mode 100644 index 0000000000..c3ea10c74b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_dispatch.hpp @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +#pragma once + +#include +#include +#include +#include + +#include "hstu_attention_bool_switch.hpp" +#include "hstu_attention_fwd_type_config.hpp" +#include "hstu_attention_fwd_setting.hpp" +#include "hstu_attention_params.hpp" +#include "hstu_attention_hdim_switch.hpp" +#include "hstu_attention_pipeline_problem.hpp" +#include "hstu_attention_traits.hpp" +#include "hstu_attention_with_softmax_fwd_pipeline.hpp" +#include "hstu_attention_no_softmax_fwd_pipeline.hpp" +#include "hstu_attention_with_softmax_fwd_trload_pipeline.hpp" +#include "hstu_attention_no_softmax_fwd_trload_pipeline.hpp" +#include "hstu_attention_fwd_kernel.hpp" +#include "hstu_attention_epilogue.hpp" + +template +struct group_forward_causal_softmax_bias_dropout_dispatch +{ + using HstuAttentionTileSetting = + typename std::conditional_t, + HstuAttentionNoSoftmaxFwdTileSetting>::Type; + +#ifdef BUILD_HSTU_FOR_GFX95_ONLY + static constexpr bool kUseTrLoad = true; +#else + static constexpr bool kUseTrLoad = false; +#endif + + template + using HstuPipelineProblemTemp = ck_tile::HstuAttentionFwdPipelineProblem< + InOutDataType, + typename HstuAttentionFwdTypeConfig::GemmAccDataType, + typename HstuAttentionFwdTypeConfig::CompDataType, + typename HstuAttentionFwdTypeConfig::BiasDataType, + kIsCrossAttention, + true, // kUseGroup + true, // kIsJagged + kHasBias, + kHasDropout, + kUseCausal, + kUseSoftmax, + kUseTrLoad, + HstuAttentionTileSetting>; + + static void Run(HstuAttentionGroupFwdParams& param, hipStream_t stream) + { + constexpr ck_tile::index_t occupancy = -1; + + const bool pad_headdim_qk = !(param.hdim_qk % HstuAttentionTileSetting::kQKHeaddim == 0); + const bool pad_headdim_v = !(param.hdim_v % HstuAttentionTileSetting::kN1 == 0); + + // no need to check seqlen_q since it is not used as fastest dim, + // buffer_load_dwordxx/buffer_store_dwordxx can handle oob access + constexpr bool kPadSeqLenQ = false; + + constexpr bool kPadSeqLenK = true; + + BOOL_SWITCH_2(pad_headdim_qk, kPadHeadDimQK, pad_headdim_v, kPadHeadDimV, [&] { + using HstuTraits = ck_tile::HstuAttentionFwdTraits; + + using HstuEpilogue = ck_tile::NRepetitions2DEpilogue::OaccDataType, + typename HstuAttentionFwdTypeConfig::ODataType, + kPadSeqLenQ, + kPadHeadDimV>>; + + BOOL_SWITCH(param.is_cross_attention, kIsCrossAttention, [&] { + using HstuPipelineProblem = HstuPipelineProblemTemp; + + if constexpr(!kUseTrLoad) + { + using HstuPipeline = std::conditional_t< + kUseSoftmax, + ck_tile::HstuAttentionWithSoftmaxFwdPipelineQRKSVS, + ck_tile::HstuAttentionNoSoftmaxFwdPipelineQRKSVS>; + + using HstuKernel = ck_tile::HstuAttentionFwdKernel; + + RunWithKernel(param, stream); + } + else + { + using HstuPipeline = std::conditional_t< + kUseSoftmax, + ck_tile::HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad< + HstuPipelineProblem, + HstuTraits>, + ck_tile::HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad>; + + using HstuKernel = ck_tile::HstuAttentionFwdKernel; + + RunWithKernel(param, stream); + }; + }); + }); + }; + + template + static void RunWithKernel(HstuAttentionGroupFwdParams& param, hipStream_t stream) + { + const auto kargs = [&] { + return HstuKernel::MakeKargs(param.q_ptr, + param.k_ptr, + param.v_ptr, + param.bias_ptr, + param.o_ptr, + param.num_batch / param.num_group, + param.seq_q_offsets_ptr, + param.is_cross_attention ? param.seq_kv_offsets_ptr + : param.seq_q_offsets_ptr, + param.group_max_seqlen_ptr, + param.group_contextual_seqlen_ptr, + param.group_window_size_ptr, + param.group_min_full_attn_seqlen_ptr, + param.group_attn_scale_ptr, + param.hdim_qk, + param.hdim_v, + param.num_head, + param.scale_s, + param.seq_stride_q, + param.seq_stride_k, + param.seq_stride_v, + param.seq_stride_bias, + param.seq_stride_o, + param.nhead_stride_q, + param.nhead_stride_k, + param.nhead_stride_v, + param.nhead_stride_bias, + param.nhead_stride_o, + param.num_targets_ptr, + param.p_drop, + param.philox_seed, + param.philox_offset); + }(); + + dim3 kGridSize = + HstuKernel::GridSize(param.num_batch, param.num_head, param.max_seqlen, param.hdim_v); + constexpr dim3 kBlockSize = HstuKernel::BlockSize(); + constexpr ck_tile::index_t kBlockPerCu = HstuKernel::kBlockPerCu; + + (void)ck_tile::launch_kernel( + ck_tile::stream_config{stream, false}, + ck_tile::make_kernel(HstuKernel{}, kGridSize, kBlockSize, 0, kargs)); + }; +}; + +template +void run_group_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionGroupFwdParams& param, + hipStream_t stream) +{ + group_forward_causal_softmax_bias_dropout_dispatch::Run(param, stream); +}; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_fp16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_fp16.cpp similarity index 70% rename from example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_fp16.cpp rename to example/ck_tile/18_hstu_attention/hstu_attention_group_forward_fp16.cpp index 9980f7078a..858ef4c268 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_fp16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_group_forward_fp16.cpp @@ -6,24 +6,25 @@ #include "hstu_attention_bool_switch.hpp" #include "hstu_attention_hdim_switch.hpp" -#include "hstu_attention_jagged_forward_dispatch.hpp" +#include "hstu_attention_group_forward_dispatch.hpp" -#include "instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp" +#include "instances/hstu_attention_group_forward_fp16_instances_ref.hpp" -void hstu_attention_jagged_forward_fp16(HstuAttentionFwdParams& param, hipStream_t stream) +void hstu_attention_group_forward_fp16(HstuAttentionGroupFwdParams& param, hipStream_t stream) { const bool has_dropout = (param.p_drop > 0.0f); const bool has_bias = (param.bias_ptr != nullptr); const bool use_causal = param.use_causal; + BOOL_SWITCH_3(has_bias, kHasBias, has_dropout, kHasDropout, use_causal, kUseCausal, [&] { HDIM_SWITCH(param.hdim_qk, param.hdim_v, MaxK, [&] { BOOL_SWITCH(param.use_softmax, kUseSoftmax, [&] { - run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); + run_group_forward_causal_softmax_bias_dropout_dispatch(param, stream); }); }); }); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp index 44e9ff08c8..e37a839606 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_jagged_forward_dispatch.hpp @@ -48,7 +48,8 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch typename HstuAttentionFwdTypeConfig::CompDataType, typename HstuAttentionFwdTypeConfig::BiasDataType, kIsCrossAttention, - true, // kIsJagged + false, // kUseGroup + true, // kIsJagged kHasBias, kHasDropout, kUseCausal, @@ -56,7 +57,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch kUseTrLoad, HstuAttentionTileSetting>; - static void Run(HstuAttentionFwdParams& param, hipStream_t stream) + static void Run(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { constexpr ck_tile::index_t occupancy = -1; @@ -117,7 +118,7 @@ struct jagged_forward_causal_softmax_bias_dropout_dispatch }; template - static void RunWithKernel(HstuAttentionFwdParams& param, hipStream_t stream) + static void RunWithKernel(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { const auto kargs = [&] { return HstuKernel::MakeKargs(param.q_ptr, @@ -174,7 +175,7 @@ template -void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionFwdParams& param, +void run_jagged_forward_causal_softmax_bias_dropout_dispatch(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { jagged_forward_causal_softmax_bias_dropout_dispatch 0.0f); const bool has_bias = (param.bias_ptr != nullptr); @@ -18,12 +20,20 @@ void hstu_attention_batched_forward_bf16(HstuAttentionFwdParams& param, hipStrea BOOL_SWITCH_3(has_bias, kHasBias, has_dropout, kHasDropout, use_causal, kUseCausal, [&] { HDIM_SWITCH(param.hdim_qk, param.hdim_v, MaxK, [&] { BOOL_SWITCH(param.use_softmax, kUseSoftmax, [&] { - run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); + if(param.is_jagged) + run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); + else + run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); }); }); }); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_fp16.cpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp similarity index 51% rename from example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_fp16.cpp rename to example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp index 58db583131..5d75873e95 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_batched_forward_fp16.cpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_group_forward_fp16.cpp @@ -7,10 +7,12 @@ #include "hstu_attention_bool_switch.hpp" #include "hstu_attention_hdim_switch.hpp" #include "hstu_attention_batched_forward_dispatch.hpp" +#include "hstu_attention_jagged_forward_dispatch.hpp" #include "instances/hstu_attention_batched_forward_fp16_instances_ref.hpp" +#include "instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp" -void hstu_attention_batched_forward_fp16(HstuAttentionFwdParams& param, hipStream_t stream) +void hstu_attention_no_group_forward_fp16(HstuAttentionNoGroupFwdParams& param, hipStream_t stream) { const bool has_dropout = (param.p_drop > 0.0f); const bool has_bias = (param.bias_ptr != nullptr); @@ -18,12 +20,20 @@ void hstu_attention_batched_forward_fp16(HstuAttentionFwdParams& param, hipStrea BOOL_SWITCH_3(has_bias, kHasBias, has_dropout, kHasDropout, use_causal, kUseCausal, [&] { HDIM_SWITCH(param.hdim_qk, param.hdim_v, MaxK, [&] { BOOL_SWITCH(param.use_softmax, kUseSoftmax, [&] { - run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); + if(param.is_jagged) + run_jagged_forward_causal_softmax_bias_dropout_dispatch(param, stream); + else + run_batched_forward_causal_softmax_bias_dropout_dispatch(param, stream); }); }); }); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_params.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_params.hpp index 9ce98839fd..955d8c688c 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_params.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_params.hpp @@ -5,7 +5,7 @@ #include -struct HstuAttentionFwdParams +struct HstuAttentionNoGroupFwdParams { // for self-attention (is_cross_attention = false), we requires // 1) either seqlen_kv == 0 or seqlen_kv == seqlen_q @@ -55,6 +55,7 @@ struct HstuAttentionFwdParams const void* num_targets_ptr; bool use_causal; + // parameters used by Non-Group HSTU ck_tile::index_t window_size; ck_tile::index_t contextual_seqlen; ck_tile::index_t min_full_attn_seqlen; @@ -65,3 +66,63 @@ struct HstuAttentionFwdParams uint64_t philox_seed; uint64_t philox_offset; }; + +struct HstuAttentionGroupFwdParams +{ + // for self-attention (is_cross_attention = false), we requires + // 1) either seq_kv_offsets_ptr == nullptr, or seq_kv_offsets_ptr == seq_q_offsets_ptr + bool is_cross_attention; + + ck_tile::index_t num_group; + ck_tile::index_t num_batch; + const void* seq_q_offsets_ptr; + const void* seq_kv_offsets_ptr; + ck_tile::index_t max_seqlen; // the maximum of all the groups' max_seqlen + + const void* q_ptr; + const void* k_ptr; + const void* v_ptr; + const void* bias_ptr; + void* o_ptr; + + ck_tile::index_t hdim_qk; + ck_tile::index_t hdim_v; + ck_tile::index_t num_head; + float scale_s; // scaling factor exerted on the immediate Q@K result + + ck_tile::index_t seq_stride_q; + ck_tile::index_t seq_stride_k; + ck_tile::index_t seq_stride_v; + ck_tile::index_t seq_stride_bias; + ck_tile::index_t seq_stride_o; + + ck_tile::index_t nhead_stride_q; + ck_tile::index_t nhead_stride_k; + ck_tile::index_t nhead_stride_v; + ck_tile::index_t nhead_stride_bias; + ck_tile::index_t nhead_stride_o; + + // batched mode only parameters + ck_tile::index_t batch_stride_q; + ck_tile::index_t batch_stride_k; + ck_tile::index_t batch_stride_v; + ck_tile::index_t batch_stride_bias; + ck_tile::index_t batch_stride_o; + + const void* num_targets_ptr; + + bool use_causal; + + // parameters used by Group HSTU + const void* group_attn_scale_ptr; + const void* group_max_seqlen_ptr; + const void* group_window_size_ptr; + const void* group_contextual_seqlen_ptr; + const void* group_min_full_attn_seqlen_ptr; + + bool use_softmax; + + float p_drop; + uint64_t philox_seed; + uint64_t philox_offset; +}; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_pipeline_problem.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_pipeline_problem.hpp index 4dc0b72a62..40fe064639 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_pipeline_problem.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_pipeline_problem.hpp @@ -64,6 +64,7 @@ template ; static constexpr index_t kNumGemm0Warps = AttentionTileSetting_::NumGemm0Warps; diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 36cb9a65f2..422dda5b4a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index 71febdb83e..d1903ae6e7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index bb398341f6..7a05a2be22 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index eb0044bee3..ac00d7468f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 842c81d46f..29adc7da6e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index de1bab9bba..fd75e79d70 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index a391cdefeb..c39b871cfb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index 6f1fd2d017..5c3a139e12 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 7e38f57564..2441e1907e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index fc1312e0b9..42fa146645 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 8f21011a2f..9428dcd4a0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index de373620a3..9fbb8c5e18 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index b57fcf1ae6..5584564fb7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index e0eda1fd86..3e0b1da384 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index 20a2c94eb6..7399eab157 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index 6fbca8beb1..1599420233 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index 847dade837..0f9dcd42af 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index cd4db455a0..a69c39fad0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index b0fe7e9a0c..0a3b51fcc8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index d04fcc50f6..ac4aefe9c3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index c2d4368586..85e4b966a4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index ae9a9820cd..8022e051de 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 2a99b9be99..0867328c31 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index 6f6292aaa2..cc1e53443e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 5f6437e2f2..5fe4f880d2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index 2084aa6e87..e34c634e05 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 26d25bd909..0460c5b80b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index cd528f39ff..2e869aa9f7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index e006dee199..524f364c94 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index a097c97db5..919a2ca2a2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index 3209277570..cdc47ab71a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index 31bfc04989..2c417230db 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp index 1f5955fae1..f790245241 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_instances_ref.hpp @@ -15,7 +15,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -23,7 +23,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -31,7 +31,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -39,7 +39,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -47,7 +47,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -55,7 +55,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -63,7 +63,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -71,7 +71,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -79,7 +79,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -87,7 +87,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -95,7 +95,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -103,7 +103,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -111,7 +111,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -119,7 +119,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -127,7 +127,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -135,7 +135,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -143,7 +143,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -151,7 +151,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -159,7 +159,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -167,7 +167,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -175,7 +175,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -183,7 +183,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -191,7 +191,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -199,7 +199,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -207,7 +207,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -215,7 +215,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -223,7 +223,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -231,7 +231,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -239,7 +239,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -247,7 +247,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -255,7 +255,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -263,7 +263,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -271,7 +271,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -279,7 +279,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -287,7 +287,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -295,7 +295,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -303,7 +303,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -311,7 +311,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -319,7 +319,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -327,7 +327,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -335,7 +335,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -343,7 +343,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -351,7 +351,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -359,7 +359,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -367,7 +367,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -375,7 +375,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -383,7 +383,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -391,7 +391,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -399,7 +399,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -407,7 +407,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -415,7 +415,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -423,7 +423,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -431,7 +431,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -439,7 +439,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -447,7 +447,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -455,7 +455,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -463,7 +463,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -471,7 +471,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -479,7 +479,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -487,7 +487,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -495,7 +495,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -503,7 +503,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -511,7 +511,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -519,4 +519,4 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 44e9e9115f..735e739c83 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index cc10734807..bb61c28263 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index 2303ca8181..c31ffe1242 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 98911790e5..0a725cff45 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 745ace2b60..b407ee76fd 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index f1957e30be..47c6506bd8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index 2c3f9b0ee3..1dd81afabe 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index cb962fad90..3286ad32b8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 2001840777..a4431b7ccb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index 3e799fc731..963ca17140 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index f4b4000478..7af16744a3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 25daf7390d..796585922e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 2016f8ebda..10c9375fa6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index ec2a5e7c94..e6cc834e34 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index e99e8cfedc..9303ca4771 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index 7e9204d0fd..3937caa4b7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index c21fe134be..a511c9734c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index 7c285b0079..56f72f67aa 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 7fe019360b..ac66d7f6ea 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index 4586597463..8d1e52a03c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 7418ceafbe..917f970c82 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 71ad9b6cf3..44abd136a2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 1cd866c432..0ddd63a123 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index 4cccc9875f..c33c9d1e3c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index a641156a39..5f394a3ddb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index 2085f81314..df038cf33a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 36aa6cd03b..8cb42923be 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index baa6f85270..8ea33f3364 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 6ca7ad1e75..11d1e0066c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index b218e0965c..9c512137fb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index a75ad6a2b4..b1294f9f66 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index cfb7414899..18f2bea56a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index aee00936ab..d9d303b485 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index de9d2248ab..189d7bc817 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index 6f2ddd517a..289cee1245 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 3ab920e274..3cab083876 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 9a247781f1..92970beba0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index 0a133a4528..d59a3e98df 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index 1f73b0903a..9a94415818 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index ac9e2d3157..0875c95641 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 0a41097b22..e683fbd36c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index 54cfbb4c51..f046c9197f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 3f179b6895..a50599a974 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 3f50442686..7c9f5bd187 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 82cd2f4b84..23e5b5bfb1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index f8403d811f..818e311721 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index 9244f2d58c..e9fadee6a6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index 9bbae70dc5..0bbc7edd25 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index d0e54b40b9..0016431f73 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index dba42fb653..6493425133 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 70d653d799..f2d2458664 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index e9aed1a9ee..38455a0738 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 077ad80e73..cc020ba8da 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index a9cbc16d54..acf1a19e90 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 49daa7c9a5..49b4903e48 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index 3061cd44dc..150bb760a0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 72476a7d86..3c7f2b845c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index a817cc54df..18334fd1db 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 4c014c2019..c70c74c7b4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index 881be3dc25..5b6ab76f4a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 50a6491efb..93c86bcbf0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index aa2545ce90..9197c03a62 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index b68fbc7297..4c4a7cce43 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index 78f31d7a45..abae11d1ae 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp index 6d801492ee..19111a0713 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_instances_ref.hpp @@ -15,7 +15,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -23,7 +23,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -31,7 +31,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -39,7 +39,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -47,7 +47,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -55,7 +55,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -63,7 +63,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -71,7 +71,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -79,7 +79,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -87,7 +87,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -95,7 +95,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -103,7 +103,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -111,7 +111,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -119,7 +119,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -127,7 +127,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -135,7 +135,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -143,7 +143,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -151,7 +151,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -159,7 +159,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -167,7 +167,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -175,7 +175,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -183,7 +183,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -191,7 +191,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -199,7 +199,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -207,7 +207,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -215,7 +215,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -223,7 +223,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -231,7 +231,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -239,7 +239,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -247,7 +247,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -255,7 +255,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -263,7 +263,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -271,7 +271,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -279,7 +279,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -287,7 +287,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -295,7 +295,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -303,7 +303,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -311,7 +311,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -319,7 +319,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -327,7 +327,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -335,7 +335,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -343,7 +343,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -351,7 +351,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -359,7 +359,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -367,7 +367,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -375,7 +375,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -383,7 +383,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -391,7 +391,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -399,7 +399,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -407,7 +407,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -415,7 +415,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -423,7 +423,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -431,7 +431,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -439,7 +439,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -447,7 +447,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -455,7 +455,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -463,7 +463,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -471,7 +471,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -479,7 +479,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -487,7 +487,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -495,7 +495,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -503,7 +503,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -511,7 +511,7 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -519,4 +519,4 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index cec5df8883..2121cf92f6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index 46a4760dca..dc6b011aef 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index 98e60c8c65..373bb27e59 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 303d3d4058..12b4e71783 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 49e5c27d4f..b81abf4736 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index beeeef97df..190727213f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index abeb6f6fa4..fd0bf72a60 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index 1e171e6353..d867cfbccb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 5d8f42117f..98ab869237 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index 0f5ff1a61b..494f3a774d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 3cdc33115f..dc15fa2451 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 997f91754d..e1cb58bc5c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 7d46856d36..6a25bb1abb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index 32042ed616..64f44a8f4e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index 64e37de126..9c233cbec0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index 88a147f3c5..b78536a402 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index bc8984b4e5..f55e6d785d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index 4a6929b18f..43117f2c55 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 87b7a71bca..8a36f0f0d9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index fc31e15e31..dffb1086bd 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 3d882587a0..0ac06ba3c3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 66e0d89883..942abca0aa 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index b25b21e9bb..b4427b3ffc 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index c4c84abfed..fdbee284e1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 7014050cff..d2b12a6c21 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index 99eeccd6e5..73d922c858 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 5a829be981..e739095d09 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index c3da869dd0..1182c110bf 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 7211600ef9..7fc6890581 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index 86427b0848..7b3c65da1e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index 9daefa8cf1..d0bd58f901 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index 88a9e1549b..3f9786f79a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_batched_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_batched_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..72bfdcab90 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..6257e17b69 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..e8d3298767 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..0947fde8ab --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..47e82e2d9e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..b5715f4032 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..74199b013a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..293501fa3f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..3339c87dcf --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..9119b5782e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..29fcb28f78 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..c076d7a775 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..a643f4f4a0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..5b7b50f4e1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..ca2ab49f4a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..5e930d548c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..f538c1571a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..8aec6a29a6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..b4a87b816c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..1bec282dd1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..142672c620 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..131bd27fc2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..789c787cc2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..1aa5ff9c0a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..23157f2a66 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..3f6d7a9bce --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..f0d34dd7a0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..215aeafb40 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..a1d3f73518 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..f7c07a3692 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..d2119125f2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..22902b14c6 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp new file mode 100644 index 0000000000..04c2d9ebc1 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_instances_ref.hpp @@ -0,0 +1,522 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..c6ea8ae0b5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..c073c076f8 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..31000dc322 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..4bd0a083db --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..fde8503f24 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..66945cdb32 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..99b0a8bdee --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..d33d4d4282 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..9fee7a4ab4 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..f644989c06 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..1152616b60 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..14361276ea --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..d90f091b57 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..4b2824c9ef --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..3ae7c5c171 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..df67b64b7d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..c063ed8a29 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..f4f41d2482 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..7871e54495 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..61c0c1fca7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..f3dac51603 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..0fc4f78bae --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..01e4cfb0aa --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..d6a4cadbe5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..5cc2377791 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..3bf82440a7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..10000e7545 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e783dad3a9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..7e7482ffc3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..3573aae758 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..a718704492 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..978901536f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::bf16_t, + false, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..34d79f2f09 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..db59a69073 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..ca3d36730a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..3138e12d78 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..1823d95c82 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..e4f801955e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..cf93936cc3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..df55a484c4 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..b2626571c0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..0d1a4d6de8 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..de2eac519f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..c070c7eeac --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..1ad3453a51 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..f5d9be6044 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..508d3595d5 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..32d26a2b0f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..4b8d3be00c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..7c58121e0f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..3ce4e17c5e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..f93ed023da --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..7e5edcd5b9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..934e41fa65 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..0097bad485 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e7d0b67853 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..8b3b9e206e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..e526c132c3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..48615d260d --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..4578bad8bb --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..999076d077 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..2dc4d47a2e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..5fc3124469 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..e3070f20e4 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp new file mode 100644 index 0000000000..baf6e357f0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_instances_ref.hpp @@ -0,0 +1,522 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + true, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); + +extern template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..33774731ce --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..ffe40dc6b0 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..1d7299e0a7 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..7f00f18216 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..78506f00e3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..c0459e603a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..634439ac6c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..568214ec7c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..5a7e5aa18c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..2969a2512b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..fe4cb16e0b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..72752ea8ba --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..a505763364 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..b9c4ab500e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..b2cc61aa54 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..24aada0d72 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + false, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..af37d59bdc --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..8ce887f9ef --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..4a6d98ce4c --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..58caba4598 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..3d7bc5393a --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..5bfa2a4040 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..d7b4fcc49f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..f0091400c9 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + true, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp new file mode 100644 index 0000000000..a4459eac56 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp new file mode 100644 index 0000000000..1860db60b3 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp new file mode 100644 index 0000000000..eba187323f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp new file mode 100644 index 0000000000..9d0928c5ab --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + true, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp new file mode 100644 index 0000000000..a8d24ac68b --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 128>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp new file mode 100644 index 0000000000..f770e713d8 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 256>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp new file mode 100644 index 0000000000..29d1886c7f --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 64>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp new file mode 100644 index 0000000000..34b2919708 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_group_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -0,0 +1,18 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + +// The file is automatically generated, don't modify! +// See the generator script +// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py` + +#include +#include "hstu_attention_group_forward_dispatch.hpp" + +template void run_group_forward_causal_softmax_bias_dropout_dispatch< + ck_tile::fp16_t, + false, + true, + false, + false, + 96>(HstuAttentionGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 47fa39006b..11679e9abc 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index 8dd7fed807..4558f71aec 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index 3b221a0639..9b1482bc14 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 9fd80ca643..aeda9deb96 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index c9ea8c9f85..a481623cca 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index 66495aa174..944f053838 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index 65d0869668..b21a909096 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index f614465337..f7b1033f82 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 4e85352335..3c23ff63ef 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index 4a08ec78c0..5646cfbf71 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 2ecf0f834a..25d802246d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 3e49b95c64..37f5587895 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 801d843934..cfa3cf71a6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index be24f955ab..d317710dc7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index 4a8c665074..9eab205462 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index b8c93970ae..35a1507267 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index 2f35f8b934..4ada591f5d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index b5bffe92c5..20b0b15289 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 895c495536..8b6b85d621 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index 6aacf56b48..fef2cb6120 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 76e162a819..4d788664e3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 9afb1ff339..b8ad957a9e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 952559d871..412c2da204 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index dc258953b7..9b2caeb43f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 10301fff99..fd64b80b6f 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index b0de249e88..8a205d4c72 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index f73442f7c2..90d560f3a0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index ae0ca3789a..0e2fb8a008 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 2f93f9a67e..b7b03ea309 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index 763bb9d6de..0355705d79 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index 1891d19185..9162e89430 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index ff0b7703be..8b99fa00ee 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp index b4ac93a2e2..cec6bba188 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_instances_ref.hpp @@ -15,7 +15,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -23,7 +23,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -31,7 +31,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -39,7 +39,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -47,7 +47,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -55,7 +55,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -63,7 +63,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -71,7 +71,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -79,7 +79,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -87,7 +87,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -95,7 +95,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -103,7 +103,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -111,7 +111,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -119,7 +119,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -127,7 +127,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -135,7 +135,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -143,7 +143,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -151,7 +151,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -159,7 +159,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -167,7 +167,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -175,7 +175,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -183,7 +183,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -191,7 +191,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -199,7 +199,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -207,7 +207,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -215,7 +215,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -223,7 +223,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -231,7 +231,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -239,7 +239,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -247,7 +247,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -255,7 +255,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -263,7 +263,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -271,7 +271,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -279,7 +279,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -287,7 +287,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -295,7 +295,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -303,7 +303,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -311,7 +311,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -319,7 +319,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -327,7 +327,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -335,7 +335,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -343,7 +343,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -351,7 +351,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -359,7 +359,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -367,7 +367,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -375,7 +375,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -383,7 +383,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -391,7 +391,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -399,7 +399,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -407,7 +407,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -415,7 +415,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -423,7 +423,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -431,7 +431,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -439,7 +439,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -447,7 +447,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -455,7 +455,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -463,7 +463,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -471,7 +471,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -479,7 +479,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -487,7 +487,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -495,7 +495,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -503,7 +503,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -511,7 +511,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::bf16_t, @@ -519,4 +519,4 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 9bf339ee25..017b2b9ac2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index 2dc97f8687..0e0917c32d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index b6c7fc3072..6e5c7f5c1e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index f01be6c3f6..1257af294e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 417c579a49..4ca9b31d8b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index c7c41e843f..9775c6c7b9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index 01ee526d18..c5d0f88b95 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index 0adab11fa1..0c90986c65 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index fe604c177e..a21b46bb60 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index 4ff7c6449b..d19e1532be 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 4b8e548247..30177b2410 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 0e7308bc48..70865faa2e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 69450e0993..6b28c249ea 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index 5581c4c525..1b3a092e28 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index 56c4b2fabf..0d00d415f6 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index cacbe4e513..435a624cab 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index 60b9f399ce..0cb475ad9e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index 65294641ca..ba492409a7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 50d0570d54..664fff3f79 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index 09226cde93..bc292bf93e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 4ef06ef3ff..baf56e86af 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 01715cb711..352a31dd7b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 60d34a2dba..005f8aea53 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index 95664b6760..09695faf5b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 9ab39a0d70..003ffabc99 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index 14f91e6df7..ff33c263d2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index ab06a435bd..68ea7eea8c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index 1512ed1595..04780471a3 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index c531054ec5..dfe6ab15ac 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index 63e7a21dbf..b55f19c252 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index a9a9b9450f..88b345eb08 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index 69b328cea6..86fd8019e0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_bf16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 487e2f3979..da56f0e9bc 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index 2cd3628a64..b05e192905 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index c5782adb4e..baf3f6c201 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 54a50c9196..e719a5c212 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 3a64e883a9..c7eec6244c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index 6fedc16471..e040daa8c9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index 5d10a6bb43..7d701e2b2e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index 966fda2c99..aaf7f1c2c4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index aa8387967d..1856b0fa2b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index e47ba94d3d..bb38021898 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 834773eba9..e6d1a8a6d4 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index 92ca10b816..74bf15b0b1 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 0d9075f495..6798d03c1c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index 8788e61c95..2e6a018581 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index c9f5926653..25a731182c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index 068480af6a..8034875050 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index 87bf14777e..8d10da5a8e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index e12bfe1f5b..0daf202a0d 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 93bb83bddb..ff0b6a1591 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index ead9df6b70..cf5872611e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 87a9d42429..3bcfa9588a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 14f29bbad9..41232f2077 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index 1ce8e85516..10d6120a8a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index fc2d38f690..19654313ca 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index a8fc8e0b09..b71cd4f958 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index e29fcc1d5e..6de1ef6209 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 9bc937b0cd..3b89713e20 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index aa3f82f899..9e9afa6ca7 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 2463c4bc7b..fe2f2b761b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index 9f531bee1a..75ff431b7c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index 504cc5c3d0..fff2d480bf 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index bb645bb508..7174621098 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_has_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp index 027aad62c3..7d47966fc2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_instances_ref.hpp @@ -15,7 +15,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -23,7 +23,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -31,7 +31,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -39,7 +39,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -47,7 +47,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -55,7 +55,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -63,7 +63,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -71,7 +71,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -79,7 +79,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -87,7 +87,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -95,7 +95,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -103,7 +103,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -111,7 +111,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -119,7 +119,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -127,7 +127,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -135,7 +135,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -143,7 +143,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -151,7 +151,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -159,7 +159,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -167,7 +167,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -175,7 +175,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -183,7 +183,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -191,7 +191,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -199,7 +199,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -207,7 +207,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -215,7 +215,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -223,7 +223,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -231,7 +231,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -239,7 +239,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -247,7 +247,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -255,7 +255,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -263,7 +263,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -271,7 +271,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -279,7 +279,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -287,7 +287,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -295,7 +295,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -303,7 +303,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -311,7 +311,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -319,7 +319,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -327,7 +327,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -335,7 +335,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -343,7 +343,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -351,7 +351,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -359,7 +359,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -367,7 +367,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -375,7 +375,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -383,7 +383,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -391,7 +391,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -399,7 +399,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -407,7 +407,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -415,7 +415,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -423,7 +423,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -431,7 +431,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -439,7 +439,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -447,7 +447,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -455,7 +455,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -463,7 +463,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -471,7 +471,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -479,7 +479,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -487,7 +487,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -495,7 +495,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -503,7 +503,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -511,7 +511,7 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< ck_tile::fp16_t, @@ -519,4 +519,4 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp index 76f797baec..f85078a607 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp index d7f3519fee..459499541e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp index 2f2899c9fd..dbf21f949a 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp index 17b2e02311..dbec38b765 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp index 3e6a2c1664..891befa082 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp index 1a228db8cf..e55ff6e865 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp index a1b326c469..4fd605a52b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp index 715adc5b69..3b6b55a8e2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp index 9e44077967..f2e1afc218 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp index b58b3a7ada..0b59f1e562 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp index 57f1337ae2..4e8fd6401c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp index ea6b65eb6f..85f52dd708 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp index 36a80da483..655979e251 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp index b7cce60aaf..0333a9fade 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp index e03e1e272a..335804c66c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp index e5100c1e83..2cf1cc4a1e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_false_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< false, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp index 1e7b7d9633..5b99069797 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp index 1f8a1d7d4e..dc988443f2 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp index 1e4f4b4b65..7c94cf09f9 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp index 9c73bd06bb..9ecedf3ec8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp index 043d693856..172f976577 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp index 487786dd6a..eb8d04a1eb 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp index f433cbba4d..b03bd1681b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp index 22c6a225d5..cd514c2a4e 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_has_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, true, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp index 6138bce84f..7d73b7953c 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp index ff3759616f..430208865b 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp index 60fd4c1e06..67458e2fc5 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp index 2a299dc1f3..8bab4dc559 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_has_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, true, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp index 5b0fb56a3f..1f450a1a40 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_128.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 128>(HstuAttentionFwdParams& param, hipStream_t stream); + 128>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp index e46694aaab..a910b873b8 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_256.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 256>(HstuAttentionFwdParams& param, hipStream_t stream); + 256>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp index 0d7a4320b3..16467d2f73 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_64.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 64>(HstuAttentionFwdParams& param, hipStream_t stream); + 64>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp index 9471013544..ef23ceaed0 100644 --- a/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp +++ b/example/ck_tile/18_hstu_attention/instances/hstu_attention_jagged_forward_fp16_no_causal_softmax_true_no_bias_no_dropout_maxk_96.cpp @@ -15,4 +15,4 @@ template void run_jagged_forward_causal_softmax_bias_dropout_dispatch< true, false, false, - 96>(HstuAttentionFwdParams& param, hipStream_t stream); + 96>(HstuAttentionNoGroupFwdParams& param, hipStream_t stream); diff --git a/example/ck_tile/18_hstu_attention/reference_hstu_attention.hpp b/example/ck_tile/18_hstu_attention/reference_hstu_attention.hpp index 429cf38aff..dacb0e3059 100644 --- a/example/ck_tile/18_hstu_attention/reference_hstu_attention.hpp +++ b/example/ck_tile/18_hstu_attention/reference_hstu_attention.hpp @@ -31,7 +31,7 @@ template -struct reference_hstu_attention +struct reference_no_group_hstu_attention { static void Run(bool is_cross_attention, const HostTensor& q_batch_seq_nhead_hdim, @@ -318,4 +318,257 @@ struct reference_hstu_attention } }; +template +struct reference_group_hstu_attention +{ + static void + Run(bool is_cross_attention, + const HostTensor& q_batch_seq_nhead_hdim, + const HostTensor& k_batch_seq_nhead_hdim, + const HostTensor& v_batch_seq_nhead_hdim, + HostTensor& o_batch_seq_nhead_hdim, + HostTensor& mask_batch_nhead_seq_seq, + int num_batch, + int num_batch_per_group, + float alpha, + int max_max_seqlen, // the maximum of all groups's max_seqlen + const std::vector& seq_q_offsets, + const std::vector& seq_kv_offsets, + const std::vector& num_targets, // define masking length at the end of token + // sequence to be excluded for attention + const std::vector& group_max_seqlens, // max seqlen list by groups + const std::vector& group_contextual_seqlens, // contextual seqlen list by groups + const std::vector& group_window_sizes, // window_size list by groups + const std::vector& group_min_full_attn_seqlens, // min_full_attn_seqlen list by groups + const std::vector& group_attn_scales) // attn scale list by group + { + // check the number of batches + assert(!seq_offsets.empty() && seq_offsets.size() == num_batch + 1); + assert(q_batch_seq_nhead_hdim.get_lengths()[0] == 1); + assert(k_batch_seq_nhead_hdim.get_lengths()[0] == 1); + assert(v_batch_seq_nhead_hdim.get_lengths()[0] == 1); + assert(o_batch_seq_nhead_hdim.get_lengths()[0] == 1); + + // check the sequence length + assert(q_batch_seq_nhead_hdim.get_lengths()[1] == k_batch_seq_nhead_hdim.get_lengths()[1]); + assert(q_batch_seq_nhead_hdim.get_lengths()[1] == v_batch_seq_nhead_hdim.get_lengths()[1]); + assert(q_batch_seq_nhead_hdim.get_lengths()[1] == o_batch_seq_nhead_hdim.get_lengths()[1]); + + // check the number of heads + int num_head = q_batch_seq_nhead_hdim.get_lengths()[2]; + assert(num_head == k_batch_seq_nhead_hdim.get_lengths()[2]); + assert(num_head == v_batch_seq_nhead_hdim.get_lengths()[2]); + assert(num_head == o_batch_seq_nhead_hdim.get_lengths()[2]); + + // check the hdim + int hdim_qk = q_batch_seq_nhead_hdim.get_lengths()[3]; + int hdim_v = v_batch_seq_nhead_hdim.get_lengths()[3]; + assert(hdim_qk == k_batch_seq_nhead_hdim.get_lengths()[3]); + assert(hdim_v == o_batch_seq_nhead_hdim.get_lengths()[3]); + + bool save_mask = false; + + if(static_cast(mask_batch_nhead_seq_seq.get_lengths()[0]) == num_batch && + static_cast(mask_batch_nhead_seq_seq.get_lengths()[1]) == num_head && + static_cast(mask_batch_nhead_seq_seq.get_lengths()[2]) == max_max_seqlen && + static_cast(mask_batch_nhead_seq_seq.get_lengths()[3]) == max_max_seqlen) + save_mask = true; + + // check num_tagets + assert(num_tagets.empty() || num_targets.size() == num_batch); + + auto silu = [&](CompDataType x) { + const auto one = ck_tile::type_convert(1.0f); + + return x / (one + std::exp(-x)); + }; + + auto f = [&](auto i_batch, auto i_head) { + int i_group = i_batch / num_batch_per_group; + int seqlen_q = seq_q_offsets[i_batch + 1] - seq_q_offsets[i_batch]; + int seqlen_kv = seq_kv_offsets[i_batch + 1] - seq_kv_offsets[i_batch]; + + int num_target = num_targets.empty() ? 0 : num_targets[i_batch]; + + int max_seqlen = group_max_seqlens[i_group]; + float attn_scale = group_attn_scales[i_group]; + + float scale_p = (attn_scale ? attn_scale : 1.0f / static_cast(max_seqlen)); + + int contextual_seqlen = group_contextual_seqlens[i_group]; + int window_size = group_window_sizes[i_group]; + int min_full_attn_seqlen = group_min_full_attn_seqlens[i_group]; + + BOOL_SWITCH_2(window_size > 0, kHasLocal, is_cross_attention, kIsCrossAttention, [&] { + using HstuMaskType = + typename HstuBlockMasking::Type; + + HstuMaskType mask = [&]() { + if constexpr(kHasLocal) + { + if constexpr(kIsCrossAttention) + { + // need adjust the min_full_attn_seqlen passed to the HstuBlockMask() if + // the user passed min_full_attn_seqlen is bigger than max_uih_len + if(seqlen_q - num_target > min_full_attn_seqlen) + return ck_tile::make_hstu_cross_attention_block_mask_with_local< + HstuMaskType>(true, + seqlen_q, + seqlen_kv, + contextual_seqlen, + num_target, + window_size, + min_full_attn_seqlen); + else + return ck_tile::make_hstu_cross_attention_block_mask_with_local< + HstuMaskType>(true, + seqlen_q, + seqlen_kv, + contextual_seqlen, + num_target, + window_size, + seqlen_q - num_target); + } + else + { + // need adjust the min_full_attn_seqlen passed to the HstuBlockMask() if + // the user passed min_full_attn_seqlen is bigger than max_uih_len + if(seqlen_q - num_target > min_full_attn_seqlen) + return ck_tile::make_hstu_self_attention_block_mask_with_local< + HstuMaskType>(true, + seqlen_q, + contextual_seqlen, + num_target, + window_size, + min_full_attn_seqlen); + else + return ck_tile::make_hstu_self_attention_block_mask_with_local< + HstuMaskType>(true, + seqlen_q, + contextual_seqlen, + num_target, + window_size, + seqlen_q - num_target); + } + } + else + { + if constexpr(kIsCrossAttention) + return ck_tile::make_hstu_cross_attention_block_mask_without_local< + HstuMaskType>(seqlen_q, seqlen_kv, contextual_seqlen, num_target); + else + return ck_tile::make_hstu_self_attention_block_mask_without_local< + HstuMaskType>(seqlen_q, contextual_seqlen, num_target); + } + }(); + + if(save_mask) + { + for(int sq = 0; sq < max_seqlen; sq++) + for(int sk = 0; sk < max_seqlen; sk++) + mask_batch_nhead_seq_seq(i_batch, i_head, sq, sk) = 0; + + for(int sq = 0; sq < seqlen_q; sq++) + for(int sk = 0; sk < seqlen_kv; sk++) + mask_batch_nhead_seq_seq(i_batch, i_head, sq, sk) = + static_cast(mask.IsTokenPairInsideMask(sq, sk)); + } + + // for all rows in the batch + for(int sq = 0; sq < seqlen_q; sq++) + { + CompDataType m = + -ck_tile::numeric::infinity(); // max value of the row + CompDataType l = + ck_tile::type_convert(0.0f); // sum of exp(x-m) of the row + // + std::vector locals; + + // for all cols in the batch + for(int sk = 0; sk < seqlen_kv; sk++) + { + if(mask.IsTokenPairInsideMask(sq, sk)) + { + GemmAccDataType dot_prod = 0.f; + for(int k = 0; k < hdim_qk; k++) + { + InOutDataType qreg = q_batch_seq_nhead_hdim( + 0, seq_q_offsets[i_batch] + sq, i_head, k); + InOutDataType kreg = k_batch_seq_nhead_hdim( + 0, seq_kv_offsets[i_batch] + sk, i_head, k); + + dot_prod += ck_tile::type_convert(qreg) * + ck_tile::type_convert(kreg); + } + + locals.push_back(ck_tile::type_convert(dot_prod) * + ck_tile::type_convert(alpha)); + } + else + { + if constexpr(!kUseSoftmax) + locals.push_back(ck_tile::type_convert(0.0f)); + else + locals.push_back(-ck_tile::numeric::infinity()); + }; + }; + + if constexpr(!kUseSoftmax) + { + // SiLu element-wise + for(CompDataType& elem : locals) + elem = silu(elem) * ck_tile::type_convert(scale_p); + } + else + { + for(CompDataType elem : locals) + m = ck_tile::max(m, elem); + + if(m == -ck_tile::numeric::infinity()) + { + for(CompDataType& elem : locals) + elem = ck_tile::type_convert(0.0f); + } + else + { + // stabalized sum of exp() + for(CompDataType elem : locals) + l += std::exp(elem - m); + + // normalization + for(CompDataType& elem : locals) + elem = std::exp(elem - m) / l; + } + }; + + // second Gemm + for(int k = 0; k < hdim_v; k++) + { + GemmAccDataType dot_prod = 0.f; + + for(int sk = 0; sk < seqlen_kv; sk++) + { + InOutDataType preg = ck_tile::type_convert(locals[sk]); + InOutDataType vreg = + v_batch_seq_nhead_hdim(0, seq_kv_offsets[i_batch] + sk, i_head, k); + + dot_prod += ck_tile::type_convert(preg) * + ck_tile::type_convert(vreg); + }; + + o_batch_seq_nhead_hdim(0, seq_q_offsets[i_batch] + sq, i_head, k) = + ck_tile::type_convert(dot_prod); + }; + }; + }); + }; + + make_ParallelTensorFunctor(f, num_batch, num_head)(std::thread::hardware_concurrency()); + } +}; + } // namespace ck_tile diff --git a/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_attention.sh b/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_attention.sh new file mode 100644 index 0000000000..baad9851e2 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_attention.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +BUILD=build +EXE=$BUILD/bin/tile_example_hstu_attention + +ndist=0 + +if [ $# -ge 2 ]; then + ndist=$1 +fi + +for dtype in "fp16" "bf16"; do + set -x + + ## no masking + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=0,3,0 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=0,3,0 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal+local + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal+local+context + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## causal+local+context+target + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## no-causal+local+context+target + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## causal+local+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + ## causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + ## no-causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=3,3,3 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + set +x +done + +set -x +## special cases +$EXE -v=1 -prec="bf16" -b=32 -g=4 -nhead=4 -hdim_qk=16 -hdim_v=64 -causal=1 \ + -seqlens=159,176,195,224,237,188,176,167,153,187,181,162,211,236,177,180,251,183,175,176,172,163,242,176,202,255,200,217,201,252,162,188 \ + -targets=401,72,259,50,104,475,147,205,192,331,231,199,273,344,434,356,369,238,362,467,140,96,49,113,115,38,96,66,225,343,293,220 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=768,768,768,768 -g_local_lens=25,27,17,32 -g_context_lens=0,0,0,0 -g_minfull_lens=49,3,33,26 -g_attn_scales=0.0013,0.0013,0.0013,0.0013 + +$EXE -v=1 -prec="bf16" -b=16 -g=2 -nhead=109 -hdim_qk=16 -hdim_v=16 -causal=1 \ + -seqlens=89,84,80,60,69,78,67,61,65,98,94,85,88,60,89,84 \ + -targets=20,4,7,5,3,16,7,5,15,11,6,16,14,11,15,11 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=120,120 -g_local_lens=13,2 -g_context_lens=0,0 -g_minfull_lens=14,9 -g_attn_scales=0.0083,0.0083 + +$EXE -v=1 -prec="bf16" -b=8 -g=2 -nhead=4 -hdim_qk=16 -hdim_v=16 -causal=1 \ + -seqlens=81,77,91,72,95,87,73,88 -targets=5,11,4,15,1,18,4,8 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=120,120 -g_local_lens=13,2 -g_context_lens=0,0 -g_minfull_lens=14,9 -g_attn_scales=0.0083,0.0083 + +set +x diff --git a/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_softmax_attention.sh b/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_softmax_attention.sh new file mode 100644 index 0000000000..84546d178e --- /dev/null +++ b/example/ck_tile/18_hstu_attention/scripts/test_group_hstu_softmax_attention.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +BUILD=build +EXE="$BUILD/bin/tile_example_hstu_attention -softmax=1" + +ndist=0 + +if [ $# -ge 2 ]; then + ndist=$1 +fi + +for dtype in "fp16" "bf16"; do + set -x + + ## no masking + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=0,3,0 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=0,3,0 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal+local + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=0,0,0 -g_minfull_lens=0,0,0 -g_attn_scales=0,0.1,0 + + ## causal+local+context + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=0 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## causal+local+context+target + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## no-causal+local+context+target + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=7,7,7 -g_attn_scales=0,0.1,0 + + ## causal+local+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + ## causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=1 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=8,8,8 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + ## no-causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=18 -g=3 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310,308,312 -causal=0 -targets=8 -norm_dist=$ndist \ + -g_max_seqlens=310,312,312 -g_local_lens=5,5,5 -g_context_lens=3,3,3 -g_minfull_lens=290,290,290 -g_attn_scales=0,0.1,0 + + set +x +done + +set -x +## special cases +$EXE -v=1 -prec="bf16" -b=32 -g=4 -nhead=4 -hdim_qk=16 -hdim_v=64 -causal=1 \ + -seqlens=159,176,195,224,237,188,176,167,153,187,181,162,211,236,177,180,251,183,175,176,172,163,242,176,202,255,200,217,201,252,162,188 \ + -targets=401,72,259,50,104,475,147,205,192,331,231,199,273,344,434,356,369,238,362,467,140,96,49,113,115,38,96,66,225,343,293,220 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=768,768,768,768 -g_local_lens=25,27,17,32 -g_context_lens=0,0,0,0 -g_minfull_lens=49,3,33,26 -g_attn_scales=0.0013,0.0013,0.0013,0.0013 + +$EXE -v=1 -prec="bf16" -b=16 -g=2 -nhead=109 -hdim_qk=16 -hdim_v=16 -causal=1 \ + -seqlens=89,84,80,60,69,78,67,61,65,98,94,85,88,60,89,84 \ + -targets=20,4,7,5,3,16,7,5,15,11,6,16,14,11,15,11 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=120,120 -g_local_lens=13,2 -g_context_lens=0,0 -g_minfull_lens=14,9 -g_attn_scales=0.0083,0.0083 + +$EXE -v=1 -prec="bf16" -b=8 -g=2 -nhead=4 -hdim_qk=16 -hdim_v=16 -causal=1 \ + -seqlens=81,77,91,72,95,87,73,88 -targets=5,11,4,15,1,18,4,8 \ + -norm_dist=$ndist -alpha=0.25 -g_max_seqlens=120,120 -g_local_lens=13,2 -g_context_lens=0,0 -g_minfull_lens=14,9 -g_attn_scales=0.0083,0.0083 + +set +x