Enable hdim96 instances

This commit is contained in:
Qianfeng Zhang
2025-12-12 14:54:11 +00:00
parent 18108d0d54
commit b3d54477f1
73 changed files with 1751 additions and 42 deletions

View File

@@ -64,7 +64,7 @@ BOOL_MAP_DROPOUT = {
False: "no_dropout",
}
INT_MAP_MAX_K = {hd: f"maxk_{hd}" for hd in [64, 128, 256]}
INT_MAP_MAX_K = {hd: f"maxk_{hd}" for hd in [64, 96, 128, 256]}
TYPE_CTYPE_MAP = {
"fp16": "ck_tile::fp16_t",
@@ -159,7 +159,7 @@ def create_forward_instances_ref(instance_dir: Path, headdims: List) -> None:
file.write(forward_instance)
if __name__ == "__main__":
headdims_fwd = [64, 128, 256]
headdims_fwd = [64, 96, 128, 256]
this_dir = os.path.dirname(__file__)
output_dir = Path(this_dir) / "instances"

View File

@@ -18,14 +18,6 @@ struct HstuAttentionNoSoftmaxFwdBlockTile;
// Tile-sizes: M N0 N0Sub N1 K1 MaxK (MaxK % N1 == 0, N0 % K1 == 0)
//
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<32>
{
using type = ck_tile::sequence<64, 64, 32, 32, 32, 32>;
using gemm0_warps = ck_tile::sequence<2, 1, 1>;
using gemm1_warps = ck_tile::sequence<2, 1, 1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<64>
{
@@ -34,6 +26,14 @@ struct HstuAttentionNoSoftmaxFwdBlockTile<64>
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<96>
{
using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>;
using gemm0_warps = ck_tile::sequence<4, 1, 1>;
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<128>
{
@@ -55,14 +55,6 @@ struct HstuAttentionWithSoftmaxFwdBlockTile;
// Tile-sizes: M N0 N0Sub N1 K1 MaxK (MaxK % N1 == 0, N0 % K1 == 0)
//
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<32>
{
using type = ck_tile::sequence<64, 64, 32, 32, 32, 32>;
using gemm0_warps = ck_tile::sequence<2, 1, 1>;
using gemm1_warps = ck_tile::sequence<2, 1, 1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<64>
{
@@ -71,6 +63,14 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<64>
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<96>
{
using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>;
using gemm0_warps = ck_tile::sequence<4, 1, 1>;
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<128>
{
@@ -90,17 +90,6 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<256>
template <ck_tile::index_t MaxK>
struct HstuAttentionNoSoftmaxFwdTileSetting;
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<32>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionNoSoftmaxFwdBlockTile<32>::type,
typename HstuAttentionNoSoftmaxFwdBlockTile<32>::gemm0_warps,
HstuAttentionFwdWarpTile1,
typename HstuAttentionNoSoftmaxFwdBlockTile<32>::gemm1_warps,
HstuAttentionFwdWarpTile1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<64>
{
@@ -112,6 +101,17 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<64>
HstuAttentionFwdWarpTile1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<96>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::type,
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm0_warps,
HstuAttentionFwdWarpTile1,
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm1_warps,
HstuAttentionFwdWarpTile1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<128>
{
@@ -137,17 +137,6 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<256>
template <ck_tile::index_t MaxK>
struct HstuAttentionWithSoftmaxFwdTileSetting;
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<32>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionWithSoftmaxFwdBlockTile<32>::type,
typename HstuAttentionWithSoftmaxFwdBlockTile<32>::gemm0_warps,
HstuAttentionFwdWarpTile1,
typename HstuAttentionWithSoftmaxFwdBlockTile<32>::gemm1_warps,
HstuAttentionFwdWarpTile1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<64>
{
@@ -159,6 +148,17 @@ struct HstuAttentionWithSoftmaxFwdTileSetting<64>
HstuAttentionFwdWarpTile3>;
};
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<96>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::type,
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm0_warps,
HstuAttentionFwdWarpTile2,
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm1_warps,
HstuAttentionFwdWarpTile2>;
};
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<128>
{
@@ -204,6 +204,14 @@ struct HstuAttentionNoSoftmaxFwdBlockTile<64>
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<96>
{
using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>;
using gemm0_warps = ck_tile::sequence<4, 1, 1>;
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionNoSoftmaxFwdBlockTile<128>
{
@@ -241,6 +249,14 @@ struct HstuAttentionWithSoftmaxFwdBlockTile<64>
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<96>
{
using type = ck_tile::sequence<128, 64, 32, 128, 32, 96>;
using gemm0_warps = ck_tile::sequence<4, 1, 1>;
using gemm1_warps = ck_tile::sequence<4, 1, 1>;
};
template <>
struct HstuAttentionWithSoftmaxFwdBlockTile<128>
{
@@ -282,6 +298,17 @@ struct HstuAttentionNoSoftmaxFwdTileSetting<64>
HstuAttentionFwdWarpTile2>;
};
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<96>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::type,
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm0_warps,
HstuAttentionFwdWarpTile2,
typename HstuAttentionNoSoftmaxFwdBlockTile<96>::gemm1_warps,
HstuAttentionFwdWarpTile2>;
};
template <>
struct HstuAttentionNoSoftmaxFwdTileSetting<128>
{
@@ -329,6 +356,17 @@ struct HstuAttentionWithSoftmaxFwdTileSetting<64>
HstuAttentionFwdWarpTile3>;
};
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<96>
{
using Type = ck_tile::HstuAttentionFwdTileSettingClass<
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::type,
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm0_warps,
HstuAttentionFwdWarpTile3,
typename HstuAttentionWithSoftmaxFwdBlockTile<96>::gemm1_warps,
HstuAttentionFwdWarpTile3>;
};
template <>
struct HstuAttentionWithSoftmaxFwdTileSetting<128>
{

View File

@@ -13,6 +13,11 @@
constexpr ck_tile::index_t CONST_NAME = 64; \
__VA_ARGS__(); \
} \
else if(HDIM_1 <= 96 && HDIM_2 <= 96) \
{ \
constexpr ck_tile::index_t CONST_NAME = 96; \
__VA_ARGS__(); \
} \
else if(HDIM_1 <= 128 && HDIM_2 <= 128) \
{ \
constexpr ck_tile::index_t CONST_NAME = 128; \

View File

@@ -150,7 +150,8 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad
static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kSubQKHeaddim ==
KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&

View File

@@ -152,7 +152,8 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad
static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kSubQKHeaddim ==
KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -137,6 +137,134 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
64>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -137,6 +137,134 @@ extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
false,
64>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_batched_forward_dispatch.hpp"
template void run_batched_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -137,6 +137,134 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
false,
64>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
true,

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/half.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::bf16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -137,6 +137,134 @@ extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
false,
64>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);
extern template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
true,

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
false,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
true,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
true,
96>(HstuAttentionFwdParams& param, hipStream_t stream);

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// The file is automatically generated, don't modify!
// See the generator script
// `composable_kernel/example/ck_tile/18_hstu_attention/generate_instances.py`
#include <ck_tile/core/numeric/bfloat16.hpp>
#include "hstu_attention_jagged_forward_dispatch.hpp"
template void run_jagged_forward_causal_softmax_bias_dropout_dispatch<
ck_tile::fp16_t,
false,
true,
false,
false,
96>(HstuAttentionFwdParams& param, hipStream_t stream);