mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
use persistent
This commit is contained in:
@@ -81,7 +81,7 @@ struct MXfp4_GemmConfig16 : MxGemmConfig
|
||||
// GEMM config with 16x16 warp tile
|
||||
struct MXfp8_GemmConfig16 : MxGemmConfig
|
||||
{
|
||||
static constexpr ck_tile::index_t M_Tile = 32;
|
||||
static constexpr ck_tile::index_t M_Tile = 64;
|
||||
static constexpr ck_tile::index_t N_Tile = 64;
|
||||
static constexpr ck_tile::index_t K_Tile = 256;
|
||||
};
|
||||
|
||||
@@ -50,8 +50,8 @@ int run_mx_gemm_with_layouts(int argc,
|
||||
ck_tile::host_tensor_descriptor(M, N, stride_C, is_row_major(CLayout{})));
|
||||
|
||||
// Scale tensors - follow parent matrix layouts for optimal memory access
|
||||
// A scales: [M, K/32] with A's layout → coalescing follows A's pattern
|
||||
// B scales: [K/32, N] with B's layout → coalescing follows B's pattern
|
||||
// A scales: [M, K/32] with A's layout
|
||||
// B scales: [K/32, N] with B's layout
|
||||
using ScaleType = ck_tile::e8m0_t;
|
||||
ck_tile::index_t scale_k_size = K / 32;
|
||||
|
||||
@@ -189,7 +189,7 @@ int run_mx_gemm_example(int argc, char* argv[])
|
||||
ck_tile::pk_fp4_t,
|
||||
float,
|
||||
MXfp4_GemmConfig16,
|
||||
false>(argc, argv, Row{}, Col{}, Row{});
|
||||
true>(argc, argv, Row{}, Col{}, Row{});
|
||||
}
|
||||
else if(mx_prec == "fp8" || mx_prec == "fp8xfp8")
|
||||
{
|
||||
@@ -197,7 +197,7 @@ int run_mx_gemm_example(int argc, char* argv[])
|
||||
ck_tile::fp8_t,
|
||||
float,
|
||||
MXfp8_GemmConfig16,
|
||||
false>(argc, argv, Row{}, Col{}, Row{});
|
||||
true>(argc, argv, Row{}, Col{}, Row{});
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user