mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
Add benchmark example
This commit is contained in:
@@ -22,9 +22,6 @@ namespace ck_tile {
|
||||
|
||||
template <typename Problem_,
|
||||
typename Policy_ = MHCDefaultPolicy,
|
||||
index_t kMTile_ = 64, // Batch tile size
|
||||
index_t kNTile_ = 32, // Output dimension tile (can cover all 24 outputs)
|
||||
index_t kKTile_ = 8, // K-tile for C dimension (must match BlockGemmShape::kK)
|
||||
typename Activation_ = element_wise::Sigmoid>
|
||||
struct MHCKernelV3
|
||||
{
|
||||
@@ -37,9 +34,10 @@ struct MHCKernelV3
|
||||
using YDataType = ck_tile::remove_cvref_t<typename Problem::YDataType>;
|
||||
using PhiDataType = ck_tile::remove_cvref_t<typename Problem::PhiDataType>;
|
||||
|
||||
static constexpr index_t kMTile = kMTile_; // Batch tile
|
||||
static constexpr index_t kNTile = kNTile_; // Output tile
|
||||
static constexpr index_t kKTile = kKTile_; // K tile for C dimension
|
||||
// Automatically derive tile sizes from BlockGemmShape (single source of truth!)
|
||||
static constexpr index_t kMTile = Problem::BlockGemmShape::kM; // Batch tile
|
||||
static constexpr index_t kNTile = Problem::BlockGemmShape::kN; // Output tile
|
||||
static constexpr index_t kKTile = Problem::BlockGemmShape::kK; // K tile for C dimension
|
||||
|
||||
static constexpr index_t kBlockSize = Problem::kBlockSize;
|
||||
|
||||
|
||||
@@ -26,12 +26,12 @@ struct MHCProblem
|
||||
using CDataType = ComputeDataType; // Output/accumulator matrix C
|
||||
|
||||
// BlockGemmShape with kM, kN, kK members for BlockGemm
|
||||
// Use supported warp gemm configuration for float32: 32x32x8
|
||||
// We'll use 2 warps in M and 1 warp in N to get 64x32 block
|
||||
// Using 32x32x8 warp tiles (supported by MFMA) with 2x1 warp layout for 64x32 block
|
||||
// This gives better parallelism than 64x32 while using supported warp sizes
|
||||
using BlockGemmShape =
|
||||
TileGemmShape<sequence<64, 32, 8>, // BlockTile (M, N, K)
|
||||
TileGemmShape<sequence<64, 32, 8>, // BlockTile (M, N, K) - keep original for now
|
||||
sequence<2, 1, 1>, // BlockWarps (2 warps in M, 1 in N, 1 in K)
|
||||
sequence<32, 32, 8>>; // WarpTile (matches available float32 MFMA)
|
||||
sequence<32, 32, 8>>; // WarpTile (32x32x8 is supported by MFMA)
|
||||
|
||||
// Layout types for BlockGemm
|
||||
using ALayout = ck_tile::tensor_layout::gemm::RowMajor; // x is row-major [B, nC]
|
||||
|
||||
Reference in New Issue
Block a user