mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[rocm-libraries] ROCm/rocm-libraries#4804 (commit 832dd0e)
Add Tile Distribution Encoding Register Mapping debug utility for MFMA / WMMA unification work. (#4804) ## Motivation This PR adds a small utility that allows you to use Tile Distribution Encodings to directly map matrix elements to register locations and vice versa. It can also print forward and backward layout mappings similar to the Matrix Calculator utility. The utility is not meant for index calculations in actual kernels, but rather as a debugging tool and probably for automated verification of the policy structs in the new WMMA / MFMA unification design. ## Technical Details Tile Distribution Encodings are a core part of CK Tile which can define the relationship between register and intrinsic matrix fragment elements. They allow for any mapping based on unmerge and merge transformations. Also, they allow for a special "Repeat" dimensions which acts like an additional matrix dimension and allows for replication of certain matrix elements. The new mapping utility can deal with all aspects. ## Test Plan Since this is a debug utility there is nothing to directly test, but there is an example file that defines four different Tile Distribution Encodings and prints their forward and backward mappings, along with some extra parameters. ## Test Result ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
a8e2ec22cf
commit
b042e1805a
4
example/ck_tile/51_tile_distr_enc_reg_map/CMakeLists.txt
Normal file
4
example/ck_tile/51_tile_distr_enc_reg_map/CMakeLists.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
add_executable(tile_example_tile_distr_enc_reg_map example_tile_distr_enc_reg_map.cpp)
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <stdio.h>
|
||||
#include "ck_tile/core/arch/mma/utility/tile_distribution_encoding_register_mapper.hpp"
|
||||
|
||||
using namespace ck_tile;
|
||||
using namespace ck_tile::core::arch::mma;
|
||||
|
||||
int main()
|
||||
{
|
||||
// Define some tile distribution encodings and print register mappings.
|
||||
|
||||
printf("Example RDNA3 V_WMMA_F32_16X16X16_F16 A Matrix (M, K)\nL{RM} V{K}\n");
|
||||
TileDistrEncRegMap<
|
||||
tile_distribution_encoding<sequence<2>, // R (= Repeat) Lanes 0-15 are duplicated at 16-31
|
||||
tuple<sequence<16>, sequence<16>>, // H (= Hidden dims = unmerged
|
||||
// dims) for M, K dimension
|
||||
tuple<sequence<0, 1>>, // P major (= Parallelism = lanes)
|
||||
tuple<sequence<0, 0>>, // P minor
|
||||
sequence<2>, // Y major (= Yield = Vector items)
|
||||
sequence<0> // Y minor
|
||||
>>::print();
|
||||
|
||||
printf("\nExample RDNA3 V_WMMA_F32_16X16X16_F16 C Matrix (M, N)\nM{2, 1} L{M1N} V{M2M0} (dummy "
|
||||
"unmerge to be more similar to other layouts)\n");
|
||||
TileDistrEncRegMap<
|
||||
tile_distribution_encoding<sequence<>, // R (= Repeat)
|
||||
tuple<sequence<8, 2, 1>, sequence<16>>, // H (= Hidden dims =
|
||||
// unmerged dims) for M,
|
||||
// N dimension
|
||||
tuple<sequence<1, 2>>, // P major (= Parallelism = lanes)
|
||||
tuple<sequence<1, 0>>, // P minor
|
||||
sequence<1, 1>, // Y major (= Yield = Vector items)
|
||||
sequence<0, 2> // Y minor
|
||||
>>::print();
|
||||
|
||||
printf("\nExample CDNA __builtin_amdgcn_mfma_f32_4x4x4f16 A Matrix (M, K) with 16x "
|
||||
"block-hiding in the M dimension\nL{BM} V{K}\n");
|
||||
TileDistrEncRegMap<
|
||||
tile_distribution_encoding<sequence<>, // R (= Repeat)
|
||||
tuple<sequence<16, 4>, sequence<4>>, // H (= Hidden dims =
|
||||
// unmerged dims) for M,
|
||||
// K dimension
|
||||
tuple<sequence<1, 1>>, // P major (= Parallelism = lanes)
|
||||
tuple<sequence<0, 1>>, // P minor
|
||||
sequence<2>, // Y major (= Yield = Vector items)
|
||||
sequence<0> // Y minor
|
||||
>>::print();
|
||||
|
||||
printf("\nExample CDNA __builtin_amdgcn_mfma_f32_4x4x4f16 B Matrix (N, K) with 16x "
|
||||
"block-hiding in the M dimension\nL{BN} V{K}\n");
|
||||
TileDistrEncRegMap<
|
||||
tile_distribution_encoding<sequence<16>, // R (= Repeat)
|
||||
tuple<sequence<4>, sequence<4>>, // H (= Hidden dims =
|
||||
// unmerged dims) for N,
|
||||
// K dimension
|
||||
tuple<sequence<0, 1>>, // P major (= Parallelism = lanes)
|
||||
tuple<sequence<0, 0>>, // P minor
|
||||
sequence<2>, // Y major (= Yield = Vector items)
|
||||
sequence<0> // Y minor
|
||||
>>::print();
|
||||
|
||||
printf("\nCustom example\n");
|
||||
TileDistrEncRegMap<
|
||||
tile_distribution_encoding<sequence<1>, // R (= Repeat)
|
||||
tuple<sequence<16>, sequence<1, 2, 8>>, // H (= Hidden dims =
|
||||
// unmerged dims)
|
||||
tuple<sequence<2, 1>>, // P major (= Parallelism = lanes)
|
||||
tuple<sequence<1, 0>>, // P minor
|
||||
sequence<2, 2>, // Y major (= Yield = Vector items)
|
||||
sequence<0, 2> // Y minor
|
||||
>>::print();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -31,4 +31,5 @@ add_subdirectory(38_block_scale_gemm)
|
||||
add_subdirectory(40_streamk_gemm)
|
||||
add_subdirectory(41_batched_contraction)
|
||||
add_subdirectory(50_sparse_attn)
|
||||
add_subdirectory(51_tile_distr_enc_reg_map)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user