From c5202aada0ab783dca740e091750ac037a628608 Mon Sep 17 00:00:00 2001 From: joyeamd Date: Thu, 26 Mar 2026 09:40:44 +0800 Subject: [PATCH] [CK][CK_TILE] Revert addional oob check in gemm IsSupported function (#5789) ## Motivation fix ck_tile's oob check. ## Technical Details ## Test Plan ## Test Result ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --- .../ops/gemm/kernel/universal_gemm_kernel.hpp | 100 +++++------------- .../gemm/test_gemm_pipeline_ut_cases.inc | 31 +++--- 2 files changed, 39 insertions(+), 92 deletions(-) diff --git a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp index 3c8bc27f59..37ed8ce49a 100644 --- a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp @@ -448,23 +448,11 @@ struct UniversalGemmKernel } if(kargs.K % vectorSizeA != 0) { - const auto remainder = kargs.K % vectorSizeA; - constexpr ck_tile::index_t APackedSize = - ck_tile::numeric_traits::PackedSize; - const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize; - // oob can support to dword level - if(remainder_in_bytes % 4 == 0) + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) { - AsTensorIsValid = true; - } - else - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!"); - } - AsTensorIsValid = false; + CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!"); } + AsTensorIsValid = false; } } else @@ -480,24 +468,11 @@ struct UniversalGemmKernel } if(kargs.M % vectorSizeA != 0) { - const auto remainder = kargs.M % vectorSizeA; - constexpr ck_tile::index_t APackedSize = - ck_tile::numeric_traits::PackedSize; - const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize; - // oob can support to dword level - if(remainder_in_bytes % 4 == 0) + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) { - - AsTensorIsValid = true; - } - else - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!"); - } - AsTensorIsValid = false; + CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!"); } + AsTensorIsValid = false; } } }); @@ -520,58 +495,33 @@ struct UniversalGemmKernel } if(kargs.N % vectorSizeB != 0) { - const auto remainder = kargs.N % vectorSizeB; - constexpr ck_tile::index_t BPackedSize = - ck_tile::numeric_traits::PackedSize; - const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize; - // oob can support to dword level - if(remainder_in_bytes % 4 == 0) + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) { - BsTensorIsValid = true; - } - else - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!"); - } - BsTensorIsValid = false; + CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!"); } + BsTensorIsValid = false; } - else + } + else + { + if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 && + GemmPipeline::kPadK == false) { - if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 && - GemmPipeline::kPadK == false) + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR( - "Can't support K that is not a multiple of k_batch * KPerBlock " - "without padding!"); - } - BsTensorIsValid = false; + CK_TILE_ERROR( + "Can't support K that is not a multiple of k_batch * KPerBlock " + "without padding!"); } - if(kargs.K % vectorSizeB != 0) + BsTensorIsValid = false; + } + if(kargs.K % vectorSizeB != 0) + { + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) { - const auto remainder = kargs.K % vectorSizeB; - constexpr ck_tile::index_t BPackedSize = - ck_tile::numeric_traits::PackedSize; - const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize; - // oob can support to dword level - if(remainder_in_bytes % 4 == 0) - { - BsTensorIsValid = true; - } - else - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR( - "K is not a multiple of vector load size for B tensor!"); - } - BsTensorIsValid = false; - } + CK_TILE_ERROR("K is not a multiple of vector load size for B tensor!"); } + BsTensorIsValid = false; } } }); diff --git a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc index bcb3fc5733..c34374c66f 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc +++ b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc @@ -31,14 +31,7 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM) if constexpr(std::is_same_v) { - if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword - { - this->Run(M, N, K); - } - else - { - EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); - } + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); } else { @@ -91,14 +84,7 @@ TYPED_TEST(TEST_SUITE_NAME, MidLargeM) } else { - if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword - { - this->Run(M, N, K); - } - else - { - EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); - } + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); } } else @@ -120,7 +106,18 @@ TYPED_TEST(TEST_SUITE_NAME, PaddK) for(int M : Ms) { - this->Run(M, N, K); + if constexpr(std::is_same_v) + { +#if defined(ARCH_GFX12) || defined(ARCH_GFX11) + this->Run(M, N, K); +#else + EXPECT_THROW(this->Run(M, N, K), std::runtime_error); +#endif + } + else + { + this->Run(M, N, K); + } } }