[rocm-libraries] ROCm/rocm-libraries#5789 (commit 6654ca6)

[CK][CK_TILE] Revert addional oob check in gemm IsSupported function (#5789) ## Motivation fix ck_tile's oob check. ## Technical Details  ## Test Plan  ## Test Result  ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
2026-05-04 13:41:24 +00:00 · 2026-03-26 01:41:35 +00:00
parent 0004a37de5
commit 046d3ac274
2 changed files with 39 additions and 92 deletions
--- a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp
+++ b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp
@@ -448,23 +448,11 @@ struct UniversalGemmKernel
                }
                if(kargs.K % vectorSizeA != 0)
                {
-                    const auto remainder = kargs.K % vectorSizeA;
-                    constexpr ck_tile::index_t APackedSize =
-                        ck_tile::numeric_traits<ADataType>::PackedSize;
-                    const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize;
-                    // oob can support to dword level
-                    if(remainder_in_bytes % 4 == 0)
+                    if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
                    {
-                        AsTensorIsValid = true;
-                    }
-                    else
-                    {
-                        if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
-                        {
-                            CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!");
-                        }
-                        AsTensorIsValid = false;
+                        CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!");
                    }
+                    AsTensorIsValid = false;
                }
            }
            else
@@ -480,24 +468,11 @@ struct UniversalGemmKernel
                }
                if(kargs.M % vectorSizeA != 0)
                {
-                    const auto remainder = kargs.M % vectorSizeA;
-                    constexpr ck_tile::index_t APackedSize =
-                        ck_tile::numeric_traits<ADataType>::PackedSize;
-                    const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize;
-                    // oob can support to dword level
-                    if(remainder_in_bytes % 4 == 0)
+                    if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
                    {
-
-                        AsTensorIsValid = true;
-                    }
-                    else
-                    {
-                        if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
-                        {
-                            CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!");
-                        }
-                        AsTensorIsValid = false;
+                        CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!");
                    }
+                    AsTensorIsValid = false;
                }
            }
        });
@@ -520,58 +495,33 @@ struct UniversalGemmKernel
                }
                if(kargs.N % vectorSizeB != 0)
                {
-                    const auto remainder = kargs.N % vectorSizeB;
-                    constexpr ck_tile::index_t BPackedSize =
-                        ck_tile::numeric_traits<BDataType>::PackedSize;
-                    const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize;
-                    // oob can support to dword level
-                    if(remainder_in_bytes % 4 == 0)
+                    if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
                    {
-                        BsTensorIsValid = true;
-                    }
-                    else
-                    {
-                        if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
-                        {
-                            CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!");
-                        }
-                        BsTensorIsValid = false;
+                        CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!");
                    }
+                    BsTensorIsValid = false;
                }
-                else
+            }
+            else
+            {
+                if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 &&
+                   GemmPipeline::kPadK == false)
                {
-                    if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 &&
-                       GemmPipeline::kPadK == false)
+                    if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
                    {
-                        if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
-                        {
-                            CK_TILE_ERROR(
-                                "Can't support K that is not a multiple of k_batch * KPerBlock "
-                                "without padding!");
-                        }
-                        BsTensorIsValid = false;
+                        CK_TILE_ERROR(
+                            "Can't support K that is not a multiple of k_batch * KPerBlock "
+                            "without padding!");
                    }
-                    if(kargs.K % vectorSizeB != 0)
+                    BsTensorIsValid = false;
+                }
+                if(kargs.K % vectorSizeB != 0)
+                {
+                    if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
                    {
-                        const auto remainder = kargs.K % vectorSizeB;
-                        constexpr ck_tile::index_t BPackedSize =
-                            ck_tile::numeric_traits<BDataType>::PackedSize;
-                        const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize;
-                        // oob can support to dword level
-                        if(remainder_in_bytes % 4 == 0)
-                        {
-                            BsTensorIsValid = true;
-                        }
-                        else
-                        {
-                            if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
-                            {
-                                CK_TILE_ERROR(
-                                    "K is not a multiple of vector load size for B tensor!");
-                            }
-                            BsTensorIsValid = false;
-                        }
+                        CK_TILE_ERROR("K is not a multiple of vector load size for B tensor!");
                    }
+                    BsTensorIsValid = false;
                }
            }
        });