diff --git a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp index 37ed8ce49a..3c8bc27f59 100644 --- a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp @@ -448,11 +448,23 @@ struct UniversalGemmKernel } if(kargs.K % vectorSizeA != 0) { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + const auto remainder = kargs.K % vectorSizeA; + constexpr ck_tile::index_t APackedSize = + ck_tile::numeric_traits::PackedSize; + const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize; + // oob can support to dword level + if(remainder_in_bytes % 4 == 0) { - CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!"); + AsTensorIsValid = true; + } + else + { + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + { + CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!"); + } + AsTensorIsValid = false; } - AsTensorIsValid = false; } } else @@ -468,11 +480,24 @@ struct UniversalGemmKernel } if(kargs.M % vectorSizeA != 0) { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + const auto remainder = kargs.M % vectorSizeA; + constexpr ck_tile::index_t APackedSize = + ck_tile::numeric_traits::PackedSize; + const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize; + // oob can support to dword level + if(remainder_in_bytes % 4 == 0) { - CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!"); + + AsTensorIsValid = true; + } + else + { + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + { + CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!"); + } + AsTensorIsValid = false; } - AsTensorIsValid = false; } } }); @@ -495,33 +520,58 @@ struct UniversalGemmKernel } if(kargs.N % vectorSizeB != 0) { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + const auto remainder = kargs.N % vectorSizeB; + constexpr ck_tile::index_t BPackedSize = + ck_tile::numeric_traits::PackedSize; + const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize; + // oob can support to dword level + if(remainder_in_bytes % 4 == 0) { - CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!"); + BsTensorIsValid = true; + } + else + { + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + { + CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!"); + } + BsTensorIsValid = false; } - BsTensorIsValid = false; } - } - else - { - if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 && - GemmPipeline::kPadK == false) + else { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 && + GemmPipeline::kPadK == false) { - CK_TILE_ERROR( - "Can't support K that is not a multiple of k_batch * KPerBlock " - "without padding!"); + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + { + CK_TILE_ERROR( + "Can't support K that is not a multiple of k_batch * KPerBlock " + "without padding!"); + } + BsTensorIsValid = false; } - BsTensorIsValid = false; - } - if(kargs.K % vectorSizeB != 0) - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + if(kargs.K % vectorSizeB != 0) { - CK_TILE_ERROR("K is not a multiple of vector load size for B tensor!"); + const auto remainder = kargs.K % vectorSizeB; + constexpr ck_tile::index_t BPackedSize = + ck_tile::numeric_traits::PackedSize; + const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize; + // oob can support to dword level + if(remainder_in_bytes % 4 == 0) + { + BsTensorIsValid = true; + } + else + { + if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) + { + CK_TILE_ERROR( + "K is not a multiple of vector load size for B tensor!"); + } + BsTensorIsValid = false; + } } - BsTensorIsValid = false; } } }); diff --git a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc index bbeb6e186a..bcb3fc5733 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc +++ b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc @@ -31,7 +31,14 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM) if constexpr(std::is_same_v) { - EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword + { + this->Run(M, N, K); + } + else + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } } else { @@ -84,7 +91,14 @@ TYPED_TEST(TEST_SUITE_NAME, MidLargeM) } else { - EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword + { + this->Run(M, N, K); + } + else + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } } } else