mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
This reverts commit 1cc5380ee9.
## Motivation
<!-- Explain the purpose of this PR and the goals it aims to achieve.
-->
## Technical Details
<!-- Explain the changes along with any relevant GitHub links. -->
## Test Plan
<!-- Explain any relevant testing done to verify this PR. -->
## Test Result
<!-- Briefly summarize test outcomes. -->
## Submission Checklist
- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
@@ -448,11 +448,23 @@ struct UniversalGemmKernel
|
||||
}
|
||||
if(kargs.K % vectorSizeA != 0)
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
const auto remainder = kargs.K % vectorSizeA;
|
||||
constexpr ck_tile::index_t APackedSize =
|
||||
ck_tile::numeric_traits<ADataType>::PackedSize;
|
||||
const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize;
|
||||
// oob can support to dword level
|
||||
if(remainder_in_bytes % 4 == 0)
|
||||
{
|
||||
CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!");
|
||||
AsTensorIsValid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
{
|
||||
CK_TILE_ERROR("K is not a multiple of vector load size for A tensor!");
|
||||
}
|
||||
AsTensorIsValid = false;
|
||||
}
|
||||
AsTensorIsValid = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -468,11 +480,24 @@ struct UniversalGemmKernel
|
||||
}
|
||||
if(kargs.M % vectorSizeA != 0)
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
const auto remainder = kargs.M % vectorSizeA;
|
||||
constexpr ck_tile::index_t APackedSize =
|
||||
ck_tile::numeric_traits<ADataType>::PackedSize;
|
||||
const auto remainder_in_bytes = remainder * sizeof(ADataType) / APackedSize;
|
||||
// oob can support to dword level
|
||||
if(remainder_in_bytes % 4 == 0)
|
||||
{
|
||||
CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!");
|
||||
|
||||
AsTensorIsValid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
{
|
||||
CK_TILE_ERROR("M is not a multiple of vector load size for A tensor!");
|
||||
}
|
||||
AsTensorIsValid = false;
|
||||
}
|
||||
AsTensorIsValid = false;
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -495,33 +520,58 @@ struct UniversalGemmKernel
|
||||
}
|
||||
if(kargs.N % vectorSizeB != 0)
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
const auto remainder = kargs.N % vectorSizeB;
|
||||
constexpr ck_tile::index_t BPackedSize =
|
||||
ck_tile::numeric_traits<BDataType>::PackedSize;
|
||||
const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize;
|
||||
// oob can support to dword level
|
||||
if(remainder_in_bytes % 4 == 0)
|
||||
{
|
||||
CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!");
|
||||
BsTensorIsValid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
{
|
||||
CK_TILE_ERROR("N is not a multiple of vector load size for B tensor!");
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 &&
|
||||
GemmPipeline::kPadK == false)
|
||||
else
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
if(kargs.K % (TilePartitioner::KPerBlock * kargs.k_batch) != 0 &&
|
||||
GemmPipeline::kPadK == false)
|
||||
{
|
||||
CK_TILE_ERROR(
|
||||
"Can't support K that is not a multiple of k_batch * KPerBlock "
|
||||
"without padding!");
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
{
|
||||
CK_TILE_ERROR(
|
||||
"Can't support K that is not a multiple of k_batch * KPerBlock "
|
||||
"without padding!");
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
if(kargs.K % vectorSizeB != 0)
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
if(kargs.K % vectorSizeB != 0)
|
||||
{
|
||||
CK_TILE_ERROR("K is not a multiple of vector load size for B tensor!");
|
||||
const auto remainder = kargs.K % vectorSizeB;
|
||||
constexpr ck_tile::index_t BPackedSize =
|
||||
ck_tile::numeric_traits<BDataType>::PackedSize;
|
||||
const auto remainder_in_bytes = remainder * sizeof(BDataType) / BPackedSize;
|
||||
// oob can support to dword level
|
||||
if(remainder_in_bytes % 4 == 0)
|
||||
{
|
||||
BsTensorIsValid = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
|
||||
{
|
||||
CK_TILE_ERROR(
|
||||
"K is not a multiple of vector load size for B tensor!");
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
}
|
||||
BsTensorIsValid = false;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -31,7 +31,14 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM)
|
||||
if constexpr(std::is_same_v<typename TestFixture::ALayout,
|
||||
ck_tile::tensor_layout::gemm::ColumnMajor>)
|
||||
{
|
||||
EXPECT_THROW((this->Run(M, N, K)), std::runtime_error);
|
||||
if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword
|
||||
{
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_THROW((this->Run(M, N, K)), std::runtime_error);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -84,7 +91,14 @@ TYPED_TEST(TEST_SUITE_NAME, MidLargeM)
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_THROW((this->Run(M, N, K)), std::runtime_error);
|
||||
if(M * sizeof(typename TestFixture::ADataType) % 4 == 0) // oob fit dword
|
||||
{
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_THROW((this->Run(M, N, K)), std::runtime_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user