[CK_TILE] Fix transpose_vectors for 2x2 8-bit tiles (#3042)

fix transpose_vectors logic for 2x2 8-bit tiles

    add a test which goes through this code path.

    factor out constexpr'd cases into smaller functions.

    add inline docs about the data movement

    impact: gemms with 8-bit non-rcr inputs on gfx942
This commit is contained in:
Max Podkorytov
2025-10-20 13:40:44 -07:00
committed by GitHub
parent 9f77061094
commit 2570462ecf
2 changed files with 176 additions and 111 deletions

View File

@@ -306,6 +306,12 @@ class CaseHalfPadRectTile2LoadTranspose
{
};
class CaseBytePadRectTile
: public TestCkTileBatchedTranspose<
PipelineConfig<ck_tile::fp8_t, PipelineTag::Universal, 256, 32, 2, 2, false, false>>
{
};
TEST_P(CaseHalf, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseByte, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseWord, TestCorrectness) { this->Run(GetParam()); }
@@ -321,6 +327,7 @@ TEST_P(CaseHalfPadRectTile1, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseHalfPadRectTile1LoadTranspose, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseHalfPadRectTile2, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseHalfPadRectTile2LoadTranspose, TestCorrectness) { this->Run(GetParam()); }
TEST_P(CaseBytePadRectTile, TestCorrectness) { this->Run(GetParam()); }
// clang-format off
INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseHalf, kTestingValues);
@@ -338,5 +345,6 @@ INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseHalfPadRectTile1,
INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseHalfPadRectTile1LoadTranspose, kTestingValues);
INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseHalfPadRectTile2, kTestingValues);
INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseHalfPadRectTile2LoadTranspose, kTestingValues);
INSTANTIATE_TEST_SUITE_P(TestCkTileBatchedTransposeSuite, CaseBytePadRectTile, kTestingValues);
// clang-format on