[CK-Tile] functional support for transposed inputs in compute-bound double-lds-buffer pipeline with async loads from global memory to LDS (#2984)

* reuse local prefetch logic from compute v4 pipeline

add single-tile test

explicit lambda capture

reuse lds block descriptors from base policy for the transposed case

match the test case kernel configuration with compute v4

* add comments
This commit is contained in:
Max Podkorytov
2025-10-10 12:57:50 -07:00
committed by GitHub
parent fada1a3cae
commit 9d060d3e3c
4 changed files with 128 additions and 76 deletions

View File

@@ -131,7 +131,10 @@ using KernelTypesCompV4 = ::testing::Types<
>;
using KernelTypesCompAsync = ::testing::Types<
std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompAsync>
std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompAsync>,
std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompAsync>,
std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompAsync>,
std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompAsync>
>;
using KernelTypesCompV4Wmma = ::testing::Types<

View File

@@ -33,6 +33,11 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM)
}
}
TYPED_TEST(TEST_SUITE_NAME, SingleTile)
{
this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile);
}
TYPED_TEST(TEST_SUITE_NAME, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};