diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3.hpp index 7e9020d796..02639dbf3e 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3.hpp @@ -682,6 +682,10 @@ struct DeviceGemmMultiD_Xdl_CShuffle_V3 : public DeviceGemmMultipleDSplitK{}] <= 1 && (arg.KBatch > 1)) + { + return false; + } else { if constexpr(NXdlPerWave32 > 0) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc index f793f81cc9..66ef05b0ba 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc +++ b/test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc @@ -11,7 +11,7 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM) std::vector Ms{1, 2, 3, 4, 5, 6}; constexpr int N = 1024; std::vector Ks; - for (auto K_count: {2, 3, 4, 10, 11}) + for(auto K_count : {2, 3, 4, 10, 11}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -36,10 +36,10 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM) TYPED_TEST(TEST_SUITE_NAME, MidLargeM) { std::vector Ms{127, 255, 312, 799, 1573}; - constexpr int N = 1024; + constexpr int N = 1024; std::vector Ks; - for (auto K_count: {2, 3, 4, 10, 11}) + for(auto K_count : {2, 3, 4, 10, 11}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -51,7 +51,7 @@ TYPED_TEST(TEST_SUITE_NAME, MidLargeM) for(int M : Ms) { - for (int K: Ks) + for(int K : Ks) { if constexpr(std::is_same_v)