From d348c3fa4ca98958f42795ada1698eaf8caa3f2a Mon Sep 17 00:00:00 2001 From: qin letao Date: Wed, 5 Mar 2025 06:12:06 +0000 Subject: [PATCH] rewrite N padding value for crash --- ...y_multiply_xdl_fp8_bpreshuffle_padding.cpp | 40 +++++++++---------- ...m_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle_padding.cpp b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle_padding.cpp index ffab345c34..d38947e7a3 100644 --- a/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle_padding.cpp +++ b/example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_bpreshuffle_padding.cpp @@ -151,27 +151,27 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_Xdl_CShu // 1, 2, S<1, 32, 1, 8>, S<8, 8, 1>, // ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v3, B0DataType>; - < Row, Col, DsLayout, ELayout, A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType, - AElementOp, BElementOp, CDEElementOp, GemmSpec, 256, - 128, 128, 128, - 16, 16, - 32, 32, - 4, 1, - S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, - S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, - 1, 1, S<1, 32, 1, 8>, S<8, 8, 1>, - ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v2, B0DataType>; + // < Row, Col, DsLayout, ELayout, A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType, + // AElementOp, BElementOp, CDEElementOp, GemmSpec, 256, + // 128, 128, 128, + // 16, 16, + // 32, 32, + // 4, 1, + // S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, + // S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, + // 1, 1, S<1, 32, 1, 8>, S<8, 8, 1>, + // ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v2, B0DataType>; -// < Row, Col, DsLayout, ELayout, A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType, -// AElementOp, BElementOp, CDEElementOp, GemmSpec, 256, -// 128, 256, 128, -// 16, 16, -// 32, 32, -// 4, 2, -// S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, -// S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, -// 1, 1, S<1, 32, 1, 8>, S<8, 8, 1>, -// ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v3, B0DataType>; + < Row, Col, DsLayout, ELayout, A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType, + AElementOp, BElementOp, CDEElementOp, GemmSpec, 256, + 64, 512, 128, + 16, 16, + 32, 32, + 2, 4, + S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, + S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 16, 16, 0, + 1, 1, S<1, 32, 1, 8>, S<8, 8, 1>, + ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v2, B0DataType>; // clang-format on diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp index 49a2f47539..31b07d9e4b 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp @@ -604,7 +604,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3_b_preshuffle BK0{CalculateBK0Padded(K_, KBatch_)}, MBlock{CalculateMBlock(M_)}, NBlock{CalculateNBlock(N_)}, - BN0Shuffled{CalculateBN0Shuffled(NPadded)}, + BN0Shuffled{CalculateBN0Shuffled((N + 128 - 1) / 128 * 128)}, BK0Shuffled{CalculateBK0Shuffled(CalculateBKShufflePadded(K_))} { }