diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_xdl_producer_consumer_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_xdl_producer_consumer_cshuffle.hpp index fcd368beb2..7db1b20823 100644 --- a/include/ck/tensor_operation/gpu/device/device_gemm_xdl_producer_consumer_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/device_gemm_xdl_producer_consumer_cshuffle.hpp @@ -488,22 +488,22 @@ struct DeviceGemm_Xdl_ProducerConsumer_CShuffle typename GridwiseGemm::DefaultBlock2CTileMap, true>; - ave_time = - launch_and_time_kernel(stream_config, - kernel, - dim3(grid_size), - dim3(ABBlockTransferThreadGroupSize + BlockGemmThreadGroupSize), - 0, - arg.p_a_grid_, - arg.p_b_grid_, - arg.p_c_grid_, - arg.a_element_op_, - arg.b_element_op_, - arg.c_element_op_, - arg.a_grid_desc_ak0_m_ak1_, - arg.b_grid_desc_bk0_n_bk1_, - arg.c_grid_desc_mblock_mperblock_nblock_nperblock_, - arg.block_2_ctile_map_); + ave_time = launch_and_time_kernel( + stream_config, + kernel, + dim3(grid_size), + dim3(ABBlockTransferThreadGroupSize + BlockGemmThreadGroupSize), + 0, + arg.p_a_grid_, + arg.p_b_grid_, + arg.p_c_grid_, + arg.a_element_op_, + arg.b_element_op_, + arg.c_element_op_, + arg.a_grid_desc_ak0_m_ak1_, + arg.b_grid_desc_bk0_n_bk1_, + arg.c_grid_desc_mblock_mperblock_nblock_nperblock_, + arg.block_2_ctile_map_); } else { @@ -520,22 +520,22 @@ struct DeviceGemm_Xdl_ProducerConsumer_CShuffle typename GridwiseGemm::DefaultBlock2CTileMap, false>; - ave_time = - launch_and_time_kernel(stream_config, - kernel, - dim3(grid_size), - dim3(ABBlockTransferThreadGroupSize + BlockGemmThreadGroupSize), - 0, - arg.p_a_grid_, - arg.p_b_grid_, - arg.p_c_grid_, - arg.a_element_op_, - arg.b_element_op_, - arg.c_element_op_, - arg.a_grid_desc_ak0_m_ak1_, - arg.b_grid_desc_bk0_n_bk1_, - arg.c_grid_desc_mblock_mperblock_nblock_nperblock_, - arg.block_2_ctile_map_); + ave_time = launch_and_time_kernel( + stream_config, + kernel, + dim3(grid_size), + dim3(ABBlockTransferThreadGroupSize + BlockGemmThreadGroupSize), + 0, + arg.p_a_grid_, + arg.p_b_grid_, + arg.p_c_grid_, + arg.a_element_op_, + arg.b_element_op_, + arg.c_element_op_, + arg.a_grid_desc_ak0_m_ak1_, + arg.b_grid_desc_bk0_n_bk1_, + arg.c_grid_desc_mblock_mperblock_nblock_nperblock_, + arg.block_2_ctile_map_); } return ave_time; diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp index 74389acd0f..cd0a7743c5 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp @@ -60,16 +60,16 @@ __global__ void c_grid_desc_mblock_mperblock_nblock_nperblock, block_2_ctile_map); #else - ignore = p_a_grid; - ignore = p_b_grid; - ignore = p_c_grid; - ignore = a_element_op; - ignore = b_element_op; - ignore = c_element_op; - ignore = a_grid_desc_ak0_m_ak1; - ignore = b_grid_desc_bk0_n_bk1; - ignore = c_grid_desc_mblock_mperblock_nblock_nperblock; - ignore = block_2_ctile_map; + ignore = p_a_grid; + ignore = p_b_grid; + ignore = p_c_grid; + ignore = a_element_op; + ignore = b_element_op; + ignore = c_element_op; + ignore = a_grid_desc_ak0_m_ak1; + ignore = b_grid_desc_bk0_n_bk1; + ignore = c_grid_desc_mblock_mperblock_nblock_nperblock; + ignore = block_2_ctile_map; #endif // end of if (defined(__gfx908__) || defined(__gfx90a__)) } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp index 76fd9d1152..a11e955387 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp @@ -59,16 +59,16 @@ __global__ void c_element_op, block_2_ctile_map); #else - ignore = p_a_grid; - ignore = p_b_grid; - ignore = p_c_grid; - ignore = a_grid_desc_k0_m_k1; - ignore = b_grid_desc_k0_n_k1; - ignore = c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2; - ignore = a_element_op; - ignore = b_element_op; - ignore = c_element_op; - ignore = block_2_ctile_map; + ignore = p_a_grid; + ignore = p_b_grid; + ignore = p_c_grid; + ignore = a_grid_desc_k0_m_k1; + ignore = b_grid_desc_k0_n_k1; + ignore = c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2; + ignore = a_element_op; + ignore = b_element_op; + ignore = c_element_op; + ignore = block_2_ctile_map; #endif // end of if (defined(__gfx908__) || defined(__gfx90a__)) }