From 765fcfc06799c0ab26e6f8b010d4459afdc09284 Mon Sep 17 00:00:00 2001 From: "Graner, Johannes" Date: Mon, 1 Dec 2025 13:18:19 +0000 Subject: [PATCH] update V3 2GB check --- ...ice_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp index 2ba9a7fe44..d938ec66c4 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle_v3.hpp @@ -1503,9 +1503,17 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffleV3 } constexpr long_index_t TwoGB = (long_index_t{1} << 31); - if(!(arg.a_grid_desc_k0_m_k1_.GetElementSpaceSize() * sizeof(ADataType) <= TwoGB && - arg.b_grid_desc_k0_n_k1_.GetElementSpaceSize() * sizeof(BDataType) <= TwoGB && - arg.c_grid_desc_m_n_.GetElementSpaceSize() * sizeof(CDataType) <= TwoGB)) + const bool a_small_enough = arg.a_grid_desc_k0_m_k1_.GetElementSpaceSize() / + (arg.split_k_offset_a_hack_ ? arg.k_batch_ : 1) * + sizeof(ADataType) <= + TwoGB; + const bool b_small_enough = arg.b_grid_desc_k0_n_k1_.GetElementSpaceSize() / + (arg.split_k_offset_b_hack_ ? arg.k_batch_ : 1) * + sizeof(BDataType) <= + TwoGB; + const bool c_small_enough = + arg.c_grid_desc_m_n_.GetElementSpaceSize() * sizeof(CDataType) <= TwoGB; + if(!(a_small_enough && b_small_enough && c_small_enough)) { return false; }