diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_wmma_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_wmma_cshuffle_v3.hpp index 1cde917ce9..d9557ef10b 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_wmma_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_wmma_cshuffle_v3.hpp @@ -552,6 +552,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3 { if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl; + } return false; } @@ -560,6 +564,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3 { if(arg.KBatch > 1 && ck::is_gfx11_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported splitK on gfx11." << std::endl; + } // gfx11 does not support *_atomic_pk_add_f16/bf16 instructions return false; } @@ -570,6 +578,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3 { if(ck::is_gfx11_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl; + } return false; } } @@ -579,6 +591,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3 GemmSpec == GemmSpecialization::MNKPadding || GemmSpec == GemmSpecialization::KPadding)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported K dimension without padding." << std::endl; + } return false; } diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_explicit.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_explicit.hpp index 620613991f..2bcd8ce349 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_explicit.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_explicit.hpp @@ -321,6 +321,10 @@ struct DeviceGroupedConvBwdWeight_Explicit { if constexpr(!is_NHWGC_GKYXC_NHWGK()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } @@ -328,11 +332,19 @@ struct DeviceGroupedConvBwdWeight_Explicit { if constexpr(!is_NDHWGC_GKZYXC_NDHWGK()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } else { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } @@ -342,6 +354,10 @@ struct DeviceGroupedConvBwdWeight_Explicit if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 && arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported stride / pad." << std::endl; + } return false; } } @@ -349,6 +365,10 @@ struct DeviceGroupedConvBwdWeight_Explicit { if(!arg.is_filter_data_packed) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Filter data must be packed." << std::endl; + } return false; } // Check this here, it allows to use other instances from factory even diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_wmma_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_wmma_cshuffle_v3.hpp index 4e5668e957..cd2b708923 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_wmma_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_wmma_cshuffle_v3.hpp @@ -1149,12 +1149,20 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 { if(num_k_loop <= GridwiseGemm::BlockwiseGemmPipe::PrefetchStages) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported num K loop." << std::endl; + } return false; } } if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl; + } return false; } @@ -1177,6 +1185,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 { if(ck::is_gfx11_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl; + } return false; } } @@ -1186,6 +1198,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if constexpr(!(is_NHWGC_GKYXC_NHWGK() || is_NGCHW_NGKHW())) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } @@ -1194,11 +1210,19 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if constexpr(!(is_NDHWGC_GKZYXC_NDHWGK() || is_NGCDHW_NGKDHW())) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } else { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } @@ -1211,6 +1235,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 && arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported stride / pad." << std::endl; + } return false; } } @@ -1221,14 +1249,26 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 // support only if whole M and N can be proccessed on one block if(!(GemmM <= MPerBlock && GemmN <= NPerBlock)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported GemmMN for merge groups." << std::endl; + } return false; } if(!(arg.Conv_C_ == 1 && arg.Conv_K_ == 1)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported conv CK for merge groups." << std::endl; + } return false; } if(arg.Conv_G_ % NumGroupsToMerge != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported conv G for merge groups." << std::endl; + } return false; } } @@ -1246,11 +1286,19 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if(!(arg.Conv_K_ == 1 && arg.compute_ptr_offset_of_batch_.BatchStrideA_ == 1 && NumGroupsToMerge > 1)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported Conv_K_ % ABlockTransferSrcScalarPerVector" << std::endl; + } return false; } if(!(arg.Conv_C_ == 1 && arg.compute_ptr_offset_of_batch_.BatchStrideB_ == 1 && NumGroupsToMerge > 1)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported Conv_C_ % BBlockTransferSrcScalarPerVector" << std::endl; + } return false; } } @@ -1258,12 +1306,21 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 // vector load A/B matrix from global memory if(!(ABlockTransferSrcVectorDim == 1 && BBlockTransferSrcVectorDim == 1)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported BlockTransferSrcVectorDim." << std::endl; + } return false; } // vector store C matrix into global memory if(!(arg.Conv_C_ % CShuffleBlockTransferScalarPerVector_NPerBlock == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported CShuffleBlockTransferScalarPerVector_NPerBlock." + << std::endl; + } return false; } @@ -1272,11 +1329,21 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 { if((arg.Conv_G_ * arg.Conv_C_) % TransposeTransferDstScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported TransposeTransferDstScalarPerVector with GC." + << std::endl; + } return false; } if((arg.Conv_G_ * arg.Conv_K_) % TransposeTransferDstScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported TransposeTransferDstScalarPerVector with GK." + << std::endl; + } return false; } @@ -1287,11 +1354,23 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if(input_spatial_acum % TransposeTransferSrcScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout + << "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector." + << std::endl; + } return false; } if(output_spatial_acum % TransposeTransferSrcScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout + << "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector." + << std::endl; + } return false; } @@ -1299,6 +1378,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3 if(!(arg.a_out_transpose_desc_.GetElementSpaceSize() * sizeof(ADataType) <= TwoGB && arg.b_out_transpose_desc_.GetElementSpaceSize() * sizeof(BDataType) <= TwoGB)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Problem exceeds 2GB limit." << std::endl; + } return false; } } diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle_v3.hpp index 9f892a2fc4..62acac2f7f 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle_v3.hpp @@ -1054,12 +1054,20 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 { if(num_k_loop <= GridwiseGemm::BlockwiseGemmPipe::PrefetchStages) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported num K loop." << std::endl; + } return false; } } if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl; + } return false; } @@ -1068,6 +1076,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 { if(gemm_arg.KBatch > 1 && ck::is_gfx11_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported splitK on gfx11." << std::endl; + } // gfx11 does not support *_atomic_pk_add_f16/bf16 instructions return false; } @@ -1078,6 +1090,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 { if(ck::is_gfx11_supported()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl; + } return false; } } @@ -1086,6 +1102,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 { if constexpr(!is_GNWC_GKXC_GNWK()) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } @@ -1095,6 +1115,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 is_GNHWC_GKYXC_GNHWK() || is_NGCHW_NGKHW())) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } @@ -1104,11 +1128,19 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 is_GNDHWC_GKZYXC_GNDHWK() || is_NGCDHW_NGKDHW())) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } } else { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported layout." << std::endl; + } return false; } @@ -1121,6 +1153,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 && arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported stride / pad." << std::endl; + } return false; } } @@ -1129,12 +1165,21 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 arg.Conv_K_ % ABlockTransferSrcScalarPerVector == 0 && arg.Conv_C_ % BBlockTransferSrcScalarPerVector == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported BlockTransferSrcScalarPerVector." << std::endl; + } return false; } // vector store C matrix into global memory if(!(arg.Conv_C_ % CShuffleBlockTransferScalarPerVector_NPerBlock == 0)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported CShuffleBlockTransferScalarPerVector_NPerBlock." + << std::endl; + } return false; } @@ -1143,11 +1188,21 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 { if((arg.Conv_G_ * arg.Conv_C_) % TransposeTransferDstScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported TransposeTransferDstScalarPerVector with GC." + << std::endl; + } return false; } if((arg.Conv_G_ * arg.Conv_K_) % TransposeTransferDstScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported TransposeTransferDstScalarPerVector with GK." + << std::endl; + } return false; } @@ -1158,11 +1213,23 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 if(input_spatial_acum % TransposeTransferSrcScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout + << "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector." + << std::endl; + } return false; } if(output_spatial_acum % TransposeTransferSrcScalarPerVector != 0) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout + << "Unsupported output_spatial_acum % TransposeTransferSrcScalarPerVector." + << std::endl; + } return false; } @@ -1182,6 +1249,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3 if(!(arg.a_out_transpose_desc_.GetElementSpaceSize() * sizeof(ADataType) <= TwoGB && arg.b_out_transpose_desc_.GetElementSpaceSize() * sizeof(BDataType) <= TwoGB)) { + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + std::cout << "Unsupported: Problem exceeds 2GB limit." << std::endl; + } return false; } }