Actually print the reason when a device implementation is not supported.

This commit is contained in:
kiefer
2025-12-04 09:57:55 +00:00
parent 1a822947eb
commit d201572ae4
4 changed files with 190 additions and 0 deletions

View File

@@ -552,6 +552,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3
{
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl;
}
return false;
}
@@ -560,6 +564,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3
{
if(arg.KBatch > 1 && ck::is_gfx11_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported splitK on gfx11." << std::endl;
}
// gfx11 does not support *_atomic_pk_add_f16/bf16 instructions
return false;
}
@@ -570,6 +578,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3
{
if(ck::is_gfx11_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl;
}
return false;
}
}
@@ -579,6 +591,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3
GemmSpec == GemmSpecialization::MNKPadding ||
GemmSpec == GemmSpecialization::KPadding))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported K dimension without padding." << std::endl;
}
return false;
}

View File

@@ -321,6 +321,10 @@ struct DeviceGroupedConvBwdWeight_Explicit
{
if constexpr(!is_NHWGC_GKYXC_NHWGK<InLayout, WeiLayout, OutLayout>())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
@@ -328,11 +332,19 @@ struct DeviceGroupedConvBwdWeight_Explicit
{
if constexpr(!is_NDHWGC_GKZYXC_NDHWGK<InLayout, WeiLayout, OutLayout>())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
else
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
@@ -342,6 +354,10 @@ struct DeviceGroupedConvBwdWeight_Explicit
if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 &&
arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported stride / pad." << std::endl;
}
return false;
}
}
@@ -349,6 +365,10 @@ struct DeviceGroupedConvBwdWeight_Explicit
{
if(!arg.is_filter_data_packed)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Filter data must be packed." << std::endl;
}
return false;
}
// Check this here, it allows to use other instances from factory even

View File

@@ -1149,12 +1149,20 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
{
if(num_k_loop <= GridwiseGemm::BlockwiseGemmPipe::PrefetchStages)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported num K loop." << std::endl;
}
return false;
}
}
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl;
}
return false;
}
@@ -1177,6 +1185,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
{
if(ck::is_gfx11_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl;
}
return false;
}
}
@@ -1186,6 +1198,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if constexpr(!(is_NHWGC_GKYXC_NHWGK<InLayout, WeiLayout, OutLayout>() ||
is_NGCHW_NGKHW<InLayout, WeiLayout, OutLayout>()))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
@@ -1194,11 +1210,19 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if constexpr(!(is_NDHWGC_GKZYXC_NDHWGK<InLayout, WeiLayout, OutLayout>() ||
is_NGCDHW_NGKDHW<InLayout, WeiLayout, OutLayout>()))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
else
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
@@ -1211,6 +1235,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 &&
arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported stride / pad." << std::endl;
}
return false;
}
}
@@ -1221,14 +1249,26 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
// support only if whole M and N can be proccessed on one block
if(!(GemmM <= MPerBlock && GemmN <= NPerBlock))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported GemmMN for merge groups." << std::endl;
}
return false;
}
if(!(arg.Conv_C_ == 1 && arg.Conv_K_ == 1))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported conv CK for merge groups." << std::endl;
}
return false;
}
if(arg.Conv_G_ % NumGroupsToMerge != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported conv G for merge groups." << std::endl;
}
return false;
}
}
@@ -1246,11 +1286,19 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if(!(arg.Conv_K_ == 1 && arg.compute_ptr_offset_of_batch_.BatchStrideA_ == 1 &&
NumGroupsToMerge > 1))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported Conv_K_ % ABlockTransferSrcScalarPerVector" << std::endl;
}
return false;
}
if(!(arg.Conv_C_ == 1 && arg.compute_ptr_offset_of_batch_.BatchStrideB_ == 1 &&
NumGroupsToMerge > 1))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported Conv_C_ % BBlockTransferSrcScalarPerVector" << std::endl;
}
return false;
}
}
@@ -1258,12 +1306,21 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
// vector load A/B matrix from global memory
if(!(ABlockTransferSrcVectorDim == 1 && BBlockTransferSrcVectorDim == 1))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported BlockTransferSrcVectorDim." << std::endl;
}
return false;
}
// vector store C matrix into global memory
if(!(arg.Conv_C_ % CShuffleBlockTransferScalarPerVector_NPerBlock == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported CShuffleBlockTransferScalarPerVector_NPerBlock."
<< std::endl;
}
return false;
}
@@ -1272,11 +1329,21 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
{
if((arg.Conv_G_ * arg.Conv_C_) % TransposeTransferDstScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported TransposeTransferDstScalarPerVector with GC."
<< std::endl;
}
return false;
}
if((arg.Conv_G_ * arg.Conv_K_) % TransposeTransferDstScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported TransposeTransferDstScalarPerVector with GK."
<< std::endl;
}
return false;
}
@@ -1287,11 +1354,23 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if(input_spatial_acum % TransposeTransferSrcScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout
<< "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector."
<< std::endl;
}
return false;
}
if(output_spatial_acum % TransposeTransferSrcScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout
<< "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector."
<< std::endl;
}
return false;
}
@@ -1299,6 +1378,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
if(!(arg.a_out_transpose_desc_.GetElementSpaceSize() * sizeof(ADataType) <= TwoGB &&
arg.b_out_transpose_desc_.GetElementSpaceSize() * sizeof(BDataType) <= TwoGB))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Problem exceeds 2GB limit." << std::endl;
}
return false;
}
}

View File

@@ -1054,12 +1054,20 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
{
if(num_k_loop <= GridwiseGemm::BlockwiseGemmPipe::PrefetchStages)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported num K loop." << std::endl;
}
return false;
}
}
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Architecture must be gfx11/gfx12." << std::endl;
}
return false;
}
@@ -1068,6 +1076,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
{
if(gemm_arg.KBatch > 1 && ck::is_gfx11_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported splitK on gfx11." << std::endl;
}
// gfx11 does not support *_atomic_pk_add_f16/bf16 instructions
return false;
}
@@ -1078,6 +1090,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
{
if(ck::is_gfx11_supported())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported f8 / bf8 on gfx11." << std::endl;
}
return false;
}
}
@@ -1086,6 +1102,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
{
if constexpr(!is_GNWC_GKXC_GNWK<InLayout, WeiLayout, OutLayout>())
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
@@ -1095,6 +1115,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
is_GNHWC_GKYXC_GNHWK<InLayout, WeiLayout, OutLayout>() ||
is_NGCHW_NGKHW<InLayout, WeiLayout, OutLayout>()))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
@@ -1104,11 +1128,19 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
is_GNDHWC_GKZYXC_GNDHWK<InLayout, WeiLayout, OutLayout>() ||
is_NGCDHW_NGKDHW<InLayout, WeiLayout, OutLayout>()))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
}
else
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported layout." << std::endl;
}
return false;
}
@@ -1121,6 +1153,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
if(!(arg.filter_spatial_lengths_[i] == 1 && arg.conv_filter_strides_[i] == 1 &&
arg.input_left_pads_[i] == 0 && arg.input_right_pads_[i] == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported stride / pad." << std::endl;
}
return false;
}
}
@@ -1129,12 +1165,21 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
arg.Conv_K_ % ABlockTransferSrcScalarPerVector == 0 &&
arg.Conv_C_ % BBlockTransferSrcScalarPerVector == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported BlockTransferSrcScalarPerVector." << std::endl;
}
return false;
}
// vector store C matrix into global memory
if(!(arg.Conv_C_ % CShuffleBlockTransferScalarPerVector_NPerBlock == 0))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported CShuffleBlockTransferScalarPerVector_NPerBlock."
<< std::endl;
}
return false;
}
@@ -1143,11 +1188,21 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
{
if((arg.Conv_G_ * arg.Conv_C_) % TransposeTransferDstScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported TransposeTransferDstScalarPerVector with GC."
<< std::endl;
}
return false;
}
if((arg.Conv_G_ * arg.Conv_K_) % TransposeTransferDstScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported TransposeTransferDstScalarPerVector with GK."
<< std::endl;
}
return false;
}
@@ -1158,11 +1213,23 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
if(input_spatial_acum % TransposeTransferSrcScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout
<< "Unsupported input_spatial_acum % TransposeTransferSrcScalarPerVector."
<< std::endl;
}
return false;
}
if(output_spatial_acum % TransposeTransferSrcScalarPerVector != 0)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout
<< "Unsupported output_spatial_acum % TransposeTransferSrcScalarPerVector."
<< std::endl;
}
return false;
}
@@ -1182,6 +1249,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
if(!(arg.a_out_transpose_desc_.GetElementSpaceSize() * sizeof(ADataType) <= TwoGB &&
arg.b_out_transpose_desc_.GetElementSpaceSize() * sizeof(BDataType) <= TwoGB))
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Unsupported: Problem exceeds 2GB limit." << std::endl;
}
return false;
}
}