diff --git a/CMakeLists.txt b/CMakeLists.txt index b75846301d..6e757ef048 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,28 +169,30 @@ enable_cppcheck( unusedPrivateFunction unusedStructMember # Ignore initializer lists in the tests - useInitializationList:*test/*.cpp - *:*src/sqlite/*.cpp - *:*.cl - *:*src/kernels/*.h - knownConditionTrueFalse:*src/kernels/composable_kernel/*/* - redundantAssignment:*src/kernels/composable_kernel/*/* - unreadVariable:*src/kernels/composable_kernel/*/* - unusedScopedObject:*src/kernels/composable_kernel/*/* - wrongPrintfScanfArgNum:*src/kernels/composable_kernel/*/* + #useInitializationList:*test/*.cpp + #*:*src/sqlite/*.cpp + #*:*.cl + #*:*src/kernels/*.h + #knownConditionTrueFalse:*src/kernels/composable_kernel/*/* + #redundantAssignment:*src/kernels/composable_kernel/*/* + #unreadVariable:*src/kernels/composable_kernel/*/* + #unusedScopedObject:*src/kernels/composable_kernel/*/* + #wrongPrintfScanfArgNum:*src/kernels/composable_kernel/*/* unmatchedSuppression FORCE SOURCES host/host_tensor/src - host/driver_offline - composable_kernel/src + host/driver_offline/src + composable_kernel/src/kernel_wrapper INCLUDE + host/host_tensor/include + host/solver/include + host/driver_offline/include + composable_kernel/include/* ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/src/include DEFINE CPPCHECK=1 - MIOPEN_USE_MIOPENGEMM=1 __linux__=1 ) diff --git a/host/driver_offline/CMakeLists.txt b/host/driver_offline/CMakeLists.txt index 927975d449..9743abbb0b 100644 --- a/host/driver_offline/CMakeLists.txt +++ b/host/driver_offline/CMakeLists.txt @@ -12,8 +12,8 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/external/half/include ) -set(CONV_FWD_DRIVER_OFFLINE_SOURCE conv_fwd_driver_offline.cpp) -set(CONV_BWD_DRIVER_OFFLINE_SOURCE conv_bwd_driver_offline.cpp) +set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp) +set(CONV_BWD_DRIVER_OFFLINE_SOURCE src/conv_bwd_driver_offline.cpp) add_executable(conv_fwd_driver_offline ${CONV_FWD_DRIVER_OFFLINE_SOURCE}) add_executable(conv_bwd_driver_offline ${CONV_BWD_DRIVER_OFFLINE_SOURCE}) diff --git a/host/driver_offline/conv_bwd_driver_offline.cpp b/host/driver_offline/src/conv_bwd_driver_offline.cpp similarity index 100% rename from host/driver_offline/conv_bwd_driver_offline.cpp rename to host/driver_offline/src/conv_bwd_driver_offline.cpp diff --git a/host/driver_offline/conv_fwd_driver_offline.cpp b/host/driver_offline/src/conv_fwd_driver_offline.cpp similarity index 100% rename from host/driver_offline/conv_fwd_driver_offline.cpp rename to host/driver_offline/src/conv_fwd_driver_offline.cpp diff --git a/host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp b/host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp index a30c2720ee..983e0f0b74 100644 --- a/host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp +++ b/host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp @@ -8,41 +8,6 @@ namespace driver { struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw { - ck::DataTypeEnum_t ABDataTypeEnum; - ck::DataTypeEnum_t AccDataTypeEnum; - ck::DataTypeEnum_t CDataTypeEnum; - - int BlockSize; - - int GN0; - int GK1; - - int GM1PerBlockGM11; - int GN1PerBlockGN11; - int GK0PerBlock; - - int BM1PerThreadBM11; - int BN1PerThreadBN11; - int BK0PerThread; - - std::array BM10BN10ThreadClusterBM10Xs; - std::array BM10BN10ThreadClusterBN10Xs; - - std::array ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1; - std::array ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1; - std::array ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; - std::array ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; - - std::array BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1; - std::array BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1; - std::array BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1; - std::array BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1; - - int CThreadTransferDstScalarPerVector; - - bool HasMainKBlockLoop; - bool HasDoubleTailKBlockLoop; - auto GetCompileParameterString() const { // clang-format off @@ -128,11 +93,46 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw " -DCK_PARAM_CThreadTransferDstScalarPerVector=" + std::to_string(CThreadTransferDstScalarPerVector) + " -DCK_PARAM_HasMainKBlockLoop=" + - std::to_string(HasMainKBlockLoop) + + std::to_string(static_cast(HasMainKBlockLoop)) + " -DCK_PARAM_HasDoubleTailKBlockLoop=" + - std::to_string(HasDoubleTailKBlockLoop); + std::to_string(static_cast(HasDoubleTailKBlockLoop)); // clang-format on } + + ck::DataTypeEnum_t ABDataTypeEnum; + ck::DataTypeEnum_t AccDataTypeEnum; + ck::DataTypeEnum_t CDataTypeEnum; + + int BlockSize; + + int GN0; + int GK1; + + int GM1PerBlockGM11; + int GN1PerBlockGN11; + int GK0PerBlock; + + int BM1PerThreadBM11; + int BN1PerThreadBN11; + int BK0PerThread; + + std::array BM10BN10ThreadClusterBM10Xs; + std::array BM10BN10ThreadClusterBN10Xs; + + std::array ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1; + std::array ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1; + std::array ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; + std::array ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; + + std::array BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1; + std::array BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1; + std::array BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1; + std::array BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1; + + int CThreadTransferDstScalarPerVector; + + bool HasMainKBlockLoop; + bool HasDoubleTailKBlockLoop; }; struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw @@ -230,8 +230,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw CalculateCompileParameterBasedOnTunable(const ConvolutionProblemDescriptor& conv_problem_desc, const TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw& tunable) { - using namespace ck; - const int C = conv_problem_desc.C; const int Y = conv_problem_desc.Y; const int X = conv_problem_desc.X; @@ -248,12 +246,17 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw DataTypeEnum_t AccDataTypeEnum; - switch(ABDataTypeEnum) + if(ABDataTypeEnum == DataTypeEnum_t::Float || ABDataTypeEnum == DataTypeEnum_t::Half) { - case DataTypeEnum_t::Float: - case DataTypeEnum_t::Half: AccDataTypeEnum = DataTypeEnum_t::Float; break; - case DataTypeEnum_t::Int8: AccDataTypeEnum = DataTypeEnum_t::Int32; break; - default: return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false); + AccDataTypeEnum = DataTypeEnum_t::Float; + } + else if(ABDataTypeEnum == DataTypeEnum_t::Int8) + { + AccDataTypeEnum = DataTypeEnum_t::Int32; + } + else + { + return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false); } const int BlockSize = tunable.BlockSize; @@ -343,7 +346,7 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw { for(const auto& tunable : generate_tunable_list_conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw()) { - CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param; + CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param{}; bool found = false; std::tie(compile_param, found) = @@ -369,8 +372,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw IsValidCompileParameter(const ConvolutionProblemDescriptor& conv_problem_desc, const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw& compile_param) { - using namespace ck; - const int N = conv_problem_desc.N; const int K = conv_problem_desc.K; const int C = conv_problem_desc.C;