diff --git a/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp index f8299028da..031b6ae979 100644 --- a/example/12_reduce/reduce_blockwise.cpp +++ b/example/12_reduce/reduce_blockwise.cpp @@ -129,13 +129,11 @@ bool reduce_blockwise_test(bool do_verification, bool matched = false; int result = 0; - const auto tuple_object = reduce_shape_instances{}; - static_for<0, std::tuple_size::value, 1>{}([&](auto i) { if(matched) return; - using ShapeType = remove_cvref_t(tuple_object))>; + using ShapeType = std::tuple_element_t; if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size()) return; diff --git a/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp index 66fc2bb582..0f527ad68f 100644 --- a/example/12_reduce/reduce_multiblock_atomic_add.cpp +++ b/example/12_reduce/reduce_multiblock_atomic_add.cpp @@ -127,13 +127,11 @@ bool reduce_multiblock_atomic_add_test(bool do_verification, bool matched = false; int result = 0; - const auto tuple_object = reduce_shape_instances{}; - static_for<0, std::tuple_size::value, 1>{}([&](auto i) { if(matched) return; - using ShapeType = remove_cvref_t(tuple_object))>; + using ShapeType = std::tuple_element_t; if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size()) return; diff --git a/example/12_reduce/reduce_threadwise_multi_d.cpp b/example/12_reduce/reduce_threadwise_multi_d.cpp index ee06395771..df50242103 100644 --- a/example/12_reduce/reduce_threadwise_multi_d.cpp +++ b/example/12_reduce/reduce_threadwise_multi_d.cpp @@ -129,13 +129,11 @@ bool reduce_threadwise_multi_d_test(bool do_verification, bool matched = false; int result = 0; - const auto tuple_object = reduce_shape_instances{}; - static_for<0, std::tuple_size::value, 1>{}([&](auto i) { if(matched) return; - using ShapeType = remove_cvref_t(tuple_object))>; + using ShapeType = std::tuple_element_t; if(ShapeType::Rank_ != inLengths.size() || ShapeType::NumReduceDim_ != reduceDims.size()) return; diff --git a/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp index 3a88358341..d0520a9342 100644 --- a/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp @@ -14,14 +14,18 @@ namespace tensor_operation { namespace device { namespace instance { +/** + * @brief Register device operation instances from a type container. + * @tparam BaseOp The base class that all operation instances must derive from. + * @tparam NewOpInstances A std::tuple (or ck::type_list) of device operation types. + * Only the type is used; the parameter value is unused (retained for type deduction). + */ template void add_device_operation_instances(std::vector>& op_instances, - const NewOpInstances& new_op_instances) + const NewOpInstances& /*new_op_instances*/) { ck::static_for<0, std::tuple_size_v, 1>{}([&](auto i) { - const auto new_op_instance = std::get(new_op_instances); - - using NewOpInstance = remove_cvref_t; + using NewOpInstance = std::tuple_element_t; if constexpr(std::is_same_v) { return; // We can use nullptr_t to enable trailing comma @@ -29,8 +33,13 @@ void add_device_operation_instances(std::vector>& op_ins else { static_assert(std::is_base_of_v, - "wrong! NewOpInstance should be derived from BaseOp"); - op_instances.push_back(std::make_unique(new_op_instance)); + "add_device_operation_instances: NewOpInstance must derive from BaseOp"); + static_assert( + std::is_default_constructible_v, + "add_device_operation_instances: NewOpInstance must be default-constructible; " + "registration default-constructs instances and ignores tuple values, so store " + "configuration in template parameters instead of constructor arguments."); + op_instances.push_back(std::make_unique()); } }); } diff --git a/library/include/ck/library/tensor_operation_instance/add_grouped_conv_bwd_wei_exp_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_grouped_conv_bwd_wei_exp_device_operation_instance.hpp index 594c9ca5a7..3bdf25778d 100644 --- a/library/include/ck/library/tensor_operation_instance/add_grouped_conv_bwd_wei_exp_device_operation_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/add_grouped_conv_bwd_wei_exp_device_operation_instance.hpp @@ -45,9 +45,11 @@ void add_explicit_gemm_device_operation_instances( DeviceGemmOp>; static_assert(std::is_base_of_v, - "wrong! NewOpInstance should be derived from BaseOp"); + "NewOpInstance must derive from BaseOp"); + static_assert(std::is_default_constructible_v, + "NewOpInstance must be default-constructible"); - op_instances.push_back(std::make_unique(NewOpInstance{})); + op_instances.push_back(std::make_unique()); }); } diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp index 1502d905b9..9917f81f1c 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp @@ -89,13 +89,12 @@ void add_device_reduce_instance_blockwise( { static_for<0, std::tuple_size::value, 1>{}( [&](auto i) { - using cfg1 = remove_cvref_t( - reduce_configuration_1_instances_blockwise{}))>; + using cfg1 = std::tuple_element_t; static_for<0, std::tuple_size::value, 1>{}( [&](auto j) { - using cfg2 = remove_cvref_t( - reduce_configuration_2_instances_blockwise{}))>; + using cfg2 = + std::tuple_element_t; using ReduceOpInstance = DeviceReduceMultiBlock; - device_op_instances.push_back( - std::make_unique(ReduceOpInstance{})); + device_op_instances.push_back(std::make_unique()); }); }); }; diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp index 76f58782c5..5fa81b3c73 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp @@ -90,14 +90,15 @@ void add_device_reduce_instance_multiblock_atomic_add( static_for<0, std::tuple_size::value, 1>{}([&](auto i) { - using cfg1 = remove_cvref_t( - reduce_configuration_1_instances_multiblock_atomic_add{}))>; + using cfg1 = + std::tuple_element_t; static_for<0, std::tuple_size::value, 1>{}([&](auto j) { - using cfg2 = remove_cvref_t( - reduce_configuration_2_instances_multiblock_atomic_add{}))>; + using cfg2 = + std::tuple_element_t; using ReduceOpInstance = DeviceReduceMultiBlock; - device_op_instances.push_back(std::make_unique(ReduceOpInstance{})); + device_op_instances.push_back(std::make_unique()); }); }); }; diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp index 250f266507..753b86e98a 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp @@ -77,8 +77,7 @@ void add_device_reduce_instance_threadwise( static_for<0, std::tuple_size::value, 1>{}( [&](auto j) { - using cfg2 = remove_cvref_t( - reduce_configuration_2_instances_threadwise{}))>; + using cfg2 = std::tuple_element_t; using ReduceOpInstance = DeviceReduceThreadWise; - device_op_instances.push_back(std::make_unique(ReduceOpInstance{})); + device_op_instances.push_back(std::make_unique()); }); }; diff --git a/profiler/include/profiler/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp index 191c57780c..4a32505be7 100644 --- a/profiler/include/profiler/profile_reduce_impl.hpp +++ b/profiler/include/profiler/profile_reduce_impl.hpp @@ -488,13 +488,11 @@ bool profile_reduce_impl(bool do_verification, using tuple_of_description_instances = tensor_operation::device::instance::reduce_description_instances; - const auto tuple_object = tuple_of_description_instances{}; - static_for<0, std::tuple_size::value, 1>{}([&](auto i) { if(matched) return; - using descType = remove_cvref_t(tuple_object))>; + using descType = std::tuple_element_t; if(!description_match( descType{}, inLengths.size(), reduceDims, ReduceOpId, PropagateNan, UseIndex))