diff --git a/composable_kernel/include/tensor_description/cluster_descriptor.hpp b/composable_kernel/include/tensor_description/cluster_descriptor.hpp index c3523623d9..d69bfb70c1 100644 --- a/composable_kernel/include/tensor_description/cluster_descriptor.hpp +++ b/composable_kernel/include/tensor_description/cluster_descriptor.hpp @@ -8,7 +8,7 @@ namespace ck { template ::type> -__host__ __device__ constexpr auto make_cluster_descriptor_v2( +__host__ __device__ constexpr auto make_cluster_descriptor( const Lengths& lengths, ArrangeOrder order = typename arithmetic_sequence_gen<0, Lengths::Size(), 1>::type{}) { diff --git a/composable_kernel/include/tensor_description/multi_index_transform.hpp b/composable_kernel/include/tensor_description/multi_index_transform.hpp index a33b9aee8d..42a5a875b7 100644 --- a/composable_kernel/include/tensor_description/multi_index_transform.hpp +++ b/composable_kernel/include/tensor_description/multi_index_transform.hpp @@ -481,11 +481,11 @@ struct Merge_v1_carry_check using LowerIndex = MultiIndex; using UpperIndex = MultiIndex<1>; - using LowLengthsScan = decltype( - container_reverse_exclusive_scan(LowLengths{}, math::multiplies_v2{}, Number<1>{})); + using LowLengthsScan = + decltype(container_reverse_exclusive_scan(LowLengths{}, math::multiplies{}, Number<1>{})); using UpLengths = - decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies_v2{}, Number<1>{}))); + decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies{}, Number<1>{}))); LowLengths low_lengths_; LowLengthsScan low_lengths_scan_; @@ -496,8 +496,8 @@ struct Merge_v1_carry_check __host__ __device__ constexpr Merge_v1_carry_check(const LowLengths& low_lengths) : low_lengths_{low_lengths}, low_lengths_scan_{ - container_reverse_exclusive_scan(low_lengths, math::multiplies_v2{}, Number<1>{})}, - up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies_v2{}, Number<1>{}))} + container_reverse_exclusive_scan(low_lengths, math::multiplies{}, Number<1>{})}, + up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies{}, Number<1>{}))} { static_assert(LowerIndex::Size() == NDimLow, "wrong!"); } @@ -1037,7 +1037,7 @@ struct Merge_v2_magic_division using UpperIndex = MultiIndex<1>; using UpLengths = - decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies_v2{}, Number<1>{}))); + decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies{}, Number<1>{}))); using LowLengthsMagicDivisorMultipiler = decltype( generate_tuple(lambda_merge_generate_MagicDivision_calculate_magic_multiplier{}, @@ -1062,7 +1062,7 @@ struct Merge_v2_magic_division low_lengths_magic_divisor_shift_{generate_tuple( [&](auto i) { return MagicDivision::CalculateMagicShift(low_lengths[i]); }, Number{})}, - up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies_v2{}, Number<1>{}))} + up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies{}, Number<1>{}))} { static_assert(LowerIndex::Size() == NDimLow, "wrong!"); } @@ -1188,11 +1188,11 @@ struct Merge_v2r2_magic_division using LowerIndex = MultiIndex; using UpperIndex = MultiIndex<1>; - using LowLengthsScan = decltype( - container_reverse_exclusive_scan(LowLengths{}, math::multiplies_v2{}, Number<1>{})); + using LowLengthsScan = + decltype(container_reverse_exclusive_scan(LowLengths{}, math::multiplies{}, Number<1>{})); using UpLengths = - decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies_v2{}, Number<1>{}))); + decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies{}, Number<1>{}))); using LowLengthsScanMagicDivisorMultipiler = decltype(generate_tuple( lambda_merge_generate_MagicDivision_calculate_magic_multiplier{}, @@ -1213,14 +1213,14 @@ struct Merge_v2r2_magic_division __host__ __device__ constexpr Merge_v2r2_magic_division(const LowLengths& low_lengths) : low_lengths_{low_lengths}, low_lengths_scan_{ - container_reverse_exclusive_scan(low_lengths, math::multiplies_v2{}, Number<1>{})}, + container_reverse_exclusive_scan(low_lengths, math::multiplies{}, Number<1>{})}, low_lengths_scan_magic_divisor_multiplier_{generate_tuple( [&](auto i) { return MagicDivision::CalculateMagicMultiplier(low_lengths_scan_[i]); }, Number{})}, low_lengths_scan_magic_divisor_shift_{generate_tuple( [&](auto i) { return MagicDivision::CalculateMagicShift(low_lengths_scan_[i]); }, Number{})}, - up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies_v2{}, Number<1>{}))} + up_lengths_{make_tuple(container_reduce(low_lengths, math::multiplies{}, Number<1>{}))} { static_assert(LowerIndex::Size() == NDimLow, "wrong!"); } @@ -1336,7 +1336,7 @@ struct UnMerge using UpperIndex = MultiIndex; using UpLengthsScan = - decltype(container_reverse_exclusive_scan(UpLengths{}, math::multiplies_v2{}, Number<1>{})); + decltype(container_reverse_exclusive_scan(UpLengths{}, math::multiplies{}, Number<1>{})); UpLengths up_lengths_; UpLengthsScan up_lengths_scan_; @@ -1346,7 +1346,7 @@ struct UnMerge __host__ __device__ constexpr UnMerge(const UpLengths& up_lengths) : up_lengths_{up_lengths}, up_lengths_scan_{ - container_reverse_exclusive_scan(up_lengths, math::multiplies_v2{}, Number<1>{})} + container_reverse_exclusive_scan(up_lengths, math::multiplies{}, Number<1>{})} { } diff --git a/composable_kernel/include/tensor_description/tensor_adaptor.hpp b/composable_kernel/include/tensor_description/tensor_adaptor.hpp index f684ce5e0f..3b647e433a 100644 --- a/composable_kernel/include/tensor_description/tensor_adaptor.hpp +++ b/composable_kernel/include/tensor_description/tensor_adaptor.hpp @@ -64,7 +64,7 @@ struct TensorAdaptor Number{}); // TODO: make container_reduce support tuple of Number and index_t - return container_reduce(lengths, math::multiplies_v2{}, Number<1>{}); + return container_reduce(lengths, math::multiplies{}, Number<1>{}); } template diff --git a/composable_kernel/include/tensor_description/tensor_descriptor.hpp b/composable_kernel/include/tensor_description/tensor_descriptor.hpp index 4038ef63da..a6a57ba63b 100644 --- a/composable_kernel/include/tensor_description/tensor_descriptor.hpp +++ b/composable_kernel/include/tensor_description/tensor_descriptor.hpp @@ -69,7 +69,7 @@ struct TensorDescriptor Number{}); // TODO: make container_reduce support tuple of Number and index_t - return container_reduce(lengths, math::multiplies_v2{}, Number<1>{}); + return container_reduce(lengths, math::multiplies{}, Number<1>{}); } template diff --git a/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp b/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp index cf329f06a5..ad75f9245e 100644 --- a/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp +++ b/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp @@ -38,8 +38,8 @@ __host__ __device__ constexpr auto calculate_element_space_size_impl(const Lengt template ::type = false> -__host__ __device__ constexpr auto make_naive_tensor_descriptor_v2(const Tuple& lengths, - const Tuple& strides) +__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple& lengths, + const Tuple& strides) { constexpr index_t N = sizeof...(Lengths); @@ -100,7 +100,7 @@ make_naive_tensor_descriptor_packed(const Tuple& lengths) constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{}; - const auto element_space_size = container_reduce(lengths, math::multiplies_v2{}, Number<1>{}); + const auto element_space_size = container_reduce(lengths, math::multiplies{}, Number<1>{}); return TensorDescriptor, remove_cv_t, @@ -112,7 +112,7 @@ make_naive_tensor_descriptor_packed(const Tuple& lengths) template __host__ __device__ constexpr auto -make_naive_tensor_descriptor_aligned_v2(const Tuple& lengths, Align align) +make_naive_tensor_descriptor_aligned(const Tuple& lengths, Align align) { constexpr auto I1 = Number<1>{}; @@ -133,7 +133,7 @@ make_naive_tensor_descriptor_aligned_v2(const Tuple& lengths, Align else { return container_reduce(lengths, - math::multiplies_v2{}, + math::multiplies{}, Number{}, i + I1, Number{}, @@ -142,7 +142,7 @@ make_naive_tensor_descriptor_aligned_v2(const Tuple& lengths, Align }, Number{}); - return make_naive_tensor_descriptor_v2(lengths, strides); + return make_naive_tensor_descriptor(lengths, strides); } } // namespace ck diff --git a/composable_kernel/include/tensor_operation/blockwise_tensor_slice_transfer.hpp b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_transfer.hpp index cf21123de6..0214b71352 100644 --- a/composable_kernel/include/tensor_operation/blockwise_tensor_slice_transfer.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_transfer.hpp @@ -143,7 +143,7 @@ struct BlockwiseTensorSliceTransfer_v4 private: static constexpr auto thread_cluster_desc_ = - make_cluster_descriptor_v2(ThreadClusterLengths{}, ThreadClusterArrangeOrder{}); + make_cluster_descriptor(ThreadClusterLengths{}, ThreadClusterArrangeOrder{}); using ThreadwiseTransfer = ThreadwiseTensorSliceTransfer_v3{}, GM0, I1, Number{}, GK1), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_block_desc_gk0_gn0_gn10_gn11_gk1 = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_block_desc_gk0_gn0_gn10_gn11_gk1 = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, GN0, I1, Number{}, GK1), max_lds_align); @@ -248,10 +248,10 @@ struct GridwiseContractionDlops_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_GM0_GM1_GN constexpr auto BN = GN0 * GN11; constexpr auto BM1 = - Number{}; constexpr auto BN1 = - Number{}; constexpr auto BM0 = BM / BM1; @@ -354,24 +354,24 @@ struct GridwiseContractionDlops_A_GK0_GM0_GM1_GK1_B_GK0_GN0_GN1_GK1_C_GM0_GM1_GN // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_block_desc_gk0_gm0_gm10_gm11_gk1 = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_block_desc_gk0_gm0_gm10_gm11_gk1 = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, GM0, I1, Number{}, GK1), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_block_desc_gk0_gn0_gn10_gn11_gk1 = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_block_desc_gk0_gn0_gn10_gn11_gk1 = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, GN0, I1, Number{}, GK1), max_lds_align); // A matrix in LDS memory for blockwise GEMM // be careful of LDS alignment - constexpr auto a_block_desc_gk0_bm_gk1 = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_block_desc_gk0_bm_gk1 = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, GM0 * Number{}, GK1), max_lds_align); // B matrix in LDS memory for blockwise GEMM // be careful of LDS alignment - constexpr auto b_block_desc_gk0_bn_gk1 = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_block_desc_gk0_bn_gk1 = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, GN0 * Number{}, GK1), max_lds_align); static_assert(a_block_desc_gk0_gm0_gm10_gm11_gk1.GetElementSpaceSize() == diff --git a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r2.hpp b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r2.hpp index 31a0fa342a..d91159b884 100644 --- a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r2.hpp +++ b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r2.hpp @@ -166,12 +166,12 @@ struct GridwiseGemmDlops_km_kn_mn_v1r2 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // LDS allocation for A and B: be careful of alignment @@ -351,22 +351,22 @@ struct GridwiseGemmDlops_km_kn_mn_v1r2 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k_m0_m1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k_m0_m1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, I1, Number{}), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k_n0_n1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k_n0_n1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, I1, Number{}), max_lds_align); // A matrix blockwise copy diff --git a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r3.hpp b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r3.hpp index 1017dcc2a1..2653dd4340 100644 --- a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r3.hpp +++ b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v1r3.hpp @@ -163,12 +163,12 @@ struct GridwiseGemmDlops_km_kn_mn_v1r3 // TODO: check alignment // A matrix in LDS memory, dst of blockwise copy - constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k_m_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // TODO: check alignment // B matrix in LDS memory, dst of blockwise copy - constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k_n_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // TODO: check alignment @@ -274,10 +274,10 @@ struct GridwiseGemmDlops_km_kn_mn_v1r3 const auto N0 = N / N1; constexpr auto M11 = - Number{}; constexpr auto N11 = - Number{}; constexpr auto M10 = M1 / M11; @@ -354,23 +354,23 @@ struct GridwiseGemmDlops_km_kn_mn_v1r3 // TODO: check alignment // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k0_m0_m1_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k0_m0_m1_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, I1, Number{}, K1), max_lds_align); // TODO: check alignment // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k0_n0_n1_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k0_n0_n1_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, I1, Number{}, K1), max_lds_align); // TODO: check alignment // A matrix in LDS memory, for blockwise GEMM - constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // TODO: check alignment // B matrix in LDS memory, for blockwise GEMM - constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); static_assert(a_k0_m0_m1_k1_block_desc.GetElementSpaceSize() == diff --git a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp index b141307b77..84ee6f40ec 100644 --- a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp +++ b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp @@ -58,7 +58,7 @@ struct GridwiseGemmDlops_km_kn_mn_v3 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_e_k_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_e_k_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // LDS allocation for A and B: be careful of alignment @@ -132,10 +132,10 @@ struct GridwiseGemmDlops_km_kn_mn_v3 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_e_k_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_e_k_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); - constexpr auto a_e_k_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_e_k_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}), max_lds_align); // B matrix in LDS memory, dst of blockwise copy diff --git a/composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r3.hpp b/composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r3.hpp index dcb16e5dcd..207f73072f 100644 --- a/composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r3.hpp +++ b/composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r3.hpp @@ -148,12 +148,12 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // LDS allocation for A and B: be careful of alignment @@ -290,12 +290,12 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3 // A matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto a_k0_m_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // B matrix in LDS memory, dst of blockwise copy // be careful of LDS alignment - constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned_v2( + constexpr auto b_k0_n_k1_block_desc = make_naive_tensor_descriptor_aligned( make_tuple(Number{}, Number{}, K1), max_lds_align); // A matrix blockwise copy diff --git a/composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp index f069540343..ccac4b7b44 100644 --- a/composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v2.hpp @@ -91,13 +91,13 @@ struct ThreadwiseTensorSliceTransfer_v3r1 container_reverse_exclusive_scan( container_reorder_given_new2old(src_vector_tensor_lengths, SrcVectorTensorContiguousDimOrder{}), - math::multiplies_v2{}, + math::multiplies{}, I1), SrcVectorTensorContiguousDimOrder{}); constexpr auto src_vector_desc = - make_naive_tensor_descriptor_v2(sequence_to_tuple_of_number(src_vector_tensor_lengths), - sequence_to_tuple_of_number(src_vector_tensor_strides)); + make_naive_tensor_descriptor(sequence_to_tuple_of_number(src_vector_tensor_lengths), + sequence_to_tuple_of_number(src_vector_tensor_strides)); // access order and lengths constexpr auto src_access_lengths = SliceLengths{} / src_vector_tensor_lengths; @@ -259,13 +259,13 @@ struct ThreadwiseTensorSliceTransfer_v3r1 container_reverse_exclusive_scan( container_reorder_given_new2old(dst_vector_tensor_lengths, DstVectorTensorContiguousDimOrder{}), - math::multiplies_v2{}, + math::multiplies{}, I1), DstVectorTensorContiguousDimOrder{}); constexpr auto dst_vector_desc = - make_naive_tensor_descriptor_v2(sequence_to_tuple_of_number(dst_vector_tensor_lengths), - sequence_to_tuple_of_number(dst_vector_tensor_strides)); + make_naive_tensor_descriptor(sequence_to_tuple_of_number(dst_vector_tensor_lengths), + sequence_to_tuple_of_number(dst_vector_tensor_strides)); // dst access order and lengths constexpr auto dst_access_lengths = SliceLengths{} / dst_vector_tensor_lengths; @@ -699,13 +699,13 @@ struct ThreadwiseTensorSliceTransfer_v4r1 container_reverse_exclusive_scan( container_reorder_given_new2old(src_vector_tensor_lengths, SrcVectorTensorContiguousDimOrder{}), - math::multiplies_v2{}, + math::multiplies{}, I1), SrcVectorTensorContiguousDimOrder{}); constexpr auto src_vector_desc = - make_naive_tensor_descriptor_v2(sequence_to_tuple_of_number(src_vector_tensor_lengths), - sequence_to_tuple_of_number(src_vector_tensor_strides)); + make_naive_tensor_descriptor(sequence_to_tuple_of_number(src_vector_tensor_lengths), + sequence_to_tuple_of_number(src_vector_tensor_strides)); // access order and lengths constexpr auto access_lengths = SliceLengths{} / src_vector_tensor_lengths; diff --git a/composable_kernel/include/utility/math.hpp b/composable_kernel/include/utility/math.hpp index bcb25a2941..48438e6179 100644 --- a/composable_kernel/include/utility/math.hpp +++ b/composable_kernel/include/utility/math.hpp @@ -28,13 +28,7 @@ struct minus __host__ __device__ constexpr T operator()(T a, T b) const { return a - b; } }; -template struct multiplies -{ - __host__ __device__ constexpr T operator()(T a, T b) const { return a * b; } -}; - -struct multiplies_v2 { template __host__ __device__ constexpr auto operator()(const A& a, const B& b) const