diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp index cc343f6f69..fb9550ca2a 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp @@ -699,9 +699,9 @@ struct DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle if constexpr(isMultiA || isMultiB) { const auto as_grid_desc_ak0_m_ak1 = - generate_tuple([&](auto) { return a_grid_desc_m_k_; }, Number{}); + make_uniform_tuple(a_grid_desc_m_k_, Number{}); const auto bs_grid_desc_bk0_n_bk1 = - generate_tuple([&](auto) { return b_grid_desc_n_k_; }, Number{}); + make_uniform_tuple(b_grid_desc_n_k_, Number{}); if(GridwiseGemm::CheckValidity(as_grid_desc_ak0_m_ak1, bs_grid_desc_bk0_n_bk1, diff --git a/include/ck/utility/tuple_helper.hpp b/include/ck/utility/tuple_helper.hpp index 22f1ded15f..9ae01792cd 100644 --- a/include/ck/utility/tuple_helper.hpp +++ b/include/ck/utility/tuple_helper.hpp @@ -59,6 +59,28 @@ __host__ __device__ constexpr auto generate_identity_sequences(Number) return generate_identity_sequences(); } +// Optimized helper for common pattern: generate_tuple([&](auto) { return value; }, Number{}) +// Creates Tuple (N copies) without lambda instantiation +namespace detail { +template +__host__ __device__ constexpr auto make_uniform_tuple_impl(T&& value, Sequence) +{ + return make_tuple(((void)Is, value)...); +} +} // namespace detail + +template +__host__ __device__ constexpr auto make_uniform_tuple(T&& value) +{ + return detail::make_uniform_tuple_impl(static_cast(value), make_index_sequence{}); +} + +template +__host__ __device__ constexpr auto make_uniform_tuple(T&& value, Number) +{ + return make_uniform_tuple(static_cast(value)); +} + // tx and ty are tuple of references, return type of will tuple of referennce (not rvalue) template __host__ __device__ constexpr auto concat_tuple_of_reference(const Tuple& tx,