Fixes to "General 2D Reduction Kernel" (#2535) (#2656)

* fix reduce2d

- revret the combine_partial_results() chnages
- remove auto from function def

* clang-format
This commit is contained in:
Yashvardhan Agarwal
2025-08-11 16:01:33 +03:00
committed by GitHub
parent 1e1ee758fa
commit 191c62967b
4 changed files with 7 additions and 63 deletions

View File

@@ -189,7 +189,9 @@ struct Reduce
/// @note Requirements:
/// - y_continous_dim % ThreadTile_N == 0 (for proper thread distribution)
/// - input_strides[-1] == 1 (for contiguous memory access)
CK_TILE_HOST static bool IsSupportedArgument(index_t y_continous_dim, auto input_strides)
template <typename InputStrides>
CK_TILE_HOST static bool IsSupportedArgument(index_t y_continous_dim,
InputStrides input_strides)
{
using S = typename Problem::BlockShape;