diff --git a/example/ck_tile/05_reduce/reduce.hpp b/example/ck_tile/05_reduce/reduce.hpp index 856b065318..55e479591c 100644 --- a/example/ck_tile/05_reduce/reduce.hpp +++ b/example/ck_tile/05_reduce/reduce.hpp @@ -6,7 +6,7 @@ #include "ck_tile/core.hpp" #include "ck_tile/ops/common.hpp" #include "ck_tile/ops/reduce/block/block_reduce.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d_default_policy.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d_default_policy.hpp" namespace ck_tile { diff --git a/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_default_policy.hpp b/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_default_policy.hpp index ba20eb43ef..73ba633b15 100644 --- a/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_default_policy.hpp +++ b/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_default_policy.hpp @@ -4,8 +4,8 @@ #pragma once #include "ck_tile/core.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d_problem.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d_problem.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d.hpp" namespace ck_tile { diff --git a/include/ck_tile/ops/reduce/block/block_reduce.hpp b/include/ck_tile/ops/reduce/block/block_reduce.hpp index 51d55235e8..d332f8afbe 100644 --- a/include/ck_tile/ops/reduce/block/block_reduce.hpp +++ b/include/ck_tile/ops/reduce/block/block_reduce.hpp @@ -6,6 +6,7 @@ #include "ck_tile/core.hpp" #include +// This file is deprecated, please use block_reduce2d.hpp namespace ck_tile { /* @@ -14,9 +15,9 @@ namespace ck_tile { */ // synchronize reduce result (cross lane reduction and broadcast on replicated dimension) template -CK_TILE_DEVICE void block_tile_reduce_sync(AccDistributedTensor_& acc_tensor, - const ReduceFunc& reduce_func, - bool_constant = {}) +[[deprecated]] CK_TILE_DEVICE void block_tile_reduce_sync(AccDistributedTensor_& acc_tensor, + const ReduceFunc& reduce_func, + bool_constant = {}) { using Dstr = typename AccDistributedTensor_::StaticTileDistribution; using DstrEncode = typename Dstr::DstrEncode; @@ -114,8 +115,8 @@ CK_TILE_DEVICE void block_tile_reduce_sync(AccDistributedTensor_& acc_tensor, * TODO: the limitation is to-be-reduced P dim can only mapping to one R dim? */ template -CK_TILE_DEVICE void block_tile_reduce_xor_sync(AccDistributedTensor_& acc_tensor, - const ReduceFunc& reduce_func) +[[deprecated]] CK_TILE_DEVICE void block_tile_reduce_xor_sync(AccDistributedTensor_& acc_tensor, + const ReduceFunc& reduce_func) { using Dstr = typename AccDistributedTensor_::StaticTileDistribution; using DstrEncode = typename Dstr::DstrEncode; @@ -173,10 +174,10 @@ template -CK_TILE_DEVICE void block_tile_reduce(AccDistributedTensor_& acc_tensor, - const InDistributedTensor_& in_tensor, - sequence, - const ReduceFunc& reduce_func) +[[deprecated]] CK_TILE_DEVICE void block_tile_reduce(AccDistributedTensor_& acc_tensor, + const InDistributedTensor_& in_tensor, + sequence, + const ReduceFunc& reduce_func) { constexpr auto I0 = number<0>{}; constexpr auto I1 = number<1>{}; @@ -248,10 +249,10 @@ template -CK_TILE_DEVICE auto block_tile_reduce(const InDistributedTensor_& in_tensor, - sequence in_reduce_dims, - const ReduceFunc& reduce_func, - const InDataType_& reduce_init) +[[deprecated]] CK_TILE_DEVICE auto block_tile_reduce(const InDistributedTensor_& in_tensor, + sequence in_reduce_dims, + const ReduceFunc& reduce_func, + const InDataType_& reduce_init) { using InDataType = typename InDistributedTensor_::DataType; using AccDataType = remove_cvref_t; @@ -281,7 +282,7 @@ CK_TILE_DEVICE auto block_tile_reduce(const InDistributedTensor_& in_tensor, // this version will call thread/warp+sync in one function call // template -struct BlockReduce2D +struct [[deprecated]] BlockReduce2D { using InDistributedTensor = remove_cvref_t; using InDataType = typename InDistributedTensor::DataType; @@ -376,6 +377,6 @@ struct BlockReduce2D // deduction guide template -CK_TILE_HOST_DEVICE_EXTERN BlockReduce2D(const T&, const typename T::DataType&)->BlockReduce2D; +CK_TILE_HOST_DEVICE_EXTERN BlockReduce2D(const T&, const typename T::DataType&) -> BlockReduce2D; } // namespace ck_tile diff --git a/include/ck_tile/ops/reduce2d/block/block_reduce2d.hpp b/include/ck_tile/ops/reduce/block/block_reduce2d.hpp similarity index 100% rename from include/ck_tile/ops/reduce2d/block/block_reduce2d.hpp rename to include/ck_tile/ops/reduce/block/block_reduce2d.hpp diff --git a/include/ck_tile/ops/reduce2d/block/block_reduce2d_default_policy.hpp b/include/ck_tile/ops/reduce/block/block_reduce2d_default_policy.hpp similarity index 96% rename from include/ck_tile/ops/reduce2d/block/block_reduce2d_default_policy.hpp rename to include/ck_tile/ops/reduce/block/block_reduce2d_default_policy.hpp index 403703870d..3c547242d5 100644 --- a/include/ck_tile/ops/reduce2d/block/block_reduce2d_default_policy.hpp +++ b/include/ck_tile/ops/reduce/block/block_reduce2d_default_policy.hpp @@ -4,8 +4,8 @@ #pragma once #include "ck_tile/core.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d_problem.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d_problem.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d.hpp" namespace ck_tile { diff --git a/include/ck_tile/ops/reduce2d/block/block_reduce2d_problem.hpp b/include/ck_tile/ops/reduce/block/block_reduce2d_problem.hpp similarity index 100% rename from include/ck_tile/ops/reduce2d/block/block_reduce2d_problem.hpp rename to include/ck_tile/ops/reduce/block/block_reduce2d_problem.hpp diff --git a/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp b/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp index 421e0d0c7e..e4814cf455 100644 --- a/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp +++ b/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp @@ -4,8 +4,8 @@ #pragma once #include "ck_tile/core.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d_problem.hpp" -#include "ck_tile/ops/reduce2d/block/block_reduce2d.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d_problem.hpp" +#include "ck_tile/ops/reduce/block/block_reduce2d.hpp" namespace ck_tile {