mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Do not use warpSize as compile time constant as it is removed (#2320)
* Do not use warpSize as compile time constant as it is removed * Update tile_image_to_column_shape.hpp update warpSize usage. * clean-up all use of warpSize, make sure code builds * fix --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: illsilin <Illia.Silin@amd.com> Co-authored-by: Bartlomiej Kocot <barkocot@amd.com>
This commit is contained in:
committed by
GitHub
parent
3af66e99ab
commit
4c57157d50
@@ -210,7 +210,7 @@ struct BlockReduce2dCrossWarpSync
|
||||
// | w0 | w1 | w2 | w3 | -----> | w0123 |
|
||||
//
|
||||
// -> also store data from every wave into LDS
|
||||
constexpr index_t num_warps = BlockShape::BlockSize / warpSize;
|
||||
constexpr index_t num_warps = BlockShape::BlockSize / get_warp_size();
|
||||
return num_warps * thread_buf_size * sizeof(DataType);
|
||||
}
|
||||
|
||||
@@ -226,7 +226,7 @@ struct BlockReduce2dCrossWarpSync
|
||||
const index_t lane_id = get_lane_id();
|
||||
const index_t warp_id = get_warp_id();
|
||||
constexpr auto num_reduce_warps = GetReduceWarps<YDistributedTensor_>();
|
||||
constexpr index_t num_warps = BlockShape::BlockSize / warpSize;
|
||||
constexpr index_t num_warps = BlockShape::BlockSize / get_warp_size();
|
||||
const index_t smem_offset = warp_id;
|
||||
|
||||
// skip if nonthing to do
|
||||
|
||||
Reference in New Issue
Block a user