[CK TILE][AICK-439] Fix cshuffle epilogue wave per shuffle (#3364)

* [CK TILE] Fix cshufle epligoue wave per shuffle

* Align shuffle per tile with smem

* fixes

* Fixes for double smem

* fix
This commit is contained in:
Bartłomiej Kocot
2025-12-15 12:59:48 +01:00
committed by GitHub
parent 3143a5a480
commit 3b773109e5
11 changed files with 73 additions and 34 deletions

View File

@@ -1048,7 +1048,7 @@ struct GroupedConvolutionBackwardDataKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
if constexpr(!(EpiloguePipeline::MemoryOperation == memory_operation_enum::atomic_add &&
GroupedConvTraitsType_::VectorSizeC % 2 != 0 &&
is_any_of<OutDataType, fp16_t, bf16_t>::value))

View File

@@ -1005,7 +1005,7 @@ struct GroupedConvolutionBackwardWeightKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
if constexpr(!(EpiloguePipeline::MemoryOperation ==
memory_operation_enum::atomic_add &&
GroupedConvTraitsType_::VectorSizeC % 2 != 0 &&

View File

@@ -1184,7 +1184,7 @@ struct GroupedConvolutionForwardKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
if constexpr(!(EpiloguePipeline::MemoryOperation ==
memory_operation_enum::atomic_add &&
GroupedConvTraitsType_::VectorSizeC % 2 != 0 &&