[CK TILE][AICK-439] Fix cshuffle epilogue wave per shuffle (#3364)

* [CK TILE] Fix cshufle epligoue wave per shuffle

* Align shuffle per tile with smem

* fixes

* Fixes for double smem

* fix
This commit is contained in:
Bartłomiej Kocot
2025-12-15 12:59:48 +01:00
committed by GitHub
parent 3143a5a480
commit 3b773109e5
11 changed files with 73 additions and 34 deletions

View File

@@ -232,7 +232,7 @@ struct BatchedGemmKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr1[GetSmemSize()];
__shared__ char smem_ptr1[GemmPipeline::GetSmemSize()];
UniversalGemmKernel::RunGemm2LDS({a_ptr},
{b_ptr},
{/*ds_ptr*/},

View File

@@ -310,7 +310,7 @@ struct GroupedGemmKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
RunGemmWithPipelineSelection2LDS(a_ptr,
b_ptr,
c_ptr,

View File

@@ -1084,7 +1084,7 @@ struct UniversalGemmKernel
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
if constexpr(!(EpiloguePipeline::MemoryOperation == memory_operation_enum::atomic_add &&
EpiloguePipeline::GetVectorSizeC() % 2 != 0 &&
is_any_of<EDataType, fp16_t, bf16_t>::value))
@@ -1169,7 +1169,7 @@ struct UniversalGemmKernel
// Run the GEMM
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr_1[GetSmemSize()];
__shared__ char smem_ptr_1[GemmPipeline::GetSmemSize()];
if constexpr(!(EpiloguePipeline::MemoryOperation ==
memory_operation_enum::atomic_add &&
EpiloguePipeline::GetVectorSizeC() % 2 != 0 &&