Merge commit '60d3e8f504edd25569811b25b4b876d0a504b3b8' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-11 15:11:42 +00:00
parent 269824c6bb
commit 9541fc3ef3
22 changed files with 439 additions and 192 deletions

View File

@@ -25,6 +25,7 @@ struct ElementWiseKernel
{
return is_wave32() ? kBlockSize / 2 : kBlockSize;
}
template <typename... XDataType, typename Dims>
CK_TILE_DEVICE void operator()(const Dims lens,
const Dims input_strides,

View File

@@ -529,7 +529,10 @@ struct GroupedConvolutionBackwardDataKernel
return dim3(kargs.grid_size_, kargs.GemmBatch, kargs.k_batch);
}
CK_TILE_HOST static constexpr auto BlockSize() { return dim3(kBlockSize); }
CK_TILE_HOST static constexpr auto BlockSize()
{
return is_wave32() ? dim3(kBlockSize / 2) : dim3(kBlockSize);
}
CK_TILE_HOST static constexpr GroupedConvBwdDataKernelArgsSpecialized
MakeKernelArgs(const GroupedConvBwdDataHostArgs& hostArgs)

View File

@@ -392,7 +392,10 @@ struct GroupedConvolutionBackwardWeightKernel
TilePartitioner::GridSize(kargs.GemmM, kargs.GemmN), kargs.GemmBatch, kargs.k_batch);
}
CK_TILE_HOST static constexpr auto BlockSize() { return dim3(kBlockSize); }
CK_TILE_HOST static constexpr auto BlockSize()
{
return is_wave32() ? dim3(kBlockSize / 2) : dim3(kBlockSize);
}
CK_TILE_HOST static constexpr GroupedConvBwdWeightKernelArgsSpecialized
MakeKernelArgs(const GroupedConvBwdWeightHostArgs& hostArgs)

View File

@@ -398,7 +398,10 @@ struct GroupedConvolutionForwardKernel
TilePartitioner::GridSize(kargs.GemmM, kargs.GemmN), kargs.GemmBatch, kargs.k_batch);
}
CK_TILE_HOST static constexpr auto BlockSize() { return dim3(kBlockSize); }
CK_TILE_HOST static auto BlockSize()
{
return is_wave32() ? dim3(kBlockSize / 2) : dim3(kBlockSize);
}
CK_TILE_HOST static constexpr GroupedConvFwdKernelArgsSpecialized
MakeKernelArgs(const GroupedConvFwdHostArgs& hostArgs)