mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 23:05:54 +00:00
Fix splitk preshuffle (#3137)
* Fix splitK multiply_multiply_wp
* Add tests for gemm_multiply_multiply_wp
* Add tests for gemm_universal_preshuffle (KBatch = 1)
* Add tests gemm_blockscale_wp
* Fix splitk gemm universal preshuffle
* Run new tests on arch supporting fp8
* Restore example
* Fix strides profiler
* Fix tests
* Fix clang format
* Finalize profiler preshuffle with tolerances
* Minor improvements to splitk related changes
* Address review comments: clang format and ckProfiler typo
* Remove b_k_split_offset from SplitKBatchOffset struct
[ROCm/composable_kernel commit: 507d81c3af]
This commit is contained in:
@@ -425,6 +425,11 @@ struct DeviceGemm_Xdl_CShuffleV3_BPreshuffle : public DeviceGemmV2BPreshuffle<AL
|
||||
return false;
|
||||
}
|
||||
|
||||
if(arg.N % NPerBlock != 0 || arg.K % KPerBlock != 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(get_warp_size() == 64)
|
||||
{
|
||||
if constexpr(NXdlPerWave64 > 0)
|
||||
|
||||
Reference in New Issue
Block a user