mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
moe fp8 blockscale use nt (#3524)
* nt on fp8 blockscale
* some improve and tests needs to be fixed
* update
* fix format
* revert useless change
* revert any change in amd_buffer_coherence
[ROCm/composable_kernel commit: 32408c8bc0]
This commit is contained in:
@@ -80,7 +80,8 @@ template <typename ALayout,
|
||||
typename ComputeTypeA = CDataType,
|
||||
typename ComputeTypeB = ComputeTypeA,
|
||||
typename LDSTypeA = ComputeTypeA,
|
||||
typename LDSTypeB = ComputeTypeB>
|
||||
typename LDSTypeB = ComputeTypeB,
|
||||
bool NonTemporalLoadB = false>
|
||||
struct DeviceMoeGemmBlockScale
|
||||
: public DeviceGemmMultipleD_BlockScale_BPreshuffle<ALayout,
|
||||
BLayout,
|
||||
@@ -163,7 +164,8 @@ struct DeviceMoeGemmBlockScale
|
||||
ComputeTypeA,
|
||||
ComputeTypeB,
|
||||
LDSTypeA,
|
||||
LDSTypeB>;
|
||||
LDSTypeB,
|
||||
NonTemporalLoadB>;
|
||||
using GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)>;
|
||||
using GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32>;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user