mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
llvm uses v_mfma for __builtin_amdgcn_mfma_scale_f32_..._f8f6f4` instead
of v_mfma_scale only if scales are literal 0 values.
See llvm/lib/Target/AMDGPU/SIInstrInfo.td:317
// Optimize v_mfma_scale* instructions to avoid the scale if the
// scales are known 0.
class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag<
(ops node:$srca, node:$srcb, node:$srcc,
node:$cbsz, node:$blgp),
(intrin $srca, $srcb, $srcc, $cbsz, $blgp,
srcvalue, 0, srcvalue, 0)
>;
def mfma_f32_16x16x128_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_16x16x128_f8f6f4>;
def mfma_f32_32x32x64_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_32x32x64_f8f6f4>;