// SPDX-License-Identifier: MIT // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. #pragma once #include "ck_tile/core.hpp" #include "ck_tile/ops/gemm/warp/warp_gemm_impl.hpp" #include "ck_tile/ops/gemm/warp/warp_gemm_attribute_mfma.hpp" namespace ck_tile { // fp16 using WarpGemmMfmaF16F16F32M32N32K8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfmaF16F16F32M16N16K16 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfmaF16F16F32M32N32K16 = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M16N16K32 = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M32N32K8SwizzleA = WarpGemmImpl, 1>>; using WarpGemmMfmaF16F16F32M32N32K16SwizzleA = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution = WarpGemmImpl>>; using WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution = WarpGemmImpl>>; using WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaF16F16F32M4N64K16 = WarpGemmImpl, 4>>; using WarpGemmMfmaF16F16F32M64N4K16 = WarpGemmImpl, 4>>; // bf16 using WarpGemmMfmaBf16Bf16F32M32N32K8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfmaBf16Bf16F32M16N16K16 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfmaBf16Bf16F32M32N32K16 = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M16N16K32 = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA = WarpGemmImpl, 1>>; using WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution = WarpGemmImpl>>; using WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution = WarpGemmImpl>>; using WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution = WarpGemmImpl, 2>>; using WarpGemmMfmaBf16Bf16F32M4N64K16 = WarpGemmImpl, 4>>; using WarpGemmMfmaBf16Bf16F32M64N4K16 = WarpGemmImpl, 4>>; // fp8 using WarpGemmMfma_f32_32x32x16_fp8_fp8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfma_f32_32x32x16_fp8_bf8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfma_f32_32x32x16_bf8_fp8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfma_f32_32x32x16_bf8_bf8 = WarpGemmImpl< WarpGemmAtrributeMfma>>; using WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed = WarpGemmImpl>>; using WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed = WarpGemmImpl>>; using WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed = WarpGemmImpl>>; using WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed = WarpGemmImpl>>; template using WarpGemmMfmaFp8Fp8F32M32N32K16SwizzleBTransposedCDistribution = WarpGemmImpl, 2, swizzle_factor>>; } // namespace ck_tile