Files
composable_kernel/example/68_gemm_add/gemm_add_wmma_fp16.cpp
2025-09-05 15:12:51 +00:00

77 lines
1.4 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
using ADataType = F16;
using BDataType = F16;
using AccDataType = F32;
using CShuffleDataType = F32;
using DDataType = F16;
using DsDataType = F16_Tuple;
using EDataType = F16;
using ALayout = Row;
using BLayout = Row;
using DLayout = Row;
using DsLayout = Row_Tuple;
using ELayout = Row;
using AElementOp = PassThrough;
using BElementOp = PassThrough;
using CDEElementOp = Add;
static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleD_Wmma_CShuffleV3<
Row,
Row,
Row_Tuple,
Row,
F16,
F16,
F16_Tuple,
F16,
F32,
F32,
PassThrough,
PassThrough,
Add,
GemmSpec,
128,
128,
64,
64,
8,
8,
16,
16,
4,
2,
S<4, 32, 1>,
S<1, 0, 2>,
S<1, 0, 2>,
2,
8,
8,
0,
S<4, 32, 1>,
S<0, 2, 1>,
S<0, 2, 1>,
1,
1,
8,
0,
1,
1,
S<1, 32, 1, 4>,
S<8, 8, 8>,
ck::BlockGemmPipelineScheduler::Intrawave,
ck::BlockGemmPipelineVersion::v1>;
// clang-format on
#include "run_gemm_add_example_wmma.inc"
int main(int argc, char* argv[]) { return !run_gemm_add_example(argc, argv); }