Files
composable_kernel/example/01_gemm/gemm_xdl_fp64.cpp
Po Yen Chen fa2d894be1 Add examples of Gemm (data type: int4) (#367)
* Add GEMM examples for int4

Currently the source files are just copied from int8 examples

* Re-use pre-defined alias in int4 exmples

* Distinguish user-side type from kernel-side type

* Add int4_t support for check_err()

* Allow conversion between Tensor<> specializations

* Re-format source files

* Use different type for host tensors

* Re-use CopyAsType<>() to implement copy ctor

* Re-use element-wise operation type alias

* Fix typo in alias names

* Complete the int4 examples

* Add constraint to Tensor<> templated methods

* Add type traits 'is_signed_integral<>'

* Add type constraints for integer version check_err<>()

* Allow comparing different-sized integral types in check_err()

* Check converted Tensor<int4_t> with golden Tensor<int8_t>

* Remove constraint of Tensor<>::CopyAsType()

* Avoid compilation error while disabling ck::int4_t support

* Remove debug messages

* Add #error directive to prevent compile sources with wrong setting

* Simplify tensor usages in examples

* Add constraint to check_err() input reference type

* Align design with other PR

* Use ""_uz to simplify example code

* Avoid too much generalizing check_err()

* Re-format GEMM instance template arguments

* Extract int4 example common codes

* Sort include directives

* Move #include directives into new header

* Move common codes together

* Re-format template argument in example code

* Reuse same implementation code for most of GEMM examples

* Re-format common.hpp

* Unify structured comment in examples

* Use reinterpret_cast<>() for cross-type pointer conversion

* Revert "Add type traits 'is_signed_integral<>'"

This reverts commit f2c148efae.

* Allow unsigned integer arguments for check_err()

* Fix compilation error in check_err()

* Remove unnecessary copy ctor for Tensor<>

* Mark Tensor<> special member functions as 'default'

* Use more strict condition to add code in examples

* Fix wrong program return value of GEMM examples

* Handle the case while user specify all the strides

* Fix never-ran examples

* Exit successfully if GEMM instance does not support given problem

* Add missing 'else' keyword

* Re-format CMakeLists.txt

* Add wrapper function to hide value conversion while copying memory

* Add new DeviceMem API to copy memory

* Use new DeviceMem API to implement examples

* Revert "Add new DeviceMem API to copy memory"

This reverts commit 3f190b0779.

* Add conversion ctor for Tensor<>

* Write Tensor<> conversion logics explicitly in example code

* Convert Tensor<> values after transfer data to host
2022-08-23 18:25:05 -05:00

47 lines
4.0 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl.hpp"
using ADataType = double;
using BDataType = double;
using CDataType = double;
using AccDataType = double;
using ALayout = Row;
using BLayout = Col;
using CLayout = Row;
using AElementOp = PassThrough;
using BElementOp = PassThrough;
using CElementOp = PassThrough;
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
// clang-format off
using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdl
// ######| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer|
// ######| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise|Spacialization| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar|
// ######| | | | | | | | Operation| Operation| Operation| | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | | PerVector|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
#if 0
< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementOp, BElementOp, CElementOp, GemmDefault, 64, 32, 32, 4, 1, 16, 16, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 1, true, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 1, true, 7, 1>;
#else
< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementOp, BElementOp, CElementOp, GemmDefault, 256, 128, 128, 4, 2, 16, 16, 4, 4, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 2, true, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 2, true, 7, 1>;
#endif
// clang-format on
using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataType,
BDataType,
CDataType,
AccDataType,
AElementOp,
BElementOp,
CElementOp>;
#include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }