Gemm+layernorm instance, ckProfiler, client example (#568)

* Add gemm + layernorm instance

* Add ckProfiler

* Add test

* Add client example

* Detect if user forger to set the workrspace

* Use literal in the example

* [What] use builtin function for sqrt
[Why] compiler will not use v_sqrt_f64_e64 if we use ::sqrt()

* check gemm vaildity in IsSupportedArgument

* Add more testcases

* Merge duplicated folder in client example

* Print more infomation

* Use better kernel parameter for MS problem size

* clang format

* Add constexpr for if condition and remove redundant include

* Remove cstdlib and add constexpr

[ROCm/composable_kernel commit: f7d28f3e4b]
This commit is contained in:
rocking5566
2023-02-10 05:02:55 +08:00
committed by GitHub
parent 6caec3d429
commit 329678b636
29 changed files with 1635 additions and 36 deletions

View File

@@ -4,7 +4,6 @@
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
@@ -116,7 +115,7 @@ auto f_host_tensor_descriptor2d =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
using namespace ck::literals;
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
if constexpr(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
{
return HostTensorDescriptor({row, col}, {stride, 1_uz});
}

View File

@@ -4,7 +4,6 @@
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
@@ -15,6 +14,7 @@
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
#include "ck/library/utility/check_err.hpp"
@@ -69,21 +69,20 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleDLayern
// clang-format on
auto f_host_tensor_descriptor1d = [](std::size_t len, std::size_t stride) {
return HostTensorDescriptor(std::vector<std::size_t>({len}),
std::vector<std::size_t>({stride}));
return HostTensorDescriptor({len}, {stride});
};
auto f_host_tensor_descriptor2d =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
using namespace ck::literals;
if constexpr(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
{
return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
std::vector<std::size_t>({stride, 1}));
return HostTensorDescriptor({row, col}, {stride, 1_uz});
}
else
{
return HostTensorDescriptor(std::vector<std::size_t>({row, col}),
std::vector<std::size_t>({1, stride}));
return HostTensorDescriptor({row, col}, {1_uz, stride});
}
};
@@ -97,6 +96,7 @@ void host_gemm_layernorm(Tensor<HDataType>& h_m_n,
AElementOp a_element_op,
BElementOp b_element_op,
CDEElementOp cde_element_op,
HElementOp h_element_op,
int M,
int N,
AccDataType epsilon = 1e-5)
@@ -145,7 +145,7 @@ void host_gemm_layernorm(Tensor<HDataType>& h_m_n,
auto ref_layernorm_invoker = ref_layernorm.MakeInvoker();
auto ref_layernorm_argument = ref_layernorm.MakeArgument(
e_m_n, gamma_n, beta_n, h_m_n, HElementOp{}, {M, N}, {1}, epsilon);
e_m_n, gamma_n, beta_n, h_m_n, h_element_op, {M, N}, {1}, epsilon);
ref_layernorm_invoker.Run(ref_layernorm_argument);
}
@@ -249,6 +249,7 @@ int main()
a_element_op,
b_element_op,
cde_element_op,
h_element_op,
M,
N,
epsilon);

View File

@@ -4,7 +4,6 @@
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
@@ -115,7 +114,7 @@ auto f_host_tensor_descriptor2d =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
using namespace ck::literals;
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
if constexpr(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
{
return HostTensorDescriptor({row, col}, {stride, 1_uz});
}

View File

@@ -135,7 +135,7 @@ int main(int argc, char* argv[])
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
using namespace ck::literals;
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
if constexpr(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
{
return HostTensorDescriptor({row, col}, {stride, 1_uz});
}