mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Gemm+layernorm instance, ckProfiler, client example (#568)
* Add gemm + layernorm instance * Add ckProfiler * Add test * Add client example * Detect if user forger to set the workrspace * Use literal in the example * [What] use builtin function for sqrt [Why] compiler will not use v_sqrt_f64_e64 if we use ::sqrt() * check gemm vaildity in IsSupportedArgument * Add more testcases * Merge duplicated folder in client example * Print more infomation * Use better kernel parameter for MS problem size * clang format * Add constexpr for if condition and remove redundant include * Remove cstdlib and add constexpr
This commit is contained in:
7
test/gemm_layernorm/CMakeLists.txt
Normal file
7
test/gemm_layernorm/CMakeLists.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
add_custom_target(test_gemm_layernorm)
|
||||
|
||||
add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
|
||||
|
||||
target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
|
||||
|
||||
add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
|
||||
@@ -0,0 +1,77 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/profile_gemm_add_relu_add_layernorm_impl.hpp"
|
||||
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
using ck::index_t;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGemmAddReluAddLayernorm : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<2, Tuple>;
|
||||
using D0DataType = std::tuple_element_t<3, Tuple>;
|
||||
using D1DataType = std::tuple_element_t<4, Tuple>;
|
||||
using EMeanVarDataType = std::tuple_element_t<5, Tuple>;
|
||||
using GammaDataType = std::tuple_element_t<6, Tuple>;
|
||||
using BetaDataType = std::tuple_element_t<7, Tuple>;
|
||||
using HDataType = std::tuple_element_t<8, Tuple>;
|
||||
using ALayout = std::tuple_element_t<9, Tuple>;
|
||||
using BLayout = std::tuple_element_t<10, Tuple>;
|
||||
using D0Layout = std::tuple_element_t<11, Tuple>;
|
||||
using D1Layout = std::tuple_element_t<12, Tuple>;
|
||||
using HLayout = std::tuple_element_t<13, Tuple>;
|
||||
|
||||
void Run()
|
||||
{
|
||||
std::vector<std::vector<ck::index_t>> lengths = {
|
||||
{1024, 1024, 1024}, {2048, 640, 640}, {1, 1, 1}};
|
||||
|
||||
for(auto length : lengths)
|
||||
{
|
||||
int M = length[0];
|
||||
int N = length[1];
|
||||
int K = length[2];
|
||||
int StrideA = ck::is_same_v<ALayout, Row> ? K : M;
|
||||
int StrideB = ck::is_same_v<BLayout, Row> ? N : K;
|
||||
int StrideD0 = 0;
|
||||
int StrideD1 = ck::is_same_v<D1Layout, Row> ? N : M;
|
||||
int StrideH = ck::is_same_v<HLayout, Row> ? N : M;
|
||||
|
||||
bool success = ck::profiler::profile_gemm_add_relu_add_layernorm_impl<ADataType,
|
||||
BDataType,
|
||||
AccDataType,
|
||||
D0DataType,
|
||||
D1DataType,
|
||||
EMeanVarDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
HDataType,
|
||||
ALayout,
|
||||
BLayout,
|
||||
D0Layout,
|
||||
D1Layout,
|
||||
HLayout>(
|
||||
true, 1, false, false, M, N, K, StrideA, StrideB, StrideD0, StrideD1, StrideH);
|
||||
|
||||
EXPECT_TRUE(success);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<
|
||||
std::tuple<F16, F16, F32, F16, F16, F16, F16, F16, F16, Row, Row, Row, Row, Row>,
|
||||
std::tuple<F16, F16, F32, F16, F16, F16, F16, F16, F16, Row, Col, Row, Row, Row>,
|
||||
std::tuple<F16, F16, F32, F16, F16, F16, F16, F16, F16, Col, Row, Row, Row, Row>,
|
||||
std::tuple<F16, F16, F32, F16, F16, F16, F16, F16, F16, Col, Col, Row, Row, Row>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestGemmAddReluAddLayernorm, KernelTypes);
|
||||
TYPED_TEST(TestGemmAddReluAddLayernorm, Test_FP16) { this->Run(); }
|
||||
Reference in New Issue
Block a user