Modularize ckProfiler operations (#514)

* Re-structure ckProfiler source files

* Rename profiler.cpp to main.cpp

* Modularize ckProfiler operations

* Add description for profiler operations

* Use longer name to avoid name collision

* Use macro to delay expansion

* Use std::move() to avoid object copying

* Prohibit users from calling dtor

* Use macro to eliminate redundant code

* Make friend function hidden

* Add missing include directive <iostream>

* Fix wrong include directives

* Remove int8 from batchnorm-forward instances since it is not needed for forward training and could fail test

Co-authored-by: Qianfeng Zhang <Qianfeng.Zhang@amd.com>

[ROCm/composable_kernel commit: 8784a72e23]
This commit is contained in:
Po Yen Chen
2022-12-02 05:15:02 +08:00
committed by GitHub
parent 8e868bf880
commit 02db748e74
82 changed files with 346 additions and 273 deletions

View File

@@ -1,5 +1,6 @@
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/profiler/include
)
include(googletest)

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_impl.hpp"
namespace {
using ADataType = ck::bhalf_t;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_impl.hpp"
namespace {
using ADataType = ck::half_t;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_impl.hpp"
namespace {
using ADataType = float;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_impl.hpp"
namespace {
using ADataType = int8_t;

View File

@@ -6,7 +6,7 @@
#include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_gemm_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_batched_gemm_reduce_impl.hpp"
#include "profiler/profile_batched_gemm_reduce_impl.hpp"
int main()
{

View File

@@ -6,7 +6,7 @@
#include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_softmax_gemm_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization;
template <ck::index_t N>

View File

@@ -7,7 +7,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
#include "profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization;
using ck::tensor_operation::device::MaskingSpecialization;

View File

@@ -8,7 +8,7 @@
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_batchnorm_backward_impl.hpp"
#include "profiler/profile_batchnorm_backward_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -8,7 +8,7 @@
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_batchnorm_forward_impl.hpp"
#include "profiler/profile_batchnorm_forward_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -8,7 +8,7 @@
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_bwd_data_impl.hpp"
#include "profiler/profile_conv_bwd_data_impl.hpp"
template <typename Tuple>
class TestConvndBwdData : public ::testing::Test

View File

@@ -8,7 +8,7 @@
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_fwd_impl.hpp"
#include "profiler/profile_conv_fwd_impl.hpp"
template <typename Tuple>
class TestConvndFwd : public ::testing::Test

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_elementwise_layernorm_impl.hpp"
#include "profiler/profile_elementwise_layernorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_gemm_reduce_impl.hpp"
#include "profiler/profile_gemm_reduce_impl.hpp"
int main()
{

View File

@@ -9,7 +9,7 @@
#include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp"
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template <typename Tuple>
class TestGroupedConvndBwdWeight : public ::testing::Test

View File

@@ -7,7 +7,7 @@
#include <vector>
#include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp"
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
class TestGroupedConvNdFwd : public ::testing::Test
{

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "profiler/include/profile_grouped_gemm_impl.hpp"
#include "profiler/profile_grouped_gemm_impl.hpp"
namespace {

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_groupnorm_impl.hpp"
#include "profiler/profile_groupnorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_groupnorm_impl.hpp"
#include "profiler/profile_groupnorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_layernorm_impl.hpp"
#include "profiler/profile_layernorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_layernorm_impl.hpp"
#include "profiler/profile_layernorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -4,7 +4,7 @@
#include <getopt.h>
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
#include "profiler/profile_reduce_impl.hpp"
using namespace ck;

View File

@@ -4,7 +4,7 @@
#include <getopt.h>
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
#include "profiler/profile_reduce_impl.hpp"
using namespace ck;

View File

@@ -13,7 +13,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "include/ck/utility/data_type.hpp"
#include "profiler/include/profile_softmax_impl.hpp"
#include "profiler/profile_softmax_impl.hpp"
namespace ck {