mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
Modularize ckProfiler operations (#514)
* Re-structure ckProfiler source files
* Rename profiler.cpp to main.cpp
* Modularize ckProfiler operations
* Add description for profiler operations
* Use longer name to avoid name collision
* Use macro to delay expansion
* Use std::move() to avoid object copying
* Prohibit users from calling dtor
* Use macro to eliminate redundant code
* Make friend function hidden
* Add missing include directive <iostream>
* Fix wrong include directives
* Remove int8 from batchnorm-forward instances since it is not needed for forward training and could fail test
Co-authored-by: Qianfeng Zhang <Qianfeng.Zhang@amd.com>
[ROCm/composable_kernel commit: 8784a72e23]
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
include_directories(BEFORE
|
||||
${PROJECT_SOURCE_DIR}/
|
||||
${PROJECT_SOURCE_DIR}/profiler/include
|
||||
)
|
||||
|
||||
include(googletest)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_batched_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_impl.hpp"
|
||||
|
||||
namespace {
|
||||
using ADataType = ck::bhalf_t;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_batched_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_impl.hpp"
|
||||
|
||||
namespace {
|
||||
using ADataType = ck::half_t;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_batched_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_impl.hpp"
|
||||
|
||||
namespace {
|
||||
using ADataType = float;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_batched_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_impl.hpp"
|
||||
|
||||
namespace {
|
||||
using ADataType = int8_t;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <vector>
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
|
||||
#include "profiler/include/profile_batched_gemm_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_gemm_impl.hpp"
|
||||
|
||||
using ck::tensor_operation::device::GemmSpecialization;
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_batched_gemm_reduce_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_reduce_impl.hpp"
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <vector>
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
|
||||
#include "profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_softmax_gemm_impl.hpp"
|
||||
using ck::tensor_operation::device::GemmSpecialization;
|
||||
|
||||
template <ck::index_t N>
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
|
||||
#include "profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
|
||||
#include "profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
|
||||
|
||||
using ck::tensor_operation::device::GemmSpecialization;
|
||||
using ck::tensor_operation::device::MaskingSpecialization;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_batchnorm_backward_impl.hpp"
|
||||
#include "profiler/profile_batchnorm_backward_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_batchnorm_forward_impl.hpp"
|
||||
#include "profiler/profile_batchnorm_forward_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_conv_bwd_data_impl.hpp"
|
||||
#include "profiler/profile_conv_bwd_data_impl.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestConvndBwdData : public ::testing::Test
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_conv_fwd_impl.hpp"
|
||||
#include "profiler/profile_conv_fwd_impl.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestConvndFwd : public ::testing::Test
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_elementwise_layernorm_impl.hpp"
|
||||
#include "profiler/profile_elementwise_layernorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_gemm_reduce_impl.hpp"
|
||||
#include "profiler/profile_gemm_reduce_impl.hpp"
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp"
|
||||
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGroupedConvndBwdWeight : public ::testing::Test
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp"
|
||||
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
|
||||
|
||||
class TestGroupedConvNdFwd : public ::testing::Test
|
||||
{
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "profiler/include/profile_grouped_gemm_impl.hpp"
|
||||
#include "profiler/profile_grouped_gemm_impl.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_groupnorm_impl.hpp"
|
||||
#include "profiler/profile_groupnorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_groupnorm_impl.hpp"
|
||||
#include "profiler/profile_groupnorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_layernorm_impl.hpp"
|
||||
#include "profiler/profile_layernorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_layernorm_impl.hpp"
|
||||
#include "profiler/profile_layernorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#include <getopt.h>
|
||||
|
||||
#include "ck/library/utility/host_common_util.hpp"
|
||||
#include "profiler/include/profile_reduce_impl.hpp"
|
||||
#include "profiler/profile_reduce_impl.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#include <getopt.h>
|
||||
|
||||
#include "ck/library/utility/host_common_util.hpp"
|
||||
#include "profiler/include/profile_reduce_impl.hpp"
|
||||
#include "profiler/profile_reduce_impl.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "include/ck/utility/data_type.hpp"
|
||||
#include "profiler/include/profile_softmax_impl.hpp"
|
||||
#include "profiler/profile_softmax_impl.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user