mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
Fix bug of layernorm ckProfiler and refine code (#448)
* Fix bug of profiler for layernorm
* 1. Rename layernorm into normalization
2. Decouple softmax from normalization
* clang-format
[ROCm/composable_kernel commit: 1b62bfaa2a]
This commit is contained in:
@@ -6,11 +6,10 @@ include(googletest)
|
||||
|
||||
add_custom_target(tests)
|
||||
|
||||
|
||||
function(add_test_executable TEST_NAME)
|
||||
message("adding test ${TEST_NAME}")
|
||||
add_executable(${TEST_NAME} ${ARGN})
|
||||
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> )
|
||||
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>)
|
||||
add_dependencies(tests ${TEST_NAME})
|
||||
add_dependencies(check ${TEST_NAME})
|
||||
rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
|
||||
@@ -23,6 +22,7 @@ function(add_gtest_executable TEST_NAME)
|
||||
add_executable(${TEST_NAME} ${ARGN})
|
||||
add_dependencies(tests ${TEST_NAME})
|
||||
add_dependencies(check ${TEST_NAME})
|
||||
|
||||
# suppress gtest warnings
|
||||
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
|
||||
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
|
||||
@@ -30,7 +30,6 @@ function(add_gtest_executable TEST_NAME)
|
||||
rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
|
||||
endfunction(add_gtest_executable TEST_NAME)
|
||||
|
||||
|
||||
add_subdirectory(magic_number_division)
|
||||
add_subdirectory(space_filling_curve)
|
||||
add_subdirectory(conv_util)
|
||||
@@ -51,5 +50,5 @@ add_subdirectory(convnd_bwd_data)
|
||||
add_subdirectory(grouped_convnd_fwd)
|
||||
add_subdirectory(block_to_ctile_map)
|
||||
add_subdirectory(softmax)
|
||||
add_subdirectory(layernorm)
|
||||
add_subdirectory(normalization)
|
||||
add_subdirectory(data_type)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/utility/number.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_layernorm_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_normalization_impl.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -65,26 +65,26 @@ class TestLayernorm2d : public ::testing::Test
|
||||
Rank,
|
||||
NumReduceDim>;
|
||||
|
||||
using DeviceInstance = tensor_operation::device::DeviceLayernormImpl<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
AccDataType,
|
||||
YDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
BlockSize,
|
||||
MThreadClusterSize,
|
||||
KThreadClusterSize,
|
||||
MThreadSliceSize,
|
||||
KThreadSliceSize,
|
||||
XYSrcVectorDim,
|
||||
XSrcVectorSize,
|
||||
GammaSrcVectorDim,
|
||||
GammaSrcVectorSize,
|
||||
BetaSrcVectorDim,
|
||||
BetaSrcVectorSize,
|
||||
YDstVectorSize>;
|
||||
using DeviceInstance = tensor_operation::device::DeviceNormalizationImpl<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
AccDataType,
|
||||
YDataType,
|
||||
PassThrough,
|
||||
Rank,
|
||||
NumReduceDim,
|
||||
BlockSize,
|
||||
MThreadClusterSize,
|
||||
KThreadClusterSize,
|
||||
MThreadSliceSize,
|
||||
KThreadSliceSize,
|
||||
XYSrcVectorDim,
|
||||
XSrcVectorSize,
|
||||
GammaSrcVectorDim,
|
||||
GammaSrcVectorSize,
|
||||
BetaSrcVectorDim,
|
||||
BetaSrcVectorSize,
|
||||
YDstVectorSize>;
|
||||
|
||||
TestLayernorm2d() : ref_instance_invoker_(ReferenceInstance{}.MakeInvoker()) {}
|
||||
|
||||
Reference in New Issue
Block a user