mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 10:59:55 +00:00
Fused elementwise layernorm (#468)
* add fused addition lyernorm
* add fused addition lyernorm
* changed CMakelist
* removed annotates
* modified descriptor of C
* fixed bug in gridwise add layernorm
* format the files
* modified name from add&layernorm into elementwise&layernorm
* created fused elementwise layernorm branch
* change input into tuple type
* add sweep once to reduce load & read of C from global memory
* modified Argument api
* modified way to malloc c in global memory
* changed gamma and beta to m_k_desc
* fixed bug when sweep once and move CDataType when define device level struct
* add src dim for gamma and beta
* implement optimization for coalesced
* delete a annotation line
* fixed some bug to meet the requirements of ck
* add bandwidth computing in example, and fixed the time unit
* move device_elementwise_layernorm_impl.hpp into device/impl
* fixed bug in device_elementwise_layernorm_impl.hpp
* changed name from layernorm into normalization
* clang-format the changed files
* changed the names
* moved immidiate results into lds, it become faster in non-sweeponce cases
* changed naming of C into X to make the defination more clear
* changed naming in example
* add tests for elementwise normalization
* move example_elementwise_layernorm_blockwise into folder 44_elementwise_normalization
* move test_elementwise_layernorm_fp16 into new folder
* move elementwise_normalization_instances into a new folder
* add more tests in test_elementwise_layernorm_fp16.cpp
* added some corner cases in test
* fixed method to compute lds size for matrix X
* changed name of 44_elementwise_normalization into 45_elementwise_normalization
* modified some comments
* modified some other confused comments
* reduce redundant tests in test_elementwise_layernorm_fp16.cpp
[ROCm/composable_kernel commit: efbcc6eddc]
This commit is contained in:
@@ -3,9 +3,9 @@ add_custom_target(test_layernorm)
|
||||
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
|
||||
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
|
||||
@@ -14,4 +14,3 @@ add_dependencies(test_layernorm test_layernorm2d_fp32)
|
||||
add_dependencies(test_layernorm test_layernorm2d_fp16)
|
||||
add_dependencies(test_layernorm test_groupnorm_fp16)
|
||||
add_dependencies(test_layernorm test_groupnorm_fp32)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user