mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
WMMA gemm_add_relu_add_layernorm (#2989)
* Summary:
- Refactor epilogue (with CShuffle) to support fused operations:
- EpilogueCShuffleBase holds common parts
- EpilogueCShuffle: runs CShuffle and write out
- EpilogueWelfordCShuffle: holds Welford specific arguments, runs CShuffle, write out, Welford first part and Welford write out
- Extend thread transfer v7r3:
- Support for intermediate data type different from src and dst type
- New functionality to write to dst buffer and keep data (to be able to use them for additional operations)
* Adress review comments
[ROCm/composable_kernel commit: 4ebc48a3cd]
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16_xdl.cpp)
|
||||
if(result EQUAL 0)
|
||||
add_custom_target(test_gemm_layernorm)
|
||||
target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
|
||||
add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
|
||||
if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
|
||||
add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
|
||||
if(result EQUAL 0)
|
||||
add_custom_target(test_gemm_layernorm)
|
||||
target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
|
||||
add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -79,11 +79,6 @@ TYPED_TEST_SUITE(TestGemmAddReluAddLayernorm, KernelTypes);
|
||||
TYPED_TEST(TestGemmAddReluAddLayernorm, Test_FP16) { this->Run(); }
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if(ck::is_gfx11_supported() || ck::is_gfx12_supported())
|
||||
{
|
||||
std::cout << "No available instance for gfx11 & gfx12." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
Reference in New Issue
Block a user