mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Wmma support for gemm_reduce (#3145)
* Initial implementation GEMM+Reduce:
- device struct
- epilogue struct
* Fix tests, improve profiler and add initial instances
* Add instances
* Fix compilation error
* Address review comments
* Fix logging
---------
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
[ROCm/composable_kernel commit: 7414a0f4d4]
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16_xdl.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility device_gemm_reduce_instance)
|
||||
endif()
|
||||
if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
|
||||
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility device_gemm_reduce_instance)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
Reference in New Issue
Block a user