mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
WMMA support for GEMM reduce (#2823)
Added gemm + reduce instance library for RDNA4. This includes:
- New device implementation running GEMM and reduction kernel
- instances for wmma (xdl parity)
- examples for wmma (xdl parity)
- tests for existing xdl and wmma
[ROCm/composable_kernel commit: b25d4d684a]
This commit is contained in:
committed by
GitHub
parent
8c0cdebe63
commit
f2edb06bb0
@@ -248,6 +248,7 @@ add_subdirectory(gemm_universal)
|
||||
add_subdirectory(gemm_b_scale)
|
||||
add_subdirectory(gemm_universal_streamk)
|
||||
add_subdirectory(gemm_reduce)
|
||||
add_subdirectory(gemm_universal_reduce)
|
||||
add_subdirectory(batched_gemm)
|
||||
add_subdirectory(batched_gemm_reduce)
|
||||
add_subdirectory(batched_gemm_gemm)
|
||||
|
||||
14
test/gemm_universal_reduce/CMakeLists.txt
Normal file
14
test/gemm_universal_reduce/CMakeLists.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
add_gtest_executable(test_gemm_universal_reduce_bf16_wmma test_gemm_universal_reduce_bf16_wmma.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_gemm_universal_reduce_bf16_wmma PRIVATE utility device_gemm_universal_reduce_instance)
|
||||
endif()
|
||||
|
||||
add_gtest_executable(test_gemm_universal_reduce_fp16_wmma test_gemm_universal_reduce_fp16_wmma.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_gemm_universal_reduce_fp16_wmma PRIVATE utility device_gemm_universal_reduce_instance)
|
||||
endif()
|
||||
|
||||
add_gtest_executable(test_gemm_universal_reduce_bf16A_i8_wmma test_gemm_universal_reduce_bf16A_i8_wmma.cpp)
|
||||
if(result EQUAL 0)
|
||||
target_link_libraries(test_gemm_universal_reduce_bf16A_i8_wmma PRIVATE utility device_gemm_universal_reduce_instance)
|
||||
endif()
|
||||
@@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/profile_gemm_universal_reduce_impl.hpp"
|
||||
|
||||
TEST(GemmUniversalReduce, BF16A_I8)
|
||||
{
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
|
||||
int M = 512;
|
||||
int N = 256;
|
||||
int K = 128;
|
||||
int KBatch = 1;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
pass = pass && ck::profiler::profile_gemm_universal_reduce_impl<ck::bhalf_t,
|
||||
int8_t,
|
||||
ck::Tuple<>,
|
||||
float,
|
||||
ck::bhalf_t,
|
||||
Row,
|
||||
Row,
|
||||
ck::Tuple<>,
|
||||
Row>(
|
||||
true, 3, false, true, M, N, K, K, N, N, KBatch, 1, 10);
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/profile_gemm_universal_reduce_impl.hpp"
|
||||
|
||||
TEST(GemmUniversalReduce, BF16)
|
||||
{
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
|
||||
int M = 512;
|
||||
int N = 256;
|
||||
int K = 128;
|
||||
int KBatch = 1;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
pass = pass && ck::profiler::profile_gemm_universal_reduce_impl<ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::Tuple<>,
|
||||
float,
|
||||
ck::bhalf_t,
|
||||
Row,
|
||||
Row,
|
||||
ck::Tuple<>,
|
||||
Row>(
|
||||
true, 1, false, true, M, N, K, K, N, N, KBatch, 1, 10);
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/profile_gemm_universal_reduce_impl.hpp"
|
||||
|
||||
TEST(GemmUniversalReduce, FP16)
|
||||
{
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
|
||||
int M = 512;
|
||||
int N = 256;
|
||||
int K = 128;
|
||||
int KBatch = 1;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
pass = pass && ck::profiler::profile_gemm_universal_reduce_impl<ck::half_t,
|
||||
ck::half_t,
|
||||
ck::Tuple<>,
|
||||
float,
|
||||
ck::half_t,
|
||||
Row,
|
||||
Row,
|
||||
ck::Tuple<>,
|
||||
Row>(
|
||||
true, 1, false, true, M, N, K, K, N, N, KBatch, 1, 10);
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
Reference in New Issue
Block a user