mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Batched gemm and reduction (#156)
* adding batched_gemm_and_reduction
* batched_gemm_reduce works with bactch_count=1
* fix a bug in grid_size; batched_gemm_reduce works for batch_count > 1
* adding profiler for batched_gemm_fp16
* fixed a bug in declaration of d1 and d0; both example and profiler work
* clang-format
* cleanup
* batched_gemm_reduce: add test
* minor change
* fixed some typo in function names
[ROCm/composable_kernel commit: 34c661e71c]
This commit is contained in:
@@ -39,6 +39,7 @@ add_subdirectory(gemm)
|
||||
add_subdirectory(gemm_split_k)
|
||||
add_subdirectory(gemm_reduce)
|
||||
add_subdirectory(batched_gemm)
|
||||
add_subdirectory(batched_gemm_reduce)
|
||||
add_subdirectory(grouped_gemm)
|
||||
add_subdirectory(convnd_fwd)
|
||||
add_subdirectory(reduce)
|
||||
|
||||
9
test/batched_gemm_reduce/CMakeLists.txt
Normal file
9
test/batched_gemm_reduce/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
include_directories(BEFORE
|
||||
${PROJECT_SOURCE_DIR}/profiler/include
|
||||
${PROJECT_SOURCE_DIR}/test/include
|
||||
${PROJECT_SOURCE_DIR}/external/include/half
|
||||
)
|
||||
|
||||
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)
|
||||
64
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
Normal file
64
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "profile_batched_gemm_reduce_impl.hpp"
|
||||
|
||||
int main()
|
||||
{
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
||||
|
||||
int M = 512;
|
||||
int N = 256;
|
||||
int K = 128;
|
||||
|
||||
int BatchCount = 3;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
Row,
|
||||
Row,
|
||||
Row>(
|
||||
true, 1, false, 1, M, N, K, K, N, N, BatchCount);
|
||||
|
||||
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
Row,
|
||||
Col,
|
||||
Row>(
|
||||
true, 1, false, 1, M, N, K, K, K, N, BatchCount);
|
||||
|
||||
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
Col,
|
||||
Row,
|
||||
Row>(
|
||||
true, 1, false, 1, M, N, K, M, N, N, BatchCount);
|
||||
|
||||
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
Col,
|
||||
Col,
|
||||
Row>(
|
||||
true, 1, false, 1, M, N, K, M, K, N, BatchCount);
|
||||
|
||||
if(pass)
|
||||
{
|
||||
std::cout << "test BatchedGEMM+Reduce fp16: Pass" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "test BatchedGEMM+Reduce fp16: Fail" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,4 @@
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <half.hpp>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "profile_gemm_reduce_impl.hpp"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user