Batched gemm and reduction (#156)

* adding batched_gemm_and_reduction

* batched_gemm_reduce works with bactch_count=1

* fix a bug in grid_size; batched_gemm_reduce works for batch_count > 1

* adding profiler for batched_gemm_fp16

* fixed a bug in declaration of d1 and d0; both example and profiler work

* clang-format

* cleanup

* batched_gemm_reduce: add test

* minor change

* fixed some typo in function names

[ROCm/composable_kernel commit: 34c661e71c]
This commit is contained in:
Jianfeng Yan
2022-03-30 11:21:18 -05:00
committed by GitHub
parent 6d537a8c3e
commit cb97ce68d8
27 changed files with 2145 additions and 62 deletions

View File

@@ -39,6 +39,7 @@ add_subdirectory(gemm)
add_subdirectory(gemm_split_k)
add_subdirectory(gemm_reduce)
add_subdirectory(batched_gemm)
add_subdirectory(batched_gemm_reduce)
add_subdirectory(grouped_gemm)
add_subdirectory(convnd_fwd)
add_subdirectory(reduce)

View File

@@ -0,0 +1,9 @@
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)

View File

@@ -0,0 +1,64 @@
#include <iostream>
#include "profile_batched_gemm_reduce_impl.hpp"
int main()
{
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
int M = 512;
int N = 256;
int K = 128;
int BatchCount = 3;
bool pass = true;
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
ck::half_t,
float,
Row,
Row,
Row>(
true, 1, false, 1, M, N, K, K, N, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
ck::half_t,
float,
Row,
Col,
Row>(
true, 1, false, 1, M, N, K, K, K, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
ck::half_t,
float,
Col,
Row,
Row>(
true, 1, false, 1, M, N, K, M, N, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
ck::half_t,
float,
Col,
Col,
Row>(
true, 1, false, 1, M, N, K, M, K, N, BatchCount);
if(pass)
{
std::cout << "test BatchedGEMM+Reduce fp16: Pass" << std::endl;
return 0;
}
else
{
std::cout << "test BatchedGEMM+Reduce fp16: Fail" << std::endl;
return -1;
}
}

View File

@@ -1,10 +1,4 @@
#include <algorithm>
#include <cstdlib>
#include <half.hpp>
#include <iostream>
#include <numeric>
#include <tuple>
#include <vector>
#include "profile_gemm_reduce_impl.hpp"