mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
adding implicit GEMM v4r2
This commit is contained in:
@@ -55,13 +55,13 @@ void device_convolution_implicit_gemm_v4r2_nchw_kcyx_nkhw(InDesc,
|
||||
|
||||
#if 1
|
||||
// 1x1 filter, 8x8 image
|
||||
constexpr index_t N1 = 2;
|
||||
constexpr index_t N0 = 1;
|
||||
constexpr index_t N2 = 1;
|
||||
|
||||
constexpr index_t Ho1 = 8;
|
||||
constexpr index_t Ho0 = 1;
|
||||
constexpr index_t Ho2 = 1;
|
||||
|
||||
constexpr index_t Wo1 = 1;
|
||||
constexpr index_t Wo0 = 2;
|
||||
constexpr index_t Wo2 = 4;
|
||||
|
||||
constexpr index_t BlockSize = 256;
|
||||
@@ -105,6 +105,10 @@ void device_convolution_implicit_gemm_v4r2_nchw_kcyx_nkhw(InDesc,
|
||||
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
|
||||
#endif
|
||||
|
||||
constexpr index_t N1 = N / (N0 * N2);
|
||||
constexpr index_t Ho1 = Ho / (Ho0 * Ho2);
|
||||
constexpr index_t Wo1 = Wo / (Wo0 * Wo2);
|
||||
|
||||
constexpr index_t B = N1 * Ho1 * Wo1;
|
||||
|
||||
constexpr index_t GridSize =
|
||||
|
||||
Reference in New Issue
Block a user