Conv3d new (#94)

* conv3d compiles but has memory error

* conv3d works

* fix performance issue by using __builtin_amdgc_readfirstlane

* change MakeBlock2CTileMap to MakeDefaultBlock2CTileMap; change c_blockid_to* to cblockid_to*

* clang-format

* remove CK_EXPERIMENTAL_PASS_TENSOR_DECRIPTOR_BY_*; moved wrapper into DeviceConv3d

* format

* remove useless marc

* add comment

Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
Jianfeng Yan
2022-02-22 22:45:28 -06:00
committed by GitHub
parent 19c5d6e651
commit 6dfb92bbef
61 changed files with 2255 additions and 1725 deletions

View File

@@ -248,7 +248,7 @@ struct DeviceBatchedGemmXdl
c_grid_desc_g_m_n_);
block_2_ctile_map_ =
GridwiseBatchedGemm::MakeBlock2CTileMap(c_grid_desc_g_m_n_, M01, N01);
GridwiseBatchedGemm::MakeDefaultBlock2CTileMap(c_grid_desc_g_m_n_, M01, N01);
}
}
@@ -261,7 +261,7 @@ struct DeviceBatchedGemmXdl
CGridDesc_G_M_N c_grid_desc_g_m_n_;
typename GridwiseBatchedGemm::CGridDesc_G_M0_N0_M1_N1_M2_M3_M4_N2
c_grid_desc_g_m0_n0_m1_n1_m2_m3_m4_n2_;
typename GridwiseBatchedGemm::Block2CTileMap block_2_ctile_map_;
typename GridwiseBatchedGemm::DefaultBlock2CTileMap block_2_ctile_map_;
index_t M01_;
index_t N01_;
AElementwiseOperation a_element_op_;
@@ -327,7 +327,7 @@ struct DeviceBatchedGemmXdl
AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
remove_reference_t<typename GridwiseBatchedGemm::Block2CTileMap>,
remove_reference_t<typename GridwiseBatchedGemm::DefaultBlock2CTileMap>,
true>;
ave_time = launch_and_time_kernel(kernel,
@@ -359,7 +359,7 @@ struct DeviceBatchedGemmXdl
AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation,
remove_reference_t<typename GridwiseBatchedGemm::Block2CTileMap>,
remove_reference_t<typename GridwiseBatchedGemm::DefaultBlock2CTileMap>,
false>;
ave_time = launch_and_time_kernel(kernel,