mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
gemm/Conv xdlops + dlops quantization (#625)
* Add conv perlayer quantization
* Add gemm_dlops quantization
* Support int8 for innerproduct
* Refine gemm dlops int8 kernel parameter
* Support gfx908(MI100) and gfx90a(MI200)
* clang-format
* Rename example number
* Support different layout for d tensor
* Add conv dlops perchannel quantization example
* Move to example 40
* Extract the common code for different platform (dlops and xdlops)
* Move ot subfolder. Prepare to add other op of quantization
* Refine the quantization instance library
* Add conv dl instances and client example
* Remove unnecessary type
* Add gemm quantization instance
* Add external api and client example
* Refine num_bytes
* Separete different layout to different cpp
* Add more xdl instances
* Revert "Remove unnecessary type"
This reverts commit 820869182f.
* Remove CShuffleDataType in dlops
Let acc and CShuffleDataType be the same in xdlops
---------
Co-authored-by: zjing14 <zhangjing14@gmail.com>
This commit is contained in:
@@ -185,8 +185,10 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
|
||||
return b_grid_desc_k0_n0_n1_k1;
|
||||
}
|
||||
|
||||
// E desc for destination in blockwise copy
|
||||
template <typename CGridDesc_M_N_>
|
||||
__host__ __device__ static constexpr auto
|
||||
MakeCGridDescriptor_M0_M10_M11_N0_N10_N11(const CGridDesc_M_N& c_grid_desc_m_n)
|
||||
MakeCGridDescriptor_M0_M10_M11_N0_N10_N11(const CGridDesc_M_N_& c_grid_desc_m_n)
|
||||
{
|
||||
const auto M = c_grid_desc_m_n.GetLength(I0);
|
||||
const auto N = c_grid_desc_m_n.GetLength(I1);
|
||||
|
||||
Reference in New Issue
Block a user