mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Clean up conv example, Instances, profiler and test (#324)
* convnd_fwd fp16 example
* update example
* update example
* update instance
* updating refernce conv
* update reference conv
* update conv fwd profiler
* update conv 1d and 3d instance
* update include path
* clean
* update profiler for conv bwd data and weight
* update conv bwd weight
* clean
* update conv example
* update profiler for conv bwd weight
* update ckprofiler for conv bwd data
* fix reference conv bwd data bug; update conv bwd data test
* update examples
* fix initialization issue
* update test for conv fwd
* clean
* clean
* remove test case too sensitive to error threshhold
* fix test
* clean
* fix build
* adding conv multiple d
* adding conv multiple D
* add matrix padder
* add gemm padding to convnd
* adding group conv
* update gemm multi-d
* refactor
* refactor
* refactor
* clean
* clean
* refactor
* refactor
* reorg
* add ds
* add bias
* clean
* add G
* adding group
* adding group
* adding group
* update Tensor
* clean
* update example
* update DeviceGemmMultipleD_Xdl_CShuffle
* update conv bwd-data and bwd-weight
* upate contraction example
* update gemm and batch gemm with e permute
* fix example build
* instance for grouped conv1d
* update example
* adding group conv instance
* update gemm bilinear instance
* update gemm+add+add+fastgelu instance
* update profiler
* update profiler
* update test
* update test and client example
* clean
* add grouped conv into profiler
* update profiler
* clean
* add test grouped conv, update all conv test to gtest
* update test
[ROCm/composable_kernel commit: 500fa99512]
This commit is contained in:
@@ -31,11 +31,11 @@ using D0DataType = F16;
|
||||
using D1DataType = F16;
|
||||
using EDataType = F16;
|
||||
|
||||
using ALayout = Row;
|
||||
using BLayout = Col;
|
||||
using DDELayout = Row;
|
||||
using DDELayout = Row;
|
||||
using DELayout = Row;
|
||||
using ALayout = Row;
|
||||
using BLayout = Col;
|
||||
using D0Layout = Row;
|
||||
using D1Layout = Row;
|
||||
using ELayout = Row;
|
||||
|
||||
struct SimpleDeviceMem
|
||||
{
|
||||
@@ -105,16 +105,16 @@ int main(int argc, char* argv[])
|
||||
SimpleDeviceMem a_device_buf(sizeof(ADataType) * f_matrix_space_size(M, K, StrideA, ALayout{}));
|
||||
SimpleDeviceMem b_device_buf(sizeof(BDataType) * f_matrix_space_size(K, N, StrideB, BLayout{}));
|
||||
SimpleDeviceMem d0_m_n_device_buf(sizeof(D0DataType) *
|
||||
f_matrix_space_size(M, N, StrideD0, DDELayout{}));
|
||||
f_matrix_space_size(M, N, StrideD0, D0Layout{}));
|
||||
SimpleDeviceMem d1_m_n_device_buf(sizeof(D1DataType) *
|
||||
f_matrix_space_size(M, N, StrideD1, DDELayout{}));
|
||||
SimpleDeviceMem e_device_buf(sizeof(EDataType) *
|
||||
f_matrix_space_size(M, N, StrideE, DELayout{}));
|
||||
f_matrix_space_size(M, N, StrideD1, D1Layout{}));
|
||||
SimpleDeviceMem e_device_buf(sizeof(EDataType) * f_matrix_space_size(M, N, StrideE, ELayout{}));
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGemmMultipleD<
|
||||
ALayout,
|
||||
BLayout,
|
||||
DDELayout,
|
||||
ck::Tuple<D0Layout, D1Layout>,
|
||||
ELayout,
|
||||
ADataType,
|
||||
BDataType,
|
||||
ck::Tuple<D0DataType, D1DataType>,
|
||||
|
||||
Reference in New Issue
Block a user