Restructure gridwise and blockwise GEMM, add tensor contraction and FWD-v4r5 (#36)

* experimenting magic number division

* overhauling fwd-v4r4 to clearly reflect transformation graph

* added fwd-v4r5

* bug fix for make_dynamic_naive_tensor_descriptor_aligned_v2

* bug fix and added sanity-check in transform_dynamic_tensor_descriptor

* added conv_driver_v2

[ROCm/composable_kernel commit: 30072aec37]
This commit is contained in:
Chao Liu
2021-06-09 23:53:08 -05:00
committed by GitHub
parent 040023fdcd
commit f4acec502e
38 changed files with 4791 additions and 2050 deletions

View File

@@ -271,19 +271,20 @@ struct Tensor
std::vector<T> mData;
};
void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream& os = std::cout)
template <typename X>
HostTensorDescriptor::HostTensorDescriptor(std::vector<X> lens) : mLens(lens)
{
os << "dim " << desc.GetNumOfDimension() << ", ";
os << "lengths {";
LogRange(os, desc.GetLengths(), ", ");
os << "}, ";
os << "strides {";
LogRange(os, desc.GetStrides(), ", ");
os << "}" << std::endl;
this->CalculateStrides();
}
template <typename X, typename Y>
HostTensorDescriptor::HostTensorDescriptor(std::vector<X> lens, std::vector<Y> strides)
: mLens(lens), mStrides(strides)
{
}
void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream& os = std::cout);
template <class T>
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
{