mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 02:27:57 +00:00
* convnd_fwd fp16 example
* update example
* update example
* update instance
* updating refernce conv
* update reference conv
* update conv fwd profiler
* update conv 1d and 3d instance
* update include path
* clean
* update profiler for conv bwd data and weight
* update conv bwd weight
* clean
* update conv example
* update profiler for conv bwd weight
* update ckprofiler for conv bwd data
* fix reference conv bwd data bug; update conv bwd data test
* update examples
* fix initialization issue
* update test for conv fwd
* clean
* clean
* remove test case too sensitive to error threshhold
* fix test
* clean
* fix build
* adding conv multiple d
* adding conv multiple D
* add matrix padder
* add gemm padding to convnd
* adding group conv
* update gemm multi-d
* refactor
* refactor
* refactor
* clean
* clean
* refactor
* refactor
* reorg
* add ds
* add bias
* clean
* add G
* adding group
* adding group
* adding group
* update Tensor
* clean
* update example
* update DeviceGemmMultipleD_Xdl_CShuffle
* update conv bwd-data and bwd-weight
* upate contraction example
* update gemm and batch gemm with e permute
* fix example build
* instance for grouped conv1d
* update example
* adding group conv instance
* update gemm bilinear instance
* update gemm+add+add+fastgelu instance
* update profiler
* update profiler
* update test
* update test and client example
* clean
* add grouped conv into profiler
* update profiler
* clean
* add test grouped conv, update all conv test to gtest
* update test
[ROCm/composable_kernel commit: 500fa99512]
57 lines
1.5 KiB
C++
57 lines
1.5 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <cassert>
|
|
|
|
#include "ck/library/utility/host_tensor.hpp"
|
|
|
|
void HostTensorDescriptor::CalculateStrides()
|
|
{
|
|
mStrides.clear();
|
|
mStrides.resize(mLens.size(), 0);
|
|
if(mStrides.empty())
|
|
return;
|
|
|
|
mStrides.back() = 1;
|
|
std::partial_sum(
|
|
mLens.rbegin(), mLens.rend() - 1, mStrides.rbegin() + 1, std::multiplies<std::size_t>());
|
|
}
|
|
|
|
std::size_t HostTensorDescriptor::GetNumOfDimension() const { return mLens.size(); }
|
|
|
|
std::size_t HostTensorDescriptor::GetElementSize() const
|
|
{
|
|
assert(mLens.size() == mStrides.size());
|
|
return std::accumulate(
|
|
mLens.begin(), mLens.end(), std::size_t{1}, std::multiplies<std::size_t>());
|
|
}
|
|
|
|
std::size_t HostTensorDescriptor::GetElementSpaceSize() const
|
|
{
|
|
std::size_t space = 1;
|
|
for(std::size_t i = 0; i < mLens.size(); ++i)
|
|
{
|
|
space += (mLens[i] - 1) * mStrides[i];
|
|
}
|
|
return space;
|
|
}
|
|
|
|
const std::vector<std::size_t>& HostTensorDescriptor::GetLengths() const { return mLens; }
|
|
|
|
const std::vector<std::size_t>& HostTensorDescriptor::GetStrides() const { return mStrides; }
|
|
|
|
std::ostream& operator<<(std::ostream& os, const HostTensorDescriptor& desc)
|
|
{
|
|
os << "dim " << desc.GetNumOfDimension() << ", ";
|
|
|
|
os << "lengths {";
|
|
LogRange(os, desc.GetLengths(), ", ");
|
|
os << "}, ";
|
|
|
|
os << "strides {";
|
|
LogRange(os, desc.GetStrides(), ", ");
|
|
os << "}";
|
|
|
|
return os;
|
|
}
|