mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
* tuning para,
* testing on v100
* add fp16
* remove deprecated tensor descriptor
* sync with miopen
* update build script
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 5c7cec1115]
27 lines
877 B
C++
27 lines
877 B
C++
#pragma once
|
|
#include "host_tensor.hpp"
|
|
#include "common_header.hpp"
|
|
#include "tensor_descriptor.hpp"
|
|
|
|
template <typename TensorDesc, std::size_t... Is>
|
|
auto make_HostTensorDescriptor_impl(TensorDesc, std::integer_sequence<std::size_t, Is...>)
|
|
{
|
|
std::initializer_list<std::size_t> lengths = {TensorDesc::GetLengths()[Is]...};
|
|
std::initializer_list<std::size_t> strides = {TensorDesc::GetStrides()[Is]...};
|
|
|
|
return HostTensorDescriptor(lengths, strides);
|
|
}
|
|
|
|
template <typename TensorDesc>
|
|
auto make_HostTensorDescriptor(TensorDesc)
|
|
{
|
|
return make_HostTensorDescriptor_impl(
|
|
TensorDesc{}, std::make_integer_sequence<std::size_t, TensorDesc::GetNumOfDimension()>{});
|
|
}
|
|
|
|
template <typename TensorDesc>
|
|
void ostream_tensor_descriptor(TensorDesc, std::ostream& os = std::cout)
|
|
{
|
|
ostream_HostTensorDescriptor(make_HostTensorDescriptor(TensorDesc{}), os);
|
|
}
|