mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 10:59:55 +00:00
Hip tensor permute unit test (#1068)
* adding files for F32 example
* adding functioning implementation with scalar multiplication and unary operator support
* added fp 16 type check in unary square
* updating scalar multiplication as an operator
* functioning version with scalar operator
* changing strides for col major
* updated column major implementation
* working column major implementation
* cleaned up comments, rearranged/renamed files
* small edits to 3d transpose profiler
* adding test/profiler/instance files for hipTensor permute unit test
* added more test instances
* cleaned up errors, randomized input tensor, added more instances
* turned off time printouts
* removed conflicting transpose profiler
* rearranged some files
[ROCm/composable_kernel commit: 12a8883c48]
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
|
||||
@@ -48,10 +49,8 @@ void host_elementwise4D(HostTensorB& B_nhwc,
|
||||
for(std::size_t n = 0; n < N; ++n)
|
||||
{
|
||||
ADataType tmp_val;
|
||||
// auto a_val = A_nchw(n, c, h, w);
|
||||
auto a_val = A_nchw.mData[(n) + (c * N) + (h * C * N) + (w * H * C * N)];
|
||||
functor_b(tmp_val, a_val);
|
||||
// functor_a(B_nhwc(n, h, w, c), scale * tmp_val);
|
||||
functor_a(B_nhwc.mData[(n) + (c * W * H * N) + (h * N) + (w * H * N)],
|
||||
scale * tmp_val);
|
||||
}
|
||||
@@ -62,12 +61,14 @@ int main()
|
||||
bool do_verification = true;
|
||||
bool time_kernel = true;
|
||||
|
||||
std::vector<std::size_t> nchw = {4, 2, 1, 8};
|
||||
std::vector<std::size_t> nhwc = {4, 1, 8, 2};
|
||||
std::vector<std::size_t> nchw = {16, 8, 32, 64};
|
||||
std::vector<std::size_t> nhwc = {16, 32, 64, 8};
|
||||
Tensor<ADataType> a(nchw);
|
||||
Tensor<BDataType> b(nhwc);
|
||||
float scale = 1.f;
|
||||
auto i = 0;
|
||||
std::mt19937 gen(11939);
|
||||
std::uniform_int_distribution<int> dis(0, 1);
|
||||
for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w)
|
||||
for(std::size_t h = 0; h < a.mDesc.GetLengths()[2]; ++h)
|
||||
for(std::size_t c = 0; c < a.mDesc.GetLengths()[1]; ++c)
|
||||
@@ -75,7 +76,7 @@ int main()
|
||||
{
|
||||
a.mData[(n * nchw[1] * nchw[2] * nchw[3]) + (c * nchw[2] * nchw[3]) +
|
||||
(h * nchw[3]) + w] = i;
|
||||
i++;
|
||||
i = dis(gen);
|
||||
}
|
||||
|
||||
DeviceMem a_device_buf(sizeof(ADataType) * a.mDesc.GetElementSpaceSize());
|
||||
|
||||
@@ -67,6 +67,8 @@ int main()
|
||||
|
||||
float scale = 1.f;
|
||||
auto i = 0;
|
||||
std::mt19937 gen(11939);
|
||||
std::uniform_int_distribution<int> dis(0, 1);
|
||||
for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w)
|
||||
for(std::size_t h = 0; h < a.mDesc.GetLengths()[2]; ++h)
|
||||
for(std::size_t c = 0; c < a.mDesc.GetLengths()[1]; ++c)
|
||||
@@ -74,7 +76,7 @@ int main()
|
||||
{
|
||||
a.mData[(n * nchw[1] * nchw[2] * nchw[3]) + (c * nchw[2] * nchw[3]) +
|
||||
(h * nchw[3]) + w] = i;
|
||||
i++;
|
||||
i = dis(gen);
|
||||
}
|
||||
|
||||
DeviceMem a_device_buf(sizeof(ADataType) * a.mDesc.GetElementSpaceSize());
|
||||
|
||||
Reference in New Issue
Block a user