diff --git a/example/19_binary_elementwise/broadcast_add_2d.cpp b/example/19_binary_elementwise/broadcast_add_2d.cpp index 181d0e6a2d..2a3ef421ff 100644 --- a/example/19_binary_elementwise/broadcast_add_2d.cpp +++ b/example/19_binary_elementwise/broadcast_add_2d.cpp @@ -74,9 +74,7 @@ int main() }; Tensor a_m_n(f_host_tensor_descriptor2d(M, N, Stride)); - Tensor b_n(f_host_tensor_descriptor1d(N, 1)); - Tensor c_m_n(f_host_tensor_descriptor2d(M, N, Stride)); a_m_n.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); diff --git a/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp index f94c19f1d1..455ff24c31 100644 --- a/example/19_binary_elementwise/elementwise_add_1d.cpp +++ b/example/19_binary_elementwise/elementwise_add_1d.cpp @@ -56,7 +56,7 @@ int main() Tensor a_m(f_host_tensor_descriptor1d(M, 1)); Tensor b_m(f_host_tensor_descriptor1d(M, 1)); - Tensor c_m(f_host_tensor_descriptor1d(M, 1)); + Tensor c_m(f_host_tensor_descriptor1d(M, 1)); a_m.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); b_m.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); diff --git a/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp index e358e993b0..937a6c8c1d 100644 --- a/example/19_binary_elementwise/elementwise_add_4d.cpp +++ b/example/19_binary_elementwise/elementwise_add_4d.cpp @@ -5,7 +5,6 @@ #include "device.hpp" #include "host_tensor.hpp" #include "host_tensor_generator.hpp" -#include "host_utility.hpp" #include "device_tensor.hpp" #include "binary_element_wise_operation.hpp" @@ -56,29 +55,29 @@ int main() std::vector nchw = {4, 16, 32, 32}; - Tensor a_m(nchw); - Tensor b_m(nchw); - Tensor c_m(nchw); + Tensor a(nchw); + Tensor b(nchw); + Tensor c(nchw); - a_m.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); - b_m.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); + a.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); + b.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); - DeviceMem a_m_device_buf(sizeof(ABDataType) * a_m.mDesc.GetElementSpace()); - DeviceMem b_m_device_buf(sizeof(ABDataType) * b_m.mDesc.GetElementSpace()); - DeviceMem c_m_device_buf(sizeof(CDataType) * c_m.mDesc.GetElementSpace()); + DeviceMem a_device_buf(sizeof(ABDataType) * a.mDesc.GetElementSpace()); + DeviceMem b_device_buf(sizeof(ABDataType) * b.mDesc.GetElementSpace()); + DeviceMem c_device_buf(sizeof(CDataType) * c.mDesc.GetElementSpace()); - a_m_device_buf.ToDevice(a_m.mData.data()); - b_m_device_buf.ToDevice(b_m.mData.data()); + a_device_buf.ToDevice(a.mData.data()); + b_device_buf.ToDevice(b.mData.data()); auto broadcastAdd = DeviceElementwiseAddInstance{}; auto argument = broadcastAdd.MakeArgumentPointer( - a_m_device_buf.GetDeviceBuffer(), - b_m_device_buf.GetDeviceBuffer(), - c_m_device_buf.GetDeviceBuffer(), - ck::convert_vector_element_type(nchw), - ck::convert_vector_element_type(a_m.mDesc.GetStrides()), - ck::convert_vector_element_type(b_m.mDesc.GetStrides()), - ck::convert_vector_element_type(c_m.mDesc.GetStrides()), + a_device_buf.GetDeviceBuffer(), + b_device_buf.GetDeviceBuffer(), + c_device_buf.GetDeviceBuffer(), + std::vector{nchw.begin(), nchw.end()}, + std::vector{a.mDesc.GetStrides().begin(), a.mDesc.GetStrides().end()}, + std::vector{b.mDesc.GetStrides().begin(), b.mDesc.GetStrides().end()}, + std::vector{c.mDesc.GetStrides().begin(), c.mDesc.GetStrides().end()}, Add{}); if(!broadcastAdd.IsSupportedArgument(argument.get())) @@ -96,17 +95,17 @@ int main() bool pass = true; if(do_verification) { - c_m_device_buf.FromDevice(c_m.mData.data()); - Tensor host_c_m(nchw); + c_device_buf.FromDevice(c.mData.data()); + Tensor host_c(nchw); host_elementwise4D, Tensor, Tensor, EltwiseComputeDataType, - Add>(host_c_m, a_m, b_m, nchw, Add{}); + Add>(host_c, a, b, nchw, Add{}); - pass &= ck::utils::check_err( - c_m.mData, host_c_m.mData, "Error: Incorrect results d1", 1e-3, 1e-3); + pass &= + ck::utils::check_err(c.mData, host_c.mData, "Error: Incorrect results d1", 1e-3, 1e-3); } return pass ? 0 : 1; diff --git a/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp index 8bf6604f18..8955aadc11 100644 --- a/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp +++ b/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp @@ -19,8 +19,6 @@ template struct DeviceBinaryElementwise : public BaseOperator { - DeviceBinaryElementwise(index_t blockSize = 256) : BaseOperator(), blockSize_(blockSize) {} - static constexpr auto I0 = Number<0>{}; template @@ -81,18 +79,18 @@ struct DeviceBinaryElementwise : public BaseOperator const std::vector& stride_a, const std::vector& stride_b, const std::vector& stride_c, - ElementwiseFunctor functor, - index_t blockSize) + ElementwiseFunctor functor) : p_a_(p_a), p_b_(p_b), p_c_(p_c), shape_(shape), functor_(functor), + blockSize_(256), gridSize_(120) // FIXME - Calculate the grid size by number of CU in the future { - a_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_a, gridSize_, blockSize); - b_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_b, gridSize_, blockSize); - c_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_c, gridSize_, blockSize); + a_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_a, gridSize_, blockSize_); + b_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_b, gridSize_, blockSize_); + c_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_c, gridSize_, blockSize_); } const ADataType* p_a_; @@ -103,13 +101,12 @@ struct DeviceBinaryElementwise : public BaseOperator GridDesc_M0 b_grid_desc_m0_; GridDesc_M0 c_grid_desc_m0_; ElementwiseFunctor functor_; + index_t blockSize_; index_t gridSize_; }; struct Invoker : public BaseInvoker { - Invoker(index_t blockSize) : BaseInvoker(), blockSize_(blockSize) {} - float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{}) { const auto kernel = kernel_binary_elementwise_1d(p_arg), stream_config); } - - index_t blockSize_; }; bool IsSupportedArgument(const BaseArgument* p_arg) override @@ -173,14 +168,10 @@ struct DeviceBinaryElementwise : public BaseOperator stride_a, stride_b, stride_c, - functor, - blockSize_); + functor); } - std::unique_ptr MakeInvokerPointer() - { - return std::make_unique(Invoker{blockSize_}); - } + std::unique_ptr MakeInvokerPointer() { return std::make_unique(); } std::string GetTypeString() const override { @@ -195,8 +186,6 @@ struct DeviceBinaryElementwise : public BaseOperator return str.str(); } - - index_t blockSize_; }; } // namespace device diff --git a/library/include/ck/library/host_tensor/host_utility.hpp b/library/include/ck/library/host_tensor/host_utility.hpp deleted file mode 100644 index 2ff76e58c3..0000000000 --- a/library/include/ck/library/host_tensor/host_utility.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include - -namespace ck { - -template -inline std::vector convert_vector_element_type(const std::vector& inData) -{ - std::vector outData; - - for(auto elem : inData) - outData.push_back(static_cast(elem)); - - return (outData); -}; - -}; // namespace ck