From 625838def0763a7a76899333fb674d5d22264718 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Fri, 6 Sep 2019 18:07:56 -0500 Subject: [PATCH] added tuple --- ...plicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp | 64 ++++++++++ .../include/tensor_description/dimension.hpp | 10 +- .../multi_index_transform.hpp | 67 +++++----- .../tensor_description/tensor_descriptor.hpp | 79 ++++++++++-- .../tensor_descriptor_helper.hpp | 117 ++++++++++++++++++ .../tensor_description/tensor_visit.hpp | 1 + composable_kernel/include/utility/Array.hpp | 14 ++- composable_kernel/include/utility/tuple.hpp | 115 +++++++++++------ driver/include/host_conv.hpp | 6 - driver/src/driver.cpp | 2 +- 10 files changed, 369 insertions(+), 106 deletions(-) create mode 100644 composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp diff --git a/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp index c0c41d53fe..8adc526afd 100644 --- a/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp @@ -4,6 +4,8 @@ #include "common_header.hpp" #include "ConstantTensorDescriptor.hpp" #include "ConstantMatrixDescriptor.hpp" +#include "tensor_descriptor.hpp" +#include "tensor_descriptor_helper.hpp" #include "blockwise_generic_tensor_slice_copy.hpp" #include "threadwise_generic_tensor_slice_copy.hpp" #include "blockwise_batched_gemm.hpp" @@ -45,6 +47,7 @@ template struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded { +#if 0 __device__ void Run(const Float* const __restrict__ p_in_global, const Float* const __restrict__ p_wei_global, Float* const __restrict__ p_out_global) const @@ -478,6 +481,67 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded #endif }); } +#else + __device__ void Run(const Float* const __restrict__ p_in_global, + const Float* const __restrict__ p_wei_global, + Float* const __restrict__ p_out_global) const + { +#if 0 + constexpr auto tmp = std::tuple{}; + constexpr auto flag = std::get<0>(tmp); +#else + constexpr auto a = Tuple, index_t>(true, Sequence<1>{}, 99); + + if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0) + { + printf("adsas %d\n", a.At(Number<0>{})); + print_Sequence("seq", a.At(Number<1>{})); + printf("adsas %lu\n", a.At(Number<2>{})); + } + + auto b = Tuple, index_t>(true, Sequence<1>{}, 99); + + b.At(Number<0>{}) = false; + + if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0) + { + printf("adsas %d\n", b.At(Number<0>{})); + print_Sequence("seq", b.At(Number<1>{})); + printf("adsas %lu\n", b.At(Number<2>{})); + } + + if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0) + { + printf("adsas %d\n", + Tuple, index_t>(true, Sequence<1>(), 99).At(Number<0>{})); + print_Sequence( + "seq", Tuple, index_t>(true, Sequence<1>(), 99).At(Number<1>{})); + printf("adsas %d\n", + Tuple, index_t>(true, Sequence<1>(), 99).At(Number<2>{})); + } +#endif + +#if 0 + constexpr auto I0 = Number<0>{}; + constexpr auto I1 = Number<1>{}; + constexpr auto I2 = Number<2>{}; + constexpr auto I3 = Number<3>{}; + + // create a native tensor descriptor + constexpr auto in_n_c_h_w_global_desc = + make_NativeTensorDescriptor(InGlobalDesc::GetLengths(), InGlobalDesc::GetStrides()); + + if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0) + { + print_tensor_descriptor("in_n_c_h_w_global_desc", in_n_c_h_w_global_desc); + } + + // transform the tensor descriptor once + // + // calculate the offset of some entry +#endif + } +#endif }; } // namespace ck diff --git a/composable_kernel/include/tensor_description/dimension.hpp b/composable_kernel/include/tensor_description/dimension.hpp index cd897323a3..1cf8dc4aba 100644 --- a/composable_kernel/include/tensor_description/dimension.hpp +++ b/composable_kernel/include/tensor_description/dimension.hpp @@ -12,15 +12,17 @@ struct Dimension }; template -struct NativeDimension : Dimension +struct NativeDimension { + __host__ __device__ static constexpr auto GetLength() { return Number{}; } + __host__ __device__ static constexpr auto GetStride() { return Number{}; } - __host__ __device__ static constexpr index_t GetOffset(index_t id) { return id * Stride; } + __host__ __device__ static constexpr index_t GetOffset(index_t i) { return i * Stride; } - __host__ __device__ static constexpr index_t GetOffsetDiff(index_t id_diff) + __host__ __device__ static constexpr index_t GetOffsetDiff(index_t i_diff) { - return id_diff * Stride; + return i_diff * Stride; } }; diff --git a/composable_kernel/include/tensor_description/multi_index_transform.hpp b/composable_kernel/include/tensor_description/multi_index_transform.hpp index 831d4c5833..8812779319 100644 --- a/composable_kernel/include/tensor_description/multi_index_transform.hpp +++ b/composable_kernel/include/tensor_description/multi_index_transform.hpp @@ -8,25 +8,19 @@ namespace ck { template using MultiIndex = Array; -// LowLengths: Sequence<...> -template +template struct PassThrough { - static constexpr index_t nDim = LowLengths::GetSize(); + using LowerIndex = MultiIndex<1>; + using UpperIndex = MultiIndex<1>; - using LowerIndex = MultiIndex; - using UpperIndex = LowerIndex; + __host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number<1>{}; } - __host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number{}; } + __host__ __device__ static constexpr auto GetNumOfUpperDimension() { return Number<1>{}; } - __host__ __device__ static constexpr auto GetNumOfUpperDimension() - { - return GetNumOfLowerDimension(); - } + __host__ __device__ static constexpr auto GetLowerLengths() { return Sequence{}; } - __host__ __device__ static constexpr auto GetLowerLengths() { return LowLengths{}; } - - __host__ __device__ static constexpr auto GetUpperLengths() { return GetLowerLengths(); } + __host__ __device__ static constexpr auto GetUpperLengths() { return Sequence{}; } __host__ __device__ static constexpr auto GetLowerIndex(UpperIndex idx_up) { return idx_up; } @@ -35,7 +29,7 @@ struct PassThrough return idx_up_diff; } - __host__ __device__ static constexpr bool IsIndexTransformLinear() { return true; } + __host__ __device__ static constexpr bool IsLinearTransform() { return true; } }; // LowLengths: Sequence<...> @@ -45,25 +39,22 @@ struct Pad static constexpr index_t nDim = LowLengths::GetSize(); using LowerIndex = MultiIndex; - using UpperIndex = LowerIndex; + using UpperIndex = MultiIndex; __host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number{}; } - __host__ __device__ static constexpr auto GetNumOfUpperDimension() - { - return GetNumOfLowerDimension(); - } + __host__ __device__ static constexpr auto GetNumOfUpperDimension() { return Number{}; } __host__ __device__ static constexpr auto GetLowerLengths() { return LowLengths{}; } __host__ __device__ static constexpr auto GetUpperLengths() { - return GetLowerLengths() + LeftPads + RightPads; + return GetLowerLengths() + LeftPads{} + RightPads{}; } __host__ __device__ static constexpr auto GetLowerIndex(UpperIndex idx_up) { - return idx_up - LeftPads; + return idx_up - LeftPads{}; } __host__ __device__ static constexpr auto GetLowerIndexDiff(UpperIndex idx_up_diff) @@ -71,9 +62,10 @@ struct Pad return idx_up_diff; } - __host__ __device__ static constexpr bool IsIndexTransformLinear() { return true; } + __host__ __device__ static constexpr bool IsLinearTransform() { return true; } }; +#if 0 // LowLengths: Sequence<...> template struct Merge @@ -116,8 +108,9 @@ struct Merge return idx_low_diff; } - __host__ __device__ static constexpr bool IsIndexTransformLinear() { return false; } + __host__ __device__ static constexpr bool IsLinearTransform() { return false; } }; +#endif // UpLengths: Sequence<...> template @@ -126,6 +119,9 @@ struct Unmerge static constexpr index_t nDimLow = 1; static constexpr index_t nDimUp = UpLengths::GetSize(); + using UpperIndex = MultiIndex; + using LowerIndex = MultiIndex; + __host__ __device__ constexpr Unmerge() { static_assert(LowLength == accumulate_on_sequence( @@ -133,7 +129,7 @@ struct Unmerge "wrong! UpLengths need to be "); } - __host__ __device__ static constexpr auto GetNumOfUpperDimension(){return Number{}}; + __host__ __device__ static constexpr auto GetNumOfUpperDimension() { return Number{}; } __host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number{}; } @@ -149,7 +145,7 @@ struct Unmerge LowerIndex idx_low{0}; - static_for<0, nDim, 1>{}([&](auto idim) { idx_low[0] += idx_up[idim] * scans[idim]; }); + static_for<0, nDimUp, 1>{}([&](auto idim) { idx_low(0) += idx_up[idim] * scans[idim]; }); return idx_low; } @@ -159,7 +155,7 @@ struct Unmerge return GetLowerIndex(idx_up_diff); } - __host__ __device__ static constexpr bool IsIndexTransformLinear() { return true; } + __host__ __device__ static constexpr bool IsLinearTransform() { return true; } }; // UpLengths: Sequence<...> @@ -171,7 +167,8 @@ struct Embed static constexpr index_t nDimLow = 1; static constexpr index_t nDimUp = UpLengths::GetSize(); - static constexpr auto mCoefficients = Coefficients{}; + using LowerIndex = MultiIndex; + using UpperIndex = MultiIndex; __host__ __device__ constexpr Embed() { @@ -179,14 +176,14 @@ struct Embed "wrong! # of dimensions not consistent"); constexpr index_t low_id_max = - Coefficents.Back() + accumulate_on_sequence(UpLengths{} * Coefficients::PopBack(), - math::plus{}, - Number<0>{}); + Coefficients::Back() + accumulate_on_sequence(UpLengths{} * Coefficients::PopBack(), + math::plus{}, + Number<0>{}); static_assert(low_id_max < LowLength, "wrong! lower-id will go out of range"); } - __host__ __device__ static constexpr auto GetNumOfUpperDimension(){return Number{}}; + __host__ __device__ static constexpr auto GetNumOfUpperDimension() { return Number{}; } __host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number{}; } @@ -196,10 +193,10 @@ struct Embed __host__ __device__ static constexpr auto GetLowerIndex(UpperIndex idx_up) { - LowerIndex idx_low{mCoefficients[nDimUp]}; + LowerIndex idx_low(Coefficients{}[nDimUp]); static_for<0, nDimUp, 1>{}( - [&](auto idim) { idx_low[0] += idx_up[idim] * mCoefficients[idim]; }); + [&](auto idim) { idx_low[0] += idx_up[idim] * Coefficients{}[idim]; }); return idx_low; } @@ -209,12 +206,12 @@ struct Embed LowerIndex idx_low_diff{0}; static_for<0, nDimUp, 1>{}( - [&](auto idim) { idx_low_diff[0] += idx_up_diff[idim] * mCoefficients[idim]; }); + [&](auto idim) { idx_low_diff[0] += idx_up_diff[idim] * Coefficients{}[idim]; }); return idx_low_diff; } - __host__ __device__ static constexpr bool IsIndexTransformLinear() { return true; } + __host__ __device__ static constexpr bool IsLinearTransform() { return true; } }; } // namespace ck diff --git a/composable_kernel/include/tensor_description/tensor_descriptor.hpp b/composable_kernel/include/tensor_description/tensor_descriptor.hpp index b2467e7e0c..4610fc5f74 100644 --- a/composable_kernel/include/tensor_description/tensor_descriptor.hpp +++ b/composable_kernel/include/tensor_description/tensor_descriptor.hpp @@ -11,21 +11,39 @@ template struct NativeTensorDescriptor { using type = NativeTensorDescriptor; - static constexpr auto mDimensions = Tuple; - static constexpr index_t nDim = mDimensions::GetSize(); + static constexpr auto mDimensions = Tuple{}; + static constexpr index_t nDim = mDimensions.GetSize(); using Index = MultiIndex; __host__ __device__ static constexpr auto GetNumOfDimension() { return Number{}; } + struct lambda_GetLength + { + template + __host__ __device__ constexpr auto operator()(IDim) const + { + return GetLength(IDim{}); + } + }; + __host__ __device__ static constexpr auto GetLengths() { - // not implemented + return typename sequence_gen::type{}; } + struct lambda_GetStride + { + template + __host__ __device__ constexpr auto operator()(IDim) const + { + return GetStride(IDim{}); + } + }; + __host__ __device__ static constexpr auto GetStrides() { - // not implemented + return typename sequence_gen::type{}; } template @@ -59,20 +77,26 @@ struct NativeTensorDescriptor return offset_diff; } - __host__ __device__ static constexpr auto AreUpperIndex2OffsetTransformLinear(); + template + __host__ __device__ static constexpr bool IsLinearDimension(Number) { - // TODO: re-implement "Sequence", so that it can take other data-type (including bool) as - // element - return uniform_sequence_gen{}; + return true; } - __host__ __device__ static constexpr auto GetIndependentDimensionGroups() + __host__ __device__ static constexpr auto GetLinearDimensions() { - // not implemented, should return Tuple, Sequence<1>, ...> - return xxx; + return typename arithmetic_sequence_gen<0, nDim, 1>::type{}; + } + + __host__ __device__ static constexpr auto GetNonLinearDimensions() { return Sequence<>{}; } + + __host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups() + { + return Tuple<>{}; } }; +#if 0 // LowerTensorDescriptor // Transforms: std::tuple // LowerDimensionIds: std::tuple> @@ -213,16 +237,45 @@ struct TransformedTensorDescriptor return GetLowerTensorDescriptor().GetOffset(GetLowerIndex(idx_up)); } - __host__ __device__ static constexpr auto AreUpperIndex2OffsetTransformLinear(); + template + __host__ __device__ static constexpr bool IsLinearDimension(Number); { // not implemented } - __host__ __device__ static constexpr auto GetIndependentDimensionGroups() + __host__ __device__ static constexpr auto GetLinearDimensions() + { + // not implemented + } + + __host__ __device__ static constexpr auto GetNonLinearDimensions() + { + // not implemented + } + + __host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups() { // not implemented } }; +#endif + +template +__host__ __device__ constexpr auto make_NativeTensorDescriptor(Sequence, + Sequence) +{ + return NativeTensorDescriptor...>{}; +} + +template +__host__ __device__ constexpr auto make_NativeTensorDescriptor_packed(Lengths) +{ + constexpr index_t strides = reverse_inclusive_scan_sequence( + Lengths::PopFront(), math::multiplies{}, Number<1>{}) + .PushBack(Number<1>{}); + + return make_NativeTensorDescriptor(Lengths{}, strides); +} } // namespace ck #endif diff --git a/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp b/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp new file mode 100644 index 0000000000..f1f662f772 --- /dev/null +++ b/composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp @@ -0,0 +1,117 @@ +#ifndef CK_TENSOR_DESCRIPTOR_HELPER_HPP +#define CK_TENSOR_DESCRIPTOR_HELPER_HPP + +#include "common_header.hpp" +#include "tensor_descriptor.hpp" + +namespace ck { + +template +__host__ __device__ void print_tensor_descriptor(const char* s, + NativeTensorDescriptor desc) +{ + print_tensor_descriptor_impl(s, desc.GetLengths(), desc.GetStrides()); +} + +template +__host__ __device__ void +print_tensor_descriptor_impl(const char* s, Sequence, Sequence) +{ + constexpr index_t nDim = sizeof...(Lengths); + + static_assert(nDim > 0 && nDim <= 12, "wrong!"); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u}, strides {%u}\n", s, nDim, Lengths..., Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u}, strides {%u %u}\n", s, nDim, Lengths..., Strides...); + }); + + static_if{}([&](auto) { + printf( + "%s dim %u, lengths {%u %u %u}, strides {%u %u %u}\n", s, nDim, Lengths..., Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u}, strides {%u %u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u}, strides {%u %u %u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u}, strides {%u %u %u %u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u}, strides {%u %u %u %u %u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u %u}, strides {%u %u %u %u %u %u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u %u %u}, strides {%u %u %u %u %u %u %u %u " + "%u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u %u %u %u}, strides {%u %u %u %u %u %u %u " + "%u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u %u %u %u %u}, strides {%u %u %u %u %u %u " + "%u %u " + "%u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); + + static_if{}([&](auto) { + printf("%s dim %u, lengths {%u %u %u %u %u %u %u %u %u %u %u %u}, strides {%u %u %u %u %u " + "%u %u %u %u " + "%u %u %u}\n", + s, + nDim, + Lengths..., + Strides...); + }); +} + +} // namespace ck +#endif diff --git a/composable_kernel/include/tensor_description/tensor_visit.hpp b/composable_kernel/include/tensor_description/tensor_visit.hpp index 9aa9597e76..1ff538d8a8 100644 --- a/composable_kernel/include/tensor_description/tensor_visit.hpp +++ b/composable_kernel/include/tensor_description/tensor_visit.hpp @@ -85,6 +85,7 @@ struct TensorVisit { constexpr auto nonlinear_independent_dimensions_igroup = nonlinear_independent_dimension_groups.Get(igroup); + constexpr auto nonlinear_independent_lengths_igroup = lambda_HackLengths{}(lengths, nonlinear_independent_dimensions_igroup); diff --git a/composable_kernel/include/utility/Array.hpp b/composable_kernel/include/utility/Array.hpp index 036fbc7d9c..8b94d46b83 100644 --- a/composable_kernel/include/utility/Array.hpp +++ b/composable_kernel/include/utility/Array.hpp @@ -82,9 +82,11 @@ struct Array // A: Array // Picks: Sequence<...> template -ArrayElementPicker +struct ArrayElementPicker { - __host__ __device__ constexpr ArrayElementPicker(Arr & array) : mData{array} + using data_type = typename Arr::data_type; + + __host__ __device__ constexpr ArrayElementPicker(Arr& array) : mData{array} { constexpr index_t imax = accumulate_on_sequence(Picks{}, math::maxer{}, Number<0>{}); @@ -95,26 +97,26 @@ ArrayElementPicker __host__ __device__ static constexpr index_t GetSize() { return Picks::GetSize(); } template - __host__ __device__ constexpr TData operator[](Number) const + __host__ __device__ constexpr data_type operator[](Number) const { constexpr auto IP = Picks::Get(Number{}); return mData[IP]; } - __host__ __device__ constexpr TData operator[](index_t i) const + __host__ __device__ constexpr data_type operator[](index_t i) const { constexpr index_t ip = Picks{}[i]; return mData[ip]; } template - __host__ __device__ TData& operator()(Number) + __host__ __device__ data_type& operator()(Number) { constexpr auto IP = Picks::Get(Number{}); return mData[IP]; } - __host__ __device__ TData& operator()(index_t i) + __host__ __device__ data_type& operator()(index_t i) { constexpr index_t ip = Picks{}[i]; return mData[ip]; diff --git a/composable_kernel/include/utility/tuple.hpp b/composable_kernel/include/utility/tuple.hpp index 1d91f59ba6..019430cd92 100644 --- a/composable_kernel/include/utility/tuple.hpp +++ b/composable_kernel/include/utility/tuple.hpp @@ -2,66 +2,99 @@ #define CK_TUPLE_HPP #include "integral_constant.hpp" +#include "Sequence.hpp" namespace ck { -template -struct tuple : public std::tuple -{ - using type = tuple; +namespace detail { - __host__ __device__ static constexpr index_t GetSize() { return std::tuple_size(tuple{}); } +template +struct TupleElementKey +{ +}; + +template +struct TupleElement +{ + template + __host__ __device__ explicit constexpr TupleElement(T&& v) : mData(static_cast(v)) + { + } + + Data mData; +}; + +template +__host__ __device__ constexpr const Data& get_tuple_element(const TupleElement& x) +{ + return x.mData; +} + +template +__host__ __device__ constexpr Data& get_tuple_element(TupleElement& x) +{ + return x.mData; +} + +template +__host__ __device__ constexpr Data&& get_tuple_element(TupleElement&& x) +{ + return static_cast(x.mData); +} + +template +struct TupleImpl; + +template +struct TupleImpl, Xs...> : TupleElement, Xs>... +{ + template + __host__ __device__ explicit constexpr TupleImpl(Ys&&... ys) + : TupleElement, Xs>(static_cast(ys))... + { + } + + __host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); } template - __host__ __device__ constexpr auto Get(Number) const + __host__ __device__ constexpr const auto& GetElementByKey(TupleElementKey) const { - return std::get(*this); + return get_tuple_element>(*this); } template - __host__ __device__ constexpr auto operator[](Number) const + __host__ __device__ constexpr auto& GetElementByKey(TupleElementKey) { - return Get(Number{}) : + return get_tuple_element>(*this); } }; -// merge tuple -template -__host__ __device__ constexpr auto merge_tuple(Tuples&&... xs) -{ - return std::tuple_cat(xs...); -}; +} // namespace detail -// generate sequence -template -struct tuple_gen_impl +template +struct Tuple : detail::TupleImpl::type, Xs...> { - static constexpr index_t NRemainLeft = NRemain / 2; - static constexpr index_t NRemainRight = NRemain - NRemainLeft; - static constexpr index_t IMiddle = IBegin + NRemainLeft; + using base = + detail::TupleImpl::type, Xs...>; - using type = - typename tuple_merge::type, - typename tuple_gen_impl::type>::type; -}; + template + __host__ __device__ explicit constexpr Tuple(Ys&&... ys) : base(static_cast(ys)...) + { + } -template -struct tuple_gen_impl -{ - static constexpr auto x = F{}(Number{}); - using type = tuple; -}; + template + __host__ __device__ constexpr const auto& At(Number) const + { + static_assert(I < base::Size(), "wrong! out of range"); + return GetElementByKey(detail::TupleElementKey{}); + } -template -struct sequence_gen_impl -{ - using type = Sequence<>; -}; - -template -struct sequence_gen -{ - using type = typename sequence_gen_impl<0, NSize, F>::type; + template + __host__ __device__ constexpr auto& At(Number) + { + static_assert(I < base::Size(), "wrong! out of range"); + return GetElementByKey(detail::TupleElementKey{}); + } }; } // namespace ck diff --git a/driver/include/host_conv.hpp b/driver/include/host_conv.hpp index d89d8e17ca..9ca7fc10d2 100644 --- a/driver/include/host_conv.hpp +++ b/driver/include/host_conv.hpp @@ -65,9 +65,6 @@ void host_direct_convolution(const Tensor& in_nchw, index_t h_pad_low = LowerPads{}.Get(Number<0>{}); index_t w_pad_low = LowerPads{}.Get(Number<1>{}); - index_t h_pad_up = UpperPads{}.Get(Number<0>{}); - index_t w_pad_up = UpperPads{}.Get(Number<1>{}); - auto f = [&](auto n, auto k, auto ho, auto wo) { double v = 0; for(int c = 0; c < wei_kcyx.mDesc.GetLengths()[1]; ++c) @@ -125,9 +122,6 @@ void host_winograd_3x3_convolution(const Tensor& in_nchw, index_t h_pad_low = LowerPads{}.Get(Number<0>{}); index_t w_pad_low = LowerPads{}.Get(Number<1>{}); - index_t h_pad_up = UpperPads{}.Get(Number<0>{}); - index_t w_pad_up = UpperPads{}.Get(Number<1>{}); - std::size_t HiPerTile = HoPerTile + Y - 1; std::size_t WiPerTile = WoPerTile + X - 1; diff --git a/driver/src/driver.cpp b/driver/src/driver.cpp index 670acaee61..7304647362 100644 --- a/driver/src/driver.cpp +++ b/driver/src/driver.cpp @@ -368,7 +368,7 @@ int main(int argc, char* argv[]) #if 0 device_convolution_direct_v2_nchw_kcyx_nkhw (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); -#elif 1 +#elif 0 device_convolution_implicit_gemm_v1_chwn_cyxk_khwn( in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); #elif 1