From 05e046654c9a226444091806a418a77fe0e4a4c2 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Tue, 11 Jun 2019 09:12:41 -0500 Subject: [PATCH] remove .hip extension --- ...ce_convolution_direct_v2_nchw_kcyx_nkhw.hpp | 4 ++-- ...olution_implicit_gemm_v1_chwn_cyxk_khwn.hpp | 10 +++++----- ...olution_implicit_gemm_v1_nchw_cyxk_khwn.hpp | 8 ++++---- ...olution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp | 6 +++--- ...olution_implicit_gemm_v2_chwn_cyxk_khwn.hpp | 6 +++--- ...olution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp | 6 +++--- ...olution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp | 6 +++--- ...convolution_2_vectorized_nchw_kcyx_nkhw.hpp | 2 +- ...emm_convolution_1_chwn_cyxk_khwn_padded.hpp | 2 +- driver/{driver.hip.cpp => driver.cpp} | 4 ++-- driver/driver.cu | 2 +- src/include/{Array.hip.hpp => Array.hpp} | 4 ++-- ...or.hip.hpp => ConstantMatrixDescriptor.hpp} | 2 +- ....hpp => ConstantMergedTensorDescriptor.hpp} | 4 ++-- ...or.hip.hpp => ConstantTensorDescriptor.hpp} | 2 +- src/include/{Sequence.hip.hpp => Sequence.hpp} | 4 ++-- ...d_inline_asm.hip.hpp => amd_inline_asm.hpp} | 2 +- src/include/{base.hip.hpp => base.hpp} | 0 ...r_op.hip.hpp => blockwise_2d_tensor_op.hpp} | 4 ++-- ...r_op.hip.hpp => blockwise_3d_tensor_op.hpp} | 4 ++-- ...r_op.hip.hpp => blockwise_4d_tensor_op.hpp} | 4 ++-- ...gemm.hip.hpp => blockwise_batched_gemm.hpp} | 2 +- ...ockwise_gemm.hip.hpp => blockwise_gemm.hpp} | 4 ++-- ...p => blockwise_generic_tensor_slice_op.hpp} | 2 +- ...p.hip.hpp => blockwise_tensor_slice_op.hpp} | 2 +- src/include/common.hip.hpp | 13 ------------- src/include/common.hpp | 13 +++++++++++++ .../{conv_common.hip.hpp => conv_common.hpp} | 2 +- .../{functional.hip.hpp => functional.hpp} | 4 ++-- .../{functional2.hip.hpp => functional2.hpp} | 4 ++-- .../{functional3.hip.hpp => functional3.hpp} | 8 ++++---- ...e_convolution_direct_v2_nchw_kcyx_nkhw.hpp} | 12 ++++++------ ...tion_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp} | 16 ++++++++-------- ...tion_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp} | 18 +++++++++--------- ...tion_implicit_gemm_v1r2_nchw_cyxk_khwn.hpp} | 18 +++++++++--------- ...tion_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp} | 16 ++++++++-------- ..._v1r3_lds_double_buffer_chwn_cyxk_khwn.hpp} | 16 ++++++++-------- ..._v1r3_lds_double_buffer_nchw_cyxk_khwn.hpp} | 16 ++++++++-------- ..._v1r3_lds_double_buffer_nchw_cyxk_nkhw.hpp} | 16 ++++++++-------- ...tion_implicit_gemm_v1r3_nchw_cyxk_khwn.hpp} | 16 ++++++++-------- ...tion_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp} | 16 ++++++++-------- ...lution_implicit_gemm_v2_chwn_cyxk_khwn.hpp} | 12 ++++++------ ...mm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp} | 14 +++++++------- ...mm_v3_lds_double_buffer_nchw_cyxk_nkhw.hpp} | 12 ++++++------ ...lution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp} | 12 ++++++------ ...mm_v4_lds_double_buffer_nchw_kcyx_nkhw.hpp} | 14 +++++++------- ...lution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp} | 14 +++++++------- ...ip.hpp => gridwise_convolution_wrapper.hpp} | 0 ...onvolution_2_vectorized_nchw_kcyx_nkhw.hpp} | 14 +++++++------- ...mm_convolution_1_chwn_cyxk_khwn_padded.hpp} | 14 +++++++------- ..._constant.hip.hpp => integral_constant.hpp} | 0 ..._op.hip.hpp => threadwise_4d_tensor_op.hpp} | 2 +- ...p.hpp => threadwise_direct_convolution.hpp} | 4 ++-- ...adwise_gemm.hip.hpp => threadwise_gemm.hpp} | 4 ++-- ... => threadwise_generic_tensor_slice_op.hpp} | 4 ++-- ....hip.hpp => threadwise_tensor_slice_op.hpp} | 2 +- .../{vector_type.hip.hpp => vector_type.hpp} | 2 +- 57 files changed, 212 insertions(+), 212 deletions(-) rename driver/{driver.hip.cpp => driver.cpp} (99%) rename src/include/{Array.hip.hpp => Array.hpp} (99%) rename src/include/{ConstantMatrixDescriptor.hip.hpp => ConstantMatrixDescriptor.hpp} (98%) rename src/include/{ConstantMergedTensorDescriptor.hip.hpp => ConstantMergedTensorDescriptor.hpp} (98%) rename src/include/{ConstantTensorDescriptor.hip.hpp => ConstantTensorDescriptor.hpp} (99%) rename src/include/{Sequence.hip.hpp => Sequence.hpp} (99%) rename src/include/{amd_inline_asm.hip.hpp => amd_inline_asm.hpp} (99%) rename src/include/{base.hip.hpp => base.hpp} (100%) rename src/include/{blockwise_2d_tensor_op.hip.hpp => blockwise_2d_tensor_op.hpp} (99%) rename src/include/{blockwise_3d_tensor_op.hip.hpp => blockwise_3d_tensor_op.hpp} (99%) rename src/include/{blockwise_4d_tensor_op.hip.hpp => blockwise_4d_tensor_op.hpp} (99%) rename src/include/{blockwise_batched_gemm.hip.hpp => blockwise_batched_gemm.hpp} (99%) rename src/include/{blockwise_gemm.hip.hpp => blockwise_gemm.hpp} (99%) rename src/include/{blockwise_generic_tensor_slice_op.hip.hpp => blockwise_generic_tensor_slice_op.hpp} (99%) rename src/include/{blockwise_tensor_slice_op.hip.hpp => blockwise_tensor_slice_op.hpp} (99%) delete mode 100644 src/include/common.hip.hpp create mode 100644 src/include/common.hpp rename src/include/{conv_common.hip.hpp => conv_common.hpp} (99%) rename src/include/{functional.hip.hpp => functional.hpp} (95%) rename src/include/{functional2.hip.hpp => functional2.hpp} (96%) rename src/include/{functional3.hip.hpp => functional3.hpp} (96%) rename src/include/{gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hip.hpp => gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp} (97%) rename src/include/{gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r2_nchw_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r2_nchw_cyxk_khwn.hpp} (97%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_chwn_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_chwn_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hip.hpp => gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp => gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v3_lds_double_buffer_nchw_cyxk_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hip.hpp => gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp} (98%) rename src/include/{gridwise_convolution_wrapper.hip.hpp => gridwise_convolution_wrapper.hpp} (100%) rename src/include/{gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hip.hpp => gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp} (97%) rename src/include/{gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hip.hpp => gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp} (97%) rename src/include/{integral_constant.hip.hpp => integral_constant.hpp} (100%) rename src/include/{threadwise_4d_tensor_op.hip.hpp => threadwise_4d_tensor_op.hpp} (97%) rename src/include/{threadwise_direct_convolution.hip.hpp => threadwise_direct_convolution.hpp} (99%) rename src/include/{threadwise_gemm.hip.hpp => threadwise_gemm.hpp} (98%) rename src/include/{threadwise_generic_tensor_slice_op.hip.hpp => threadwise_generic_tensor_slice_op.hpp} (97%) rename src/include/{threadwise_tensor_slice_op.hip.hpp => threadwise_tensor_slice_op.hpp} (99%) rename src/include/{vector_type.hip.hpp => vector_type.hpp} (98%) diff --git a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp b/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp index 7e20cccac5..790bcfbb9a 100644 --- a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp +++ b/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,8 +1,8 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp" template void device_convolution_direct_v2_nchw_kcyx_nkhw(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp b/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp index 217eb853d9..05e85f5bfb 100644 --- a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp +++ b/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp @@ -1,11 +1,11 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_chwn_cyxk_khwn.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_chwn_cyxk_khwn.hpp" template void device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_khwn.hpp b/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_khwn.hpp index 3237a7310b..cebc92f907 100644 --- a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_khwn.hpp +++ b/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_khwn.hpp @@ -1,10 +1,10 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r2_nchw_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_khwn.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v1r2_nchw_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_khwn.hpp" template void device_convolution_implicit_gemm_v1_nchw_cyxk_khwn(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp b/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp index acd8176023..43c8512b87 100644 --- a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp +++ b/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp @@ -1,9 +1,9 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hpp" template void device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp b/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp index 54e777dbe4..8033d32b53 100644 --- a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp +++ b/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp @@ -1,9 +1,9 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp" template void device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp b/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp index b3b6d785bf..17feafef95 100644 --- a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp +++ b/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp @@ -1,9 +1,9 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v3_lds_double_buffer_nchw_cyxk_nkhw.hpp" template void device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(InDesc, diff --git a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index 618d330534..0704ddc867 100644 --- a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -1,9 +1,9 @@ #pragma once #include #include "device.hpp" -#include "gridwise_convolution_wrapper.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hip.hpp" -#include "gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hip.hpp" +#include "gridwise_convolution_wrapper.hpp" +#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hpp" template void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc, diff --git a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp b/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp index 7790900f83..cddbb4df3c 100644 --- a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp +++ b/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp @@ -1,7 +1,7 @@ #pragma once #include #include "device.hpp" -#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hip.hpp" +#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" template void device_direct_convolution_2_vectorized_nchw_kcyx_nkhw(InDesc, diff --git a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp b/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp index dc0bb2f7e1..f8ed51516d 100644 --- a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp +++ b/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp @@ -1,7 +1,7 @@ #pragma once #include #include "device.hpp" -#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hip.hpp" +#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp" template void device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded(InDesc, diff --git a/driver/driver.hip.cpp b/driver/driver.cpp similarity index 99% rename from driver/driver.hip.cpp rename to driver/driver.cpp index 40cd4fdd3f..ededb0d87f 100644 --- a/driver/driver.hip.cpp +++ b/driver/driver.cpp @@ -5,8 +5,8 @@ #include #include "config.h" #include "tensor.hpp" -#include "ConstantTensorDescriptor.hip.hpp" -#include "conv_common.hip.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "conv_common.hpp" #include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp" //#include "device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" #include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp" diff --git a/driver/driver.cu b/driver/driver.cu index 974d4de85e..1ca4fea9d7 120000 --- a/driver/driver.cu +++ b/driver/driver.cu @@ -1 +1 @@ -driver.hip.cpp \ No newline at end of file +driver.cpp \ No newline at end of file diff --git a/src/include/Array.hip.hpp b/src/include/Array.hpp similarity index 99% rename from src/include/Array.hip.hpp rename to src/include/Array.hpp index 5e00d32a09..9f9192bad9 100644 --- a/src/include/Array.hip.hpp +++ b/src/include/Array.hpp @@ -1,6 +1,6 @@ #pragma once -#include "Sequence.hip.hpp" -#include "functional2.hip.hpp" +#include "Sequence.hpp" +#include "functional2.hpp" template struct Array diff --git a/src/include/ConstantMatrixDescriptor.hip.hpp b/src/include/ConstantMatrixDescriptor.hpp similarity index 98% rename from src/include/ConstantMatrixDescriptor.hip.hpp rename to src/include/ConstantMatrixDescriptor.hpp index 4b15f75fa9..8339580d01 100644 --- a/src/include/ConstantMatrixDescriptor.hip.hpp +++ b/src/include/ConstantMatrixDescriptor.hpp @@ -1,5 +1,5 @@ #pragma once -#include "common.hip.hpp" +#include "common.hpp" template struct ConstantMatrixDescriptor diff --git a/src/include/ConstantMergedTensorDescriptor.hip.hpp b/src/include/ConstantMergedTensorDescriptor.hpp similarity index 98% rename from src/include/ConstantMergedTensorDescriptor.hip.hpp rename to src/include/ConstantMergedTensorDescriptor.hpp index 2333035190..21a08a3b67 100644 --- a/src/include/ConstantMergedTensorDescriptor.hip.hpp +++ b/src/include/ConstantMergedTensorDescriptor.hpp @@ -1,6 +1,6 @@ #pragma once -#include "common.hip.hpp" -#include "ConstantTensorDescriptor.hip.hpp" +#include "common.hpp" +#include "ConstantTensorDescriptor.hpp" // OriginalTensorDesc : ConstantTensorDescriptor<...> // it's the tensor whose dimensions are to be merged diff --git a/src/include/ConstantTensorDescriptor.hip.hpp b/src/include/ConstantTensorDescriptor.hpp similarity index 99% rename from src/include/ConstantTensorDescriptor.hip.hpp rename to src/include/ConstantTensorDescriptor.hpp index f28cb32733..f2decc3f54 100644 --- a/src/include/ConstantTensorDescriptor.hip.hpp +++ b/src/include/ConstantTensorDescriptor.hpp @@ -1,5 +1,5 @@ #pragma once -#include "common.hip.hpp" +#include "common.hpp" template __host__ __device__ constexpr auto calculate_tensor_strides_packed(Lengths) diff --git a/src/include/Sequence.hip.hpp b/src/include/Sequence.hpp similarity index 99% rename from src/include/Sequence.hip.hpp rename to src/include/Sequence.hpp index a29506f215..5d021631d6 100644 --- a/src/include/Sequence.hip.hpp +++ b/src/include/Sequence.hpp @@ -1,6 +1,6 @@ #pragma once -#include "integral_constant.hip.hpp" -#include "functional.hip.hpp" +#include "integral_constant.hpp" +#include "functional.hpp" template struct is_valid_sequence_map; diff --git a/src/include/amd_inline_asm.hip.hpp b/src/include/amd_inline_asm.hpp similarity index 99% rename from src/include/amd_inline_asm.hip.hpp rename to src/include/amd_inline_asm.hpp index 1e453d3cf5..05e34a26be 100644 --- a/src/include/amd_inline_asm.hip.hpp +++ b/src/include/amd_inline_asm.hpp @@ -1,5 +1,5 @@ #pragma once -#include "common.hip.hpp" +#include "common.hpp" #define NO_VM_WAIT 0 #define NO_LGKM_WAIT 0 diff --git a/src/include/base.hip.hpp b/src/include/base.hpp similarity index 100% rename from src/include/base.hip.hpp rename to src/include/base.hpp diff --git a/src/include/blockwise_2d_tensor_op.hip.hpp b/src/include/blockwise_2d_tensor_op.hpp similarity index 99% rename from src/include/blockwise_2d_tensor_op.hip.hpp rename to src/include/blockwise_2d_tensor_op.hpp index d6ddf8db5f..d39a74a1a3 100644 --- a/src/include/blockwise_2d_tensor_op.hip.hpp +++ b/src/include/blockwise_2d_tensor_op.hpp @@ -1,6 +1,6 @@ #pragma once -#include "common.hip.hpp" -#include "ConstantTensorDescriptor.hip.hpp" +#include "common.hpp" +#include "ConstantTensorDescriptor.hpp" template __device__ void diff --git a/src/include/blockwise_3d_tensor_op.hip.hpp b/src/include/blockwise_3d_tensor_op.hpp similarity index 99% rename from src/include/blockwise_3d_tensor_op.hip.hpp rename to src/include/blockwise_3d_tensor_op.hpp index 058a5b4401..ad647fc9da 100644 --- a/src/include/blockwise_3d_tensor_op.hip.hpp +++ b/src/include/blockwise_3d_tensor_op.hpp @@ -1,6 +1,6 @@ #pragma once -#include "common.hip.hpp" -#include "ConstantTensorDescriptor.hip.hpp" +#include "common.hpp" +#include "ConstantTensorDescriptor.hpp" template __device__ void diff --git a/src/include/blockwise_batched_gemm.hip.hpp b/src/include/blockwise_batched_gemm.hpp similarity index 99% rename from src/include/blockwise_batched_gemm.hip.hpp rename to src/include/blockwise_batched_gemm.hpp index 6e397d1efa..937bf5ee8a 100644 --- a/src/include/blockwise_batched_gemm.hip.hpp +++ b/src/include/blockwise_batched_gemm.hpp @@ -1,5 +1,5 @@ #pragma once -#include "threadwise_gemm.hip.hpp" +#include "threadwise_gemm.hpp" template diff --git a/src/include/functional.hip.hpp b/src/include/functional.hpp similarity index 95% rename from src/include/functional.hip.hpp rename to src/include/functional.hpp index 776abe0b2a..84e5cffe83 100644 --- a/src/include/functional.hip.hpp +++ b/src/include/functional.hpp @@ -1,6 +1,6 @@ #pragma once -#include "integral_constant.hip.hpp" -#include "Sequence.hip.hpp" +#include "integral_constant.hpp" +#include "Sequence.hpp" struct forwarder { diff --git a/src/include/functional2.hip.hpp b/src/include/functional2.hpp similarity index 96% rename from src/include/functional2.hip.hpp rename to src/include/functional2.hpp index 6633abd316..c1dec36575 100644 --- a/src/include/functional2.hip.hpp +++ b/src/include/functional2.hpp @@ -1,6 +1,6 @@ #pragma once -#include "functional.hip.hpp" -#include "Sequence.hip.hpp" +#include "functional.hpp" +#include "Sequence.hpp" template struct static_for_impl; diff --git a/src/include/functional3.hip.hpp b/src/include/functional3.hpp similarity index 96% rename from src/include/functional3.hip.hpp rename to src/include/functional3.hpp index 4019725c4c..ee3ab656f4 100644 --- a/src/include/functional3.hip.hpp +++ b/src/include/functional3.hpp @@ -1,8 +1,8 @@ #pragma once -#include "functional.hip.hpp" -#include "functional2.hip.hpp" -#include "Sequence.hip.hpp" -#include "Array.hip.hpp" +#include "functional.hpp" +#include "functional2.hpp" +#include "Sequence.hpp" +#include "Array.hpp" // RemainLengths: Sequence<...> template diff --git a/src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hip.hpp b/src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp similarity index 97% rename from src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hip.hpp rename to src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp index 04ec8f4c62..5bc5aa39a3 100644 --- a/src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hip.hpp +++ b/src/include/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,10 +1,10 @@ #pragma once -#include "common.hip.hpp" -#include "ConstantTensorDescriptor.hip.hpp" -#include "blockwise_2d_tensor_op.hip.hpp" -#include "blockwise_4d_tensor_op.hip.hpp" -#include "threadwise_tensor_slice_op.hip.hpp" -#include "threadwise_direct_convolution.hip.hpp" +#include "common.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_op.hpp" +#include "threadwise_direct_convolution.hpp" template __device__ void threadwise_4d_tensor_shift_down(Desc, Float* __restrict__ p, IDim, NShift) diff --git a/src/include/threadwise_direct_convolution.hip.hpp b/src/include/threadwise_direct_convolution.hpp similarity index 99% rename from src/include/threadwise_direct_convolution.hip.hpp rename to src/include/threadwise_direct_convolution.hpp index 3ba4a8dd4e..a518c2d182 100644 --- a/src/include/threadwise_direct_convolution.hip.hpp +++ b/src/include/threadwise_direct_convolution.hpp @@ -1,6 +1,6 @@ #pragma once -#include "ConstantTensorDescriptor.hip.hpp" -#include "threadwise_tensor_slice_op.hip.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_op.hpp" // optimized for scenario if p_in, p_wei, p_out are in register template diff --git a/src/include/threadwise_gemm.hip.hpp b/src/include/threadwise_gemm.hpp similarity index 98% rename from src/include/threadwise_gemm.hip.hpp rename to src/include/threadwise_gemm.hpp index 61a4e45151..d28e9e73ea 100644 --- a/src/include/threadwise_gemm.hip.hpp +++ b/src/include/threadwise_gemm.hpp @@ -1,6 +1,6 @@ #pragma once -#include "common.hip.hpp" -#include "ConstantMatrixDescriptor.hip.hpp" +#include "common.hpp" +#include "ConstantMatrixDescriptor.hpp" template __device__ void threadwise_matrix_set_zero(Matrix, Float* __restrict__ p_thread) diff --git a/src/include/threadwise_generic_tensor_slice_op.hip.hpp b/src/include/threadwise_generic_tensor_slice_op.hpp similarity index 97% rename from src/include/threadwise_generic_tensor_slice_op.hip.hpp rename to src/include/threadwise_generic_tensor_slice_op.hpp index 9a7e5ae062..7ffed89f8e 100644 --- a/src/include/threadwise_generic_tensor_slice_op.hip.hpp +++ b/src/include/threadwise_generic_tensor_slice_op.hpp @@ -1,6 +1,6 @@ #pragma once -#include "ConstantTensorDescriptor.hip.hpp" -#include "ConstantMergedTensorDescriptor.hip.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" template diff --git a/src/include/vector_type.hip.hpp b/src/include/vector_type.hpp similarity index 98% rename from src/include/vector_type.hip.hpp rename to src/include/vector_type.hpp index ef8feeadda..3f5a3a10a8 100644 --- a/src/include/vector_type.hip.hpp +++ b/src/include/vector_type.hpp @@ -1,6 +1,6 @@ #pragma once #include "config.h" -#include "integral_constant.hip.hpp" +#include "integral_constant.hpp" template struct vector_type