diff --git a/composable_kernel/include/utility/amd_address_space.hpp b/composable_kernel/include/utility/amd_address_space.hpp index c5bb1b2cd0..a8010f951c 100644 --- a/composable_kernel/include/utility/amd_address_space.hpp +++ b/composable_kernel/include/utility/amd_address_space.hpp @@ -3,6 +3,9 @@ #include "config.hpp" +// Address Space for AMDGCN +// https://llvm.org/docs/AMDGPUUsage.html#address-space + namespace ck { enum AddressSpaceEnum_t @@ -17,15 +20,24 @@ enum AddressSpaceEnum_t template __device__ T* cast_pointer_to_generic_address_space(T CONSTANT* p) { - return (T*)p; + // cast a pointer in "Constant" address space (4) to "Generic" address space (0) + // only old style cast seems be able to be compiled +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push + return (T*)p; // NOLINT(old-style-cast) +#pragma clang diagnostic pop } template __host__ __device__ T CONSTANT* cast_pointer_to_constant_address_space(T* p) { - return (T CONSTANT*)p; + // cast a pointer in "Generic" address space (0) to "Constant" address space (4) + // only old style cast seems be able to be compiled +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push + return (T CONSTANT*)p; // NOLINT(old-style-cast) +#pragma clang diagnostic pop } } // namespace ck - #endif diff --git a/composable_kernel/include/utility/c_style_pointer_cast.hpp b/composable_kernel/include/utility/c_style_pointer_cast.hpp new file mode 100644 index 0000000000..f4b4b09a76 --- /dev/null +++ b/composable_kernel/include/utility/c_style_pointer_cast.hpp @@ -0,0 +1,20 @@ +#ifndef CK_C_STYLE_POINTER_CAST_HPP +#define CK_C_STYLE_POINTER_CAST_HPP + +#include "type.hpp" + +namespace ck { + +template && is_pointer_v, bool>::type = false> +__host__ __device__ PY c_style_pointer_cast(PX p_x) +{ +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push + return (PY)p_x; // NOLINT(old-style-cast) +#pragma clang diagnostic pop +} + +} // namespace ck +#endif diff --git a/composable_kernel/include/utility/common_header.hpp b/composable_kernel/include/utility/common_header.hpp index 39b400b3cd..c4346e45d0 100644 --- a/composable_kernel/include/utility/common_header.hpp +++ b/composable_kernel/include/utility/common_header.hpp @@ -25,6 +25,7 @@ #include "type.hpp" #include "magic_division.hpp" #include "utility.hpp" +#include "c_style_pointer_cast.hpp" #include "amd_address_space.hpp" #include "amd_buffer_addressing.hpp" #include "static_buffer.hpp" diff --git a/composable_kernel/include/utility/dynamic_buffer.hpp b/composable_kernel/include/utility/dynamic_buffer.hpp index 34c28d7fef..b41639051f 100644 --- a/composable_kernel/include/utility/dynamic_buffer.hpp +++ b/composable_kernel/include/utility/dynamic_buffer.hpp @@ -1,9 +1,10 @@ #ifndef CK_DYNAMIC_BUFFER_HPP #define CK_DYNAMIC_BUFFER_HPP -namespace ck { - #include "amd_buffer_addressing.hpp" +#include "c_style_pointer_cast.hpp" + +namespace ck { template struct DynamicBuffer @@ -44,20 +45,20 @@ struct DynamicBuffer static_assert(scalar_per_x_vector % scalar_per_t_vector == 0, "wrong! X need to be multiple T"); - constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; - if constexpr(GetAddressSpace() == AddressSpaceEnum_t::Global) { #if CK_USE_AMD_BUFFER_ADDRESSING + constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; + return amd_buffer_load_v2>, t_per_x>( p_data_, i, is_valid_offset, element_space_size_); #else - return is_valid_offset ? *reinterpret_cast(&p_data_[i]) : X{0}; + return is_valid_offset ? *c_style_pointer_cast(&p_data_[i]) : X{0}; #endif } else { - return is_valid_offset ? *reinterpret_cast(&p_data_[i]) : X{0}; + return is_valid_offset ? *c_style_pointer_cast(&p_data_[i]) : X{0}; } } @@ -78,17 +79,17 @@ struct DynamicBuffer static_assert(scalar_per_x_vector % scalar_per_t_vector == 0, "wrong! X need to be multiple T"); - constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; - if constexpr(GetAddressSpace() == AddressSpaceEnum_t::Global) { #if CK_USE_AMD_BUFFER_ADDRESSING + constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector; + amd_buffer_store_v2>, t_per_x>( x, p_data_, i, is_valid_offset, element_space_size_); #else if(is_valid_offset) { - *reinterpret_cast(&p_data_[i]) = x; + *c_style_pointer_cast(&p_data_[i]) = x; } #endif } @@ -97,7 +98,7 @@ struct DynamicBuffer if(is_valid_offset) { #if !CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE - *reinterpret_cast(&p_data_[i]) = x; + *c_style_pointer_cast(&p_data_[i]) = x; #else // HACK: compiler would lower IR "store address_space(3)" into // inefficient @@ -128,24 +129,24 @@ struct DynamicBuffer { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } else if constexpr(is_same>, int8_t>::value && is_same>, int8x2_t>::value) { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } else if constexpr(is_same>, int8_t>::value && is_same>, int8x4_t>::value) { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } else if constexpr(is_same>, int8x4_t>::value && @@ -153,8 +154,8 @@ struct DynamicBuffer { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } else if constexpr(is_same>, int8x8_t>::value && @@ -162,8 +163,8 @@ struct DynamicBuffer { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } else if constexpr(is_same>, int8x16_t>::value && @@ -171,13 +172,13 @@ struct DynamicBuffer { // HACK: cast pointer of x is bad // TODO: remove this after compiler fix - *reinterpret_cast(&p_data_[i]) = - *reinterpret_cast(&x); + *c_style_pointer_cast(&p_data_[i]) = + *c_style_pointer_cast(&x); } } else { - *reinterpret_cast(&p_data_[i]) = x; + *c_style_pointer_cast(&p_data_[i]) = x; } #endif } @@ -186,7 +187,7 @@ struct DynamicBuffer { if(is_valid_offset) { - *reinterpret_cast(&p_data_[i]) = x; + *c_style_pointer_cast(&p_data_[i]) = x; } } } diff --git a/composable_kernel/include/utility/type.hpp b/composable_kernel/include/utility/type.hpp index 32f7dfb569..12ed435658 100644 --- a/composable_kernel/include/utility/type.hpp +++ b/composable_kernel/include/utility/type.hpp @@ -22,10 +22,7 @@ template using remove_cv_t = typename std::remove_cv::type; template -constexpr std::remove_reference_t&& move(T&& t) noexcept -{ - return static_cast::type&&>(t); -} +inline constexpr bool is_pointer_v = std::is_pointer::value; template struct is_known_at_compile_time; diff --git a/host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp b/host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp index e1c6db8045..85c418c52f 100644 --- a/host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp +++ b/host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp @@ -290,9 +290,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k const auto K = out_n_ho_wo_k_lengths[I3]; const auto C = wei_k_y_x_c_lengths[I3]; - const auto Hi = in_n_hi_wi_c_lengths[I1]; - const auto Wi = in_n_hi_wi_c_lengths[I2]; - const auto Ho = out_n_ho_wo_k_lengths[I1]; const auto Wo = out_n_ho_wo_k_lengths[I2];