mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
fix kernel filename
This commit is contained in:
@@ -42,6 +42,12 @@ message(STATUS "Build with HIP ${hip_VERSION}")
|
|||||||
#find_path(HALF_INCLUDE_DIR half.hpp)
|
#find_path(HALF_INCLUDE_DIR half.hpp)
|
||||||
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
|
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
|
||||||
|
|
||||||
|
# CMAKE_CXX_FLAGS
|
||||||
|
if(BUILD_DEV)
|
||||||
|
string(APPEND CMAKE_CXX_FLAGS " -Werror -Weverything")
|
||||||
|
endif()
|
||||||
|
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||||
|
|
||||||
## tidy
|
## tidy
|
||||||
include(EnableCompilerWarnings)
|
include(EnableCompilerWarnings)
|
||||||
set(MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
|
set(MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
|
||||||
@@ -50,7 +56,6 @@ if(CMAKE_CXX_COMPILER MATCHES ".*hcc" OR CMAKE_CXX_COMPILER MATCHES ".*clang\\+\
|
|||||||
# Enable tidy on hip
|
# Enable tidy on hip
|
||||||
elseif(MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU")
|
elseif(MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU")
|
||||||
set(MIOPEN_TIDY_ERRORS ALL)
|
set(MIOPEN_TIDY_ERRORS ALL)
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include(ClangTidy)
|
include(ClangTidy)
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
|
|||||||
const auto out_desc_n_k_ho_wo =
|
const auto out_desc_n_k_ho_wo =
|
||||||
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_k_ho_wo_lengths);
|
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_k_ho_wo_lengths);
|
||||||
|
|
||||||
#if 1
|
#if 0
|
||||||
// [8, 1, 128, 1] * [8, 4, 32, 1] = [1, 128, 4, 32] for fp32
|
// [8, 1, 128, 1] * [8, 4, 32, 1] = [1, 128, 4, 32] for fp32
|
||||||
// cdata = 64, BlockSize = 256
|
// cdata = 64, BlockSize = 256
|
||||||
constexpr index_t BlockSize = 256;
|
constexpr index_t BlockSize = 256;
|
||||||
|
|||||||
@@ -115,15 +115,13 @@ int main(int argc, char* argv[])
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
constexpr index_t in_vector_size = 1;
|
|
||||||
using in_data_t = float;
|
using in_data_t = float;
|
||||||
using acc_data_t = float;
|
using acc_data_t = float;
|
||||||
using out_data_t = float;
|
using out_data_t = float;
|
||||||
#elif 1
|
#elif 1
|
||||||
constexpr index_t in_vector_size = 1;
|
using in_data_t = half_t;
|
||||||
using in_data_t = half_t;
|
using acc_data_t = float;
|
||||||
using acc_data_t = float;
|
using out_data_t = half_t;
|
||||||
using out_data_t = half_t;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::vector<std::size_t> in_lengths_host(4), wei_lengths_host(4), out_lengths_host(4);
|
std::vector<std::size_t> in_lengths_host(4), wei_lengths_host(4), out_lengths_host(4);
|
||||||
@@ -213,38 +211,6 @@ int main(int argc, char* argv[])
|
|||||||
wei.GenerateTensorValue(gen_wei, num_thread);
|
wei.GenerateTensorValue(gen_wei, num_thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto f_make_for_device_nchw = [&]() {
|
|
||||||
#if USE_DYNAMIC_MODE
|
|
||||||
const auto in_lengths_dev = make_tuple(N, C, Hi, Wi);
|
|
||||||
const auto wei_lengths_dev = make_tuple(K, C, Y, X);
|
|
||||||
const auto out_lengths_dev = make_tuple(N, K, Ho, Wo);
|
|
||||||
const auto conv_strides_dev = make_tuple(conv_stride_h, conv_stride_w);
|
|
||||||
const auto conv_dilations_dev = make_tuple(conv_dilation_h, conv_dilation_w);
|
|
||||||
const auto in_left_pads_dev = make_tuple(in_left_pad_h, in_left_pad_w);
|
|
||||||
const auto in_right_pads_dev = make_tuple(in_right_pad_h, in_right_pad_w);
|
|
||||||
#else
|
|
||||||
const auto in_lengths_dev =
|
|
||||||
make_tuple(Number<N>{}, Number<C>{}, Number<Hi>{}, Number<Wi>{});
|
|
||||||
const auto wei_lengths_dev = make_tuple(Number<K>{}, Number<C>{}, Number<Y>{}, Number<X>{});
|
|
||||||
const auto out_lengths_dev =
|
|
||||||
make_tuple(Number<N>{}, Number<K>{}, Number<Ho>{}, Number<Wo>{});
|
|
||||||
const auto conv_strides_dev = make_tuple(Number<conv_stride_h>{}, Number<conv_stride_w>{});
|
|
||||||
const auto conv_dilations_dev =
|
|
||||||
make_tuple(Number<conv_dilation_h>{}, Number<conv_dilation_w>{});
|
|
||||||
const auto in_left_pads_dev = make_tuple(Number<in_left_pad_h>{}, Number<in_left_pad_w>{});
|
|
||||||
const auto in_right_pads_dev =
|
|
||||||
make_tuple(Number<in_right_pad_h>{}, Number<in_right_pad_w>{});
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return make_tuple(in_lengths_dev,
|
|
||||||
wei_lengths_dev,
|
|
||||||
out_lengths_dev,
|
|
||||||
conv_strides_dev,
|
|
||||||
conv_dilations_dev,
|
|
||||||
in_left_pads_dev,
|
|
||||||
in_right_pads_dev);
|
|
||||||
};
|
|
||||||
|
|
||||||
auto f_make_for_device_nhwc = [&]() {
|
auto f_make_for_device_nhwc = [&]() {
|
||||||
#if USE_DYNAMIC_MODE
|
#if USE_DYNAMIC_MODE
|
||||||
const auto in_lengths_dev = make_tuple(N, Hi, Wi, C);
|
const auto in_lengths_dev = make_tuple(N, Hi, Wi, C);
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
#define USE_DYNAMIC_MODE 1
|
#define USE_DYNAMIC_MODE 1
|
||||||
#define USE_CONV_FWD_V4R4_NCHW 0
|
#define USE_CONV_FWD_V4R4_NCHW 0
|
||||||
#define USE_CONV_FWD_V4R4R2_NHWC 1
|
#define USE_CONV_FWD_V4R4R2_NHWC 1
|
||||||
#define USE_CONV_FWD_V6R1_NCHW 0
|
#define USE_CONV_FWD_V6R1_NCHW 1
|
||||||
#define USE_CONV_FWD_V5R1_NCHW 0
|
#define USE_CONV_FWD_V5R1_NCHW 0
|
||||||
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
|
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
|
||||||
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 0
|
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 0
|
||||||
|
|||||||
@@ -24,32 +24,32 @@ struct KernelTimerImpl
|
|||||||
{
|
{
|
||||||
KernelTimerImpl()
|
KernelTimerImpl()
|
||||||
{
|
{
|
||||||
hipEventCreate(&mStart);
|
hipGetErrorString(hipEventCreate(&mStart));
|
||||||
hipEventCreate(&mEnd);
|
hipGetErrorString(hipEventCreate(&mEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
~KernelTimerImpl()
|
~KernelTimerImpl()
|
||||||
{
|
{
|
||||||
hipEventDestroy(mStart);
|
hipGetErrorString(hipEventDestroy(mStart));
|
||||||
hipEventDestroy(mEnd);
|
hipGetErrorString(hipEventDestroy(mEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Start()
|
void Start()
|
||||||
{
|
{
|
||||||
hipDeviceSynchronize();
|
hipGetErrorString(hipDeviceSynchronize());
|
||||||
hipEventRecord(mStart, 0);
|
hipGetErrorString(hipEventRecord(mStart, nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
void End()
|
void End()
|
||||||
{
|
{
|
||||||
hipEventRecord(mEnd, 0);
|
hipGetErrorString(hipEventRecord(mEnd, nullptr));
|
||||||
hipEventSynchronize(mEnd);
|
hipGetErrorString(hipEventSynchronize(mEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
float GetElapsedTime() const
|
float GetElapsedTime() const
|
||||||
{
|
{
|
||||||
float time;
|
float time;
|
||||||
hipEventElapsedTime(&time, mStart, mEnd);
|
hipGetErrorString(hipEventElapsedTime(&time, mStart, mEnd));
|
||||||
return time;
|
return time;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user