mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
refactor
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
include_directories(BEFORE
|
include_directories(BEFORE
|
||||||
include
|
include
|
||||||
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
||||||
|
${PROJECT_SOURCE_DIR}/host/solver/include
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ include_directories(BEFORE
|
|||||||
${PROJECT_BINARY_DIR}/host/online_compile/include
|
${PROJECT_BINARY_DIR}/host/online_compile/include
|
||||||
${PROJECT_SOURCE_DIR}/host/online_compile/include
|
${PROJECT_SOURCE_DIR}/host/online_compile/include
|
||||||
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
||||||
|
${PROJECT_SOURCE_DIR}/host/solver/include
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcy
|
|||||||
std::vector<float> kernel1_times;
|
std::vector<float> kernel1_times;
|
||||||
std::vector<float> kernel2_times;
|
std::vector<float> kernel2_times;
|
||||||
|
|
||||||
for(index_t i = 0; i < nrepeat; ++i)
|
for(index_t i = 0; i < nrepeat + 1; ++i)
|
||||||
{
|
{
|
||||||
KernelTimer timer1, timer2;
|
KernelTimer timer1, timer2;
|
||||||
std::string kernel_name;
|
std::string kernel_name;
|
||||||
@@ -164,11 +164,11 @@ void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcy
|
|||||||
auto ave_time1 =
|
auto ave_time1 =
|
||||||
std::accumulate(
|
std::accumulate(
|
||||||
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
||||||
(nrepeat - 1);
|
nrepeat;
|
||||||
auto ave_time2 =
|
auto ave_time2 =
|
||||||
std::accumulate(
|
std::accumulate(
|
||||||
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
||||||
(nrepeat - 1);
|
nrepeat;
|
||||||
|
|
||||||
float perf = (float)(conv_problem_desc.CalculateFlop()) /
|
float perf = (float)(conv_problem_desc.CalculateFlop()) /
|
||||||
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
|
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
|
||||||
|
|||||||
Reference in New Issue
Block a user