mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
remove online compilation from CK
This commit is contained in:
@@ -1,11 +1,8 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
project(modular_convolution)
|
||||
project(composable_kernel)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
|
||||
|
||||
include(TargetFlags)
|
||||
include(AddKernels)
|
||||
|
||||
## C++
|
||||
enable_language(CXX)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
|
||||
function(add_kernels SRC_DIR KERNEL_FILES)
|
||||
set(INIT_KERNELS_LIST)
|
||||
set(KERNELS_DECLS)
|
||||
foreach(KERNEL_FILE ${KERNEL_FILES})
|
||||
if("${CMAKE_VERSION}" VERSION_LESS 3.0)
|
||||
configure_file(${KERNEL_FILE} ${KERNEL_FILE}.delete)
|
||||
else()
|
||||
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${KERNEL_FILE})
|
||||
endif()
|
||||
get_filename_component(BASE_NAME ${KERNEL_FILE} NAME_WE)
|
||||
string(TOUPPER "${BASE_NAME}" KEY_NAME)
|
||||
string(MAKE_C_IDENTIFIER "${KEY_NAME}" VAR_NAME)
|
||||
string(APPEND KERNELS_DECLS "extern const size_t APP_KERNEL_${VAR_NAME}_SIZE;\n")
|
||||
string(APPEND KERNELS_DECLS "extern const unsigned char APP_KERNEL_${VAR_NAME}[];\n")
|
||||
list(APPEND INIT_KERNELS_LIST " { \"${KEY_NAME}\", std::string(reinterpret_cast<const char*>(APP_KERNEL_${VAR_NAME}), APP_KERNEL_${VAR_NAME}_SIZE) }")
|
||||
endforeach()
|
||||
string(REPLACE ";" ",\n" INIT_KERNELS "${INIT_KERNELS_LIST}")
|
||||
configure_file(${SRC_DIR}/kernel.cpp.in ${PROJECT_BINARY_DIR}/kernel.cpp)
|
||||
endfunction()
|
||||
|
||||
function(add_kernel_includes SRC_DIR KERNEL_FILES)
|
||||
set(INIT_KERNELS_LIST)
|
||||
foreach(KERNEL_FILE ${KERNEL_FILES})
|
||||
if("${CMAKE_VERSION}" VERSION_LESS 3.0)
|
||||
configure_file(${KERNEL_FILE} ${KERNEL_FILE}.delete)
|
||||
else()
|
||||
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${KERNEL_FILE})
|
||||
endif()
|
||||
get_filename_component(BASE_NAME ${KERNEL_FILE} NAME_WE)
|
||||
get_filename_component(FILE_NAME ${KERNEL_FILE} NAME)
|
||||
string(TOUPPER "${BASE_NAME}" KEY_NAME)
|
||||
string(MAKE_C_IDENTIFIER "${KEY_NAME}" VAR_NAME)
|
||||
list(APPEND INIT_KERNELS_LIST " { \"${FILE_NAME}\", std::string(reinterpret_cast<const char*>(${VAR_NAME}), ${VAR_NAME}_SIZE) }")
|
||||
endforeach()
|
||||
string(REPLACE ";" ",\n" INIT_KERNELS "${INIT_KERNELS_LIST}")
|
||||
configure_file(${SRC_DIR}/kernel_includes.cpp.in ${PROJECT_BINARY_DIR}/kernel_includes.cpp)
|
||||
endfunction()
|
||||
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
|
||||
function(get_target_property2 VAR TARGET PROPERTY)
|
||||
get_target_property(_pflags ${TARGET} ${PROPERTY})
|
||||
if(_pflags)
|
||||
set(${VAR} ${_pflags} PARENT_SCOPE)
|
||||
else()
|
||||
set(${VAR} "" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
||||
macro(append_flags FLAGS TARGET PROPERTY PREFIX)
|
||||
get_target_property2(_pflags ${TARGET} ${PROPERTY})
|
||||
foreach(FLAG ${_pflags})
|
||||
if(TARGET ${FLAG})
|
||||
target_flags(_pflags2 ${FLAG})
|
||||
string(APPEND ${FLAGS} " ${_pflags2}")
|
||||
else()
|
||||
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}")
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
macro(append_link_flags FLAGS TARGET PROPERTY)
|
||||
get_target_property2(_pflags ${TARGET} ${PROPERTY})
|
||||
foreach(FLAG ${_pflags})
|
||||
if(TARGET ${FLAG})
|
||||
target_flags(_pflags2 ${FLAG})
|
||||
string(APPEND ${FLAGS} " ${_pflags2}")
|
||||
elseif(FLAG MATCHES "^-.*")
|
||||
string(APPEND ${FLAGS} " ${FLAG}")
|
||||
elseif(EXISTS ${FLAG})
|
||||
string(APPEND ${FLAGS} " ${FLAG}")
|
||||
else()
|
||||
string(APPEND ${FLAGS} " -l${FLAG}")
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
function(target_flags FLAGS TARGET)
|
||||
set(_flags)
|
||||
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_OPTIONS" "")
|
||||
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_DEFINITIONS" "-D")
|
||||
append_flags(_flags ${TARGET} "INTERFACE_INCLUDE_DIRECTORIES" "-isystem ")
|
||||
append_flags(_flags ${TARGET} "INTERFACE_LINK_DIRECTORIES" "-L ")
|
||||
append_flags(_flags ${TARGET} "INTERFACE_LINK_OPTIONS" "")
|
||||
append_link_flags(_flags ${TARGET} "INTERFACE_LINK_LIBRARIES" "")
|
||||
# message("_flags: ${_flags}")
|
||||
set(${FLAGS} ${_flags} PARENT_SCOPE)
|
||||
endfunction()
|
||||
@@ -1,4 +1,2 @@
|
||||
add_subdirectory(host_tensor)
|
||||
add_subdirectory(online_compile)
|
||||
add_subdirectory(driver_offline)
|
||||
add_subdirectory(driver_online)
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
include_directories(BEFORE
|
||||
include
|
||||
${PROJECT_BINARY_DIR}/host/online_compile/include
|
||||
${PROJECT_SOURCE_DIR}/host/online_compile/include
|
||||
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
||||
${PROJECT_SOURCE_DIR}/host/solver/include
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/driver
|
||||
${PROJECT_SOURCE_DIR}/external/rocm/include
|
||||
${PROJECT_SOURCE_DIR}/external/half/include
|
||||
)
|
||||
|
||||
set(CONV_FWD_DRIVER_ONLINE_SOURCE conv_fwd_driver_online.cpp)
|
||||
|
||||
add_executable(conv_fwd_driver_online ${CONV_FWD_DRIVER_ONLINE_SOURCE})
|
||||
|
||||
target_link_libraries(conv_fwd_driver_online PRIVATE host_tensor)
|
||||
target_link_libraries(conv_fwd_driver_online PRIVATE online_compile)
|
||||
@@ -1,453 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <stdlib.h>
|
||||
#include <half.hpp>
|
||||
#include "config.hpp"
|
||||
#include "print.hpp"
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "host_tensor_generator.hpp"
|
||||
#include "conv_common.hpp"
|
||||
#include "host_conv.hpp"
|
||||
#include "device_tensor.hpp"
|
||||
#include "handle.hpp"
|
||||
#include "hipCheck.hpp"
|
||||
#include "online_device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw.hpp"
|
||||
#include "online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw.hpp"
|
||||
#include "online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp"
|
||||
#include "online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.hpp"
|
||||
|
||||
#define USE_CONV_FWD_V4R4_NCHW 1
|
||||
#define USE_CONV_FWD_V6R1_NCHW 1
|
||||
#define USE_CONV_FWD_V4R4_XDLOPS_NCHW 1
|
||||
#define USE_CONV_FWD_V4R4_XDLOPS_NHWC 1
|
||||
|
||||
enum ConvForwardAlgo
|
||||
{
|
||||
V4R4NCHW, // 0
|
||||
V6R1NCHW, // 1
|
||||
V4R4XDLNCHW, // 2
|
||||
V4R4XDLNHWC // 3
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using namespace ck;
|
||||
using namespace ck::driver;
|
||||
using size_t = std::size_t;
|
||||
|
||||
hipStream_t stream;
|
||||
online_compile::Handle* handle;
|
||||
|
||||
MY_HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
handle = new online_compile::Handle(stream);
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
constexpr auto I4 = Number<4>{};
|
||||
constexpr auto I5 = Number<5>{};
|
||||
constexpr auto I6 = Number<6>{};
|
||||
|
||||
if(argc != 22)
|
||||
{
|
||||
printf("arg1 to 5: layout, algo, do_verification, init_method, do_log, nrepeat\n");
|
||||
printf("rest: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const ConvTensorLayout layout = static_cast<ConvTensorLayout>(atoi(argv[1]));
|
||||
const ConvForwardAlgo algo = static_cast<ConvForwardAlgo>(atoi(argv[2]));
|
||||
const bool do_verification = atoi(argv[3]);
|
||||
const int init_method = atoi(argv[4]);
|
||||
const bool do_log = atoi(argv[5]);
|
||||
const int nrepeat = atoi(argv[6]);
|
||||
|
||||
const index_t N = atoi(argv[7]);
|
||||
const index_t K = atoi(argv[8]);
|
||||
const index_t C = atoi(argv[9]);
|
||||
const index_t Y = atoi(argv[10]);
|
||||
const index_t X = atoi(argv[11]);
|
||||
const index_t Hi = atoi(argv[12]);
|
||||
const index_t Wi = atoi(argv[13]);
|
||||
|
||||
const index_t conv_stride_h = atoi(argv[14]);
|
||||
const index_t conv_stride_w = atoi(argv[15]);
|
||||
const index_t conv_dilation_h = atoi(argv[16]);
|
||||
const index_t conv_dilation_w = atoi(argv[17]);
|
||||
const index_t in_left_pad_h = atoi(argv[18]);
|
||||
const index_t in_left_pad_w = atoi(argv[19]);
|
||||
const index_t in_right_pad_h = atoi(argv[20]);
|
||||
const index_t in_right_pad_w = atoi(argv[21]);
|
||||
|
||||
const index_t YEff = (Y - 1) * conv_dilation_h + 1;
|
||||
const index_t XEff = (X - 1) * conv_dilation_w + 1;
|
||||
|
||||
const index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
|
||||
const index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
|
||||
|
||||
#if 1
|
||||
using in_data_t = float;
|
||||
using acc_data_t = float;
|
||||
using out_data_t = float;
|
||||
#elif 0
|
||||
using in_data_t = half_t;
|
||||
using acc_data_t = float;
|
||||
using out_data_t = half_t;
|
||||
#elif 1
|
||||
using in_data_t = int8_t;
|
||||
using acc_data_t = int32_t;
|
||||
using out_data_t = int8_t;
|
||||
#endif
|
||||
|
||||
std::vector<std::size_t> in_lengths_host(4), wei_lengths_host(4), out_lengths_host(4);
|
||||
|
||||
switch(layout)
|
||||
{
|
||||
case ConvTensorLayout::NCHW:
|
||||
// NCHW
|
||||
in_lengths_host[0] = static_cast<std::size_t>(N);
|
||||
in_lengths_host[1] = static_cast<std::size_t>(C);
|
||||
in_lengths_host[2] = static_cast<std::size_t>(Hi);
|
||||
in_lengths_host[3] = static_cast<std::size_t>(Wi);
|
||||
|
||||
wei_lengths_host[0] = static_cast<std::size_t>(K);
|
||||
wei_lengths_host[1] = static_cast<std::size_t>(C);
|
||||
wei_lengths_host[2] = static_cast<std::size_t>(Y);
|
||||
wei_lengths_host[3] = static_cast<std::size_t>(X);
|
||||
|
||||
out_lengths_host[0] = static_cast<std::size_t>(N);
|
||||
out_lengths_host[1] = static_cast<std::size_t>(K);
|
||||
out_lengths_host[2] = static_cast<std::size_t>(Ho);
|
||||
out_lengths_host[3] = static_cast<std::size_t>(Wo);
|
||||
break;
|
||||
case ConvTensorLayout::NHWC:
|
||||
// NHWC
|
||||
in_lengths_host[0] = static_cast<std::size_t>(N);
|
||||
in_lengths_host[1] = static_cast<std::size_t>(Hi);
|
||||
in_lengths_host[2] = static_cast<std::size_t>(Wi);
|
||||
in_lengths_host[3] = static_cast<std::size_t>(C);
|
||||
|
||||
wei_lengths_host[0] = static_cast<std::size_t>(K);
|
||||
wei_lengths_host[1] = static_cast<std::size_t>(Y);
|
||||
wei_lengths_host[2] = static_cast<std::size_t>(X);
|
||||
wei_lengths_host[3] = static_cast<std::size_t>(C);
|
||||
|
||||
out_lengths_host[0] = static_cast<std::size_t>(N);
|
||||
out_lengths_host[1] = static_cast<std::size_t>(Ho);
|
||||
out_lengths_host[2] = static_cast<std::size_t>(Wo);
|
||||
out_lengths_host[3] = static_cast<std::size_t>(K);
|
||||
break;
|
||||
default: throw std::runtime_error("wrong! not implemented");
|
||||
}
|
||||
|
||||
Tensor<in_data_t> in(in_lengths_host);
|
||||
Tensor<in_data_t> wei(wei_lengths_host);
|
||||
Tensor<out_data_t> out_host(out_lengths_host);
|
||||
Tensor<out_data_t> out_device(out_lengths_host);
|
||||
|
||||
std::cout << "layout: " << layout << std::endl;
|
||||
ostream_HostTensorDescriptor(in.mDesc, std::cout << "in: ");
|
||||
ostream_HostTensorDescriptor(wei.mDesc, std::cout << "wei: ");
|
||||
ostream_HostTensorDescriptor(out_host.mDesc, std::cout << "out: ");
|
||||
print_array("InLeftPads", make_tuple(in_left_pad_h, in_left_pad_w));
|
||||
print_array("InRightPads", make_tuple(in_right_pad_h, in_right_pad_w));
|
||||
print_array("ConvStrides", make_tuple(conv_stride_h, conv_stride_w));
|
||||
print_array("ConvDilations", make_tuple(conv_dilation_h, conv_dilation_w));
|
||||
|
||||
std::size_t num_thread = std::thread::hardware_concurrency();
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0:
|
||||
// no initialization
|
||||
break;
|
||||
case 1:
|
||||
in.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
|
||||
break;
|
||||
case 2:
|
||||
in.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
|
||||
break;
|
||||
case 3:
|
||||
in.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
|
||||
break;
|
||||
case 4:
|
||||
in.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
|
||||
break;
|
||||
case 5:
|
||||
in.GenerateTensorValue(GeneratorTensor_3<float>{0.0, 1.0}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor_3<float>{-0.5, 0.5}, num_thread);
|
||||
break;
|
||||
default:
|
||||
in.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);
|
||||
|
||||
auto gen_wei = [](auto... is) {
|
||||
return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
|
||||
};
|
||||
wei.GenerateTensorValue(gen_wei, num_thread);
|
||||
}
|
||||
|
||||
auto f_make_for_device_nchw = [&]() {
|
||||
const auto in_lengths_dev = make_tuple(N, C, Hi, Wi);
|
||||
const auto wei_lengths_dev = make_tuple(K, C, Y, X);
|
||||
const auto out_lengths_dev = make_tuple(N, K, Ho, Wo);
|
||||
|
||||
return make_tuple(in_lengths_dev, wei_lengths_dev, out_lengths_dev);
|
||||
};
|
||||
|
||||
auto f_make_for_device_nhwc = [&]() {
|
||||
const auto in_lengths_dev = make_tuple(N, Hi, Wi, C);
|
||||
const auto wei_lengths_dev = make_tuple(K, Y, X, C);
|
||||
const auto out_lengths_dev = make_tuple(N, Ho, Wo, K);
|
||||
|
||||
return make_tuple(in_lengths_dev, wei_lengths_dev, out_lengths_dev);
|
||||
};
|
||||
|
||||
const auto conv_strides = make_tuple(conv_stride_h, conv_stride_w);
|
||||
const auto conv_dilations = make_tuple(conv_dilation_h, conv_dilation_w);
|
||||
const auto in_left_pads = make_tuple(in_left_pad_h, in_left_pad_w);
|
||||
const auto in_right_pads = make_tuple(in_right_pad_h, in_right_pad_w);
|
||||
|
||||
#if USE_CONV_FWD_V4R4_NCHW
|
||||
if(algo == ConvForwardAlgo::V4R4NCHW)
|
||||
{
|
||||
if(layout != ConvTensorLayout::NCHW)
|
||||
{
|
||||
throw std::runtime_error("wrong! layout");
|
||||
}
|
||||
|
||||
const auto tmp = f_make_for_device_nchw();
|
||||
|
||||
tunable_dyn_conv_fwd_v4r4_dlops_nchw_kcyx_nkhw* tunable =
|
||||
&default_tunable_dyn_conv_fwd_v4r4_dlops_nchw_kcyx_nkhw;
|
||||
|
||||
online_device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw<
|
||||
in_data_t,
|
||||
acc_data_t,
|
||||
out_data_t>(handle,
|
||||
tmp[I0],
|
||||
tmp[I1],
|
||||
tmp[I2],
|
||||
conv_strides,
|
||||
conv_dilations,
|
||||
in_left_pads,
|
||||
in_right_pads,
|
||||
in,
|
||||
wei,
|
||||
out_device,
|
||||
tunable,
|
||||
nrepeat);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_CONV_FWD_V6R1_NCHW
|
||||
if(algo == ConvForwardAlgo::V6R1NCHW)
|
||||
{
|
||||
if(layout != ConvTensorLayout::NCHW)
|
||||
{
|
||||
throw std::runtime_error("wrong! layout");
|
||||
}
|
||||
|
||||
const auto tmp = f_make_for_device_nchw();
|
||||
|
||||
#if 1
|
||||
const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param = {
|
||||
get_datatype_enum_from_type<in_data_t>::value,
|
||||
get_datatype_enum_from_type<acc_data_t>::value,
|
||||
get_datatype_enum_from_type<out_data_t>::value,
|
||||
256,
|
||||
4,
|
||||
1,
|
||||
128,
|
||||
32,
|
||||
8,
|
||||
4,
|
||||
4,
|
||||
1,
|
||||
{8, 2},
|
||||
{8, 2},
|
||||
{4, 1, 1, 1, 1},
|
||||
{2, 1, 1, 128, 1},
|
||||
{4, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 4, 1, 1, 1},
|
||||
{8, 1, 1, 32, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
4,
|
||||
true,
|
||||
true};
|
||||
#elif 0
|
||||
const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param = {
|
||||
get_datatype_enum_from_type<in_data_t>::value,
|
||||
get_datatype_enum_from_type<acc_data_t>::value,
|
||||
get_datatype_enum_from_type<out_data_t>::value,
|
||||
256,
|
||||
4,
|
||||
2,
|
||||
128,
|
||||
32,
|
||||
8,
|
||||
4,
|
||||
4,
|
||||
1,
|
||||
{8, 2},
|
||||
{8, 2},
|
||||
{4, 1, 1, 1, 2},
|
||||
{2, 1, 1, 128, 1},
|
||||
{4, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 4, 1, 1, 2},
|
||||
{8, 1, 1, 32, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
4,
|
||||
true,
|
||||
true};
|
||||
#elif 1
|
||||
const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param = {
|
||||
get_datatype_enum_from_type<in_data_t>::value,
|
||||
get_datatype_enum_from_type<acc_data_t>::value,
|
||||
get_datatype_enum_from_type<out_data_t>::value,
|
||||
256,
|
||||
4,
|
||||
4,
|
||||
128,
|
||||
32,
|
||||
8,
|
||||
4,
|
||||
4,
|
||||
1,
|
||||
{8, 2},
|
||||
{8, 2},
|
||||
{4, 1, 1, 1, 4},
|
||||
{2, 1, 1, 128, 1},
|
||||
{4, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 4, 1, 1, 4},
|
||||
{8, 1, 1, 32, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
4,
|
||||
true,
|
||||
true};
|
||||
#endif
|
||||
|
||||
online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw<
|
||||
in_data_t,
|
||||
acc_data_t,
|
||||
out_data_t>(handle,
|
||||
tmp[I0],
|
||||
tmp[I1],
|
||||
tmp[I2],
|
||||
conv_strides,
|
||||
conv_dilations,
|
||||
in_left_pads,
|
||||
in_right_pads,
|
||||
in,
|
||||
wei,
|
||||
out_device,
|
||||
compile_param,
|
||||
nrepeat);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_CONV_FWD_V4R4_XDLOPS_NCHW
|
||||
if(algo == ConvForwardAlgo::V4R4XDLNCHW)
|
||||
{
|
||||
if(layout != ConvTensorLayout::NCHW)
|
||||
{
|
||||
throw std::runtime_error("wrong! layout");
|
||||
}
|
||||
|
||||
const auto tmp = f_make_for_device_nchw();
|
||||
|
||||
tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw* tunable =
|
||||
&default_tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw;
|
||||
|
||||
online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw<
|
||||
in_data_t,
|
||||
acc_data_t,
|
||||
out_data_t>(handle,
|
||||
tmp[I0],
|
||||
tmp[I1],
|
||||
tmp[I2],
|
||||
conv_strides,
|
||||
conv_dilations,
|
||||
in_left_pads,
|
||||
in_right_pads,
|
||||
in,
|
||||
wei,
|
||||
out_device,
|
||||
tunable,
|
||||
nrepeat);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_CONV_FWD_V4R4_XDLOPS_NHWC
|
||||
if(algo == ConvForwardAlgo::V4R4XDLNHWC)
|
||||
{
|
||||
if(layout != ConvTensorLayout::NHWC)
|
||||
{
|
||||
throw std::runtime_error("wrong! layout");
|
||||
}
|
||||
|
||||
const auto tmp = f_make_for_device_nhwc();
|
||||
|
||||
tunable_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk* tunable =
|
||||
&default_tunable_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk;
|
||||
|
||||
online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk<
|
||||
in_data_t,
|
||||
acc_data_t,
|
||||
out_data_t>(handle,
|
||||
tmp[I0],
|
||||
tmp[I1],
|
||||
tmp[I2],
|
||||
conv_strides,
|
||||
conv_dilations,
|
||||
in_left_pads,
|
||||
in_right_pads,
|
||||
in,
|
||||
wei,
|
||||
out_device,
|
||||
tunable,
|
||||
nrepeat);
|
||||
}
|
||||
#endif
|
||||
|
||||
if(do_verification)
|
||||
{
|
||||
host_direct_convolution(in,
|
||||
wei,
|
||||
out_host,
|
||||
make_tuple(conv_stride_h, conv_stride_w),
|
||||
make_tuple(conv_dilation_h, conv_dilation_w),
|
||||
make_tuple(in_left_pad_h, in_left_pad_w),
|
||||
make_tuple(in_right_pad_h, in_right_pad_w),
|
||||
layout);
|
||||
|
||||
check_error(out_host, out_device);
|
||||
|
||||
#if 0
|
||||
if(do_log)
|
||||
{
|
||||
LogRangeAsType<float>(std::cout << "in : ", in.mData, ",") << std::endl;
|
||||
LogRangeAsType<float>(std::cout << "wei: ", wei.mData, ",") << std::endl;
|
||||
LogRangeAsType<float>(std::cout << "out_host : ", out_host.mData, ",") << std::endl;
|
||||
LogRangeAsType<float>(std::cout << "out_device: ", out_device.mData, ",") << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
delete handle;
|
||||
MY_HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
@@ -1,395 +0,0 @@
|
||||
#pragma once
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "handle.hpp"
|
||||
#include "online_driver_common.hpp"
|
||||
#include "dynamic_tensor_descriptor.hpp"
|
||||
#include "dynamic_tensor_descriptor_helper.hpp"
|
||||
#include "transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw.hpp"
|
||||
#include "conv_tunable_fwd_v4r4_dlops_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
namespace detail_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw {
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_network_config_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out += std::to_string(get_datatype_enum_from_type<TInWei>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TAcc>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_network_config_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_dlops_nchw_kcyx_nkhw* pt)
|
||||
{
|
||||
std::string out("TUN_");
|
||||
|
||||
out += std::to_string(pt->BlockSize) + "_";
|
||||
|
||||
out += std::to_string(pt->MPerBlock) + "x" + std::to_string(pt->NPerBlock) + "x" +
|
||||
std::to_string(pt->KPerBlock) + "_";
|
||||
out += std::to_string(pt->M1PerThread) + "x" + std::to_string(pt->N1PerThread) + "x" +
|
||||
std::to_string(pt->KPerThread) + "_";
|
||||
out += std::to_string(pt->M1N1ThreadClusterM10) + "x" +
|
||||
std::to_string(pt->M1N1ThreadClusterN10) + "x" +
|
||||
std::to_string(pt->M1N1ThreadClusterM11) + "x" +
|
||||
std::to_string(pt->M1N1ThreadClusterN11) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->ABlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->ABlockTransferDstScalarPerVector_M1) + "_";
|
||||
out += std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->BBlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->BBlockTransferDstScalarPerVector_N1) + "_";
|
||||
out += std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstVectorDim) + "_";
|
||||
out += std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_definition_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TInWei>::value) +
|
||||
" -DCK_PARAM_AccDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TAcc>::value) +
|
||||
" -DCK_PARAM_CDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_definition_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_dlops_nchw_kcyx_nkhw* pt)
|
||||
{
|
||||
std::string out;
|
||||
|
||||
out += " -DCK_PARAM_BlockSize=" + std::to_string(pt->BlockSize);
|
||||
|
||||
out += " -DCK_PARAM_MPerBlock=" + std::to_string(pt->MPerBlock) +
|
||||
" -DCK_PARAM_NPerBlock=" + std::to_string(pt->NPerBlock) +
|
||||
" -DCK_PARAM_KPerBlock=" + std::to_string(pt->KPerBlock);
|
||||
out += " -DCK_PARAM_M1PerThread=" + std::to_string(pt->M1PerThread) +
|
||||
" -DCK_PARAM_N1PerThread=" + std::to_string(pt->N1PerThread) +
|
||||
" -DCK_PARAM_KPerThread=" + std::to_string(pt->KPerThread);
|
||||
|
||||
out += " -DCK_PARAM_M1N1ThreadClusterM10=" + std::to_string(pt->M1N1ThreadClusterM10) +
|
||||
" -DCK_PARAM_M1N1ThreadClusterN10=" + std::to_string(pt->M1N1ThreadClusterN10) +
|
||||
" -DCK_PARAM_M1N1ThreadClusterM11=" + std::to_string(pt->M1N1ThreadClusterM11) +
|
||||
" -DCK_PARAM_M1N1ThreadClusterN11=" + std::to_string(pt->M1N1ThreadClusterN11);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadSliceLengths_K_M0_M1=" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K_M0_M1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterLengths_K_M0_M1=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K_M0_M1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABlockTransferSrcVectorDim=" + std::to_string(pt->ABlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_ABlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->ABlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_ABlockTransferDstScalarPerVector_M1=" +
|
||||
std::to_string(pt->ABlockTransferDstScalarPerVector_M1);
|
||||
out += " -DCK_PARAM_AThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadSliceLengths_K_N0_N1=" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K_N0_N1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterLengths_K_N0_N1=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K_N0_N1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_BBlockTransferSrcVectorDim=" + std::to_string(pt->BBlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_BBlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->BBlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_BBlockTransferDstScalarPerVector_N1=" +
|
||||
std::to_string(pt->BBlockTransferDstScalarPerVector_N1);
|
||||
out += " -DCK_PARAM_BThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstAccessOrder=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstVectorDim=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstVectorDim);
|
||||
out += " -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
|
||||
std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
} // namespace detail_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw
|
||||
|
||||
template <typename TInWei,
|
||||
typename TAcc,
|
||||
typename TOut,
|
||||
typename InLengths,
|
||||
typename WeiLengths,
|
||||
typename OutLengths,
|
||||
typename ConvStrides,
|
||||
typename ConvDilations,
|
||||
typename InLeftPads,
|
||||
typename InRightPads>
|
||||
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw(
|
||||
online_compile::Handle* handle,
|
||||
const InLengths& in_n_c_hi_wi_lengths,
|
||||
const WeiLengths& wei_k_c_y_x_lengths,
|
||||
const OutLengths& out_n_k_ho_wo_lengths,
|
||||
const ConvStrides& conv_strides,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads& in_right_pads,
|
||||
const Tensor<TInWei>& in_n_c_hi_wi,
|
||||
const Tensor<TInWei>& wei_k_c_y_x,
|
||||
Tensor<TOut>& out_n_k_ho_wo,
|
||||
const tunable_dyn_conv_fwd_v4r4_dlops_nchw_kcyx_nkhw* tunable,
|
||||
ck::index_t nrepeat)
|
||||
{
|
||||
using namespace ck;
|
||||
using namespace ck::driver;
|
||||
using namespace detail_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw;
|
||||
using size_t = std::size_t;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// The follow codes are only used for computing the grid_size, hasMainKBlockLoop,
|
||||
// hasDoubleTailKBlockLoop
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
const auto in_n_c_hi_wi_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_c_hi_wi_lengths);
|
||||
const auto wei_k_c_y_x_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_c_y_x_lengths);
|
||||
const auto out_n_k_ho_wo_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_k_ho_wo_lengths);
|
||||
|
||||
const auto descs =
|
||||
transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw_pad(wei_k_c_y_x_desc,
|
||||
in_n_c_hi_wi_desc,
|
||||
out_n_k_ho_wo_desc,
|
||||
conv_strides,
|
||||
conv_dilations,
|
||||
in_left_pads,
|
||||
in_right_pads);
|
||||
const auto a_k_m_grid_desc = descs[I0];
|
||||
const auto c_m_n_grid_desc = descs[I2];
|
||||
const auto M = c_m_n_grid_desc.GetLength(I0);
|
||||
const auto N = c_m_n_grid_desc.GetLength(I1);
|
||||
const auto K = a_k_m_grid_desc.GetLength(I0);
|
||||
|
||||
const index_t grid_size = (M / tunable->MPerBlock) * (N / tunable->NPerBlock);
|
||||
const bool hasMainKBlockLoop = ((K + tunable->KPerBlock) / (2 * tunable->KPerBlock) > 1);
|
||||
const bool hasDoubleTailKBlockLoop = ((K / tunable->KPerBlock) % 2 == 0);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// these buffers are usually provided by the user application
|
||||
DeviceMem in_n_c_hi_wi_dev_buf(sizeof(TInWei) * in_n_c_hi_wi.mDesc.GetElementSpace());
|
||||
DeviceMem wei_k_c_y_x_dev_buf(sizeof(TInWei) * wei_k_c_y_x.mDesc.GetElementSpace());
|
||||
DeviceMem out_n_k_ho_wo_dev_buf(sizeof(TOut) * out_n_k_ho_wo.mDesc.GetElementSpace());
|
||||
|
||||
in_n_c_hi_wi_dev_buf.ToDevice(in_n_c_hi_wi.mData.data());
|
||||
wei_k_c_y_x_dev_buf.ToDevice(wei_k_c_y_x.mData.data());
|
||||
out_n_k_ho_wo_dev_buf.ToDevice(out_n_k_ho_wo.mData.data());
|
||||
|
||||
// these are workspace buffers that should be expressed to the user by the corresponding
|
||||
// workspace API
|
||||
DeviceMem workspace_buf(4096);
|
||||
|
||||
void* a_k_m0_m1_grid_desc_dev_buf = workspace_buf.GetDeviceBuffer();
|
||||
void* b_k_n0_n1_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 1024);
|
||||
void* c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 2048);
|
||||
void* c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 3072);
|
||||
|
||||
const std::vector<size_t> vld = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd1 = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd2 = {static_cast<size_t>(grid_size * tunable->BlockSize), 1, 1};
|
||||
|
||||
std::string program_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw.cpp";
|
||||
std::string algo_name = "implicit_gemm_conv_fwd_v4r4_dlops_nchw";
|
||||
|
||||
std::string param = " -std=c++17 ";
|
||||
std::string network_config;
|
||||
|
||||
param += get_definition_string_from_types<TInWei, TAcc, TOut>() + " " +
|
||||
get_definition_string_from_tunable(tunable) +
|
||||
" -DCK_PARAM_HAS_MAIN_KBLOCK_LOOP=" + std::to_string(hasMainKBlockLoop) +
|
||||
" -DCK_PARAM_HAS_DOUBLE_TAIL_KBLOCK_LOOP=" + std::to_string(hasDoubleTailKBlockLoop);
|
||||
network_config = get_network_config_string_from_types<TInWei, TAcc, TOut>() + "_" +
|
||||
get_network_config_string_from_tunable(tunable) + "_" +
|
||||
std::to_string(hasMainKBlockLoop) + "_" +
|
||||
std::to_string(hasDoubleTailKBlockLoop);
|
||||
|
||||
std::vector<float> kernel1_times;
|
||||
std::vector<float> kernel2_times;
|
||||
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
KernelTimer timer1, timer2;
|
||||
std::string kernel_name;
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw_prepare";
|
||||
auto network_config_1 = network_config + "_1";
|
||||
|
||||
timer1.Start();
|
||||
handle->AddKernel(algo_name, network_config_1, program_name, kernel_name, vld, vgd1, param)(
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I0]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I1]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I2]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I3]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I0]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I2]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I3]),
|
||||
conv_strides[I0],
|
||||
conv_strides[I1],
|
||||
conv_dilations[I0],
|
||||
conv_dilations[I1],
|
||||
in_left_pads[I0],
|
||||
in_left_pads[I1],
|
||||
in_right_pads[I0],
|
||||
in_right_pads[I1],
|
||||
a_k_m0_m1_grid_desc_dev_buf,
|
||||
b_k_n0_n1_grid_desc_dev_buf,
|
||||
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf,
|
||||
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf);
|
||||
timer1.End();
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw";
|
||||
auto network_config_2 = network_config + "_2";
|
||||
|
||||
timer2.Start();
|
||||
handle->AddKernel(algo_name, network_config_2, program_name, kernel_name, vld, vgd2, param)(
|
||||
reinterpret_cast<const TInWei*>(wei_k_c_y_x_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<const TInWei*>(in_n_c_hi_wi_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<TOut*>(out_n_k_ho_wo_dev_buf.GetDeviceBuffer()),
|
||||
(const void*)(a_k_m0_m1_grid_desc_dev_buf),
|
||||
(const void*)(b_k_n0_n1_grid_desc_dev_buf),
|
||||
(const void*)(c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf),
|
||||
(const void*)(c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf));
|
||||
timer2.End();
|
||||
|
||||
kernel1_times.push_back(timer1.GetElapsedTime());
|
||||
kernel2_times.push_back(timer2.GetElapsedTime());
|
||||
}
|
||||
|
||||
{
|
||||
auto ave_time1 =
|
||||
std::accumulate(
|
||||
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
auto ave_time2 =
|
||||
std::accumulate(
|
||||
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
|
||||
const auto N = in_n_c_hi_wi_lengths[I0];
|
||||
const auto C = in_n_c_hi_wi_lengths[I1];
|
||||
|
||||
const auto K = out_n_k_ho_wo_lengths[I1];
|
||||
const auto Ho = out_n_k_ho_wo_lengths[I2];
|
||||
const auto Wo = out_n_k_ho_wo_lengths[I3];
|
||||
|
||||
const auto Y = wei_k_c_y_x_lengths[I2];
|
||||
const auto X = wei_k_c_y_x_lengths[I3];
|
||||
|
||||
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
|
||||
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
|
||||
|
||||
std::cout << "Average time : " << ave_time1 + ave_time2 << " ms(" << ave_time1 << ", "
|
||||
<< ave_time2 << "), " << perf << " TFlop/s" << std::endl;
|
||||
};
|
||||
|
||||
// copy result back to host
|
||||
out_n_k_ho_wo_dev_buf.FromDevice(out_n_k_ho_wo.mData.data());
|
||||
}
|
||||
@@ -1,386 +0,0 @@
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "handle.hpp"
|
||||
#include "online_driver_common.hpp"
|
||||
#include "dynamic_tensor_descriptor.hpp"
|
||||
#include "dynamic_tensor_descriptor_helper.hpp"
|
||||
#include "conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
namespace detail_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw {
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_network_config_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out += std::to_string(get_datatype_enum_from_type<TInWei>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TAcc>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_network_config_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw* pt)
|
||||
{
|
||||
std::string out("TUN_");
|
||||
|
||||
out += std::to_string(pt->BlockSize) + "_";
|
||||
|
||||
out += std::to_string(pt->MPerBlock) + "x" + std::to_string(pt->NPerBlock) + "x" +
|
||||
std::to_string(pt->KPerBlock) + "_";
|
||||
out += std::to_string(pt->MPerWave) + "x" + std::to_string(pt->NPerWave) + "x" +
|
||||
std::to_string(pt->MRepeat) + "x" + std::to_string(pt->NRepeat) + "x" +
|
||||
std::to_string(pt->K1) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->ABlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->ABlockTransferDstScalarPerVector_K1) + "_";
|
||||
out += std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->BBlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->BBlockTransferDstScalarPerVector_K1) + "_";
|
||||
out += std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[6]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[7]) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstVectorDim) + "_";
|
||||
out += std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_definition_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TInWei>::value) +
|
||||
" -DCK_PARAM_AccDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TAcc>::value) +
|
||||
" -DCK_PARAM_CDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_definition_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw* pt)
|
||||
{
|
||||
std::string out;
|
||||
|
||||
out += " -DCK_PARAM_BlockSize=" + std::to_string(pt->BlockSize);
|
||||
|
||||
out += " -DCK_PARAM_MPerBlock=" + std::to_string(pt->MPerBlock) +
|
||||
" -DCK_PARAM_NPerBlock=" + std::to_string(pt->NPerBlock) +
|
||||
" -DCK_PARAM_KPerBlock=" + std::to_string(pt->KPerBlock);
|
||||
out += " -DCK_PARAM_MPerWave=" + std::to_string(pt->MPerWave) +
|
||||
" -DCK_PARAM_NPerWave=" + std::to_string(pt->NPerWave) +
|
||||
" -DCK_PARAM_K1=" + std::to_string(pt->K1) +
|
||||
" -DCK_PARAM_MRepeat=" + std::to_string(pt->MRepeat) +
|
||||
" -DCK_PARAM_NRepeat=" + std::to_string(pt->NRepeat);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadSliceLengths_K0_M_K1=" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterLengths_K0_M_K1=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABlockTransferSrcVectorDim=" + std::to_string(pt->ABlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_ABlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->ABlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_ABlockTransferDstScalarPerVector_K1=" +
|
||||
std::to_string(pt->ABlockTransferDstScalarPerVector_K1);
|
||||
out += " -DCK_PARAM_AThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadSliceLengths_K0_N_K1=" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterLengths_K0_N_K1=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_BBlockTransferSrcVectorDim=" + std::to_string(pt->BBlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_BBlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->BBlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_BBlockTransferDstScalarPerVector_K1=" +
|
||||
std::to_string(pt->BBlockTransferDstScalarPerVector_K1);
|
||||
out += " -DCK_PARAM_BThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstAccessOrder=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[6]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[7]);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstVectorDim=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstVectorDim);
|
||||
out += " -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
|
||||
std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
} // namespace detail_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
|
||||
|
||||
template <typename TInWei,
|
||||
typename TAcc,
|
||||
typename TOut,
|
||||
typename InLengths,
|
||||
typename WeiLengths,
|
||||
typename OutLengths,
|
||||
typename ConvStrides,
|
||||
typename ConvDilations,
|
||||
typename InLeftPads,
|
||||
typename InRightPads>
|
||||
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw(
|
||||
online_compile::Handle* handle,
|
||||
const InLengths& in_n_c_hi_wi_lengths,
|
||||
const WeiLengths& wei_k_c_y_x_lengths,
|
||||
const OutLengths& out_n_k_ho_wo_lengths,
|
||||
const ConvStrides& conv_strides,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads& in_right_pads,
|
||||
const Tensor<TInWei>& in_n_c_hi_wi,
|
||||
const Tensor<TInWei>& wei_k_c_y_x,
|
||||
Tensor<TOut>& out_n_k_ho_wo,
|
||||
const tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw* tunable,
|
||||
ck::index_t nrepeat)
|
||||
{
|
||||
using namespace ck;
|
||||
using namespace ck::driver;
|
||||
using namespace detail_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw;
|
||||
using size_t = std::size_t;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
const auto in_n_c_hi_wi_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_c_hi_wi_lengths);
|
||||
const auto wei_k_c_y_x_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_c_y_x_lengths);
|
||||
const auto out_n_k_ho_wo_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_k_ho_wo_lengths);
|
||||
|
||||
const auto n = in_n_c_hi_wi_desc.GetLength(I0);
|
||||
const auto c = in_n_c_hi_wi_desc.GetLength(I1);
|
||||
const auto hi = in_n_c_hi_wi_desc.GetLength(I2);
|
||||
const auto wi = in_n_c_hi_wi_desc.GetLength(I3);
|
||||
const auto k = wei_k_c_y_x_desc.GetLength(I0);
|
||||
const auto y = wei_k_c_y_x_desc.GetLength(I2);
|
||||
const auto x = wei_k_c_y_x_desc.GetLength(I3);
|
||||
const auto ho = out_n_k_ho_wo_desc.GetLength(I2);
|
||||
const auto wo = out_n_k_ho_wo_desc.GetLength(I3);
|
||||
|
||||
const auto M = k;
|
||||
const auto N = n * ho * wo;
|
||||
const auto K = c * y * x;
|
||||
const auto K0 = K / tunable->K1;
|
||||
|
||||
const index_t grid_size = (M / tunable->MPerBlock) * (N / tunable->NPerBlock);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// these buffers are usually provided by the user application
|
||||
DeviceMem in_n_c_hi_wi_dev_buf(sizeof(TInWei) * in_n_c_hi_wi.mDesc.GetElementSpace());
|
||||
DeviceMem wei_k_c_y_x_dev_buf(sizeof(TInWei) * wei_k_c_y_x.mDesc.GetElementSpace());
|
||||
DeviceMem out_n_k_ho_wo_dev_buf(sizeof(TOut) * out_n_k_ho_wo.mDesc.GetElementSpace());
|
||||
|
||||
in_n_c_hi_wi_dev_buf.ToDevice(in_n_c_hi_wi.mData.data());
|
||||
wei_k_c_y_x_dev_buf.ToDevice(wei_k_c_y_x.mData.data());
|
||||
out_n_k_ho_wo_dev_buf.ToDevice(out_n_k_ho_wo.mData.data());
|
||||
|
||||
// these are workspace buffers that should be expressed to the user by the corresponding
|
||||
// workspace API
|
||||
DeviceMem workspace_buf(4096);
|
||||
|
||||
void* a_k_m0_m1_grid_desc_dev_buf = workspace_buf.GetDeviceBuffer();
|
||||
void* b_k_n0_n1_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 1024);
|
||||
void* c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 2048);
|
||||
void* c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 3072);
|
||||
|
||||
const std::vector<size_t> vld = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd1 = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd2 = {static_cast<size_t>(grid_size * tunable->BlockSize), 1, 1};
|
||||
|
||||
std::string program_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.cpp";
|
||||
std::string algo_name = "implicit_gemm_conv_fwd_v4r4_xdlops_nchw";
|
||||
|
||||
std::string param = " -std=c++17 ";
|
||||
std::string network_config;
|
||||
|
||||
param += get_definition_string_from_types<TInWei, TAcc, TOut>() + " " + " -DCK_USE_AMD_XDLOPS" +
|
||||
get_definition_string_from_tunable(tunable);
|
||||
|
||||
network_config = get_network_config_string_from_types<TInWei, TAcc, TOut>() + "_" +
|
||||
get_network_config_string_from_tunable(tunable);
|
||||
|
||||
std::vector<float> kernel1_times;
|
||||
std::vector<float> kernel2_times;
|
||||
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
KernelTimer timer1, timer2;
|
||||
std::string kernel_name;
|
||||
|
||||
kernel_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw_prepare";
|
||||
auto network_config_1 = network_config + "_1";
|
||||
|
||||
timer1.Start();
|
||||
handle->AddKernel(algo_name, network_config_1, program_name, kernel_name, vld, vgd1, param)(
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I0]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I1]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I2]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I3]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I0]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I2]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I3]),
|
||||
conv_strides[I0],
|
||||
conv_strides[I1],
|
||||
conv_dilations[I0],
|
||||
conv_dilations[I1],
|
||||
in_left_pads[I0],
|
||||
in_left_pads[I1],
|
||||
in_right_pads[I0],
|
||||
in_right_pads[I1],
|
||||
a_k_m0_m1_grid_desc_dev_buf,
|
||||
b_k_n0_n1_grid_desc_dev_buf,
|
||||
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf,
|
||||
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf);
|
||||
timer1.End();
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw";
|
||||
auto network_config_2 = network_config + "_2";
|
||||
|
||||
timer2.Start();
|
||||
handle->AddKernel(algo_name, network_config_2, program_name, kernel_name, vld, vgd2, param)(
|
||||
reinterpret_cast<const TInWei*>(wei_k_c_y_x_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<const TInWei*>(in_n_c_hi_wi_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<TOut*>(out_n_k_ho_wo_dev_buf.GetDeviceBuffer()),
|
||||
(const void*)(a_k_m0_m1_grid_desc_dev_buf),
|
||||
(const void*)(b_k_n0_n1_grid_desc_dev_buf),
|
||||
(const void*)(c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf),
|
||||
(const void*)(c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf));
|
||||
timer2.End();
|
||||
|
||||
kernel1_times.push_back(timer1.GetElapsedTime());
|
||||
kernel2_times.push_back(timer2.GetElapsedTime());
|
||||
}
|
||||
|
||||
{
|
||||
auto ave_time1 =
|
||||
std::accumulate(
|
||||
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
auto ave_time2 =
|
||||
std::accumulate(
|
||||
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
|
||||
const auto N = in_n_c_hi_wi_lengths[I0];
|
||||
const auto C = in_n_c_hi_wi_lengths[I1];
|
||||
|
||||
const auto K = out_n_k_ho_wo_lengths[I1];
|
||||
const auto Ho = out_n_k_ho_wo_lengths[I2];
|
||||
const auto Wo = out_n_k_ho_wo_lengths[I3];
|
||||
|
||||
const auto Y = wei_k_c_y_x_lengths[I2];
|
||||
const auto X = wei_k_c_y_x_lengths[I3];
|
||||
|
||||
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
|
||||
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
|
||||
|
||||
std::cout << "Average time : " << ave_time1 + ave_time2 << " ms(" << ave_time1 << ", "
|
||||
<< ave_time2 << "), " << perf << " TFlop/s" << std::endl;
|
||||
};
|
||||
|
||||
// copy result back to host
|
||||
out_n_k_ho_wo_dev_buf.FromDevice(out_n_k_ho_wo.mData.data());
|
||||
}
|
||||
@@ -1,389 +0,0 @@
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "handle.hpp"
|
||||
#include "online_driver_common.hpp"
|
||||
#include "dynamic_tensor_descriptor.hpp"
|
||||
#include "dynamic_tensor_descriptor_helper.hpp"
|
||||
#include "transform_forward_convolution_into_gemm_v4r4r4_nhwc_kyxc_nhwk.hpp"
|
||||
#include "conv_tunable_fwd_v4r4_xdlops_nhwc_kyxc_nhwk.hpp"
|
||||
|
||||
namespace detail_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk {
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_network_config_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out += std::to_string(get_datatype_enum_from_type<TInWei>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TAcc>::value) + "_" +
|
||||
std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_network_config_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk* pt)
|
||||
{
|
||||
std::string out("TUN_");
|
||||
|
||||
out += std::to_string(pt->BlockSize) + "_";
|
||||
|
||||
out += std::to_string(pt->MPerBlock) + "x" + std::to_string(pt->NPerBlock) + "x" +
|
||||
std::to_string(pt->KPerBlock) + "_";
|
||||
out += std::to_string(pt->MPerWave) + "x" + std::to_string(pt->NPerWave) + "x" +
|
||||
std::to_string(pt->MRepeat) + "x" + std::to_string(pt->NRepeat) + "x" +
|
||||
std::to_string(pt->K1) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->ABlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->ABlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->ABlockTransferDstScalarPerVector_K1) + "_";
|
||||
out += std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]) + "_";
|
||||
|
||||
out += std::to_string(pt->BBlockTransferSrcVectorDim) + "_";
|
||||
out += std::to_string(pt->BBlockTransferSrcScalarPerVector) + "_";
|
||||
out += std::to_string(pt->BBlockTransferDstScalarPerVector_K1) + "_";
|
||||
out += std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[6]) + "x" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[7]) + "_";
|
||||
|
||||
out += std::to_string(pt->CThreadTransferSrcDstVectorDim) + "_";
|
||||
out += std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
template <typename TInWei, typename TAcc, typename TOut>
|
||||
static std::string get_definition_string_from_types()
|
||||
{
|
||||
using namespace ck;
|
||||
|
||||
std::string out;
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TInWei>::value) +
|
||||
" -DCK_PARAM_AccDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TAcc>::value) +
|
||||
" -DCK_PARAM_CDataTypeEnum=" + std::to_string(get_datatype_enum_from_type<TOut>::value);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_definition_string_from_tunable(const tunable_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk* pt)
|
||||
{
|
||||
std::string out;
|
||||
|
||||
out += " -DCK_PARAM_BlockSize=" + std::to_string(pt->BlockSize);
|
||||
|
||||
out += " -DCK_PARAM_MPerBlock=" + std::to_string(pt->MPerBlock) +
|
||||
" -DCK_PARAM_NPerBlock=" + std::to_string(pt->NPerBlock) +
|
||||
" -DCK_PARAM_KPerBlock=" + std::to_string(pt->KPerBlock);
|
||||
out += " -DCK_PARAM_MPerWave=" + std::to_string(pt->MPerWave) +
|
||||
" -DCK_PARAM_NPerWave=" + std::to_string(pt->NPerWave) +
|
||||
" -DCK_PARAM_K1=" + std::to_string(pt->K1) +
|
||||
" -DCK_PARAM_MRepeat=" + std::to_string(pt->MRepeat) +
|
||||
" -DCK_PARAM_NRepeat=" + std::to_string(pt->NRepeat);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadSliceLengths_K0_M_K1=" +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadSliceLengths_K0_M_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterLengths_K0_M_K1=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterLengths_K0_M_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_ABlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->ABlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_ABlockTransferSrcVectorDim=" + std::to_string(pt->ABlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_ABlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->ABlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_ABlockTransferDstScalarPerVector_K1=" +
|
||||
std::to_string(pt->ABlockTransferDstScalarPerVector_K1);
|
||||
out += " -DCK_PARAM_AThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->AThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadSliceLengths_K0_N_K1=" +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadSliceLengths_K0_N_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterLengths_K0_N_K1=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterLengths_K0_N_K1[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferThreadClusterArrangeOrder=" +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferThreadClusterArrangeOrder[2]);
|
||||
|
||||
out += " -DCK_PARAM_BBlockTransferSrcAccessOrder=" +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[0]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[1]) + "," +
|
||||
std::to_string(pt->BBlockTransferSrcAccessOrder[2]);
|
||||
|
||||
out +=
|
||||
" -DCK_PARAM_BBlockTransferSrcVectorDim=" + std::to_string(pt->BBlockTransferSrcVectorDim);
|
||||
out += " -DCK_PARAM_BBlockTransferSrcScalarPerVector=" +
|
||||
std::to_string(pt->BBlockTransferSrcScalarPerVector);
|
||||
out += " -DCK_PARAM_BBlockTransferDstScalarPerVector_K1=" +
|
||||
std::to_string(pt->BBlockTransferDstScalarPerVector_K1);
|
||||
out += " -DCK_PARAM_BThreadTransferSrcResetCoordinateAfterRun=" +
|
||||
std::to_string(pt->BThreadTransferSrcResetCoordinateAfterRun);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstAccessOrder=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[0]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[1]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[2]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[3]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[4]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[5]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[6]) + "," +
|
||||
std::to_string(pt->CThreadTransferSrcDstAccessOrder[7]);
|
||||
|
||||
out += " -DCK_PARAM_CThreadTransferSrcDstVectorDim=" +
|
||||
std::to_string(pt->CThreadTransferSrcDstVectorDim);
|
||||
out += " -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
|
||||
std::to_string(pt->CThreadTransferDstScalarPerVector);
|
||||
|
||||
return (out);
|
||||
};
|
||||
|
||||
} // namespace detail_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk
|
||||
|
||||
template <typename TInWei,
|
||||
typename TAcc,
|
||||
typename TOut,
|
||||
typename InLengths,
|
||||
typename WeiLengths,
|
||||
typename OutLengths,
|
||||
typename ConvStrides,
|
||||
typename ConvDilations,
|
||||
typename InLeftPads,
|
||||
typename InRightPads>
|
||||
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk(
|
||||
online_compile::Handle* handle,
|
||||
const InLengths& in_n_hi_wi_c_lengths,
|
||||
const WeiLengths& wei_k_y_x_c_lengths,
|
||||
const OutLengths& out_n_ho_wo_k_lengths,
|
||||
const ConvStrides& conv_strides,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads& in_right_pads,
|
||||
const Tensor<TInWei>& in_n_hi_wi_c,
|
||||
const Tensor<TInWei>& wei_k_y_x_c,
|
||||
Tensor<TOut>& out_n_ho_wo_k,
|
||||
const tunable_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk* tunable,
|
||||
ck::index_t nrepeat)
|
||||
{
|
||||
using namespace ck;
|
||||
using namespace detail_dyn_conv_fwd_v4r4_xdlops_nhwc_kyxc_nhwk;
|
||||
using size_t = std::size_t;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// The follow codes are only used for computing the grid_size, hasMainKBlockLoop,
|
||||
// hasDoubleTailKBlockLoop
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
const auto in_n_hi_wi_c_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(in_n_hi_wi_c_lengths);
|
||||
const auto wei_k_y_x_c_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(wei_k_y_x_c_lengths);
|
||||
const auto out_n_ho_wo_k_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(out_n_ho_wo_k_lengths);
|
||||
|
||||
const auto n = in_n_hi_wi_c_desc.GetLength(I0);
|
||||
const auto hi = in_n_hi_wi_c_desc.GetLength(I1);
|
||||
const auto wi = in_n_hi_wi_c_desc.GetLength(I2);
|
||||
const auto c = in_n_hi_wi_c_desc.GetLength(I3);
|
||||
|
||||
const auto k = wei_k_y_x_c_desc.GetLength(I0);
|
||||
const auto y = wei_k_y_x_c_desc.GetLength(I1);
|
||||
const auto x = wei_k_y_x_c_desc.GetLength(I2);
|
||||
|
||||
const auto ho = out_n_ho_wo_k_desc.GetLength(I1);
|
||||
const auto wo = out_n_ho_wo_k_desc.GetLength(I2);
|
||||
|
||||
const auto M = k;
|
||||
const auto N = n * ho * wo;
|
||||
const auto K = c * y * x;
|
||||
const auto K0 = K / tunable->K1;
|
||||
|
||||
const index_t grid_size = (M / tunable->MPerBlock) * (N / tunable->NPerBlock);
|
||||
|
||||
// these buffers are usually provided by the user application
|
||||
DeviceMem in_n_hi_wi_c_dev_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace());
|
||||
DeviceMem wei_k_y_x_c_dev_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
|
||||
DeviceMem out_n_ho_wo_k_dev_buf(sizeof(TOut) * out_n_ho_wo_k.mDesc.GetElementSpace());
|
||||
|
||||
in_n_hi_wi_c_dev_buf.ToDevice(in_n_hi_wi_c.mData.data());
|
||||
wei_k_y_x_c_dev_buf.ToDevice(wei_k_y_x_c.mData.data());
|
||||
out_n_ho_wo_k_dev_buf.ToDevice(out_n_ho_wo_k.mData.data());
|
||||
|
||||
// these are workspace buffers that should be expressed to the user by the corresponding
|
||||
// workspace API
|
||||
DeviceMem workspace_buf(4096);
|
||||
|
||||
void* a_k0_m_k1_grid_desc_dev_buf = workspace_buf.GetDeviceBuffer();
|
||||
void* b_k0_n_k1_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 1024);
|
||||
void* c_m0_m1_m2_n_grid_desc_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 2048);
|
||||
void* c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf =
|
||||
static_cast<void*>(static_cast<unsigned char*>(workspace_buf.GetDeviceBuffer()) + 3072);
|
||||
|
||||
const std::vector<size_t> vld = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd1 = {static_cast<size_t>(tunable->BlockSize), 1, 1};
|
||||
const std::vector<size_t> vgd2 = {static_cast<size_t>(grid_size * tunable->BlockSize), 1, 1};
|
||||
|
||||
std::string program_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.cpp";
|
||||
std::string algo_name = "implicit_gemm_conv_fwd_v4r4_xdlops_nhwc";
|
||||
|
||||
std::string param = " -std=c++17 ";
|
||||
std::string network_config;
|
||||
|
||||
param += get_definition_string_from_types<TInWei, TAcc, TOut>() + " -DCK_USE_AMD_XDLOPS ";
|
||||
param += get_definition_string_from_tunable(tunable);
|
||||
|
||||
network_config = get_network_config_string_from_types<TInWei, TAcc, TOut>() + "_" +
|
||||
get_network_config_string_from_tunable(tunable);
|
||||
|
||||
std::vector<float> kernel1_times;
|
||||
std::vector<float> kernel2_times;
|
||||
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
KernelTimer timer1, timer2;
|
||||
std::string kernel_name;
|
||||
|
||||
kernel_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk_prepare";
|
||||
auto network_config_1 = network_config + "_1";
|
||||
|
||||
timer1.Start();
|
||||
handle->AddKernel(algo_name, network_config_1, program_name, kernel_name, vld, vgd1, param)(
|
||||
static_cast<index_t>(in_n_hi_wi_c_lengths[I0]),
|
||||
static_cast<index_t>(in_n_hi_wi_c_lengths[I1]),
|
||||
static_cast<index_t>(in_n_hi_wi_c_lengths[I2]),
|
||||
static_cast<index_t>(in_n_hi_wi_c_lengths[I3]),
|
||||
static_cast<index_t>(wei_k_y_x_c_lengths[I0]),
|
||||
static_cast<index_t>(wei_k_y_x_c_lengths[I1]),
|
||||
static_cast<index_t>(wei_k_y_x_c_lengths[I2]),
|
||||
conv_strides[I0],
|
||||
conv_strides[I1],
|
||||
conv_dilations[I0],
|
||||
conv_dilations[I1],
|
||||
in_left_pads[I0],
|
||||
in_left_pads[I1],
|
||||
in_right_pads[I0],
|
||||
in_right_pads[I1],
|
||||
a_k0_m_k1_grid_desc_dev_buf,
|
||||
b_k0_n_k1_grid_desc_dev_buf,
|
||||
c_m0_m1_m2_n_grid_desc_dev_buf,
|
||||
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf);
|
||||
timer1.End();
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk";
|
||||
auto network_config_2 = network_config + "_2";
|
||||
|
||||
timer2.Start();
|
||||
handle->AddKernel(algo_name, network_config_2, program_name, kernel_name, vld, vgd2, param)(
|
||||
reinterpret_cast<const TInWei*>(in_n_hi_wi_c_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<const TInWei*>(wei_k_y_x_c_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<TOut*>(out_n_ho_wo_k_dev_buf.GetDeviceBuffer()),
|
||||
(const void*)(a_k0_m_k1_grid_desc_dev_buf),
|
||||
(const void*)(b_k0_n_k1_grid_desc_dev_buf),
|
||||
(const void*)(c_m0_m1_m2_n_grid_desc_dev_buf),
|
||||
(const void*)(c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf));
|
||||
timer2.End();
|
||||
|
||||
kernel1_times.push_back(timer1.GetElapsedTime());
|
||||
kernel2_times.push_back(timer2.GetElapsedTime());
|
||||
}
|
||||
|
||||
{
|
||||
auto ave_time1 =
|
||||
std::accumulate(
|
||||
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
auto ave_time2 =
|
||||
std::accumulate(
|
||||
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
||||
(nrepeat - 1);
|
||||
|
||||
const auto N = in_n_hi_wi_c_lengths[I0];
|
||||
const auto C = in_n_hi_wi_c_lengths[I3];
|
||||
|
||||
const auto Ho = out_n_ho_wo_k_lengths[I1];
|
||||
const auto Wo = out_n_ho_wo_k_lengths[I2];
|
||||
const auto K = out_n_ho_wo_k_lengths[I3];
|
||||
|
||||
const auto Y = wei_k_y_x_c_lengths[I1];
|
||||
const auto X = wei_k_y_x_c_lengths[I2];
|
||||
|
||||
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
|
||||
(std::size_t(1000) * 1000 * 1000) / ave_time2;
|
||||
|
||||
std::cout << "Average time : " << ave_time1 + ave_time2 << " ms(" << ave_time1 << ", "
|
||||
<< ave_time2 << "), " << perf << " TFlop/s" << std::endl;
|
||||
};
|
||||
|
||||
// copy result back to host
|
||||
out_n_ho_wo_k_dev_buf.FromDevice(out_n_ho_wo_k.mData.data());
|
||||
}
|
||||
@@ -1,183 +0,0 @@
|
||||
#pragma once
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "handle.hpp"
|
||||
#include "online_driver_common.hpp"
|
||||
#include "convolution_problem_descriptor.hpp"
|
||||
#include "dynamic_tensor_descriptor.hpp"
|
||||
#include "dynamic_tensor_descriptor_helper.hpp"
|
||||
#include "transform_forward_convolution_into_gemm_v6r1_nchw_kcyx_nkhw.hpp"
|
||||
#include "conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
template <typename TInWei,
|
||||
typename TAcc,
|
||||
typename TOut,
|
||||
typename InLengths,
|
||||
typename WeiLengths,
|
||||
typename OutLengths,
|
||||
typename ConvStrides,
|
||||
typename ConvDilations,
|
||||
typename InLeftPads,
|
||||
typename InRightPads>
|
||||
void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
|
||||
online_compile::Handle* handle,
|
||||
const InLengths& in_n_c_hi_wi_lengths,
|
||||
const WeiLengths& wei_k_c_y_x_lengths,
|
||||
const OutLengths& out_n_k_ho_wo_lengths,
|
||||
const ConvStrides& conv_strides,
|
||||
const ConvDilations& conv_dilations,
|
||||
const InLeftPads& in_left_pads,
|
||||
const InRightPads& in_right_pads,
|
||||
const Tensor<TInWei>& in_n_c_hi_wi,
|
||||
const Tensor<TInWei>& wei_k_c_y_x,
|
||||
Tensor<TOut>& out_n_k_ho_wo,
|
||||
const ck::driver::CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw& compile_param,
|
||||
ck::index_t nrepeat)
|
||||
{
|
||||
using namespace ck;
|
||||
using namespace ck::driver;
|
||||
using size_t = std::size_t;
|
||||
|
||||
std::cout << __func__ << std::endl;
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
ConvolutionProblemDescriptor conv_problem_desc{in_n_c_hi_wi_lengths[I0],
|
||||
out_n_k_ho_wo_lengths[I1],
|
||||
in_n_c_hi_wi_lengths[I1],
|
||||
wei_k_c_y_x_lengths[I2],
|
||||
wei_k_c_y_x_lengths[I3],
|
||||
in_n_c_hi_wi_lengths[I2],
|
||||
in_n_c_hi_wi_lengths[I3],
|
||||
out_n_k_ho_wo_lengths[I2],
|
||||
out_n_k_ho_wo_lengths[I3],
|
||||
conv_strides[I0],
|
||||
conv_strides[I1],
|
||||
conv_dilations[I0],
|
||||
conv_dilations[I1],
|
||||
in_left_pads[I0],
|
||||
in_left_pads[I1],
|
||||
in_right_pads[I0],
|
||||
in_right_pads[I1],
|
||||
get_datatype_enum_from_type<TInWei>::value,
|
||||
get_datatype_enum_from_type<TInWei>::value,
|
||||
get_datatype_enum_from_type<TOut>::value};
|
||||
|
||||
if(!ConvIgemmFwdV6r1DlopsNchwKcyxNkhw::IsValidCompileParameter(conv_problem_desc,
|
||||
compile_param))
|
||||
{
|
||||
throw std::runtime_error("wrong! IsValidCompileParameter fail");
|
||||
}
|
||||
|
||||
DeviceMem in_n_c_hi_wi_dev_buf(sizeof(TInWei) * in_n_c_hi_wi.mDesc.GetElementSpace());
|
||||
DeviceMem wei_k_c_y_x_dev_buf(sizeof(TInWei) * wei_k_c_y_x.mDesc.GetElementSpace());
|
||||
DeviceMem out_n_k_ho_wo_dev_buf(sizeof(TOut) * out_n_k_ho_wo.mDesc.GetElementSpace());
|
||||
|
||||
in_n_c_hi_wi_dev_buf.ToDevice(in_n_c_hi_wi.mData.data());
|
||||
wei_k_c_y_x_dev_buf.ToDevice(wei_k_c_y_x.mData.data());
|
||||
out_n_k_ho_wo_dev_buf.ToDevice(out_n_k_ho_wo.mData.data());
|
||||
|
||||
// workspace is used for save transformed tensor descritpors created by prepare kernel
|
||||
DeviceMem workspace_dev_buf(
|
||||
ConvIgemmFwdV6r1DlopsNchwKcyxNkhw::GetWorkSpaceSize(conv_problem_desc, compile_param));
|
||||
|
||||
const auto block_size = std::size_t(
|
||||
ConvIgemmFwdV6r1DlopsNchwKcyxNkhw::GetBlockSize(conv_problem_desc, compile_param));
|
||||
|
||||
const auto grid_size = std::size_t(
|
||||
ConvIgemmFwdV6r1DlopsNchwKcyxNkhw::GetGridSize(conv_problem_desc, compile_param));
|
||||
|
||||
const std::vector<size_t> vld1 = {1, 1, 1};
|
||||
const std::vector<size_t> vgd1 = {1, 1, 1};
|
||||
|
||||
const std::vector<size_t> vld2 = {static_cast<size_t>(block_size), 1, 1};
|
||||
const std::vector<size_t> vgd2 = {static_cast<size_t>(grid_size * block_size), 1, 1};
|
||||
|
||||
std::string program_name =
|
||||
"dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw.cpp";
|
||||
std::string algo_name = "implicit_gemm_conv_fwd_v6r1_dlops_nchw";
|
||||
|
||||
std::string compile_param_string =
|
||||
get_ck_hip_online_compile_common_flag() + compile_param.GetCompileParameterString();
|
||||
std::string network_config = compile_param_string;
|
||||
|
||||
std::vector<float> kernel1_times;
|
||||
std::vector<float> kernel2_times;
|
||||
|
||||
for(index_t i = 0; i < nrepeat + 1; ++i)
|
||||
{
|
||||
KernelTimer timer1, timer2;
|
||||
std::string kernel_name;
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw_prepare";
|
||||
auto network_config_1 = network_config + "_1";
|
||||
|
||||
timer1.Start();
|
||||
handle->AddKernel(algo_name,
|
||||
network_config_1,
|
||||
program_name,
|
||||
kernel_name,
|
||||
vld1,
|
||||
vgd1,
|
||||
compile_param_string)(static_cast<index_t>(in_n_c_hi_wi_lengths[I0]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I1]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I2]),
|
||||
static_cast<index_t>(in_n_c_hi_wi_lengths[I3]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I0]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I2]),
|
||||
static_cast<index_t>(wei_k_c_y_x_lengths[I3]),
|
||||
conv_strides[I0],
|
||||
conv_strides[I1],
|
||||
conv_dilations[I0],
|
||||
conv_dilations[I1],
|
||||
in_left_pads[I0],
|
||||
in_left_pads[I1],
|
||||
in_right_pads[I0],
|
||||
in_right_pads[I1],
|
||||
(void*)(workspace_dev_buf.GetDeviceBuffer()));
|
||||
timer1.End();
|
||||
|
||||
kernel_name = "dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw";
|
||||
auto network_config_2 = network_config + "_2";
|
||||
|
||||
timer2.Start();
|
||||
handle->AddKernel(algo_name,
|
||||
network_config_2,
|
||||
program_name,
|
||||
kernel_name,
|
||||
vld2,
|
||||
vgd2,
|
||||
compile_param_string)(
|
||||
reinterpret_cast<const TInWei*>(wei_k_c_y_x_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<const TInWei*>(in_n_c_hi_wi_dev_buf.GetDeviceBuffer()),
|
||||
reinterpret_cast<TOut*>(out_n_k_ho_wo_dev_buf.GetDeviceBuffer()),
|
||||
(const void*)(workspace_dev_buf.GetDeviceBuffer()));
|
||||
timer2.End();
|
||||
|
||||
kernel1_times.push_back(timer1.GetElapsedTime());
|
||||
kernel2_times.push_back(timer2.GetElapsedTime());
|
||||
}
|
||||
|
||||
{
|
||||
auto ave_time1 =
|
||||
std::accumulate(
|
||||
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
|
||||
nrepeat;
|
||||
auto ave_time2 =
|
||||
std::accumulate(
|
||||
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
|
||||
nrepeat;
|
||||
|
||||
float perf = (float)(conv_problem_desc.CalculateFlop()) /
|
||||
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
|
||||
|
||||
std::cout << "Average time : " << ave_time1 + ave_time2 << " ms(" << ave_time1 << ", "
|
||||
<< ave_time2 << "), " << perf << " TFlop/s" << std::endl;
|
||||
};
|
||||
|
||||
// copy result back to host
|
||||
out_n_k_ho_wo_dev_buf.FromDevice(out_n_k_ho_wo.mData.data());
|
||||
}
|
||||
@@ -1,168 +0,0 @@
|
||||
set(CMAKE_CXX_COMPILER /opt/rocm/llvm/bin/clang++)
|
||||
|
||||
## for online-compiling of HIP kernels
|
||||
set(OLC_HIP_COMPILER ${CMAKE_CXX_COMPILER} CACHE PATH "")
|
||||
|
||||
## reset to avoid the C++ options from the parent project
|
||||
set(CMAKE_CXX_FLAGS "")
|
||||
message("Compiling options for library and kernels: ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
# look for and register clang-offload-bundler
|
||||
if(OLC_HIP_COMPILER MATCHES ".*clang\\+\\+$")
|
||||
find_program(OLC_OFFLOADBUNDLER_BIN clang-offload-bundler
|
||||
PATH_SUFFIXES bin
|
||||
PATHS
|
||||
/opt/rocm/llvm
|
||||
${CMAKE_INSTALL_PREFIX}/llvm
|
||||
)
|
||||
endif()
|
||||
|
||||
if(OLC_OFFLOADBUNDLER_BIN)
|
||||
message(STATUS "clang-offload-bundler found: ${OLC_OFFLOADBUNDLER_BIN}")
|
||||
set(OLC_OFFLOADBUNDLER_BIN "${OLC_OFFLOADBUNDLER_BIN}")
|
||||
else()
|
||||
# look for and register extractkernel
|
||||
message(STATUS "clang-offload-bundler not found")
|
||||
|
||||
find_program(EXTRACTKERNEL_BIN extractkernel
|
||||
PATH_SUFFIXES bin
|
||||
PATHS
|
||||
/opt/rocm/hip
|
||||
/opt/rocm/hcc
|
||||
/opt/rocm
|
||||
${CMAKE_INSTALL_PREFIX}/hip
|
||||
${CMAKE_INSTALL_PREFIX}/hcc
|
||||
${CMAKE_INSTALL_PREFIX}
|
||||
|
||||
)
|
||||
if(EXTRACTKERNEL_BIN)
|
||||
message(STATUS "extractkernel found: ${EXTRACTKERNEL_BIN}")
|
||||
set(EXTRACTKERNEL_BIN "${EXTRACTKERNEL_BIN}")
|
||||
else()
|
||||
message(FATAL_ERROR "extractkernel not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(Boost_USE_STATIC_LIBS "Use boost static libraries" OFF)
|
||||
set(BOOST_COMPONENTS filesystem)
|
||||
add_definitions(-DBOOST_ALL_NO_LIB=1)
|
||||
find_package(Boost REQUIRED COMPONENTS ${BOOST_COMPONENTS})
|
||||
|
||||
# HIP is always required
|
||||
find_package(hip REQUIRED PATHS /opt/rocm)
|
||||
message(STATUS "Build with HIP ${hip_VERSION}")
|
||||
target_flags(HIP_COMPILER_FLAGS hip::device)
|
||||
# Remove cuda arch flags
|
||||
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||
string(REGEX REPLACE --offload-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||
|
||||
set(OLC_hip_VERSION_MAJOR "${hip_VERSION_MAJOR}")
|
||||
set(OLC_hip_VERSION_MINOR "${hip_VERSION_MINOR}")
|
||||
set(OLC_hip_VERSION_PATCH "${hip_VERSION_PATCH}")
|
||||
|
||||
option(ENABLE_DEBUG "Build to enable debugging" ON)
|
||||
if(ENABLE_DEBUG)
|
||||
set(OLC_DEBUG 1)
|
||||
else()
|
||||
set(OLC_DEBUG 0)
|
||||
endif()
|
||||
|
||||
configure_file("${PROJECT_SOURCE_DIR}/host/online_compile/include/config.h.in" "${PROJECT_BINARY_DIR}/host/online_compile/include/config.h")
|
||||
|
||||
include_directories(BEFORE
|
||||
${PROJECT_BINARY_DIR}/host/online_compile/include
|
||||
)
|
||||
|
||||
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
|
||||
|
||||
## HIP_COMPILER_FLAGS will be used for on-line compiling of the HIP kernels
|
||||
set(HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS} ${HIP_ONLINE_COMPILER_FLAGS}")
|
||||
add_definitions("-DHIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}")
|
||||
|
||||
file(GLOB_RECURSE COMPOSABLE_KERNEL_INCLUDE_1 "${PROJECT_SOURCE_DIR}/composable_kernel/include/*/*.hpp")
|
||||
file(GLOB COMPOSABLE_KERNEL_INCLUDE_2 "${PROJECT_SOURCE_DIR}/external/rocm/include/bfloat16_dev.hpp")
|
||||
set(MCONV_KERNEL_INCLUDES
|
||||
${COMPOSABLE_KERNEL_INCLUDE_1}
|
||||
${COMPOSABLE_KERNEL_INCLUDE_2}
|
||||
)
|
||||
|
||||
file(GLOB_RECURSE MCONV_KERNELS "${PROJECT_SOURCE_DIR}/composable_kernel/src/kernel_wrapper/*.cpp")
|
||||
|
||||
add_kernels(${CMAKE_CURRENT_SOURCE_DIR} "${MCONV_KERNELS}")
|
||||
add_kernel_includes(${CMAKE_CURRENT_SOURCE_DIR} "${MCONV_KERNEL_INCLUDES}")
|
||||
|
||||
set(ONLINE_COMPILATION_SOURCE
|
||||
${PROJECT_BINARY_DIR}/kernel.cpp
|
||||
${PROJECT_BINARY_DIR}/kernel_includes.cpp
|
||||
)
|
||||
|
||||
include_directories(BEFORE
|
||||
${PROJECT_BINARY_DIR}/host/online_compile/include
|
||||
include
|
||||
)
|
||||
|
||||
set(OLC_HIP_UTILITY_CPPS
|
||||
hip_utility/logger.cpp
|
||||
hip_utility/tmp_dir.cpp
|
||||
hip_utility/md5.cpp
|
||||
hip_utility/exec_utils.cpp
|
||||
hip_utility/target_properties.cpp
|
||||
hip_utility/handlehip.cpp
|
||||
hip_utility/kernel_build_params.cpp
|
||||
hip_utility/hip_build_utils.cpp
|
||||
hip_utility/hipoc_program.cpp
|
||||
hip_utility/hipoc_kernel.cpp
|
||||
hip_utility/kernel_cache.cpp
|
||||
hip_utility/binary_cache.cpp
|
||||
)
|
||||
|
||||
list(APPEND OLC_SOURCES ${OLC_HIP_UTILITY_CPPS} ${OLC_HIP_UTILITY_HEADERS})
|
||||
|
||||
## addkernels provide the tool to create inlined kernels in one header
|
||||
add_subdirectory(addkernels)
|
||||
|
||||
function(inline_kernels_src KERNELS KERNEL_INCLUDES)
|
||||
set(KERNEL_SRC_HPP_FILENAME batch_all.cpp.hpp)
|
||||
set(KERNEL_SRC_HPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/${KERNEL_SRC_HPP_FILENAME})
|
||||
set(KERNEL_SRC_CPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/batch_all.cpp)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${KERNEL_SRC_HPP_PATH}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS addkernels ${KERNELS} ${KERNEL_INCLUDES}
|
||||
COMMAND $<TARGET_FILE:addkernels> -target ${KERNEL_SRC_HPP_PATH} -extern -source ${KERNELS}
|
||||
COMMENT "Inlining All kernels"
|
||||
)
|
||||
configure_file(kernels_batch.cpp.in ${KERNEL_SRC_CPP_PATH})
|
||||
list(APPEND OLC_SOURCES ${KERNEL_SRC_CPP_PATH} ${KERNEL_SRC_HPP_PATH})
|
||||
|
||||
set(OLC_SOURCES ${OLC_SOURCES} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
inline_kernels_src("${MCONV_KERNELS}" "${MCONV_KERNEL_INCLUDES}")
|
||||
|
||||
list(APPEND ONLINE_COMPILATION_SOURCE ${OLC_SOURCES} ${PROJECT_BINARY_DIR}/olc_kernel_includes.h)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/olc_kernel_includes.h
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS addkernels ${MCONV_KERNEL_INCLUDES}
|
||||
COMMAND $<TARGET_FILE:addkernels> -no-recurse -guard GUARD_OLC_KERNEL_INCLUDES_HPP_ -target ${PROJECT_BINARY_DIR}/olc_kernel_includes.h -source ${MCONV_KERNEL_INCLUDES}
|
||||
COMMENT "Inlining HIP kernel includes"
|
||||
)
|
||||
|
||||
## the library target
|
||||
add_library(online_compile SHARED ${ONLINE_COMPILATION_SOURCE})
|
||||
|
||||
target_include_directories(online_compile PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/online_compile/include/)
|
||||
target_include_directories(online_compile PRIVATE ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(online_compile PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)
|
||||
|
||||
target_link_libraries(online_compile PRIVATE hip::device)
|
||||
target_link_libraries(online_compile INTERFACE hip::host)
|
||||
target_link_libraries(online_compile PRIVATE Boost::filesystem)
|
||||
|
||||
target_compile_features(online_compile PUBLIC)
|
||||
set_target_properties(online_compile PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
install(TARGETS online_compile LIBRARY DESTINATION lib)
|
||||
@@ -1,30 +0,0 @@
|
||||
################################################################################
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set(ADD_KERNELS_SOURCE include_inliner.cpp addkernels.cpp)
|
||||
|
||||
add_executable(addkernels EXCLUDE_FROM_ALL ${ADD_KERNELS_SOURCE})
|
||||
|
||||
@@ -1,264 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include "include_inliner.hpp"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
void Bin2Hex(std::istream& source,
|
||||
std::ostream& target,
|
||||
const std::string& variable,
|
||||
bool nullTerminate,
|
||||
size_t bufferSize,
|
||||
size_t lineSize)
|
||||
{
|
||||
source.seekg(0, std::ios::end);
|
||||
std::unique_ptr<unsigned char[]> buffer(new unsigned char[bufferSize]);
|
||||
std::streamoff sourceSize = source.tellg();
|
||||
std::streamoff blockStart = 0;
|
||||
|
||||
if(variable.length() != 0)
|
||||
{
|
||||
target << "extern const size_t " << variable << "_SIZE;" << std::endl;
|
||||
target << "extern const unsigned char " << variable << "[];" << std::endl;
|
||||
target << "const size_t " << variable << "_SIZE = " << std::setbase(10) << sourceSize << ";"
|
||||
<< std::endl;
|
||||
target << "const unsigned char " << variable << "[] = {" << std::endl;
|
||||
}
|
||||
|
||||
target << std::setbase(16) << std::setfill('0');
|
||||
source.seekg(0, std::ios::beg);
|
||||
|
||||
while(blockStart < sourceSize)
|
||||
{
|
||||
source.read(reinterpret_cast<char*>(buffer.get()), bufferSize);
|
||||
|
||||
std::streamoff pos = source.tellg();
|
||||
std::streamoff blockSize = (pos < 0 ? sourceSize : pos) - blockStart;
|
||||
std::streamoff i = 0;
|
||||
|
||||
while(i < blockSize)
|
||||
{
|
||||
size_t j = i;
|
||||
size_t end = std::min<size_t>(i + lineSize, blockSize);
|
||||
|
||||
for(; j < end; j++)
|
||||
target << "0x" << std::setw(2) << static_cast<unsigned>(buffer[j]) << ",";
|
||||
|
||||
target << std::endl;
|
||||
i = end;
|
||||
}
|
||||
|
||||
blockStart += blockSize;
|
||||
}
|
||||
|
||||
if(nullTerminate)
|
||||
target << "0x00," << std::endl;
|
||||
|
||||
if(variable.length() != 0)
|
||||
{
|
||||
target << "};" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void PrintHelp()
|
||||
{
|
||||
std::cout << "Usage: bin2hex {<option>}" << std::endl;
|
||||
std::cout << "Option format: -<option name>[ <option value>]" << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Options:" << std::endl;
|
||||
std::cout
|
||||
<< "[REQUIRED] -s[ource] {<path to file>}: files to be processed. Must be last argument."
|
||||
<< std::endl;
|
||||
std::cout << " -t[arget] <path>: target file. Default: std out." << std::endl;
|
||||
std::cout << " -l[ine-size] <number>: bytes in one line. Default: 16." << std::endl;
|
||||
std::cout << " -b[uffer] <number>: read buffer size. Default: 512." << std::endl;
|
||||
std::cout << " -g[uard] <string>: guard name. Default: no guard" << std::endl;
|
||||
std::cout << " -n[o-recurse] : dont expand include files recursively. Default: off"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
[[gnu::noreturn]] void WrongUsage(const std::string& error)
|
||||
{
|
||||
std::cout << "Wrong usage: " << error << std::endl;
|
||||
std::cout << std::endl;
|
||||
PrintHelp();
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
[[gnu::noreturn]] void UnknownArgument(const std::string& arg)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << "unknown argument - " << arg;
|
||||
WrongUsage(ss.str());
|
||||
}
|
||||
|
||||
void Process(const std::string& sourcePath,
|
||||
std::ostream& target,
|
||||
size_t bufferSize,
|
||||
size_t lineSize,
|
||||
bool recurse,
|
||||
bool as_extern)
|
||||
{
|
||||
std::string fileName(sourcePath);
|
||||
std::string extension, root;
|
||||
std::stringstream inlinerTemp;
|
||||
auto extPos = fileName.rfind('.');
|
||||
auto slashPos = fileName.rfind('/');
|
||||
|
||||
if(extPos != std::string::npos)
|
||||
{
|
||||
extension = fileName.substr(extPos + 1);
|
||||
fileName = fileName.substr(0, extPos);
|
||||
}
|
||||
|
||||
if(slashPos != std::string::npos)
|
||||
{
|
||||
root = fileName.substr(0, slashPos + 1);
|
||||
fileName = fileName.substr(slashPos + 1);
|
||||
}
|
||||
|
||||
std::string variable(fileName);
|
||||
std::ifstream sourceFile(sourcePath, std::ios::in | std::ios::binary);
|
||||
std::istream* source = &sourceFile;
|
||||
|
||||
if(!sourceFile.good())
|
||||
{
|
||||
std::cerr << "File not found: " << sourcePath << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
const auto is_asm = extension == "s";
|
||||
const auto is_cl = extension == "cl";
|
||||
const auto is_hip = extension == "cpp";
|
||||
const auto is_header = extension == "hpp";
|
||||
|
||||
if(is_asm || is_cl || is_hip || is_header)
|
||||
{
|
||||
IncludeInliner inliner;
|
||||
|
||||
try
|
||||
{
|
||||
if(is_asm)
|
||||
inliner.Process(
|
||||
sourceFile, inlinerTemp, root, sourcePath, ".include", false, recurse);
|
||||
else if(is_cl || is_header)
|
||||
inliner.Process(
|
||||
sourceFile, inlinerTemp, root, sourcePath, "#include", true, recurse);
|
||||
else if(is_hip)
|
||||
inliner.Process(
|
||||
sourceFile, inlinerTemp, root, sourcePath, "<#not_include>", true, false);
|
||||
}
|
||||
catch(const InlineException& ex)
|
||||
{
|
||||
std::cerr << ex.What() << std::endl;
|
||||
std::cerr << ex.GetTrace() << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
source = &inlinerTemp;
|
||||
}
|
||||
|
||||
std::transform(variable.begin(), variable.end(), variable.begin(), ::toupper);
|
||||
|
||||
if(as_extern && variable.length() != 0)
|
||||
{
|
||||
variable = "APP_KERNEL_" + variable;
|
||||
}
|
||||
|
||||
Bin2Hex(*source, target, variable, true, bufferSize, lineSize);
|
||||
}
|
||||
|
||||
int main(int argsn, char** args)
|
||||
{
|
||||
if(argsn == 1)
|
||||
{
|
||||
PrintHelp();
|
||||
return 2;
|
||||
}
|
||||
|
||||
std::string guard;
|
||||
size_t bufferSize = 512;
|
||||
size_t lineSize = 16;
|
||||
|
||||
std::ofstream targetFile;
|
||||
std::ostream* target = &std::cout;
|
||||
bool recurse = true;
|
||||
bool as_extern = false;
|
||||
|
||||
int i = 0;
|
||||
while(++i < argsn && **args != '-')
|
||||
{
|
||||
std::string arg(args[i] + 1);
|
||||
std::transform(arg.begin(), arg.end(), arg.begin(), ::tolower);
|
||||
|
||||
if(arg == "s" || arg == "source")
|
||||
{
|
||||
if(guard.length() > 0)
|
||||
{
|
||||
*target << "#ifndef " << guard << std::endl;
|
||||
*target << "#define " << guard << std::endl;
|
||||
}
|
||||
|
||||
*target << "#include <cstddef>" << std::endl;
|
||||
|
||||
while(++i < argsn)
|
||||
{
|
||||
Process(args[i], *target, bufferSize, lineSize, recurse, as_extern);
|
||||
}
|
||||
|
||||
if(guard.length() > 0)
|
||||
{
|
||||
*target << "#endif" << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if(arg == "t" || arg == "target")
|
||||
{
|
||||
targetFile.open(args[++i], std::ios::out);
|
||||
target = &targetFile;
|
||||
}
|
||||
else if(arg == "l" || arg == "line-size")
|
||||
lineSize = std::stol(args[++i]);
|
||||
else if(arg == "b" || arg == "buffer")
|
||||
bufferSize = std::stol(args[++i]);
|
||||
else if(arg == "g" || arg == "guard")
|
||||
guard = args[++i];
|
||||
else if(arg == "n" || arg == "no-recurse")
|
||||
recurse = false;
|
||||
else if(arg == "e" || arg == "extern")
|
||||
as_extern = true;
|
||||
else
|
||||
UnknownArgument(arg);
|
||||
}
|
||||
|
||||
WrongUsage("source key is required");
|
||||
}
|
||||
@@ -1,213 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
#include <linux/limits.h>
|
||||
#include <cstdlib>
|
||||
#endif // !WIN32
|
||||
|
||||
#include "include_inliner.hpp"
|
||||
|
||||
namespace PathHelpers {
|
||||
static int GetMaxPath()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return MAX_PATH;
|
||||
#else
|
||||
return PATH_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string GetAbsolutePath(const std::string& path)
|
||||
{
|
||||
std::string result(GetMaxPath(), ' ');
|
||||
#ifdef _WIN32
|
||||
const auto retval = GetFullPathName(path.c_str(), result.size(), &result[0], nullptr);
|
||||
|
||||
if(retval == 0)
|
||||
return "";
|
||||
#else
|
||||
auto* const retval = realpath(path.c_str(), &result[0]);
|
||||
|
||||
if(retval == nullptr)
|
||||
return "";
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
} // namespace PathHelpers
|
||||
|
||||
std::string IncludeFileExceptionBase::What() const
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << GetMessage() << ": <" << _file << ">";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void IncludeInliner::Process(std::istream& input,
|
||||
std::ostream& output,
|
||||
const std::string& root,
|
||||
const std::string& file_name,
|
||||
const std::string& directive,
|
||||
bool allow_angle_brackets,
|
||||
bool recurse)
|
||||
{
|
||||
ProcessCore(input, output, root, file_name, 0, directive, allow_angle_brackets, recurse);
|
||||
}
|
||||
|
||||
void IncludeInliner::ProcessCore(std::istream& input,
|
||||
std::ostream& output,
|
||||
const std::string& root,
|
||||
const std::string& file_name,
|
||||
int line_number,
|
||||
const std::string& directive,
|
||||
bool allow_angle_brackets,
|
||||
bool recurse)
|
||||
{
|
||||
if(_include_depth >= include_depth_limit)
|
||||
throw InlineStackOverflowException(GetIncludeStackTrace(0));
|
||||
|
||||
_include_depth++;
|
||||
_included_stack_head =
|
||||
std::make_shared<SourceFileDesc>(file_name, _included_stack_head, line_number);
|
||||
auto current_line = 0;
|
||||
auto next_include_optional = false;
|
||||
|
||||
while(!input.eof())
|
||||
{
|
||||
std::string line;
|
||||
std::string word;
|
||||
std::getline(input, line);
|
||||
std::istringstream line_parser(line);
|
||||
line_parser >> word;
|
||||
current_line++;
|
||||
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
|
||||
|
||||
const auto include_optional = next_include_optional;
|
||||
next_include_optional = false;
|
||||
|
||||
if(!word.empty() && word == "//inliner-include-optional")
|
||||
{
|
||||
if(include_optional)
|
||||
throw IncludeExpectedException(GetIncludeStackTrace(current_line));
|
||||
next_include_optional = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!word.empty() && word == directive && recurse)
|
||||
{
|
||||
auto first_quote_pos = line.find('"', static_cast<int>(line_parser.tellg()) + 1);
|
||||
std::string::size_type second_quote_pos;
|
||||
|
||||
if(first_quote_pos != std::string::npos)
|
||||
{
|
||||
second_quote_pos = line.find('"', first_quote_pos + 1);
|
||||
if(second_quote_pos == std::string::npos)
|
||||
throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line));
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!allow_angle_brackets)
|
||||
throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line));
|
||||
|
||||
first_quote_pos = line.find('<', static_cast<int>(line_parser.tellg()) + 1);
|
||||
if(first_quote_pos == std::string::npos)
|
||||
throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line));
|
||||
|
||||
second_quote_pos = line.find('>', first_quote_pos + 1);
|
||||
if(second_quote_pos == std::string::npos)
|
||||
throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line));
|
||||
}
|
||||
|
||||
const std::string include_file_path =
|
||||
line.substr(first_quote_pos + 1, second_quote_pos - first_quote_pos - 1);
|
||||
const std::string abs_include_file_path(
|
||||
PathHelpers::GetAbsolutePath(root + "/" + include_file_path)); // NOLINT
|
||||
|
||||
if(abs_include_file_path.empty())
|
||||
{
|
||||
if(include_optional)
|
||||
continue;
|
||||
throw IncludeNotFoundException(include_file_path,
|
||||
GetIncludeStackTrace(current_line));
|
||||
}
|
||||
std::ifstream include_file(abs_include_file_path, std::ios::in);
|
||||
|
||||
if(!include_file.good())
|
||||
throw IncludeCantBeOpenedException(include_file_path,
|
||||
GetIncludeStackTrace(current_line));
|
||||
|
||||
ProcessCore(include_file,
|
||||
output,
|
||||
root,
|
||||
include_file_path,
|
||||
current_line,
|
||||
directive,
|
||||
allow_angle_brackets,
|
||||
recurse);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(include_optional)
|
||||
throw IncludeExpectedException(GetIncludeStackTrace(current_line));
|
||||
|
||||
if(output.tellp() > 0)
|
||||
output << std::endl;
|
||||
|
||||
output << line;
|
||||
}
|
||||
}
|
||||
|
||||
auto prev_file = _included_stack_head->included_from;
|
||||
_included_stack_head = prev_file;
|
||||
_include_depth--;
|
||||
}
|
||||
|
||||
std::string IncludeInliner::GetIncludeStackTrace(int line)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
|
||||
if(_included_stack_head == nullptr)
|
||||
return "";
|
||||
|
||||
auto item = _included_stack_head;
|
||||
ss << " " << item->path << ":" << line;
|
||||
|
||||
while(item->included_from != nullptr)
|
||||
{
|
||||
ss << std::endl << " from " << item->included_from->path << ":" << item->included_line;
|
||||
item = item->included_from;
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef SOURCE_INLINER_HPP
|
||||
|
||||
#define SOURCE_INLINER_HPP
|
||||
#include "source_file_desc.hpp"
|
||||
#include <ostream>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
|
||||
class InlineException : public std::exception
|
||||
{
|
||||
public:
|
||||
InlineException(const std::string& trace) : _trace(trace) {}
|
||||
|
||||
virtual std::string What() const = 0;
|
||||
const std::string& GetTrace() const { return _trace; }
|
||||
|
||||
private:
|
||||
std::string _trace;
|
||||
};
|
||||
|
||||
class InlineStackOverflowException : public InlineException
|
||||
{
|
||||
public:
|
||||
InlineStackOverflowException(const std::string& trace) : InlineException(trace) {}
|
||||
|
||||
std::string What() const override
|
||||
{
|
||||
return "Include stack depth limit has been reached, possible circle includes";
|
||||
}
|
||||
};
|
||||
|
||||
class IncludeExpectedException : public InlineException
|
||||
{
|
||||
public:
|
||||
IncludeExpectedException(const std::string& trace) : InlineException(trace) {}
|
||||
|
||||
std::string What() const override { return "Include directive expected"; }
|
||||
};
|
||||
|
||||
class WrongInlineDirectiveException : public InlineException
|
||||
{
|
||||
public:
|
||||
WrongInlineDirectiveException(const std::string& trace) : InlineException(trace) {}
|
||||
|
||||
std::string What() const override { return "Include directive has wrong format"; }
|
||||
};
|
||||
|
||||
class IncludeFileExceptionBase : public InlineException
|
||||
{
|
||||
public:
|
||||
IncludeFileExceptionBase(const std::string& file, const std::string& trace)
|
||||
: InlineException(trace), _file(file)
|
||||
{
|
||||
}
|
||||
|
||||
std::string What() const override;
|
||||
virtual std::string GetMessage() const = 0;
|
||||
|
||||
private:
|
||||
std::string _file;
|
||||
};
|
||||
|
||||
class IncludeNotFoundException : public IncludeFileExceptionBase
|
||||
{
|
||||
public:
|
||||
IncludeNotFoundException(const std::string& file, const std::string& trace)
|
||||
: IncludeFileExceptionBase(file, trace)
|
||||
{
|
||||
}
|
||||
|
||||
std::string GetMessage() const override
|
||||
{
|
||||
return "Include file not found (if it is optional put //inliner-include-optional on line "
|
||||
"before it)";
|
||||
}
|
||||
};
|
||||
|
||||
class IncludeCantBeOpenedException : public IncludeFileExceptionBase
|
||||
{
|
||||
public:
|
||||
IncludeCantBeOpenedException(const std::string& file, const std::string& trace)
|
||||
: IncludeFileExceptionBase(file, trace)
|
||||
{
|
||||
}
|
||||
|
||||
std::string GetMessage() const override { return "Can not open include file"; }
|
||||
};
|
||||
|
||||
class IncludeInliner
|
||||
{
|
||||
public:
|
||||
int include_depth_limit = 256;
|
||||
|
||||
void Process(std::istream& input,
|
||||
std::ostream& output,
|
||||
const std::string& root,
|
||||
const std::string& file_name,
|
||||
const std::string& directive,
|
||||
bool allow_angle_brackets,
|
||||
bool recurse);
|
||||
std::string GetIncludeStackTrace(int line);
|
||||
|
||||
private:
|
||||
int _include_depth = 0;
|
||||
std::shared_ptr<SourceFileDesc> _included_stack_head = nullptr;
|
||||
|
||||
void ProcessCore(std::istream& input,
|
||||
std::ostream& output,
|
||||
const std::string& root,
|
||||
const std::string& file_name,
|
||||
int line_number,
|
||||
const std::string& directive,
|
||||
bool allow_angle_brackets,
|
||||
bool recurse);
|
||||
};
|
||||
|
||||
#endif // !SOURCE_INLINER_HPP
|
||||
@@ -1,45 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef SOURCE_FILE_DESC_HPP
|
||||
|
||||
#define SOURCE_FILE_DESC_HPP
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
class SourceFileDesc
|
||||
{
|
||||
public:
|
||||
const std::string path;
|
||||
int included_line;
|
||||
std::shared_ptr<SourceFileDesc> included_from;
|
||||
|
||||
SourceFileDesc(const std::string& path_, std::shared_ptr<SourceFileDesc> from, int line)
|
||||
: path(path_), included_line(line), included_from(from)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SOURCE_FILE_DESC_HPP
|
||||
@@ -1,112 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <binary_cache.hpp>
|
||||
#include <handle.hpp>
|
||||
#include <md5.hpp>
|
||||
#include <env.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <logger.hpp>
|
||||
#include <target_properties.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
OLC_DECLARE_ENV_VAR(OLC_DISABLE_CACHE)
|
||||
OLC_DECLARE_ENV_VAR(HOME)
|
||||
|
||||
static boost::filesystem::path ComputeCachePath()
|
||||
{
|
||||
const char* home_dir = GetStringEnv(HOME{});
|
||||
if(home_dir == nullptr || home_dir == std::string("/") || home_dir == std::string(""))
|
||||
{
|
||||
home_dir = "/tmp";
|
||||
}
|
||||
|
||||
auto p = boost::filesystem::path{home_dir} / "_hip_binary_kernels_";
|
||||
|
||||
if(!boost::filesystem::exists(p))
|
||||
boost::filesystem::create_directories(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
boost::filesystem::path GetCachePath()
|
||||
{
|
||||
static const boost::filesystem::path user_path = ComputeCachePath();
|
||||
|
||||
return user_path;
|
||||
}
|
||||
|
||||
static bool IsCacheDisabled() { return online_compile::IsEnabled(OLC_DISABLE_CACHE{}); }
|
||||
|
||||
boost::filesystem::path
|
||||
GetCacheFile(const std::string& device, const std::string& name, const std::string& args)
|
||||
{
|
||||
// std::string filename = (is_kernel_str ? online_compile::md5(name) : name) + ".o";
|
||||
std::string filename = name + ".o";
|
||||
return GetCachePath() / online_compile::md5(device + ":" + args) / filename;
|
||||
}
|
||||
|
||||
boost::filesystem::path LoadBinary(const TargetProperties& target,
|
||||
const size_t num_cu,
|
||||
const std::string& name,
|
||||
const std::string& args)
|
||||
{
|
||||
if(online_compile::IsCacheDisabled())
|
||||
return {};
|
||||
|
||||
(void)num_cu;
|
||||
auto f = GetCacheFile(target.DbId(), name, args);
|
||||
if(boost::filesystem::exists(f))
|
||||
{
|
||||
return f.string();
|
||||
}
|
||||
else
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
void SaveBinary(const boost::filesystem::path& binary_path,
|
||||
const TargetProperties& target,
|
||||
const std::string& name,
|
||||
const std::string& args)
|
||||
{
|
||||
if(online_compile::IsCacheDisabled())
|
||||
{
|
||||
boost::filesystem::remove(binary_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto p = GetCacheFile(target.DbId(), name, args);
|
||||
boost::filesystem::create_directories(p.parent_path());
|
||||
boost::filesystem::rename(binary_path, p);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,93 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include <exec_utils.hpp>
|
||||
#include <manage_ptr.hpp>
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <cstdio>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <cstdio>
|
||||
#include <sys/wait.h>
|
||||
#endif // __linux__
|
||||
|
||||
namespace online_compile {
|
||||
namespace exec {
|
||||
|
||||
int Run(const std::string& p, std::istream* in, std::ostream* out)
|
||||
{
|
||||
#ifdef __linux__
|
||||
const auto redirect_stdin = (in != nullptr);
|
||||
const auto redirect_stdout = (out != nullptr);
|
||||
|
||||
assert(!(redirect_stdin && redirect_stdout));
|
||||
|
||||
const auto file_mode = redirect_stdout ? "r" : "w";
|
||||
OLC_MANAGE_PTR(FILE*, pclose) pipe{popen(p.c_str(), file_mode)};
|
||||
|
||||
if(!pipe)
|
||||
throw std::runtime_error("online_compile::exec::Run(): popen(" + p + ", " + file_mode +
|
||||
") failed");
|
||||
|
||||
if(redirect_stdin || redirect_stdout)
|
||||
{
|
||||
std::array<char, 1024> buffer{};
|
||||
|
||||
if(redirect_stdout)
|
||||
{
|
||||
while(feof(pipe.get()) == 0)
|
||||
if(fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr)
|
||||
*out << buffer.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
while(!in->eof())
|
||||
{
|
||||
in->read(buffer.data(), buffer.size() - 1);
|
||||
buffer[in->gcount()] = 0;
|
||||
|
||||
if(fputs(buffer.data(), pipe.get()) == EOF)
|
||||
throw std::runtime_error("online_compile::exec::Run(): fputs() failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto status = pclose(pipe.release());
|
||||
return WEXITSTATUS(status);
|
||||
#else
|
||||
(void)p;
|
||||
(void)in;
|
||||
(void)out;
|
||||
return -1;
|
||||
#endif // __linux__
|
||||
}
|
||||
|
||||
} // namespace exec
|
||||
} // namespace online_compile
|
||||
@@ -1,285 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <handle.hpp>
|
||||
|
||||
#include <binary_cache.hpp>
|
||||
#include <env.hpp>
|
||||
#include <kernel_cache.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <target_properties.hpp>
|
||||
|
||||
#include <hipCheck.hpp>
|
||||
|
||||
#include <write_file.hpp>
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
OLC_DECLARE_ENV_VAR(OLC_DEVICE_CU)
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
std::size_t GetAvailableMemory()
|
||||
{
|
||||
size_t free, total;
|
||||
MY_HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
return free;
|
||||
}
|
||||
|
||||
int get_device_id() // Get random device
|
||||
{
|
||||
int device;
|
||||
|
||||
MY_HIP_CHECK(hipGetDevice(&device));
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
void set_device(int id) { MY_HIP_CHECK(hipSetDevice(id)); }
|
||||
|
||||
int set_default_device()
|
||||
{
|
||||
int n;
|
||||
|
||||
MY_HIP_CHECK(hipGetDeviceCount(&n));
|
||||
|
||||
// Pick device based on process id
|
||||
auto pid = ::getpid();
|
||||
assert(pid > 0);
|
||||
set_device(pid % n);
|
||||
return (pid % n);
|
||||
}
|
||||
|
||||
struct HandleImpl
|
||||
{
|
||||
using StreamPtr = std::shared_ptr<typename std::remove_pointer<hipStream_t>::type>;
|
||||
|
||||
HandleImpl() {}
|
||||
|
||||
StreamPtr create_stream()
|
||||
{
|
||||
hipStream_t result;
|
||||
|
||||
MY_HIP_CHECK(hipStreamCreate(&result));
|
||||
|
||||
return StreamPtr{result, &hipStreamDestroy};
|
||||
}
|
||||
|
||||
static StreamPtr reference_stream(hipStream_t s) { return StreamPtr{s, null_deleter{}}; }
|
||||
|
||||
std::string get_device_name() const
|
||||
{
|
||||
hipDeviceProp_t props;
|
||||
|
||||
MY_HIP_CHECK(hipGetDeviceProperties(&props, device));
|
||||
|
||||
const std::string name(props.gcnArchName);
|
||||
return name;
|
||||
}
|
||||
|
||||
StreamPtr stream = nullptr;
|
||||
int device = -1;
|
||||
KernelCache cache;
|
||||
TargetProperties target_properties;
|
||||
};
|
||||
|
||||
Handle::Handle(hipStream_t stream) : impl(new HandleImpl())
|
||||
{
|
||||
this->impl->device = get_device_id();
|
||||
|
||||
if(stream == nullptr)
|
||||
this->impl->stream = HandleImpl::reference_stream(nullptr);
|
||||
else
|
||||
this->impl->stream = HandleImpl::reference_stream(stream);
|
||||
|
||||
this->impl->target_properties.Init(this);
|
||||
}
|
||||
|
||||
Handle::Handle() : impl(new HandleImpl())
|
||||
{
|
||||
this->impl->device = get_device_id();
|
||||
this->impl->stream = HandleImpl::reference_stream(nullptr);
|
||||
|
||||
this->impl->target_properties.Init(this);
|
||||
}
|
||||
|
||||
Handle::~Handle() {}
|
||||
|
||||
void Handle::SetStream(hipStream_t streamID) const
|
||||
{
|
||||
this->impl->stream = HandleImpl::reference_stream(streamID);
|
||||
|
||||
this->impl->target_properties.Init(this);
|
||||
}
|
||||
|
||||
hipStream_t Handle::GetStream() const { return impl->stream.get(); }
|
||||
|
||||
KernelInvoke Handle::AddKernel(const std::string& algorithm,
|
||||
const std::string& network_config,
|
||||
const std::string& program_name,
|
||||
const std::string& kernel_name,
|
||||
const std::vector<size_t>& vld,
|
||||
const std::vector<size_t>& vgd,
|
||||
const std::string& params,
|
||||
std::size_t cache_index) const
|
||||
{
|
||||
|
||||
auto obj = this->impl->cache.AddKernel(
|
||||
*this, algorithm, network_config, program_name, kernel_name, vld, vgd, params, cache_index);
|
||||
return this->Run(obj);
|
||||
}
|
||||
|
||||
void Handle::ClearKernels(const std::string& algorithm, const std::string& network_config) const
|
||||
{
|
||||
this->impl->cache.ClearKernels(algorithm, network_config);
|
||||
}
|
||||
|
||||
const std::vector<Kernel>& Handle::GetKernelsImpl(const std::string& algorithm,
|
||||
const std::string& network_config) const
|
||||
{
|
||||
return this->impl->cache.GetKernels(algorithm, network_config);
|
||||
}
|
||||
|
||||
bool Handle::HasKernel(const std::string& algorithm, const std::string& network_config) const
|
||||
{
|
||||
return this->impl->cache.HasKernels(algorithm, network_config);
|
||||
}
|
||||
|
||||
KernelInvoke Handle::Run(Kernel k) const { return k.Invoke(this->GetStream()); }
|
||||
|
||||
Program Handle::LoadProgram(const std::string& program_name, std::string params) const
|
||||
{
|
||||
if((!online_compile::EndsWith(program_name, ".mlir-cpp")) &&
|
||||
(!online_compile::EndsWith(program_name, ".mlir")))
|
||||
{
|
||||
params += " -mcpu=" + this->GetTargetProperties().Name();
|
||||
}
|
||||
|
||||
auto hsaco = online_compile::LoadBinary(
|
||||
this->GetTargetProperties(), this->GetMaxComputeUnits(), program_name, params);
|
||||
if(hsaco.empty())
|
||||
{
|
||||
auto p = HIPOCProgram{program_name, params, this->GetTargetProperties()};
|
||||
|
||||
auto path = online_compile::GetCachePath() / boost::filesystem::unique_path();
|
||||
if(p.IsCodeObjectInMemory())
|
||||
online_compile::WriteFile(p.GetCodeObjectBlob(), path);
|
||||
else
|
||||
boost::filesystem::copy_file(p.GetCodeObjectPathname(), path);
|
||||
online_compile::SaveBinary(path, this->GetTargetProperties(), program_name, params);
|
||||
|
||||
return p;
|
||||
}
|
||||
else
|
||||
{
|
||||
return HIPOCProgram{program_name, hsaco};
|
||||
}
|
||||
}
|
||||
|
||||
bool Handle::HasProgram(const std::string& program_name, const std::string& params) const
|
||||
{
|
||||
return this->impl->cache.HasProgram(program_name, params);
|
||||
}
|
||||
|
||||
void Handle::AddProgram(Program prog,
|
||||
const std::string& program_name,
|
||||
const std::string& params) const
|
||||
{
|
||||
this->impl->cache.AddProgram(prog, program_name, params);
|
||||
}
|
||||
|
||||
void Handle::Finish() const { MY_HIP_CHECK(hipStreamSynchronize(this->GetStream())); }
|
||||
|
||||
std::size_t Handle::GetLocalMemorySize() const
|
||||
{
|
||||
int result;
|
||||
|
||||
MY_HIP_CHECK(hipDeviceGetAttribute(
|
||||
&result, hipDeviceAttributeMaxSharedMemoryPerBlock, this->impl->device));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::size_t Handle::GetGlobalMemorySize() const
|
||||
{
|
||||
size_t result;
|
||||
|
||||
MY_HIP_CHECK(hipDeviceTotalMem(&result, this->impl->device));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::size_t Handle::GetMaxComputeUnits() const
|
||||
{
|
||||
int result;
|
||||
const char* const num_cu = online_compile::GetStringEnv(OLC_DEVICE_CU{});
|
||||
if(num_cu != nullptr && strlen(num_cu) > 0)
|
||||
{
|
||||
return boost::lexical_cast<std::size_t>(num_cu);
|
||||
}
|
||||
|
||||
MY_HIP_CHECK(
|
||||
hipDeviceGetAttribute(&result, hipDeviceAttributeMultiprocessorCount, this->impl->device));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::size_t Handle::GetWavefrontWidth() const
|
||||
{
|
||||
hipDeviceProp_t props{};
|
||||
|
||||
MY_HIP_CHECK(hipGetDeviceProperties(&props, this->impl->device));
|
||||
|
||||
auto result = static_cast<size_t>(props.warpSize);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Handle::GetDeviceNameImpl() const { return this->impl->get_device_name(); }
|
||||
|
||||
std::string Handle::GetDeviceName() const { return this->impl->target_properties.Name(); }
|
||||
|
||||
const TargetProperties& Handle::GetTargetProperties() const
|
||||
{
|
||||
return this->impl->target_properties;
|
||||
}
|
||||
|
||||
std::ostream& Handle::Print(std::ostream& os) const
|
||||
{
|
||||
os << "stream: " << this->impl->stream << ", device_id: " << this->impl->device;
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,346 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <hip_build_utils.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <tmp_dir.hpp>
|
||||
#include <env.hpp>
|
||||
#include <target_properties.hpp>
|
||||
#include <write_file.hpp>
|
||||
#include <exec_utils.hpp>
|
||||
#include <logger.hpp>
|
||||
#include <config.h>
|
||||
#include <boost/optional.hpp>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
OLC_DECLARE_ENV_VAR(OLC_DEBUG_HIP_VERBOSE)
|
||||
OLC_DECLARE_ENV_VAR(OLC_DEBUG_HIP_DUMP)
|
||||
|
||||
#define OLC_HIP_COMPILER "/opt/rocm/llvm/bin/clang++"
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
bool IsHccCompiler()
|
||||
{
|
||||
static const auto isHcc = EndsWith(OLC_HIP_COMPILER, "hcc");
|
||||
return isHcc;
|
||||
}
|
||||
|
||||
bool IsHipClangCompiler()
|
||||
{
|
||||
static const auto isClangXX = EndsWith(OLC_HIP_COMPILER, "clang++");
|
||||
return isClangXX;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
inline bool ProduceCoV3()
|
||||
{
|
||||
// Otherwise, let's enable CO v3 for HIP kernels since ROCm 3.0.
|
||||
return (HipCompilerVersion() >= external_tool_version_t{3, 0, -1});
|
||||
}
|
||||
|
||||
/// Returns option for enabling/disabling CO v3 generation for the compiler
|
||||
/// that builds HIP kernels, depending on compiler version etc.
|
||||
inline const std::string& GetCoV3Option(const bool enable)
|
||||
{
|
||||
/// \note PR #2166 uses the "--hcc-cov3" option when isHCC is true.
|
||||
/// It's unclear why... HCC included in ROCm 2.8 does not support it,
|
||||
/// perhaps it suits for some older HCC?
|
||||
///
|
||||
/// These options are Ok for ROCm 3.0:
|
||||
static const std::string option_enable{"-mcode-object-v3"};
|
||||
static const std::string no_option{};
|
||||
if(enable)
|
||||
return option_enable;
|
||||
else
|
||||
return no_option;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
static boost::filesystem::path HipBuildImpl(boost::optional<TmpDir>& tmp_dir,
|
||||
const std::string& filename,
|
||||
std::string src,
|
||||
std::string params,
|
||||
const TargetProperties& target,
|
||||
const bool testing_mode,
|
||||
const bool sources_already_reside_on_filesystem)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// Write out the include files
|
||||
// Let's assume includes are overkill for feature tests & optimize'em out.
|
||||
if(!testing_mode)
|
||||
{
|
||||
auto inc_list = GetHipKernelIncList();
|
||||
auto inc_path = tmp_dir->path;
|
||||
boost::filesystem::create_directories(inc_path);
|
||||
for(auto inc_file : inc_list)
|
||||
{
|
||||
auto inc_src = GetKernelInc(inc_file);
|
||||
WriteFile(inc_src, inc_path / inc_file);
|
||||
}
|
||||
}
|
||||
|
||||
// Sources produced by MLIR-cpp already reside in tmp dir.
|
||||
if(!sources_already_reside_on_filesystem)
|
||||
{
|
||||
src += "\nint main() {}\n";
|
||||
WriteFile(src, tmp_dir->path / filename);
|
||||
}
|
||||
|
||||
// cppcheck-suppress unreadVariable
|
||||
const LcOptionTargetStrings lots(target);
|
||||
|
||||
auto env = std::string("");
|
||||
if(IsHccCompiler())
|
||||
{
|
||||
params += " -amdgpu-target=" + target.Name();
|
||||
params += " " + GetCoV3Option(ProduceCoV3());
|
||||
}
|
||||
else if(IsHipClangCompiler())
|
||||
{
|
||||
if(params.find("-std=") == std::string::npos)
|
||||
params += " --std=c++11";
|
||||
|
||||
if(HipCompilerVersion() < external_tool_version_t{4, 1, 0})
|
||||
params += " --cuda-gpu-arch=" + lots.device;
|
||||
else
|
||||
params += " --cuda-gpu-arch=" + lots.device + lots.xnack;
|
||||
|
||||
params += " --cuda-device-only";
|
||||
params += " -c";
|
||||
params += " -O3 ";
|
||||
}
|
||||
|
||||
params += " -Wno-unused-command-line-argument -I. ";
|
||||
params += OLC_STRINGIZE(HIP_COMPILER_FLAGS);
|
||||
if(IsHccCompiler())
|
||||
{
|
||||
env += std::string("KMOPTLLC=\"-mattr=+enable-ds128 ");
|
||||
if(HipCompilerVersion() >= external_tool_version_t{2, 8, 0})
|
||||
env += " --amdgpu-spill-vgpr-to-agpr=0";
|
||||
env += '\"';
|
||||
}
|
||||
else if(IsHipClangCompiler())
|
||||
{
|
||||
params += " -mllvm --amdgpu-spill-vgpr-to-agpr=0";
|
||||
params += " -mllvm -amdgpu-early-inline-all=true";
|
||||
params += " -mllvm -amdgpu-function-calls=false";
|
||||
}
|
||||
|
||||
if(online_compile::IsEnabled(OLC_DEBUG_HIP_VERBOSE{}))
|
||||
{
|
||||
params += " -v";
|
||||
}
|
||||
|
||||
if(online_compile::IsEnabled(OLC_DEBUG_HIP_DUMP{}))
|
||||
{
|
||||
if(IsHccCompiler())
|
||||
{
|
||||
params += " -gline-tables-only";
|
||||
env += " KMDUMPISA=1";
|
||||
env += " KMDUMPLLVM=1";
|
||||
}
|
||||
else if(IsHipClangCompiler())
|
||||
{
|
||||
params += " -gline-tables-only";
|
||||
params += " -save-temps";
|
||||
}
|
||||
}
|
||||
|
||||
// hip version
|
||||
params +=
|
||||
std::string(" -DHIP_PACKAGE_VERSION_FLAT=") + std::to_string(HIP_PACKAGE_VERSION_FLAT);
|
||||
|
||||
params += " ";
|
||||
auto bin_file = tmp_dir->path / (filename + ".o");
|
||||
|
||||
// compile
|
||||
const std::string redirector = testing_mode ? " 1>/dev/null 2>&1" : "";
|
||||
tmp_dir->Execute(env + std::string(" ") + OLC_HIP_COMPILER,
|
||||
params + filename + " -o " + bin_file.string() + redirector);
|
||||
if(!boost::filesystem::exists(bin_file))
|
||||
throw std::runtime_error(filename + " failed to compile");
|
||||
#ifdef EXTRACTKERNEL_BIN
|
||||
if(IsHccCompiler())
|
||||
{
|
||||
// call extract kernel
|
||||
tmp_dir->Execute(EXTRACTKERNEL_BIN, " -i " + bin_file.string());
|
||||
auto hsaco =
|
||||
std::find_if(boost::filesystem::directory_iterator{tmp_dir->path}, {}, [](auto entry) {
|
||||
return (entry.path().extension() == ".hsaco");
|
||||
});
|
||||
|
||||
if(hsaco == boost::filesystem::directory_iterator{})
|
||||
{
|
||||
fdt_log(LogLevel::Info, "HipBuild", "failed to find *.hsaco in ")
|
||||
<< hsaco->path().string() << std::endl;
|
||||
}
|
||||
|
||||
return hsaco->path();
|
||||
}
|
||||
#endif
|
||||
return bin_file;
|
||||
#else
|
||||
(void)filename;
|
||||
(void)params;
|
||||
throw std::runtimer_error("HIP kernels are only supported in Linux");
|
||||
#endif
|
||||
}
|
||||
|
||||
boost::filesystem::path HipBuild(boost::optional<TmpDir>& tmp_dir,
|
||||
const std::string& filename,
|
||||
std::string src,
|
||||
std::string params,
|
||||
const TargetProperties& target,
|
||||
const bool sources_already_reside_on_filesystem)
|
||||
{
|
||||
return HipBuildImpl(
|
||||
tmp_dir, filename, src, params, target, false, sources_already_reside_on_filesystem);
|
||||
}
|
||||
|
||||
void bin_file_to_str(const boost::filesystem::path& file, std::string& buf)
|
||||
{
|
||||
std::ifstream bin_file_ptr(file.string().c_str(), std::ios::binary);
|
||||
std::ostringstream bin_file_strm;
|
||||
bin_file_strm << bin_file_ptr.rdbuf();
|
||||
buf = bin_file_strm.str();
|
||||
}
|
||||
|
||||
static external_tool_version_t HipCompilerVersionImpl()
|
||||
{
|
||||
external_tool_version_t version;
|
||||
if(IsHccCompiler())
|
||||
{
|
||||
const std::string path(OLC_HIP_COMPILER);
|
||||
const std::string mandatory_prefix("(based on HCC ");
|
||||
do
|
||||
{
|
||||
if(path.empty() || !std::ifstream(path).good())
|
||||
break;
|
||||
|
||||
std::stringstream out;
|
||||
if(online_compile::exec::Run(path + " --version", nullptr, &out) != 0)
|
||||
break;
|
||||
|
||||
std::string line;
|
||||
while(!out.eof())
|
||||
{
|
||||
std::getline(out, line);
|
||||
fdt_log() << line;
|
||||
auto begin = line.find(mandatory_prefix);
|
||||
if(begin == std::string::npos)
|
||||
continue;
|
||||
|
||||
begin += mandatory_prefix.size();
|
||||
int v3, v2, v1 = v2 = v3 = -1;
|
||||
char c2, c1 = c2 = 'X';
|
||||
std::istringstream iss(line.substr(begin));
|
||||
iss >> v1 >> c1 >> v2 >> c2 >> v3;
|
||||
if(!iss.fail() && v1 >= 0)
|
||||
{
|
||||
version.major = v1;
|
||||
if(c1 == '.' && v2 >= 0)
|
||||
{
|
||||
version.minor = v2;
|
||||
if(c2 == '.' && v3 >= 0)
|
||||
version.patch = v3;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
} while(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef HIP_PACKAGE_VERSION_MAJOR
|
||||
fdt_log(
|
||||
LogLevel::Info, "HipCompilerVersion", "Read version information from HIP package...");
|
||||
version.major = HIP_PACKAGE_VERSION_MAJOR;
|
||||
#ifdef HIP_PACKAGE_VERSION_MINOR
|
||||
version.minor = HIP_PACKAGE_VERSION_MINOR;
|
||||
#else
|
||||
version.minor = 0;
|
||||
#endif
|
||||
#ifdef HIP_PACKAGE_VERSION_PATCH
|
||||
version.patch = HIP_PACKAGE_VERSION_PATCH;
|
||||
#else
|
||||
version.patch = 0;
|
||||
#endif
|
||||
#else // HIP_PACKAGE_VERSION_MAJOR is not defined. CMake failed to find HIP package.
|
||||
fdt_log(LogLevel::Info, "HipCompilerVersion", "...assuming 3.2.0 (hip-clang RC)");
|
||||
version.major = 3;
|
||||
version.minor = 2;
|
||||
version.patch = 0;
|
||||
#endif
|
||||
}
|
||||
fdt_log() << version.major << '.' << version.minor << '.' << version.patch << std::endl;
|
||||
return version;
|
||||
}
|
||||
|
||||
external_tool_version_t HipCompilerVersion()
|
||||
{
|
||||
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
|
||||
static auto once = HipCompilerVersionImpl();
|
||||
return once;
|
||||
}
|
||||
|
||||
bool operator>(const external_tool_version_t& lhs, const external_tool_version_t& rhs)
|
||||
{
|
||||
if(lhs.major > rhs.major)
|
||||
return true;
|
||||
else if(lhs.major == rhs.major)
|
||||
{
|
||||
if(lhs.minor > rhs.minor)
|
||||
return true;
|
||||
else if(lhs.minor == rhs.minor)
|
||||
return (lhs.patch > rhs.patch);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator<(const external_tool_version_t& lhs, const external_tool_version_t& rhs)
|
||||
{
|
||||
return rhs > lhs;
|
||||
}
|
||||
bool operator>=(const external_tool_version_t& lhs, const external_tool_version_t& rhs)
|
||||
{
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
bool operator<=(const external_tool_version_t& lhs, const external_tool_version_t& rhs)
|
||||
{
|
||||
return !(lhs > rhs);
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,84 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <env.hpp>
|
||||
#include <hipoc_kernel.hpp>
|
||||
#include <hipCheck.hpp>
|
||||
|
||||
#include <hip/hip_ext.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
void HIPOCKernelInvoke::run(void* args, std::size_t size) const
|
||||
{
|
||||
HipEventPtr start = nullptr;
|
||||
HipEventPtr stop = nullptr;
|
||||
void* config[] = {// HIP_LAUNCH_PARAM_* are macros that do horrible things
|
||||
// NOLINTNEXTLINE cppcoreguidelines-pro-type-cstyle-cast
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
args,
|
||||
// NOLINTNEXTLINE cppcoreguidelines-pro-type-cstyle-cast
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE,
|
||||
&size,
|
||||
// NOLINTNEXTLINE cppcoreguidelines-pro-type-cstyle-cast
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
if(callback)
|
||||
{
|
||||
start = make_hip_event();
|
||||
stop = make_hip_event();
|
||||
}
|
||||
|
||||
MY_HIP_CHECK(hipExtModuleLaunchKernel(fun,
|
||||
gdims[0],
|
||||
gdims[1],
|
||||
gdims[2],
|
||||
ldims[0],
|
||||
ldims[1],
|
||||
ldims[2],
|
||||
0,
|
||||
stream,
|
||||
nullptr,
|
||||
reinterpret_cast<void**>(&config),
|
||||
start.get(),
|
||||
stop.get()));
|
||||
|
||||
if(callback)
|
||||
{
|
||||
MY_HIP_CHECK(hipEventSynchronize(stop.get()));
|
||||
callback(start.get(), stop.get());
|
||||
}
|
||||
}
|
||||
|
||||
HIPOCKernelInvoke HIPOCKernel::Invoke(hipStream_t stream,
|
||||
std::function<void(hipEvent_t, hipEvent_t)> callback) const
|
||||
{
|
||||
return HIPOCKernelInvoke{stream, fun, ldims, gdims, name, callback};
|
||||
}
|
||||
} // namespace online_compile
|
||||
@@ -1,139 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include <hip_build_utils.hpp>
|
||||
#include <hipoc_program.hpp>
|
||||
#include <kernel.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <target_properties.hpp>
|
||||
#include <env.hpp>
|
||||
#include <write_file.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <boost/filesystem/operations.hpp>
|
||||
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
static hipModulePtr CreateModule(const boost::filesystem::path& hsaco_file)
|
||||
{
|
||||
hipModule_t raw_m;
|
||||
MY_HIP_CHECK(hipModuleLoad(&raw_m, hsaco_file.string().c_str()));
|
||||
hipModulePtr m{raw_m};
|
||||
return m;
|
||||
}
|
||||
|
||||
template <typename T> /// intended for std::string and std::vector<char>
|
||||
hipModulePtr CreateModuleInMem(const T& blob)
|
||||
{
|
||||
hipModule_t raw_m;
|
||||
MY_HIP_CHECK(hipModuleLoadData(&raw_m, reinterpret_cast<const void*>(blob.data())));
|
||||
hipModulePtr m{raw_m};
|
||||
return m;
|
||||
}
|
||||
|
||||
HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name,
|
||||
const boost::filesystem::path& filespec)
|
||||
: program(program_name), hsaco_file(filespec)
|
||||
{
|
||||
this->module = CreateModule(hsaco_file);
|
||||
}
|
||||
|
||||
HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name,
|
||||
std::string params,
|
||||
const TargetProperties& target_)
|
||||
: program(program_name), target(target_)
|
||||
{
|
||||
BuildCodeObject(params);
|
||||
if(!binary.empty())
|
||||
{
|
||||
module = CreateModuleInMem(this->binary);
|
||||
}
|
||||
else
|
||||
{
|
||||
module = CreateModule(this->hsaco_file);
|
||||
}
|
||||
}
|
||||
|
||||
void HIPOCProgramImpl::BuildCodeObjectInFile(std::string& params,
|
||||
const std::string& src,
|
||||
const std::string& filename)
|
||||
{
|
||||
|
||||
this->dir.emplace(filename);
|
||||
hsaco_file = dir->path / (filename + ".o");
|
||||
|
||||
if(online_compile::EndsWith(filename, ".cpp"))
|
||||
{
|
||||
hsaco_file = HipBuild(dir, filename, src, params, target);
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Only HIP kernel source of .cpp file is supported");
|
||||
|
||||
if(!boost::filesystem::exists(hsaco_file))
|
||||
throw std::runtime_error("Cant find file: " + hsaco_file.string());
|
||||
}
|
||||
|
||||
void HIPOCProgramImpl::BuildCodeObject(std::string params)
|
||||
{
|
||||
std::string filename = program;
|
||||
|
||||
if(online_compile::EndsWith(filename, ".cpp"))
|
||||
{
|
||||
params += " -Wno-everything";
|
||||
}
|
||||
|
||||
BuildCodeObjectInFile(params, GetKernelSrc(this->program), filename);
|
||||
}
|
||||
|
||||
HIPOCProgram::HIPOCProgram() {}
|
||||
HIPOCProgram::HIPOCProgram(const std::string& program_name,
|
||||
std::string params,
|
||||
const TargetProperties& target)
|
||||
: impl(std::make_shared<HIPOCProgramImpl>(program_name, params, target))
|
||||
{
|
||||
}
|
||||
|
||||
HIPOCProgram::HIPOCProgram(const std::string& program_name, const boost::filesystem::path& hsaco)
|
||||
: impl(std::make_shared<HIPOCProgramImpl>(program_name, hsaco))
|
||||
{
|
||||
}
|
||||
|
||||
hipModule_t HIPOCProgram::GetModule() const { return impl->module.get(); }
|
||||
|
||||
boost::filesystem::path HIPOCProgram::GetCodeObjectPathname() const { return impl->hsaco_file; }
|
||||
|
||||
std::string HIPOCProgram::GetCodeObjectBlob() const
|
||||
{
|
||||
return {impl->binary.data(), impl->binary.size()};
|
||||
}
|
||||
|
||||
bool HIPOCProgram::IsCodeObjectInMemory() const { return !impl->binary.empty(); };
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,66 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
|
||||
#include <kernel_build_params.hpp>
|
||||
#include <stringutils.hpp>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
static std::string GenerateDefines(const std::vector<KernelBuildParameter>& options,
|
||||
const std::string& prefix)
|
||||
{
|
||||
const auto strs =
|
||||
options | boost::adaptors::transformed([&prefix](const KernelBuildParameter& define) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << '-';
|
||||
if(define.type == ParameterTypes::Define)
|
||||
ss << prefix;
|
||||
|
||||
ss << define.name;
|
||||
|
||||
if(!define.value.empty())
|
||||
{
|
||||
switch(define.type)
|
||||
{
|
||||
case ParameterTypes::Define: ss << '='; break;
|
||||
case ParameterTypes::Option: ss << ' '; break;
|
||||
}
|
||||
|
||||
ss << define.value;
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
});
|
||||
|
||||
return JoinStrings(strs, " ");
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,154 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
/* ************************************************************************
|
||||
* Copyright 2015 Vratis, Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ************************************************************************ */
|
||||
|
||||
#include <env.hpp>
|
||||
#include <kernel_cache.hpp>
|
||||
#include <stringutils.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
const std::vector<Kernel>& KernelCache::GetKernels(const std::string& algorithm,
|
||||
const std::string& network_config)
|
||||
{
|
||||
|
||||
std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
|
||||
|
||||
const auto it = kernel_map.find(key);
|
||||
if(it != kernel_map.end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
|
||||
static const std::vector<Kernel> empty{};
|
||||
return empty;
|
||||
}
|
||||
|
||||
bool KernelCache::HasKernels(const std::string& algorithm, const std::string& network_config) const
|
||||
{
|
||||
const auto key = std::make_pair(algorithm, network_config);
|
||||
const auto it = kernel_map.find(key);
|
||||
if(it == kernel_map.end())
|
||||
return false;
|
||||
|
||||
if(it->second.empty())
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"There should be at least one kernel in kernel cache if an entry exists");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KernelCache::HasProgram(const std::string& name, const std::string& params) const
|
||||
{
|
||||
const auto key = std::make_pair(name, params);
|
||||
return program_map.count(key) > 0;
|
||||
}
|
||||
|
||||
void KernelCache::AddProgram(Program prog, const std::string& program_name, std::string params)
|
||||
{
|
||||
program_map[std::make_pair(program_name, params)] = prog;
|
||||
}
|
||||
|
||||
Kernel KernelCache::AddKernel(const Handle& h,
|
||||
const std::string& algorithm,
|
||||
const std::string& network_config,
|
||||
const std::string& program_name,
|
||||
const std::string& kernel_name,
|
||||
const std::vector<size_t>& vld,
|
||||
const std::vector<size_t>& vgd,
|
||||
std::string params,
|
||||
std::size_t cache_index)
|
||||
{
|
||||
const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
|
||||
|
||||
Program program;
|
||||
|
||||
auto program_it = program_map.find(std::make_pair(program_name, params));
|
||||
if(program_it != program_map.end())
|
||||
{
|
||||
program = program_it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
program = h.LoadProgram(program_name, params);
|
||||
program_map[std::make_pair(program_name, params)] = program;
|
||||
}
|
||||
|
||||
Kernel kernel{};
|
||||
|
||||
kernel = Kernel{program, kernel_name, vld, vgd};
|
||||
|
||||
if(!network_config.empty() && !algorithm.empty())
|
||||
{
|
||||
this->AddKernel(key, kernel, cache_index);
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
void KernelCache::AddKernel(Key key, Kernel k, std::size_t cache_index)
|
||||
{
|
||||
auto&& v = kernel_map[key];
|
||||
if(cache_index >= v.size())
|
||||
{
|
||||
v.resize(cache_index + 1);
|
||||
}
|
||||
v[cache_index] = k;
|
||||
}
|
||||
|
||||
void KernelCache::ClearKernels(const std::string& algorithm, const std::string& network_config)
|
||||
{
|
||||
if(network_config.empty() || algorithm.empty())
|
||||
{
|
||||
throw std::runtime_error("Network config or algorithm empty.");
|
||||
}
|
||||
const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
|
||||
auto&& v = this->kernel_map[key];
|
||||
if(!v.empty()) {}
|
||||
v.clear();
|
||||
}
|
||||
|
||||
KernelCache::KernelCache() {}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,43 +0,0 @@
|
||||
#include <config.h>
|
||||
#include <logger.hpp>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
#if OLC_DEBUG
|
||||
static LogLevel defLevel = LogLevel::Info2;
|
||||
#else
|
||||
static LogLevel defLevel = LogLevel::Error;
|
||||
#endif
|
||||
|
||||
string LogLevelString(LogLevel level)
|
||||
{
|
||||
switch(level)
|
||||
{
|
||||
case LogLevel::Error: return ("Error");
|
||||
case LogLevel::Warning: return ("Warning");
|
||||
case LogLevel::Info: return ("Info");
|
||||
case LogLevel::Info2: return ("Info2");
|
||||
default: return ("Unknown");
|
||||
};
|
||||
};
|
||||
|
||||
ostream& fdt_log(LogLevel level, const char* header, const char* content)
|
||||
{
|
||||
if(level > online_compile::defLevel)
|
||||
{
|
||||
return (cerr);
|
||||
};
|
||||
|
||||
cerr << endl << LogLevelString(level) << ":" << header << ", " << content;
|
||||
|
||||
return (cerr);
|
||||
}
|
||||
|
||||
ostream& fdt_log() { return (cerr); };
|
||||
|
||||
void fdt_log_flush() { cerr << endl; }
|
||||
}; // namespace online_compile
|
||||
@@ -1,319 +0,0 @@
|
||||
/*
|
||||
* Derived from a public-domain MD5 implementation. Original license
|
||||
* below.
|
||||
*
|
||||
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
|
||||
* MD5 Message-Digest Algorithm (RFC 1321).
|
||||
*
|
||||
* Homepage:
|
||||
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
|
||||
*
|
||||
* Author:
|
||||
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
|
||||
*
|
||||
* This software was written by Alexander Peslyak in 2001. No copyright is
|
||||
* claimed, and the software is hereby placed in the public domain.
|
||||
* In case this attempt to disclaim copyright and place the software in the
|
||||
* public domain is deemed null and void, then the software is
|
||||
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
|
||||
* general public under the following terms:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted.
|
||||
*
|
||||
* There's ABSOLUTELY NO WARRANTY, express or implied.
|
||||
*
|
||||
* (This is a heavily cut-down "BSD license".)
|
||||
*
|
||||
* This differs from Colin Plumb's older public domain implementation in that
|
||||
* no exactly 32-bit integer data type is required (any 32-bit or wider
|
||||
* unsigned integer data type will do), there's no compile-time endianness
|
||||
* configuration, and the function prototypes match OpenSSL's. No code from
|
||||
* Colin Plumb's implementation has been reused; this comment merely compares
|
||||
* the properties of the two independent implementations.
|
||||
*
|
||||
* The primary goals of this implementation are portability and ease of use.
|
||||
* It is meant to be fast, but not as fast as possible. Some known
|
||||
* optimizations are not included to reduce source code size and avoid
|
||||
* compile-time configuration.
|
||||
*/
|
||||
#include <md5.hpp>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define MD5_DIGEST_LENGTH 16
|
||||
|
||||
struct MD5_CTX
|
||||
{
|
||||
uint32_t lo, hi;
|
||||
uint32_t a, b, c, d;
|
||||
unsigned char buffer[64];
|
||||
uint32_t block[MD5_DIGEST_LENGTH];
|
||||
};
|
||||
|
||||
/*
|
||||
* The basic MD5 functions.
|
||||
*
|
||||
* F and G are optimized compared to their RFC 1321 definitions for
|
||||
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
|
||||
* implementation.
|
||||
*/
|
||||
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
|
||||
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
|
||||
#define H(x, y, z) (((x) ^ (y)) ^ (z))
|
||||
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
|
||||
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
|
||||
|
||||
/*
|
||||
* The MD5 transformation for all four rounds.
|
||||
*/
|
||||
#define STEP(f, a, b, c, d, x, t, s) \
|
||||
(a) += f((b), (c), (d)) + (x) + (t); \
|
||||
(a) = (((a) << (s)) | (((a)&0xffffffff) >> (32 - (s)))); \
|
||||
(a) += (b);
|
||||
|
||||
/*
|
||||
* SET reads 4 input bytes in little-endian byte order and stores them in a
|
||||
* properly aligned word in host byte order.
|
||||
*
|
||||
* The check for little-endian architectures that tolerate unaligned memory
|
||||
* accesses is just an optimization. Nothing will break if it fails to detect
|
||||
* a suitable architecture.
|
||||
*
|
||||
* Unfortunately, this optimization may be a C strict aliasing rules violation
|
||||
* if the caller's data buffer has effective type that cannot be aliased by
|
||||
* uint32_t. In practice, this problem may occur if these MD5 routines are
|
||||
* inlined into a calling function, or with future and dangerously advanced
|
||||
* link-time optimizations. For the time being, keeping these MD5 routines in
|
||||
* their own translation unit avoids the problem.
|
||||
*/
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
|
||||
#define SET(n) (*reinterpret_cast<const uint32_t*>(&ptr[(n)*4]))
|
||||
#define GET(n) SET(n)
|
||||
#else
|
||||
#define SET(n) \
|
||||
(ctx->block[(n)] = static_cast<uint32_t>(ptr[(n)*4]) | \
|
||||
(static_cast<uint32_t>(ptr[(n)*4 + 1]) << 8) | \
|
||||
(static_cast<uint32_t>(ptr[(n)*4 + 2]) << 16) | \
|
||||
(static_cast<uint32_t>(ptr[(n)*4 + 3]) << 24))
|
||||
#define GET(n) (ctx->block[(n)])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This processes one or more 64-byte data blocks, but does NOT update the bit
|
||||
* counters. There are no alignment requirements.
|
||||
*/
|
||||
static const void* body(MD5_CTX* ctx, const void* data, size_t size)
|
||||
{
|
||||
const unsigned char* ptr;
|
||||
uint32_t a, b, c, d;
|
||||
|
||||
ptr = static_cast<const unsigned char*>(data);
|
||||
|
||||
a = ctx->a;
|
||||
b = ctx->b;
|
||||
c = ctx->c;
|
||||
d = ctx->d;
|
||||
|
||||
do
|
||||
{
|
||||
uint32_t saved_a = a, saved_b = b, saved_c = c, saved_d = d;
|
||||
|
||||
/* Round 1 */
|
||||
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
|
||||
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
|
||||
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
|
||||
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
|
||||
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
|
||||
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
|
||||
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
|
||||
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
|
||||
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
|
||||
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
|
||||
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
|
||||
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
|
||||
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
|
||||
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
|
||||
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
|
||||
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
|
||||
|
||||
/* Round 2 */
|
||||
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
|
||||
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
|
||||
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
|
||||
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
|
||||
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
|
||||
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
|
||||
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
|
||||
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
|
||||
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
|
||||
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
|
||||
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
|
||||
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
|
||||
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
|
||||
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
|
||||
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
|
||||
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
|
||||
|
||||
/* Round 3 */
|
||||
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
|
||||
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
|
||||
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
|
||||
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
|
||||
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
|
||||
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
|
||||
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
|
||||
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
|
||||
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
|
||||
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
|
||||
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
|
||||
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
|
||||
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
|
||||
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
|
||||
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
|
||||
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
|
||||
|
||||
/* Round 4 */
|
||||
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
|
||||
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
|
||||
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
|
||||
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
|
||||
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
|
||||
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
|
||||
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
|
||||
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
|
||||
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
|
||||
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
|
||||
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
|
||||
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
|
||||
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
|
||||
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
|
||||
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
|
||||
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
|
||||
|
||||
a += saved_a;
|
||||
b += saved_b;
|
||||
c += saved_c;
|
||||
d += saved_d;
|
||||
|
||||
ptr += 64;
|
||||
} while((size -= 64) != 0u);
|
||||
|
||||
ctx->a = a;
|
||||
ctx->b = b;
|
||||
ctx->c = c;
|
||||
ctx->d = d;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void MD5_Init(MD5_CTX* ctx)
|
||||
{
|
||||
ctx->a = 0x67452301;
|
||||
ctx->b = 0xefcdab89;
|
||||
ctx->c = 0x98badcfe;
|
||||
ctx->d = 0x10325476;
|
||||
|
||||
ctx->lo = 0;
|
||||
ctx->hi = 0;
|
||||
}
|
||||
|
||||
static void MD5_Update(MD5_CTX* ctx, const void* data, size_t size)
|
||||
{
|
||||
uint32_t saved_lo;
|
||||
size_t used;
|
||||
|
||||
saved_lo = ctx->lo;
|
||||
if((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
|
||||
ctx->hi++;
|
||||
ctx->hi += size >> 29;
|
||||
|
||||
used = saved_lo & 0x3f;
|
||||
|
||||
if(used != 0u)
|
||||
{
|
||||
size_t available = 64 - used;
|
||||
|
||||
if(size < available)
|
||||
{
|
||||
memcpy(&ctx->buffer[used], data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(&ctx->buffer[used], data, available);
|
||||
data = static_cast<const unsigned char*>(data) + available;
|
||||
size -= available;
|
||||
body(ctx, ctx->buffer, 64);
|
||||
}
|
||||
|
||||
if(size >= 64)
|
||||
{
|
||||
data = body(ctx, data, size & ~size_t{0x3f});
|
||||
size &= 0x3f;
|
||||
}
|
||||
|
||||
memcpy(ctx->buffer, data, size);
|
||||
}
|
||||
|
||||
#define OUT(dst, src) \
|
||||
(dst)[0] = static_cast<unsigned char>(src); \
|
||||
(dst)[1] = static_cast<unsigned char>((src) >> 8); \
|
||||
(dst)[2] = static_cast<unsigned char>((src) >> 16); \
|
||||
(dst)[3] = static_cast<unsigned char>((src) >> 24);
|
||||
|
||||
static void MD5_Final(unsigned char* result, MD5_CTX* ctx)
|
||||
{
|
||||
size_t used, available;
|
||||
|
||||
used = ctx->lo & 0x3f;
|
||||
|
||||
ctx->buffer[used++] = 0x80;
|
||||
|
||||
available = 64 - used;
|
||||
|
||||
if(available < 8)
|
||||
{
|
||||
memset(&ctx->buffer[used], 0, available);
|
||||
body(ctx, ctx->buffer, 64);
|
||||
used = 0;
|
||||
available = 64;
|
||||
}
|
||||
|
||||
memset(&ctx->buffer[used], 0, available - 8);
|
||||
|
||||
ctx->lo <<= 3;
|
||||
OUT(&ctx->buffer[56], ctx->lo)
|
||||
OUT(&ctx->buffer[60], ctx->hi)
|
||||
|
||||
body(ctx, ctx->buffer, 64);
|
||||
|
||||
OUT(&result[0], ctx->a)
|
||||
OUT(&result[4], ctx->b)
|
||||
OUT(&result[8], ctx->c)
|
||||
OUT(&result[12], ctx->d)
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
}
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
std::string md5(std::string s)
|
||||
{
|
||||
std::array<unsigned char, MD5_DIGEST_LENGTH> result{};
|
||||
|
||||
MD5_CTX ctx{};
|
||||
MD5_Init(&ctx);
|
||||
MD5_Update(&ctx, s.data(), s.length());
|
||||
MD5_Final(result.data(), &ctx);
|
||||
|
||||
std::ostringstream sout;
|
||||
sout << std::hex << std::setfill('0');
|
||||
for(auto c : result)
|
||||
sout << std::setw(2) << int{c};
|
||||
|
||||
return sout.str();
|
||||
}
|
||||
} // namespace online_compile
|
||||
@@ -1,119 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include <env.hpp>
|
||||
#include <handle.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <target_properties.hpp>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
OLC_DECLARE_ENV_VAR(OLC_DEBUG_ENFORCE_DEVICE)
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
static std::string GetDeviceNameFromMap(const std::string& in)
|
||||
{
|
||||
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
|
||||
static std::map<std::string, std::string> device_name_map = {
|
||||
{"Ellesmere", "gfx803"},
|
||||
{"Baffin", "gfx803"},
|
||||
{"RacerX", "gfx803"},
|
||||
{"Polaris10", "gfx803"},
|
||||
{"Polaris11", "gfx803"},
|
||||
{"Tonga", "gfx803"},
|
||||
{"Fiji", "gfx803"},
|
||||
{"gfx800", "gfx803"},
|
||||
{"gfx802", "gfx803"},
|
||||
{"gfx804", "gfx803"},
|
||||
{"Vega10", "gfx900"},
|
||||
{"gfx901", "gfx900"},
|
||||
{"10.3.0 Sienna_Cichlid 18", "gfx1030"},
|
||||
};
|
||||
|
||||
const char* const p_asciz = online_compile::GetStringEnv(OLC_DEBUG_ENFORCE_DEVICE{});
|
||||
if(p_asciz != nullptr && strlen(p_asciz) > 0)
|
||||
return {p_asciz};
|
||||
|
||||
const auto name = in.substr(0, in.find(':')); // str.substr(0, npos) returns str.
|
||||
|
||||
auto match = device_name_map.find(name);
|
||||
if(match != device_name_map.end())
|
||||
return match->second;
|
||||
return name; // NOLINT (performance-no-automatic-move)
|
||||
}
|
||||
|
||||
void TargetProperties::Init(const Handle* const handle)
|
||||
{
|
||||
const auto rawName = [&]() -> std::string { return handle->GetDeviceNameImpl(); }();
|
||||
name = GetDeviceNameFromMap(rawName);
|
||||
// DKMS driver older than 5.9 may report incorrect state of SRAMECC feature.
|
||||
// Therefore we compute default SRAMECC and rely on it for now.
|
||||
sramecc = [&]() -> boost::optional<bool> {
|
||||
if(name == "gfx906" || name == "gfx908")
|
||||
return {true};
|
||||
return {};
|
||||
}();
|
||||
// However we need to store the reported state, even if it is incorrect,
|
||||
// to use together with COMGR.
|
||||
sramecc_reported = [&]() -> boost::optional<bool> {
|
||||
if(rawName.find(":sramecc+") != std::string::npos)
|
||||
return true;
|
||||
if(rawName.find(":sramecc-") != std::string::npos)
|
||||
return false;
|
||||
return sramecc; // default
|
||||
}();
|
||||
xnack = [&]() -> boost::optional<bool> {
|
||||
if(rawName.find(":xnack+") != std::string::npos)
|
||||
return true;
|
||||
if(rawName.find(":xnack-") != std::string::npos)
|
||||
return false;
|
||||
return {}; // default
|
||||
}();
|
||||
InitDbId();
|
||||
}
|
||||
|
||||
void TargetProperties::InitDbId()
|
||||
{
|
||||
dbId = name;
|
||||
if(name == "gfx906" || name == "gfx908")
|
||||
{
|
||||
// Let's stay compatible with existing gfx906/908 databases.
|
||||
// When feature equal to the default (SRAMECC ON), do not
|
||||
// append feature suffix. This is for backward compatibility
|
||||
// with legacy databases ONLY!
|
||||
if(!sramecc || !(*sramecc))
|
||||
dbId += "_nosramecc";
|
||||
}
|
||||
else
|
||||
{
|
||||
if(sramecc && *sramecc)
|
||||
dbId += "_sramecc";
|
||||
}
|
||||
if(xnack && *xnack)
|
||||
dbId += "_xnack";
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,66 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#include <tmp_dir.hpp>
|
||||
#include <env.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <logger.hpp>
|
||||
|
||||
OLC_DECLARE_ENV_VAR(OLC_DEBUG_SAVE_TEMP_DIR)
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
void SystemCmd(std::string cmd)
|
||||
{
|
||||
fdt_log(LogLevel::Info, "SystemCmd", cmd.c_str());
|
||||
fdt_log_flush();
|
||||
if(std::system(cmd.c_str()) != 0)
|
||||
throw std::runtime_error("Can't execute " + cmd);
|
||||
}
|
||||
|
||||
TmpDir::TmpDir(std::string prefix)
|
||||
: path(boost::filesystem::temp_directory_path() /
|
||||
boost::filesystem::unique_path("online_compile-" + prefix + "-%%%%-%%%%-%%%%-%%%%"))
|
||||
{
|
||||
boost::filesystem::create_directories(this->path);
|
||||
}
|
||||
|
||||
void TmpDir::Execute(std::string exe, std::string args) const
|
||||
{
|
||||
std::string cd = "cd " + this->path.string() + "; ";
|
||||
std::string cmd = cd + exe + " " + args; // + " > /dev/null";
|
||||
SystemCmd(cmd);
|
||||
}
|
||||
|
||||
TmpDir::~TmpDir()
|
||||
{
|
||||
if(!online_compile::IsEnabled(OLC_DEBUG_SAVE_TEMP_DIR{}))
|
||||
{
|
||||
boost::filesystem::remove_all(this->path);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,52 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef GUARD_OLC_BINARY_CACHE_HPP
|
||||
#define GUARD_OLC_BINARY_CACHE_HPP
|
||||
|
||||
#include <target_properties.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
boost::filesystem::path
|
||||
GetCacheFile(const std::string& device, const std::string& name, const std::string& args);
|
||||
|
||||
boost::filesystem::path GetCachePath();
|
||||
|
||||
boost::filesystem::path LoadBinary(const TargetProperties& target,
|
||||
std::size_t num_cu,
|
||||
const std::string& name,
|
||||
const std::string& args);
|
||||
void SaveBinary(const boost::filesystem::path& binary_path,
|
||||
const TargetProperties& target,
|
||||
const std::string& name,
|
||||
const std::string& args);
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,47 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_CONFIG_H_IN
|
||||
#define GUARD_CONFIG_H_IN
|
||||
|
||||
// "_PACKAGE_" to avoid name contentions: the macros like
|
||||
// HIP_VERSION_MAJOR are defined in hip_version.h.
|
||||
// clang-format off
|
||||
#define HIP_PACKAGE_VERSION_MAJOR @OLC_hip_VERSION_MAJOR@
|
||||
#define HIP_PACKAGE_VERSION_MINOR @OLC_hip_VERSION_MINOR@
|
||||
#define HIP_PACKAGE_VERSION_PATCH @OLC_hip_VERSION_PATCH@
|
||||
// clang-format on
|
||||
|
||||
#define HIP_PACKAGE_VERSION_FLAT \
|
||||
((HIP_PACKAGE_VERSION_MAJOR * 1000ULL + HIP_PACKAGE_VERSION_MINOR) * 1000000 + \
|
||||
HIP_PACKAGE_VERSION_PATCH)
|
||||
|
||||
#cmakedefine01 OLC_DEBUG
|
||||
|
||||
#cmakedefine OLC_HIP_COMPILER "@OLC_HIP_COMPILER@"
|
||||
#cmakedefine EXTRACTKERNEL_BIN "@EXTRACTKERNEL_BIN@"
|
||||
#cmakedefine OLC_OFFLOADBUNDLER_BIN "@OLC_OFFLOADBUNDLER_BIN@"
|
||||
|
||||
#endif
|
||||
@@ -1,123 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_ENV_HPP
|
||||
#define GUARD_OLC_ENV_HPP
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
/// \todo Rework: Case-insensitive string compare, ODR, (?) move to .cpp
|
||||
|
||||
// Declare a cached environment variable
|
||||
#define OLC_DECLARE_ENV_VAR(x) \
|
||||
struct x \
|
||||
{ \
|
||||
static const char* value() { return #x; } \
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns false if a feature-controlling environment variable is defined
|
||||
* and set to something which disables a feature.
|
||||
*/
|
||||
inline bool IsEnvvarValueDisabled(const char* name)
|
||||
{
|
||||
const auto value_env_p = std::getenv(name);
|
||||
return value_env_p != nullptr &&
|
||||
(std::strcmp(value_env_p, "disable") == 0 || std::strcmp(value_env_p, "disabled") == 0 ||
|
||||
std::strcmp(value_env_p, "0") == 0 || std::strcmp(value_env_p, "no") == 0 ||
|
||||
std::strcmp(value_env_p, "false") == 0);
|
||||
}
|
||||
|
||||
inline bool IsEnvvarValueEnabled(const char* name)
|
||||
{
|
||||
const auto value_env_p = std::getenv(name);
|
||||
return value_env_p != nullptr &&
|
||||
(std::strcmp(value_env_p, "enable") == 0 || std::strcmp(value_env_p, "enabled") == 0 ||
|
||||
std::strcmp(value_env_p, "1") == 0 || std::strcmp(value_env_p, "yes") == 0 ||
|
||||
std::strcmp(value_env_p, "true") == 0);
|
||||
}
|
||||
|
||||
// Return 0 if env is enabled else convert environment var to an int.
|
||||
// Supports hexadecimal with leading 0x or decimal
|
||||
inline unsigned long int EnvvarValue(const char* name, unsigned long int fallback = 0)
|
||||
{
|
||||
const auto value_env_p = std::getenv(name);
|
||||
if(value_env_p == nullptr)
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
else
|
||||
{
|
||||
return strtoul(value_env_p, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<std::string> GetEnv(const char* name)
|
||||
{
|
||||
const auto p = std::getenv(name);
|
||||
if(p == nullptr)
|
||||
return {};
|
||||
else
|
||||
return {{p}};
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline const char* GetStringEnv(T)
|
||||
{
|
||||
static const std::vector<std::string> result = GetEnv(T::value());
|
||||
if(result.empty())
|
||||
return nullptr;
|
||||
else
|
||||
return result.front().c_str();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool IsEnabled(T)
|
||||
{
|
||||
static const bool result = online_compile::IsEnvvarValueEnabled(T::value());
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool IsDisabled(T)
|
||||
{
|
||||
static const bool result = online_compile::IsEnvvarValueDisabled(T::value());
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline unsigned long int Value(T, unsigned long int fallback = 0)
|
||||
{
|
||||
static const auto result = online_compile::EnvvarValue(T::value(), fallback);
|
||||
return result;
|
||||
}
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,42 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef EXEC_OLC_UTILS_HPP
|
||||
#define EXEC_OLC_UTILS_HPP
|
||||
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
namespace exec {
|
||||
|
||||
/// Redirecting both input and output is not supported.
|
||||
int Run(const std::string& p, std::istream* in, std::ostream* out);
|
||||
|
||||
} // namespace exec
|
||||
} // namespace online_compile
|
||||
|
||||
#endif // EXEC_UTILS_HPP
|
||||
@@ -1,145 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_HANDLE_HPP_
|
||||
#define GUARD_OLC_HANDLE_HPP_
|
||||
|
||||
#include <kernel.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <target_properties.hpp>
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ios>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
struct HandleImpl;
|
||||
|
||||
struct Handle
|
||||
{
|
||||
friend struct TargetProperties;
|
||||
|
||||
Handle();
|
||||
Handle(hipStream_t stream);
|
||||
Handle(Handle&&) noexcept;
|
||||
~Handle();
|
||||
|
||||
hipStream_t GetStream() const;
|
||||
void SetStream(hipStream_t streamID) const;
|
||||
|
||||
KernelInvoke AddKernel(const std::string& algorithm,
|
||||
const std::string& network_config,
|
||||
const std::string& program_name,
|
||||
const std::string& kernel_name,
|
||||
const std::vector<size_t>& vld,
|
||||
const std::vector<size_t>& vgd,
|
||||
const std::string& params,
|
||||
std::size_t cache_index = 0) const;
|
||||
|
||||
bool HasKernel(const std::string& algorithm, const std::string& network_config) const;
|
||||
|
||||
void ClearKernels(const std::string& algorithm, const std::string& network_config) const;
|
||||
|
||||
auto GetKernels(const std::string& algorithm, const std::string& network_config) const
|
||||
{
|
||||
return this->GetKernelsImpl(algorithm, network_config) |
|
||||
boost::adaptors::transformed([this](Kernel k) { return this->Run(k); });
|
||||
}
|
||||
KernelInvoke GetKernel(const std::string& algorithm, const std::string& network_config) const
|
||||
{
|
||||
auto ks = this->GetKernelsImpl(algorithm, network_config);
|
||||
if(ks.empty())
|
||||
{
|
||||
throw std::runtime_error("looking for default kernel (does not exist): " + algorithm +
|
||||
", " + network_config);
|
||||
}
|
||||
return this->Run(ks.front());
|
||||
}
|
||||
|
||||
KernelInvoke Run(Kernel k) const;
|
||||
|
||||
Program LoadProgram(const std::string& program_name, std::string params) const;
|
||||
|
||||
bool HasProgram(const std::string& program_name, const std::string& params) const;
|
||||
|
||||
void AddProgram(Program prog, const std::string& program_name, const std::string& params) const;
|
||||
|
||||
void Finish() const;
|
||||
|
||||
std::size_t GetLocalMemorySize() const;
|
||||
std::size_t GetGlobalMemorySize() const;
|
||||
std::size_t GetWavefrontWidth() const;
|
||||
std::size_t GetMaxComputeUnits() const;
|
||||
std::size_t GetMaxHardwareComputeUnits() const
|
||||
{
|
||||
std::size_t num_cu = this->GetMaxComputeUnits();
|
||||
std::string name = this->GetDeviceName();
|
||||
return StartsWith(name, "gfx1") ? num_cu * 2 /* CUs per WGP */ : num_cu;
|
||||
}
|
||||
|
||||
std::string GetDeviceName() const;
|
||||
const TargetProperties& GetTargetProperties() const;
|
||||
|
||||
private:
|
||||
std::string GetDeviceNameImpl() const;
|
||||
const std::vector<Kernel>& GetKernelsImpl(const std::string& algorithm,
|
||||
const std::string& network_config) const;
|
||||
|
||||
public:
|
||||
std::ostream& Print(std::ostream& os) const;
|
||||
|
||||
static std::string GetDbBasename(const TargetProperties& target, size_t num_cu)
|
||||
{
|
||||
auto ret = target.DbId() + [&]() {
|
||||
std::ostringstream ss;
|
||||
if(num_cu <= 64)
|
||||
ss << '_' << num_cu;
|
||||
else
|
||||
ss << std::hex << num_cu;
|
||||
return std::string(ss.str());
|
||||
}();
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string GetDbBasename() const
|
||||
{
|
||||
return GetDbBasename(GetTargetProperties(), GetMaxComputeUnits());
|
||||
}
|
||||
|
||||
std::unique_ptr<HandleImpl> impl;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const Handle& handle) { return handle.Print(os); }
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif // GUARD_OLC_HANDLE_HPP_
|
||||
@@ -1,22 +0,0 @@
|
||||
#ifndef _HIP_OLC_CHECK_HPP_
|
||||
#define _HIP_OLC_CHECK_HPP_
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
// Here flag can be a constant, variable or function call
|
||||
#define MY_HIP_CHECK(flag) \
|
||||
do \
|
||||
{ \
|
||||
hipError_t _tmpVal; \
|
||||
if((_tmpVal = flag) != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream ostr; \
|
||||
ostr << "HIP Function Failed (" << __FILE__ << "," << __LINE__ << ") " \
|
||||
<< hipGetErrorString(_tmpVal); \
|
||||
throw std::runtime_error(ostr.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
@@ -1,97 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef OLC_GUARD_OLC_HIP_BUILD_UTILS_HPP
|
||||
#define OLC_GUARD_OLC_HIP_BUILD_UTILS_HPP
|
||||
|
||||
#include <target_properties.hpp>
|
||||
#include <kernel.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
boost::filesystem::path HipBuild(boost::optional<online_compile::TmpDir>& tmp_dir,
|
||||
const std::string& filename,
|
||||
std::string src,
|
||||
std::string params,
|
||||
const TargetProperties& target,
|
||||
bool sources_already_reside_on_filesystem = false);
|
||||
|
||||
void bin_file_to_str(const boost::filesystem::path& file, std::string& buf);
|
||||
|
||||
struct external_tool_version_t
|
||||
{
|
||||
int major = -1;
|
||||
int minor = -1;
|
||||
int patch = -1;
|
||||
friend bool operator>(const external_tool_version_t& lhs, const external_tool_version_t& rhs);
|
||||
friend bool operator<(const external_tool_version_t& lhs, const external_tool_version_t& rhs);
|
||||
friend bool operator>=(const external_tool_version_t& lhs, const external_tool_version_t& rhs);
|
||||
friend bool operator<=(const external_tool_version_t& lhs, const external_tool_version_t& rhs);
|
||||
};
|
||||
|
||||
external_tool_version_t HipCompilerVersion();
|
||||
|
||||
bool IsHccCompiler();
|
||||
bool IsHipClangCompiler();
|
||||
|
||||
class LcOptionTargetStrings
|
||||
{
|
||||
public:
|
||||
const std::string& device;
|
||||
const std::string xnack;
|
||||
|
||||
private:
|
||||
const std::string sramecc;
|
||||
const std::string sramecc_reported;
|
||||
|
||||
public:
|
||||
const std::string targetId;
|
||||
LcOptionTargetStrings(const TargetProperties& target)
|
||||
: device(target.Name()),
|
||||
xnack([&]() -> std::string {
|
||||
if(target.Xnack())
|
||||
return std::string{":xnack"} + (*target.Xnack() ? "+" : "-");
|
||||
return {};
|
||||
}()),
|
||||
sramecc([&]() -> std::string {
|
||||
if(target.Sramecc())
|
||||
return std::string{":sramecc"} + (*target.Sramecc() ? "+" : "-");
|
||||
return {};
|
||||
}()),
|
||||
sramecc_reported([&]() -> std::string {
|
||||
if(target.SrameccReported())
|
||||
return std::string{":sramecc"} + (*target.SrameccReported() ? "+" : "-");
|
||||
return {};
|
||||
}()),
|
||||
targetId(device + sramecc + xnack)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,174 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_HIPOC_KERNEL_HPP
|
||||
#define GUARD_OLC_HIPOC_KERNEL_HPP
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <hipoc_program.hpp>
|
||||
#include <stringutils.hpp>
|
||||
#include <manage_ptr.hpp>
|
||||
#include <op_kernel_args.hpp>
|
||||
#include <hipCheck.hpp>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
using HipEventPtr = OLC_MANAGE_PTR(hipEvent_t, hipEventDestroy);
|
||||
inline HipEventPtr make_hip_event()
|
||||
{
|
||||
hipEvent_t result = nullptr;
|
||||
MY_HIP_CHECK(hipEventCreate(&result));
|
||||
return HipEventPtr{result};
|
||||
}
|
||||
|
||||
template <class T, class U>
|
||||
struct KernelArgsPair
|
||||
{
|
||||
static const int alignment = sizeof(U);
|
||||
static const int padding = (alignment - (sizeof(T) % alignment)) % alignment;
|
||||
static const int second_index = sizeof(T) + padding;
|
||||
KernelArgsPair(T x, U y)
|
||||
{
|
||||
|
||||
new(buffer) T(x); // NOLINT (clang-analyzer-cplusplus.PlacementNew)
|
||||
new(buffer + second_index) U(y);
|
||||
}
|
||||
char buffer[second_index + sizeof(U)] = {};
|
||||
};
|
||||
|
||||
template <class... Ts>
|
||||
struct KernelArgsPack;
|
||||
|
||||
template <class T, class U, class... Ts>
|
||||
struct KernelArgsPack<T, U, Ts...>
|
||||
{
|
||||
using data_t = KernelArgsPack<KernelArgsPair<T, U>, Ts...>;
|
||||
KernelArgsPack(T x, U y, Ts... xs) : data(KernelArgsPair<T, U>(x, y), xs...) {}
|
||||
data_t data;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct KernelArgsPack<T>
|
||||
{
|
||||
KernelArgsPack(T x) : head(x) {}
|
||||
T head;
|
||||
};
|
||||
|
||||
template <class... Ts>
|
||||
struct KernelArgs
|
||||
{
|
||||
KernelArgs(Ts... xs) : pack(xs...) { std::fill(std::begin(hidden), std::end(hidden), 0); }
|
||||
KernelArgsPack<Ts...> pack;
|
||||
uint64_t hidden[6] = {};
|
||||
};
|
||||
|
||||
struct HIPOCKernelInvoke
|
||||
{
|
||||
hipStream_t stream = nullptr;
|
||||
hipFunction_t fun = nullptr;
|
||||
std::array<size_t, 3> ldims = {};
|
||||
std::array<size_t, 3> gdims = {};
|
||||
std::string name;
|
||||
std::function<void(hipEvent_t, hipEvent_t)> callback;
|
||||
|
||||
// Workaround for aggregate types in c++11
|
||||
HIPOCKernelInvoke() {}
|
||||
HIPOCKernelInvoke(hipStream_t pstream,
|
||||
hipFunction_t pfun,
|
||||
std::array<size_t, 3> pldims,
|
||||
std::array<size_t, 3> pgdims,
|
||||
std::string pname,
|
||||
std::function<void(hipEvent_t, hipEvent_t)> pcallback)
|
||||
: stream(pstream), fun(pfun), ldims(pldims), gdims(pgdims), name(pname), callback(pcallback)
|
||||
{
|
||||
}
|
||||
void operator()(std::vector<OpKernelArg>& any_args) const
|
||||
{
|
||||
char hip_args[256] = {0};
|
||||
auto sz_left = any_args[0].size();
|
||||
|
||||
memcpy(hip_args, &(any_args[0].buffer[0]), any_args[0].size());
|
||||
|
||||
for(unsigned long idx = 1; idx < any_args.size(); idx++)
|
||||
{
|
||||
auto& any_arg = any_args[idx];
|
||||
unsigned long alignment = any_arg.size();
|
||||
unsigned long padding = (alignment - (sz_left % alignment)) % alignment;
|
||||
unsigned long second_index = sz_left + padding;
|
||||
memcpy(hip_args + second_index, &(any_arg.buffer[0]), any_arg.size());
|
||||
sz_left = second_index + alignment;
|
||||
}
|
||||
run(hip_args, sz_left);
|
||||
}
|
||||
|
||||
template <class... Ts>
|
||||
void operator()(Ts... xs) const
|
||||
{
|
||||
KernelArgs<Ts...> args{xs...};
|
||||
run(&args, sizeof(args));
|
||||
}
|
||||
|
||||
void run(void* args, std::size_t size) const;
|
||||
|
||||
const std::string& GetName() const { return name; }
|
||||
};
|
||||
|
||||
struct HIPOCKernel
|
||||
{
|
||||
HIPOCProgram program;
|
||||
std::string name;
|
||||
std::array<size_t, 3> ldims = {};
|
||||
std::array<size_t, 3> gdims = {};
|
||||
std::string kernel_module;
|
||||
hipFunction_t fun = nullptr;
|
||||
|
||||
HIPOCKernel() {}
|
||||
HIPOCKernel(HIPOCProgram p,
|
||||
const std::string kernel_name,
|
||||
std::vector<size_t> local_dims,
|
||||
std::vector<size_t> global_dims)
|
||||
: program(p), name(kernel_name)
|
||||
{
|
||||
assert(!local_dims.empty() && local_dims.size() <= 3);
|
||||
assert(!global_dims.empty() && global_dims.size() <= 3);
|
||||
ldims.fill(1);
|
||||
gdims.fill(1);
|
||||
std::copy(local_dims.begin(), local_dims.end(), ldims.begin());
|
||||
std::copy(global_dims.begin(), global_dims.end(), gdims.begin());
|
||||
|
||||
kernel_module = name;
|
||||
MY_HIP_CHECK(hipModuleGetFunction(&fun, program.GetModule(), kernel_module.c_str()));
|
||||
}
|
||||
|
||||
HIPOCKernelInvoke Invoke(hipStream_t stream,
|
||||
std::function<void(hipEvent_t, hipEvent_t)> callback = nullptr) const;
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,64 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_HIPOC_PROGRAM_HPP
|
||||
#define GUARD_OLC_HIPOC_PROGRAM_HPP
|
||||
|
||||
#include <target_properties.hpp>
|
||||
#include <manage_ptr.hpp>
|
||||
#include <hipoc_program_impl.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
struct HIPOCProgramImpl;
|
||||
struct HIPOCProgram
|
||||
{
|
||||
HIPOCProgram();
|
||||
/// This ctor builds the program from source, initializes module.
|
||||
/// Also either CO pathname (typically if offline tools were used)
|
||||
/// or binary blob (if comgr was used to build the program)
|
||||
/// is initialized. GetModule(), GetCodeObjectPathname(),
|
||||
/// GetCodeObjectBlob() return appropriate data after this ctor.
|
||||
/// Other ctors only guarantee to initialize module.
|
||||
HIPOCProgram(const std::string& program_name,
|
||||
std::string params,
|
||||
const TargetProperties& target);
|
||||
HIPOCProgram(const std::string& program_name, const boost::filesystem::path& hsaco);
|
||||
std::shared_ptr<const HIPOCProgramImpl> impl;
|
||||
hipModule_t GetModule() const;
|
||||
/// \return Pathname of CO file, if it resides on the filesystem.
|
||||
boost::filesystem::path GetCodeObjectPathname() const;
|
||||
/// \return Copy of in-memory CO blob.
|
||||
std::string GetCodeObjectBlob() const;
|
||||
/// \return True if CO blob resides in-memory.
|
||||
/// False if CO resides on filesystem.
|
||||
bool IsCodeObjectInMemory() const;
|
||||
};
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,61 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_HIPOC_PROGRAM_IMPL_HPP
|
||||
#define GUARD_OLC_HIPOC_PROGRAM_IMPL_HPP
|
||||
|
||||
#include <target_properties.hpp>
|
||||
#include <manage_ptr.hpp>
|
||||
#include <tmp_dir.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
using hipModulePtr = OLC_MANAGE_PTR(hipModule_t, hipModuleUnload);
|
||||
|
||||
struct HIPOCProgramImpl
|
||||
{
|
||||
HIPOCProgramImpl(){};
|
||||
HIPOCProgramImpl(const std::string& program_name, const boost::filesystem::path& filespec);
|
||||
|
||||
HIPOCProgramImpl(const std::string& program_name,
|
||||
std::string params,
|
||||
const TargetProperties& target_);
|
||||
|
||||
std::string program;
|
||||
TargetProperties target;
|
||||
boost::filesystem::path hsaco_file;
|
||||
hipModulePtr module;
|
||||
boost::optional<TmpDir> dir;
|
||||
std::vector<char> binary;
|
||||
|
||||
void
|
||||
BuildCodeObjectInFile(std::string& params, const std::string& src, const std::string& filename);
|
||||
void BuildCodeObject(std::string params);
|
||||
};
|
||||
} // namespace online_compile
|
||||
#endif // GUARD_OLC_HIPOC_PROGRAM_IMPL_HPP
|
||||
@@ -1,45 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_KERNEL_HPP
|
||||
#define GUARD_OLC_KERNEL_HPP
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <hipoc_kernel.hpp>
|
||||
|
||||
namespace online_compile {
|
||||
std::string GetKernelSrc(std::string name);
|
||||
std::string GetKernelInc(std::string key);
|
||||
std::vector<std::string> GetKernelIncList();
|
||||
std::vector<std::string> GetHipKernelIncList();
|
||||
|
||||
using Kernel = HIPOCKernel;
|
||||
using KernelInvoke = HIPOCKernelInvoke;
|
||||
using Program = HIPOCProgram;
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,137 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_KERNEL_BUILD_PARAMETERS_HPP_
|
||||
#define GUARD_OLC_KERNEL_BUILD_PARAMETERS_HPP_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
namespace kbp {
|
||||
struct Option
|
||||
{
|
||||
};
|
||||
} // namespace kbp
|
||||
|
||||
enum class ParameterTypes
|
||||
{
|
||||
Define,
|
||||
Option,
|
||||
};
|
||||
|
||||
struct KernelBuildParameter
|
||||
{
|
||||
ParameterTypes type;
|
||||
std::string name;
|
||||
std::string value;
|
||||
};
|
||||
|
||||
class KernelBuildParameters
|
||||
{
|
||||
public:
|
||||
struct KBPInit
|
||||
{
|
||||
friend class KernelBuildParameters;
|
||||
|
||||
KBPInit(const std::string& name, const std::string& value = "")
|
||||
: data{ParameterTypes::Define, name, value}
|
||||
{
|
||||
}
|
||||
|
||||
template <class TValue, class = decltype(std::to_string(std::declval<TValue>()))>
|
||||
KBPInit(const std::string& name, const TValue& value) : KBPInit(name, std::to_string(value))
|
||||
{
|
||||
}
|
||||
|
||||
KBPInit(kbp::Option, const std::string& name, const std::string& value = "")
|
||||
: data{ParameterTypes::Option, name, value}
|
||||
{
|
||||
}
|
||||
|
||||
template <class TValue, class = decltype(std::to_string(std::declval<TValue>()))>
|
||||
KBPInit(kbp::Option, const std::string& name, const TValue& value)
|
||||
: KBPInit(kbp::Option{}, name, std::to_string(value))
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
KernelBuildParameter data{};
|
||||
};
|
||||
|
||||
KernelBuildParameters() = default;
|
||||
KernelBuildParameters(const std::initializer_list<KBPInit>& defines_)
|
||||
{
|
||||
options.reserve(defines_.size());
|
||||
for(const auto& define : defines_)
|
||||
{
|
||||
assert(ValidateUniqueness(define.data.name));
|
||||
options.push_back(define.data);
|
||||
}
|
||||
}
|
||||
|
||||
bool Empty() const { return options.empty(); }
|
||||
|
||||
void Define(const std::string& name, const std::string& value = "")
|
||||
{
|
||||
assert(ValidateUniqueness(name));
|
||||
options.push_back({ParameterTypes::Define, name, value});
|
||||
}
|
||||
|
||||
template <class TValue, class = decltype(std::to_string(std::declval<TValue>()))>
|
||||
void Define(const std::string& name, const TValue& value)
|
||||
{
|
||||
Define(name, std::to_string(value));
|
||||
}
|
||||
|
||||
KernelBuildParameters& operator<<(const KernelBuildParameters& other)
|
||||
{
|
||||
std::copy(other.options.begin(), other.options.end(), std::back_inserter(options));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class TFor>
|
||||
std::string GenerateFor(TFor&&) const
|
||||
{
|
||||
return TFor::Generate(options);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<KernelBuildParameter> options = {};
|
||||
|
||||
bool ValidateUniqueness(const std::string& name) const
|
||||
{
|
||||
const auto eq = [=](const auto& item) { return item.name == name; };
|
||||
return std::find_if(options.begin(), options.end(), eq) == options.end();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,97 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
/* ************************************************************************
|
||||
* Copyright 2015 Vratis, Ltd.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ************************************************************************ */
|
||||
|
||||
#ifndef GUARD_OLC_KERNEL_CACHE_HPP_
|
||||
#define GUARD_OLC_KERNEL_CACHE_HPP_
|
||||
|
||||
#include <handle.hpp>
|
||||
#include <kernel.hpp>
|
||||
#include <simple_hash.hpp>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
/**
|
||||
* @brief The KernelCache class Build and cache kernels
|
||||
*
|
||||
*/
|
||||
class KernelCache
|
||||
{
|
||||
public:
|
||||
using Key = std::pair<std::string, std::string>;
|
||||
using KernelMap = std::unordered_map<Key, std::vector<Kernel>, SimpleHash>;
|
||||
using ProgramMap = std::unordered_map<Key, Program, SimpleHash>;
|
||||
|
||||
Kernel AddKernel(const Handle& h,
|
||||
const std::string& algorithm,
|
||||
const std::string& network_config,
|
||||
const std::string& program_name,
|
||||
const std::string& kernel_name,
|
||||
const std::vector<size_t>& vld,
|
||||
const std::vector<size_t>& vgd,
|
||||
std::string params = "",
|
||||
std::size_t cache_index = 0);
|
||||
|
||||
void AddKernel(Key key, Kernel k, std::size_t cache_index);
|
||||
|
||||
void ClearKernels(const std::string& algorithm, const std::string& network_config);
|
||||
|
||||
const std::vector<Kernel>& GetKernels(const std::string& algorithm,
|
||||
const std::string& network_config);
|
||||
|
||||
bool HasKernels(const std::string& algorithm, const std::string& network_config) const;
|
||||
|
||||
bool HasProgram(const std::string& name, const std::string& params) const;
|
||||
|
||||
void AddProgram(Program prog, const std::string& program_name, std::string params);
|
||||
|
||||
KernelCache();
|
||||
|
||||
private:
|
||||
KernelMap kernel_map;
|
||||
ProgramMap program_map;
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif // GUARD_OLC_KERNEL_CACHE_HPP_
|
||||
@@ -1,23 +0,0 @@
|
||||
#ifndef _OLC_LOGGER_HPP_
|
||||
#define _OLC_LOGGER_HPP_
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
enum class LogLevel
|
||||
{
|
||||
Quiet = 1,
|
||||
Error = 2,
|
||||
Warning = 3,
|
||||
Info = 4,
|
||||
Info2 = 5
|
||||
};
|
||||
|
||||
std::ostream& fdt_log(LogLevel level, const char* header, const char* content);
|
||||
std::ostream& fdt_log();
|
||||
void fdt_log_flush();
|
||||
|
||||
}; // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,76 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_MANAGE_PTR_HPP
|
||||
#define GUARD_OLC_MANAGE_PTR_HPP
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
template <class F, F f>
|
||||
struct manage_deleter
|
||||
{
|
||||
template <class T>
|
||||
void operator()(T* x) const
|
||||
{
|
||||
if(x != nullptr)
|
||||
{
|
||||
(void)f(x); // NOLINT (cppcoreguidelines-owning-memory)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct null_deleter
|
||||
{
|
||||
template <class T>
|
||||
void operator()(T* /*x*/) const
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class F, F f>
|
||||
using manage_ptr = std::unique_ptr<T, manage_deleter<F, f>>;
|
||||
|
||||
template <class T>
|
||||
struct element_type
|
||||
{
|
||||
using type = typename T::element_type;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
using remove_ptr = typename std::
|
||||
conditional<std::is_pointer<T>::value, std::remove_pointer<T>, element_type<T>>::type::type;
|
||||
|
||||
template <class T>
|
||||
using shared = std::shared_ptr<remove_ptr<T>>;
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#define OLC_MANAGE_PTR(T, F) \
|
||||
online_compile::manage_ptr<typename std::remove_pointer<T>::type, decltype(&F), &F> // NOLINT
|
||||
|
||||
#endif
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef GUARD_OLC_MD5_HPP
|
||||
#define GUARD_OLC_MD5_HPP
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
std::string md5(std::string s);
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,40 +0,0 @@
|
||||
#ifndef OLC_GUARD_MLOPEN_OP_KERNEL_ARGS_HPP
|
||||
#define OLC_GUARD_MLOPEN_OP_KERNEL_ARGS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cstdint>
|
||||
#include <half.hpp>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
struct OpKernelArg
|
||||
{
|
||||
|
||||
OpKernelArg(char val, size_t sz) : buffer(sz) { std::fill(buffer.begin(), buffer.end(), val); }
|
||||
|
||||
template <typename T>
|
||||
OpKernelArg(T arg) : buffer(sizeof(T))
|
||||
{
|
||||
static_assert(std::is_trivial<T>{} || std::is_same<T, half_float::half>{},
|
||||
"Only for trivial types");
|
||||
*(reinterpret_cast<T*>(buffer.data())) = arg;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
OpKernelArg(T* arg) // NOLINT
|
||||
: buffer(sizeof(T*))
|
||||
{
|
||||
*(reinterpret_cast<T**>(buffer.data())) = arg;
|
||||
is_ptr = true;
|
||||
}
|
||||
|
||||
std::size_t size() const { return buffer.size(); };
|
||||
boost::container::small_vector<char, 8> buffer;
|
||||
bool is_ptr = false;
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,44 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef GUARD_OLC_SIMPLE_HASH_HPP
|
||||
#define GUARD_OLC_SIMPLE_HASH_HPP
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
struct SimpleHash
|
||||
{
|
||||
size_t operator()(const std::pair<std::string, std::string>& p) const
|
||||
{
|
||||
using std::hash;
|
||||
return (hash<std::string>()(p.first) ^ hash<std::string>()(p.second));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,133 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_STRINGUTILS_HPP
|
||||
#define GUARD_OLC_STRINGUTILS_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#define OLC_STRINGIZE_1(...) #__VA_ARGS__
|
||||
#define OLC_STRINGIZE(...) OLC_STRINGIZE_1(__VA_ARGS__)
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
inline std::string
|
||||
ReplaceString(std::string subject, const std::string& search, const std::string& replace)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while((pos = subject.find(search, pos)) != std::string::npos)
|
||||
{
|
||||
subject.replace(pos, search.length(), replace);
|
||||
pos += replace.length();
|
||||
}
|
||||
return subject;
|
||||
}
|
||||
|
||||
inline bool EndsWith(const std::string& value, const std::string& suffix)
|
||||
{
|
||||
if(suffix.size() > value.size())
|
||||
return false;
|
||||
else
|
||||
return std::equal(suffix.rbegin(), suffix.rend(), value.rbegin());
|
||||
}
|
||||
|
||||
template <class Strings>
|
||||
inline std::string JoinStrings(Strings strings, std::string delim)
|
||||
{
|
||||
auto it = strings.begin();
|
||||
if(it == strings.end())
|
||||
return "";
|
||||
|
||||
auto nit = std::next(it);
|
||||
return std::accumulate(
|
||||
nit, strings.end(), *it, [&](std::string x, std::string y) { return x + delim + y; });
|
||||
}
|
||||
|
||||
template <class F>
|
||||
static inline std::string TransformString(std::string s, F f)
|
||||
{
|
||||
std::transform(s.begin(), s.end(), s.begin(), f);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string ToUpper(std::string s) { return TransformString(std::move(s), ::toupper); }
|
||||
|
||||
inline bool StartsWith(const std::string& value, const std::string& prefix)
|
||||
{
|
||||
if(prefix.size() > value.size())
|
||||
return false;
|
||||
else
|
||||
return std::equal(prefix.begin(), prefix.end(), value.begin());
|
||||
}
|
||||
|
||||
inline std::string RemovePrefix(std::string s, std::string prefix)
|
||||
{
|
||||
if(StartsWith(s, prefix))
|
||||
return s.substr(prefix.length());
|
||||
else
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::vector<std::string> SplitSpaceSeparated(const std::string& in)
|
||||
{
|
||||
std::istringstream ss(in);
|
||||
std::istream_iterator<std::string> begin(ss), end;
|
||||
return {begin, end};
|
||||
}
|
||||
|
||||
inline std::vector<std::string> SplitSpaceSeparated(const std::string& in,
|
||||
const std::vector<std::string>& dontSplitAfter)
|
||||
{
|
||||
std::vector<std::string> rv;
|
||||
std::istringstream ss(in);
|
||||
std::string s;
|
||||
while(ss >> s)
|
||||
{
|
||||
if(std::any_of(dontSplitAfter.begin(), dontSplitAfter.end(), [&](const auto& dont) {
|
||||
return dont == s;
|
||||
}))
|
||||
{
|
||||
std::string s2;
|
||||
if(ss >> s2)
|
||||
{
|
||||
s += std::string(" ").append(s2); // Exactly one space is important.
|
||||
rv.push_back(s);
|
||||
continue;
|
||||
}
|
||||
throw std::runtime_error("Error parsing string: '" + in + '\'');
|
||||
}
|
||||
rv.push_back(s);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif // GUARD_OLC_STRINGUTILS_HPP
|
||||
@@ -1,56 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_OLC_TARGET_PROPERTIES_HPP
|
||||
#define GUARD_OLC_TARGET_PROPERTIES_HPP
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
struct Handle;
|
||||
|
||||
struct TargetProperties
|
||||
{
|
||||
const std::string& Name() const { return name; }
|
||||
const std::string& DbId() const { return dbId; }
|
||||
boost::optional<bool> Xnack() const { return xnack; }
|
||||
boost::optional<bool> Sramecc() const { return sramecc; }
|
||||
boost::optional<bool> SrameccReported() const { return sramecc_reported; }
|
||||
void Init(const Handle*);
|
||||
|
||||
private:
|
||||
void InitDbId();
|
||||
std::string name;
|
||||
std::string dbId;
|
||||
boost::optional<bool> xnack = boost::none;
|
||||
boost::optional<bool> sramecc = boost::none;
|
||||
boost::optional<bool> sramecc_reported = boost::none;
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif // GUARD_OLC_TARGET_PROPERTIES_HPP
|
||||
@@ -1,26 +0,0 @@
|
||||
#ifndef GUARD_OLC_TMP_DIR_HPP
|
||||
#define GUARD_OLC_TMP_DIR_HPP
|
||||
|
||||
#include <string>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
void SystemCmd(std::string cmd);
|
||||
|
||||
struct TmpDir
|
||||
{
|
||||
boost::filesystem::path path;
|
||||
TmpDir(std::string prefix);
|
||||
|
||||
TmpDir(TmpDir const&) = delete;
|
||||
TmpDir& operator=(TmpDir const&) = delete;
|
||||
|
||||
void Execute(std::string exe, std::string args) const;
|
||||
|
||||
~TmpDir();
|
||||
};
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,30 +0,0 @@
|
||||
#ifndef GUARD_OLC_WRITE_FILE_HPP
|
||||
#define GUARD_OLC_WRITE_FILE_HPP
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <manage_ptr.hpp>
|
||||
#include <fstream>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
using FilePtr = OLC_MANAGE_PTR(FILE*, std::fclose);
|
||||
|
||||
inline void WriteFile(const std::string& content, const boost::filesystem::path& name)
|
||||
{
|
||||
// std::cerr << "Write file: " << name << std::endl;
|
||||
FilePtr f{std::fopen(name.string().c_str(), "w")};
|
||||
if(std::fwrite(content.c_str(), 1, content.size(), f.get()) != content.size())
|
||||
throw std::runtime_error("Failed to write to file");
|
||||
}
|
||||
|
||||
inline void WriteFile(const std::vector<char>& content, const boost::filesystem::path& name)
|
||||
{
|
||||
// std::cerr << "Write file: " << name << std::endl;
|
||||
FilePtr f{std::fopen(name.string().c_str(), "w")};
|
||||
if(std::fwrite(&content[0], 1, content.size(), f.get()) != content.size())
|
||||
throw std::runtime_error("Failed to write to file");
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
|
||||
#endif
|
||||
@@ -1,70 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
// clang-format off
|
||||
${KERNELS_DECLS}
|
||||
// clang-format on
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
const std::map<std::string, std::string>& kernels()
|
||||
{
|
||||
static const std::map<std::string, std::string> data{${INIT_KERNELS}};
|
||||
return data;
|
||||
}
|
||||
|
||||
std::string GetKernelSrc(std::string name)
|
||||
{
|
||||
// Use the base name of the string
|
||||
int start = 0;
|
||||
auto slash = static_cast<int>(name.find_last_of("/\\"));
|
||||
if(slash != std::string::npos)
|
||||
{
|
||||
start = slash + 1;
|
||||
}
|
||||
|
||||
int len = name.size();
|
||||
auto ex = static_cast<int>(name.rfind('.'));
|
||||
if(ex != std::string::npos)
|
||||
{
|
||||
len = ex - start;
|
||||
}
|
||||
|
||||
auto key = name.substr(start, len);
|
||||
// Convert to uppercase
|
||||
std::transform(key.begin(), key.end(), key.begin(), ::toupper);
|
||||
|
||||
auto it = kernels().find(key);
|
||||
if(it == kernels().end())
|
||||
throw std::runtime_error("Failed to load kernel source: " + key);
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1,80 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#include "olc_kernel_includes.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
namespace online_compile {
|
||||
|
||||
static inline bool EndsWith(const std::string& value, const std::string& suffix)
|
||||
{
|
||||
if(suffix.size() > value.size())
|
||||
return false;
|
||||
else
|
||||
return std::equal(suffix.rbegin(), suffix.rend(), value.rbegin());
|
||||
}
|
||||
|
||||
const std::map<std::string, std::string>& kernel_includes()
|
||||
{
|
||||
static const std::map<std::string, std::string> data{${INIT_KERNELS}};
|
||||
return data;
|
||||
}
|
||||
|
||||
std::string GetKernelInc(std::string key)
|
||||
{
|
||||
auto it = kernel_includes().find(key);
|
||||
if(it == kernel_includes().end())
|
||||
throw std::runtime_error("Failed to load kernel source: " + key);
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::vector<std::string> GetKernelIncList()
|
||||
{
|
||||
std::vector<std::string> keys;
|
||||
auto m = kernel_includes();
|
||||
std::transform(m.begin(),
|
||||
m.end(),
|
||||
std::back_inserter(keys),
|
||||
[](decltype(m)::value_type const& pair) { return pair.first; });
|
||||
return keys;
|
||||
}
|
||||
|
||||
std::vector<std::string> GetHipKernelIncList()
|
||||
{
|
||||
auto keys = GetKernelIncList();
|
||||
keys.erase(std::remove_if(keys.begin(),
|
||||
keys.end(),
|
||||
[&](const auto& key) {
|
||||
return !(EndsWith(key, ".hpp") || EndsWith(key, ".h"));
|
||||
}),
|
||||
keys.end());
|
||||
return keys;
|
||||
}
|
||||
|
||||
} // namespace online_compile
|
||||
@@ -1 +0,0 @@
|
||||
#include "${KERNEL_SRC_HPP_FILENAME}"
|
||||
@@ -1,16 +1,9 @@
|
||||
#ifndef ONLINE_DRIVER_COMMON_HPP
|
||||
#define ONLINE_DRIVER_COMMON_HPP
|
||||
#ifndef CK_SOLVER_COMMON_HPP
|
||||
#define CK_SOLVER_COMMON_HPP
|
||||
|
||||
namespace ck {
|
||||
namespace driver {
|
||||
|
||||
inline auto get_ck_hip_online_compile_common_flag()
|
||||
{
|
||||
std::string param = " -std=c++17";
|
||||
|
||||
return param;
|
||||
}
|
||||
|
||||
// greatest common divisor, aka highest common factor
|
||||
inline int gcd(int x, int y)
|
||||
{
|
||||
Reference in New Issue
Block a user