mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
tidy
This commit is contained in:
@@ -40,7 +40,7 @@ message(STATUS "Build with HIP ${hip_VERSION}")
|
|||||||
|
|
||||||
## half
|
## half
|
||||||
#find_path(HALF_INCLUDE_DIR half.hpp)
|
#find_path(HALF_INCLUDE_DIR half.hpp)
|
||||||
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
|
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
|
||||||
|
|
||||||
## tidy
|
## tidy
|
||||||
include(EnableCompilerWarnings)
|
include(EnableCompilerWarnings)
|
||||||
|
|||||||
@@ -411,9 +411,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
|
|||||||
constexpr auto a_block_space_size =
|
constexpr auto a_block_space_size =
|
||||||
math::integer_least_multiple(a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align);
|
math::integer_least_multiple(a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align);
|
||||||
|
|
||||||
constexpr auto b_block_space_size =
|
|
||||||
math::integer_least_multiple(b_k0_n_k1_block_desc.GetElementSpaceSize(), max_lds_align);
|
|
||||||
|
|
||||||
FloatAB* p_a_block = p_shared_block;
|
FloatAB* p_a_block = p_shared_block;
|
||||||
FloatAB* p_b_block = p_shared_block + a_block_space_size;
|
FloatAB* p_b_block = p_shared_block + a_block_space_size;
|
||||||
|
|
||||||
@@ -574,8 +571,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
|
|||||||
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(
|
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(
|
||||||
I1, I1, I1, I1, Number<M0>{}, Number<1>{}, Number<M2>{}, Number<1>{}));
|
I1, I1, I1, I1, Number<M0>{}, Number<1>{}, Number<M2>{}, Number<1>{}));
|
||||||
|
|
||||||
StaticBuffer<AddressSpaceEnum_t::Vgpr, FloatC, BlkSize> c_blk_buf_;
|
|
||||||
|
|
||||||
// calculate origin of thread output tensor on global memory
|
// calculate origin of thread output tensor on global memory
|
||||||
// blockwise GEMM c matrix starting index
|
// blockwise GEMM c matrix starting index
|
||||||
const auto c_thread_mtx_on_block =
|
const auto c_thread_mtx_on_block =
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ include_directories(BEFORE
|
|||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
|
||||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/driver
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/driver
|
||||||
${PROJECT_SOURCE_DIR}/external/rocm/include
|
${PROJECT_SOURCE_DIR}/external/rocm/include
|
||||||
${PROJECT_SOURCE_DIR}/external/half/include
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp)
|
set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp)
|
||||||
|
|||||||
@@ -338,9 +338,6 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nh
|
|||||||
const auto K = out_n_ho_wo_k_lengths[I3];
|
const auto K = out_n_ho_wo_k_lengths[I3];
|
||||||
const auto C = wei_k_y_x_c_lengths[I3];
|
const auto C = wei_k_y_x_c_lengths[I3];
|
||||||
|
|
||||||
const auto Hi = in_n_hi_wi_c_lengths[I1];
|
|
||||||
const auto Wi = in_n_hi_wi_c_lengths[I2];
|
|
||||||
|
|
||||||
const auto Ho = out_n_ho_wo_k_lengths[I1];
|
const auto Ho = out_n_ho_wo_k_lengths[I1];
|
||||||
const auto Wo = out_n_ho_wo_k_lengths[I2];
|
const auto Wo = out_n_ho_wo_k_lengths[I2];
|
||||||
|
|
||||||
|
|||||||
@@ -20,11 +20,11 @@
|
|||||||
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
|
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
|
||||||
|
|
||||||
#define USE_DYNAMIC_MODE 1
|
#define USE_DYNAMIC_MODE 1
|
||||||
#define USE_CONV_FWD_V4R4_NCHW 0
|
#define USE_CONV_FWD_V4R4_NCHW 1
|
||||||
#define USE_CONV_FWD_V4R4R2_NHWC 0
|
#define USE_CONV_FWD_V4R4R2_NHWC 1
|
||||||
#define USE_CONV_FWD_V6R1_NCHW 0
|
#define USE_CONV_FWD_V6R1_NCHW 1
|
||||||
#define USE_CONV_FWD_V5R1_NCHW 0
|
#define USE_CONV_FWD_V5R1_NCHW 0
|
||||||
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
|
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 1
|
||||||
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 1
|
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 1
|
||||||
|
|
||||||
enum ConvForwardAlgo
|
enum ConvForwardAlgo
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ set(HOST_TENSOR_SOURCE
|
|||||||
## the library target
|
## the library target
|
||||||
add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE})
|
add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE})
|
||||||
|
|
||||||
#target_include_directories(host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
|
target_include_directories(host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
|
||||||
|
|
||||||
target_link_libraries(host_tensor PRIVATE hip::device)
|
target_link_libraries(host_tensor PRIVATE hip::device)
|
||||||
target_link_libraries(host_tensor INTERFACE hip::host)
|
target_link_libraries(host_tensor INTERFACE hip::host)
|
||||||
|
|||||||
Reference in New Issue
Block a user