diff --git a/CMakeLists.txt b/CMakeLists.txt index c3c0eab529..e5a5258328 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ message(STATUS "Build with HIP ${hip_VERSION}") ## half #find_path(HALF_INCLUDE_DIR half.hpp) -#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}") +message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}") ## tidy include(EnableCompilerWarnings) diff --git a/composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp b/composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp index 09b39ab2ca..4f02da1409 100644 --- a/composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp +++ b/composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp @@ -411,9 +411,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3 constexpr auto a_block_space_size = math::integer_least_multiple(a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align); - constexpr auto b_block_space_size = - math::integer_least_multiple(b_k0_n_k1_block_desc.GetElementSpaceSize(), max_lds_align); - FloatAB* p_a_block = p_shared_block; FloatAB* p_b_block = p_shared_block + a_block_space_size; @@ -574,8 +571,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3 make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple( I1, I1, I1, I1, Number{}, Number<1>{}, Number{}, Number<1>{})); - StaticBuffer c_blk_buf_; - // calculate origin of thread output tensor on global memory // blockwise GEMM c matrix starting index const auto c_thread_mtx_on_block = diff --git a/host/driver_offline/CMakeLists.txt b/host/driver_offline/CMakeLists.txt index 9743abbb0b..fec11e99af 100644 --- a/host/driver_offline/CMakeLists.txt +++ b/host/driver_offline/CMakeLists.txt @@ -9,7 +9,6 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform ${PROJECT_SOURCE_DIR}/composable_kernel/include/driver ${PROJECT_SOURCE_DIR}/external/rocm/include - ${PROJECT_SOURCE_DIR}/external/half/include ) set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp) diff --git a/host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp b/host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp index 8596630ad3..0d49c417de 100644 --- a/host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp +++ b/host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp @@ -338,9 +338,6 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nh const auto K = out_n_ho_wo_k_lengths[I3]; const auto C = wei_k_y_x_c_lengths[I3]; - const auto Hi = in_n_hi_wi_c_lengths[I1]; - const auto Wi = in_n_hi_wi_c_lengths[I2]; - const auto Ho = out_n_ho_wo_k_lengths[I1]; const auto Wo = out_n_ho_wo_k_lengths[I2]; diff --git a/host/driver_offline/src/conv_fwd_driver_offline.cpp b/host/driver_offline/src/conv_fwd_driver_offline.cpp index 42a2d2f681..3358d5b98c 100644 --- a/host/driver_offline/src/conv_fwd_driver_offline.cpp +++ b/host/driver_offline/src/conv_fwd_driver_offline.cpp @@ -20,11 +20,11 @@ #include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp" #define USE_DYNAMIC_MODE 1 -#define USE_CONV_FWD_V4R4_NCHW 0 -#define USE_CONV_FWD_V4R4R2_NHWC 0 -#define USE_CONV_FWD_V6R1_NCHW 0 +#define USE_CONV_FWD_V4R4_NCHW 1 +#define USE_CONV_FWD_V4R4R2_NHWC 1 +#define USE_CONV_FWD_V6R1_NCHW 1 #define USE_CONV_FWD_V5R1_NCHW 0 -#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0 +#define USE_CONV_FWD_V4R4R2_XDL_NCHW 1 #define USE_CONV_FWD_V4R4R4_XDL_NHWC 1 enum ConvForwardAlgo diff --git a/host/host_tensor/CMakeLists.txt b/host/host_tensor/CMakeLists.txt index 90249fc664..3dcecf64e1 100644 --- a/host/host_tensor/CMakeLists.txt +++ b/host/host_tensor/CMakeLists.txt @@ -10,7 +10,7 @@ set(HOST_TENSOR_SOURCE ## the library target add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE}) -#target_include_directories(host_tensor SYSTEM PUBLIC $) +target_include_directories(host_tensor SYSTEM PUBLIC $) target_link_libraries(host_tensor PRIVATE hip::device) target_link_libraries(host_tensor INTERFACE hip::host)