From d2525548c6974bc7a0efc27e4356cdffcac71f2d Mon Sep 17 00:00:00 2001 From: Jinchao Xu Date: Tue, 5 Aug 2025 02:26:08 +0800 Subject: [PATCH] Add -gsplit-dwarf flag to reduce debug section size and fix ckProfiler link errors (#2611) Resolves R_X86_64_32 relocation out of range errors in grouped conv2d instances by splitting debug information into separate .dwo files. Add explicit cast to avoid signed/unsigned comparison warning. [ROCm/composable_kernel commit: 15eb493152b4cddff947159ea4b829e1f55c56f3] --- include/ck_tile/host/reference/reference_softmax.hpp | 4 ++-- include/ck_tile/host/reference/reference_topk.hpp | 9 +++++---- library/src/tensor_operation_instance/gpu/CMakeLists.txt | 4 ++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/ck_tile/host/reference/reference_softmax.hpp b/include/ck_tile/host/reference/reference_softmax.hpp index d86e879944..4e729c437d 100644 --- a/include/ck_tile/host/reference/reference_softmax.hpp +++ b/include/ck_tile/host/reference/reference_softmax.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -14,7 +14,7 @@ CK_TILE_HOST void reference_softmax(const HostTensor& x, HostTensor& y, index_t dim = -1) { index_t rank = x.get_num_of_dimension(); - assert(rank == y.get_num_of_dimension()); + assert(static_cast(rank) == y.get_num_of_dimension()); assert(dim == -1 || dim < rank); index_t target_dim = dim == -1 ? (rank - 1) : dim; diff --git a/include/ck_tile/host/reference/reference_topk.hpp b/include/ck_tile/host/reference/reference_topk.hpp index 3d0404a2e5..0fc99a983a 100644 --- a/include/ck_tile/host/reference/reference_topk.hpp +++ b/include/ck_tile/host/reference/reference_topk.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -38,8 +38,8 @@ CK_TILE_HOST void reference_topk(const HostTensor& x, { // rank must be the same index_t rank = x.get_num_of_dimension(); - assert(rank == y_values.get_num_of_dimension()); - assert(rank == y_indices.get_num_of_dimension()); + assert(static_cast(rank) == y_values.get_num_of_dimension()); + assert(static_cast(rank) == y_indices.get_num_of_dimension()); assert(dim == -1 || dim < rank); index_t topk_dim = dim == -1 ? (rank - 1) : dim; @@ -47,7 +47,8 @@ CK_TILE_HOST void reference_topk(const HostTensor& x, auto x_len = x.get_lengths(); assert(k <= topk_src_len); - assert(k == y_values.get_length(topk_dim) && k == y_indices.get_length(topk_dim)); + assert(static_cast(k) == y_values.get_length(topk_dim) && + static_cast(k) == y_indices.get_length(topk_dim)); index_t n_parallel = x.get_element_size() / topk_src_len; diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index 5204b51edf..1eaaa7e6ba 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -175,6 +175,10 @@ function(add_instance_library INSTANCE_NAME) target_compile_features(${INSTANCE_NAME} PUBLIC) + # splits debug information into separate .dwo files to reduce debug section size + if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + target_compile_options(${INSTANCE_NAME} PRIVATE -gsplit-dwarf) + endif() # flags to compress the library if(NOT DISABLE_OFFLOAD_COMPRESS AND NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132) message(DEBUG "Adding --offload-compress flag for ${INSTANCE_NAME}")