diff --git a/include/ck_tile/host/reference/reference_softmax.hpp b/include/ck_tile/host/reference/reference_softmax.hpp index d86e879944..4e729c437d 100644 --- a/include/ck_tile/host/reference/reference_softmax.hpp +++ b/include/ck_tile/host/reference/reference_softmax.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -14,7 +14,7 @@ CK_TILE_HOST void reference_softmax(const HostTensor& x, HostTensor& y, index_t dim = -1) { index_t rank = x.get_num_of_dimension(); - assert(rank == y.get_num_of_dimension()); + assert(static_cast(rank) == y.get_num_of_dimension()); assert(dim == -1 || dim < rank); index_t target_dim = dim == -1 ? (rank - 1) : dim; diff --git a/include/ck_tile/host/reference/reference_topk.hpp b/include/ck_tile/host/reference/reference_topk.hpp index 3d0404a2e5..0fc99a983a 100644 --- a/include/ck_tile/host/reference/reference_topk.hpp +++ b/include/ck_tile/host/reference/reference_topk.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -38,8 +38,8 @@ CK_TILE_HOST void reference_topk(const HostTensor& x, { // rank must be the same index_t rank = x.get_num_of_dimension(); - assert(rank == y_values.get_num_of_dimension()); - assert(rank == y_indices.get_num_of_dimension()); + assert(static_cast(rank) == y_values.get_num_of_dimension()); + assert(static_cast(rank) == y_indices.get_num_of_dimension()); assert(dim == -1 || dim < rank); index_t topk_dim = dim == -1 ? (rank - 1) : dim; @@ -47,7 +47,8 @@ CK_TILE_HOST void reference_topk(const HostTensor& x, auto x_len = x.get_lengths(); assert(k <= topk_src_len); - assert(k == y_values.get_length(topk_dim) && k == y_indices.get_length(topk_dim)); + assert(static_cast(k) == y_values.get_length(topk_dim) && + static_cast(k) == y_indices.get_length(topk_dim)); index_t n_parallel = x.get_element_size() / topk_src_len; diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index 5204b51edf..1eaaa7e6ba 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -175,6 +175,10 @@ function(add_instance_library INSTANCE_NAME) target_compile_features(${INSTANCE_NAME} PUBLIC) + # splits debug information into separate .dwo files to reduce debug section size + if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + target_compile_options(${INSTANCE_NAME} PRIVATE -gsplit-dwarf) + endif() # flags to compress the library if(NOT DISABLE_OFFLOAD_COMPRESS AND NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132) message(DEBUG "Adding --offload-compress flag for ${INSTANCE_NAME}")