From 3873bf3b91b5d512ecde69d739d413522931f062 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Mon, 30 Mar 2026 07:19:32 -0700 Subject: [PATCH] [CK] fix clang lifetimebound errors with staging compiler (#5921) ## Motivation The ROCm staging compiler (newer Clang) enforces `[[clang::lifetimebound]]` annotations on methods that return references or pointers to internal object data. Without these annotations, the staging compiler emits compilation errors for container accessor methods across the CK and CK Tile namespaces. ## Technical Details Adds `[[clang::lifetimebound]]` to all reference/pointer-returning accessors in core container types: **`ck::` namespace:** - `Array` -- `At()`, `operator[]`, `operator()`, `begin()`, `end()` - `index_array` -- `operator[]` - `StaticallyIndexedArray_v2` -- `At()`, `operator[]`, `operator()` - `IndexLookupTable` -- `operator[]` **`ck_tile::` namespace:** - `array` -- `get(i)`, `at()`, `operator[]`, `operator()` - `static_array` -- `operator[]` - `thread_buffer` -- `get(i)`, `at()`, `operator[]`, `operator()` - `make_kernel()` -- parameter pack Also removes the unused `instance_index` variable from `batched_gemm_reduce_fp16.cpp` and simplifies its argument parsing accordingly. ## Test Plan - Compile with the staging compiler to verify all lifetimebound errors are resolved - Existing tests pass unchanged -- the attribute is a compile-time annotation with no runtime effect ## Test Result ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --- Jenkinsfile | 4 +-- .../tensor_space_filling_curve.hpp | 1 + include/ck/utility/array.hpp | 31 ++++++++++++++----- include/ck/utility/sequence.hpp | 11 +++++-- .../ck/utility/statically_indexed_array.hpp | 8 ++--- include/ck_tile/core/container/array.hpp | 22 ++++++------- .../ck_tile/core/container/static_array.hpp | 10 ++++-- .../ck_tile/core/container/thread_buffer.hpp | 22 ++++++------- include/ck_tile/host/kernel_launch.hpp | 7 +++-- .../batched_gemm_reduce_fp16.cpp | 10 +++--- .../test_grouped_gemm_fixed_nk_bias.cpp | 10 +++--- .../test_grouped_gemm_multi_abd_fixed_nk.cpp | 17 ---------- 12 files changed, 82 insertions(+), 71 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4d482f9e7a..8df0980cb3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1317,8 +1317,8 @@ pipeline { description: "Build CK and run tests on gfx12 (default: ON)") booleanParam( name: "NINJA_BUILD_TRACE", - defaultValue: false, - description: "Generate a ninja build trace (default: OFF)") + defaultValue: true, + description: "Generate a ninja build trace (default: ON)") booleanParam( name: "NINJA_FTIME_TRACE", defaultValue: false, diff --git a/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp index 6ca81b14f5..0897768e2e 100644 --- a/include/ck/tensor_description/tensor_space_filling_curve.hpp +++ b/include/ck/tensor_description/tensor_space_filling_curve.hpp @@ -24,6 +24,7 @@ struct IndexLookupTable MultiIndex data[NumAccesses > 0 ? NumAccesses : 1]; __host__ __device__ constexpr const MultiIndex& operator[](index_t i) const + [[clang::lifetimebound]] { return data[i]; } diff --git a/include/ck/utility/array.hpp b/include/ck/utility/array.hpp index 9dfc266d8b..e194267efa 100644 --- a/include/ck/utility/array.hpp +++ b/include/ck/utility/array.hpp @@ -24,13 +24,22 @@ struct Array __host__ __device__ static constexpr index_t Size() { return NSize; } - __host__ __device__ constexpr const TData& At(index_t i) const { return mData[i]; } + __host__ __device__ constexpr const TData& At(index_t i) const [[clang::lifetimebound]] + { + return mData[i]; + } - __host__ __device__ constexpr TData& At(index_t i) { return mData[i]; } + __host__ __device__ constexpr TData& At(index_t i) [[clang::lifetimebound]] { return mData[i]; } - __host__ __device__ constexpr const TData& operator[](index_t i) const { return At(i); } + __host__ __device__ constexpr const TData& operator[](index_t i) const [[clang::lifetimebound]] + { + return At(i); + } - __host__ __device__ constexpr TData& operator()(index_t i) { return At(i); } + __host__ __device__ constexpr TData& operator()(index_t i) [[clang::lifetimebound]] + { + return At(i); + } template __host__ constexpr auto Emplace(index_t i, Args&&... args) @@ -50,10 +59,16 @@ struct Array return *this; } - __host__ __device__ constexpr const TData* begin() const { return &mData[0]; } - __host__ __device__ constexpr const TData* end() const { return &mData[NSize]; } - __host__ __device__ constexpr TData* begin() { return &mData[0]; } - __host__ __device__ constexpr TData* end() { return &mData[NSize]; } + __host__ __device__ constexpr const TData* begin() const [[clang::lifetimebound]] + { + return &mData[0]; + } + __host__ __device__ constexpr const TData* end() const [[clang::lifetimebound]] + { + return &mData[NSize]; + } + __host__ __device__ constexpr TData* begin() [[clang::lifetimebound]] { return &mData[0]; } + __host__ __device__ constexpr TData* end() [[clang::lifetimebound]] { return &mData[NSize]; } }; // empty Array diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp index 503426949a..d6d4b83f81 100644 --- a/include/ck/utility/sequence.hpp +++ b/include/ck/utility/sequence.hpp @@ -403,8 +403,15 @@ struct index_array { index_t data[N > 0 ? N : 1]; - __host__ __device__ constexpr index_t& operator[](index_t i) { return data[i]; } - __host__ __device__ constexpr const index_t& operator[](index_t i) const { return data[i]; } + __host__ __device__ constexpr index_t& operator[](index_t i) [[clang::lifetimebound]] + { + return data[i]; + } + __host__ __device__ constexpr const index_t& operator[](index_t i) const + [[clang::lifetimebound]] + { + return data[i]; + } }; /** diff --git a/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp index 8dc5b8dd09..1139091233 100644 --- a/include/ck/utility/statically_indexed_array.hpp +++ b/include/ck/utility/statically_indexed_array.hpp @@ -69,7 +69,7 @@ struct StaticallyIndexedArray_v2 // read access template - __host__ __device__ constexpr const auto& At(Number) const + __host__ __device__ constexpr const auto& At(Number) const [[clang::lifetimebound]] { static_assert(I < N, "wrong! out of range"); @@ -78,7 +78,7 @@ struct StaticallyIndexedArray_v2 // write access template - __host__ __device__ constexpr auto& At(Number) + __host__ __device__ constexpr auto& At(Number) [[clang::lifetimebound]] { static_assert(I < N, "wrong! out of range"); @@ -87,14 +87,14 @@ struct StaticallyIndexedArray_v2 // read access template - __host__ __device__ constexpr const auto& operator[](Number i) const + __host__ __device__ constexpr const auto& operator[](Number i) const [[clang::lifetimebound]] { return At(i); } // write access template - __host__ __device__ constexpr auto& operator()(Number i) + __host__ __device__ constexpr auto& operator()(Number i) [[clang::lifetimebound]] { return At(i); } diff --git a/include/ck_tile/core/container/array.hpp b/include/ck_tile/core/container/array.hpp index b487467a71..45adbded2c 100644 --- a/include/ck_tile/core/container/array.hpp +++ b/include/ck_tile/core/container/array.hpp @@ -103,23 +103,23 @@ struct array // clang-format off CK_TILE_HOST_DEVICE constexpr auto& get() { return data; } CK_TILE_HOST_DEVICE constexpr const auto& get() const { return data; } - CK_TILE_HOST_DEVICE constexpr auto& get(index_t i) { return data[i]; } - CK_TILE_HOST_DEVICE constexpr const auto& get(index_t i) const { return data[i]; } + CK_TILE_HOST_DEVICE constexpr auto& get(index_t i) [[clang::lifetimebound]] { return data[i]; } + CK_TILE_HOST_DEVICE constexpr const auto& get(index_t i) const [[clang::lifetimebound]] { return data[i]; } template CK_TILE_HOST_DEVICE constexpr auto& get() { return data[I]; } template CK_TILE_HOST_DEVICE constexpr const auto& get() const { return data[I]; } - template CK_TILE_HOST_DEVICE constexpr auto& get(number) { return data[I]; } + template CK_TILE_HOST_DEVICE constexpr auto& get(number)[[clang::lifetimebound]] { return data[I]; } template CK_TILE_HOST_DEVICE constexpr const auto& get(number) const { return data[I]; } CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const { return get(i); } - template CK_TILE_HOST_DEVICE constexpr auto& at() { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at() const { return get(I); } - template CK_TILE_HOST_DEVICE constexpr auto& at(number) { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const { return get(I); } + CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const [[clang::lifetimebound]] { return get(i); } + template CK_TILE_HOST_DEVICE constexpr auto& at() [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at() const [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr auto& at(number) [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const [[clang::lifetimebound]] { return get(I); } - CK_TILE_HOST_DEVICE constexpr const value_type& operator[](index_t i) const { return get(i); } - CK_TILE_HOST_DEVICE constexpr value_type& operator[](index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr value_type& operator()(index_t i) { return get(i); } // TODO: compatible + CK_TILE_HOST_DEVICE constexpr const value_type& operator[](index_t i) const [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr value_type& operator[](index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr value_type& operator()(index_t i) [[clang::lifetimebound]] { return get(i); } // TODO: compatible #if 0 template CK_TILE_HOST_DEVICE constexpr auto operator=(const ArrayLike& arr) diff --git a/include/ck_tile/core/container/static_array.hpp b/include/ck_tile/core/container/static_array.hpp index baefcdc408..6794e8e169 100644 --- a/include/ck_tile/core/container/static_array.hpp +++ b/include/ck_tile/core/container/static_array.hpp @@ -27,8 +27,14 @@ struct static_array T elems[N > 0 ? N : 1]; // Basic constexpr accessors - CK_TILE_HOST_DEVICE constexpr const T& operator[](index_t i) const { return elems[i]; } - CK_TILE_HOST_DEVICE constexpr T& operator[](index_t i) { return elems[i]; } + CK_TILE_HOST_DEVICE constexpr const T& operator[](index_t i) const [[clang::lifetimebound]] + { + return elems[i]; + } + CK_TILE_HOST_DEVICE constexpr T& operator[](index_t i) [[clang::lifetimebound]] + { + return elems[i]; + } CK_TILE_HOST_DEVICE static constexpr index_t size() { return N; } }; diff --git a/include/ck_tile/core/container/thread_buffer.hpp b/include/ck_tile/core/container/thread_buffer.hpp index 2224056015..a955b7f84f 100644 --- a/include/ck_tile/core/container/thread_buffer.hpp +++ b/include/ck_tile/core/container/thread_buffer.hpp @@ -54,17 +54,17 @@ struct thread_buffer { CK_TILE_HOST_DEVICE static constexpr auto size() { return N; } CK_TILE_HOST_DEVICE auto & get() {return data; } CK_TILE_HOST_DEVICE const auto & get() const {return data; } - CK_TILE_HOST_DEVICE auto & get(index_t i) {return data[i]; } - CK_TILE_HOST_DEVICE const auto & get(index_t i) const {return data[i]; } - CK_TILE_HOST_DEVICE constexpr const auto& operator[](index_t i) const { return get(i); } - CK_TILE_HOST_DEVICE constexpr auto& operator[](index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr auto& operator()(index_t i) { return get(i); } // TODO: compatible - CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const { return get(i); } - template CK_TILE_HOST_DEVICE constexpr auto& at() { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at() const { return get(I); } - template CK_TILE_HOST_DEVICE constexpr auto& at(number) { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const { return get(I); } + CK_TILE_HOST_DEVICE auto & get(index_t i) [[clang::lifetimebound]] {return data[i]; } + CK_TILE_HOST_DEVICE const auto & get(index_t i) const [[clang::lifetimebound]] {return data[i]; } + CK_TILE_HOST_DEVICE constexpr const auto& operator[](index_t i) const [[clang::lifetimebound]] {return get(i); } + CK_TILE_HOST_DEVICE constexpr auto& operator[](index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr auto& operator()(index_t i) [[clang::lifetimebound]] { return get(i); } // TODO: compatible + CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const [[clang::lifetimebound]] { return get(i); } + template CK_TILE_HOST_DEVICE constexpr auto& at() [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at() const [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr auto& at(number) [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const [[clang::lifetimebound]] { return get(I); } template ::value, bool>::type = false> diff --git a/include/ck_tile/host/kernel_launch.hpp b/include/ck_tile/host/kernel_launch.hpp index 4565c0e147..ca7a5c765c 100644 --- a/include/ck_tile/host/kernel_launch.hpp +++ b/include/ck_tile/host/kernel_launch.hpp @@ -87,8 +87,11 @@ template -CK_TILE_HOST auto -make_kernel(KernelImpl /*f*/, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) +CK_TILE_HOST auto make_kernel(KernelImpl /*f*/, + dim3 grid_dim, + dim3 block_dim, + std::size_t lds_byte, + [[clang::lifetimebound]] Args... args) { const auto kernel = []() { if constexpr(std::is_void_v) diff --git a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp index 71cd12e534..e1b7272d61 100644 --- a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp +++ b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp @@ -11,8 +11,7 @@ #include "profiler/profile_batched_gemm_reduce_impl.hpp" -static ck::index_t param_mask = 0xffff; -static ck::index_t instance_index = -1; +static ck::index_t param_mask = 0xffff; struct GemmParams { ck::index_t M; @@ -105,15 +104,14 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); if(argc == 1) {} - else if(argc == 3) + else if(argc == 2) { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); + param_mask = strtol(argv[1], nullptr, 0); } else { std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; + std::cout << "Arg1: param_mask " << std::endl; } return RUN_ALL_TESTS(); } diff --git a/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp b/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp index 56d4051b79..63acfbb2e3 100644 --- a/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp +++ b/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp @@ -18,8 +18,7 @@ #include #include -static ck::index_t param_mask = 0xffffff; -static ck::index_t instance_index = -1; +static ck::index_t param_mask = 0xffffff; using FP32 = float; using FP16 = ck::half_t; @@ -292,15 +291,14 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); if(argc == 1) {} - else if(argc == 3) + else if(argc == 2) { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); + param_mask = strtol(argv[1], nullptr, 0); } else { std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; + std::cout << "Arg1: param_mask " << std::endl; } return RUN_ALL_TESTS(); } diff --git a/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp b/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp index 610e7f2b77..05caa6ed6d 100644 --- a/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp +++ b/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp @@ -15,9 +15,6 @@ #include "gtest/gtest.h" -static ck::index_t param_mask = 0xffffff; -static ck::index_t instance_index = -1; - using FP32 = float; using FP16 = ck::half_t; using BF16 = ck::bhalf_t; @@ -238,19 +235,5 @@ TYPED_TEST(TestGroupedGemmMultiABDFixedNK, Regular) int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); - if(argc == 1) - { - // Run with default arguments. - } - else if(argc == 3) - { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); - } - else - { - std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; - } return RUN_ALL_TESTS(); }