diff --git a/Jenkinsfile b/Jenkinsfile index 4d482f9e7a..8df0980cb3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1317,8 +1317,8 @@ pipeline { description: "Build CK and run tests on gfx12 (default: ON)") booleanParam( name: "NINJA_BUILD_TRACE", - defaultValue: false, - description: "Generate a ninja build trace (default: OFF)") + defaultValue: true, + description: "Generate a ninja build trace (default: ON)") booleanParam( name: "NINJA_FTIME_TRACE", defaultValue: false, diff --git a/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp index 6ca81b14f5..0897768e2e 100644 --- a/include/ck/tensor_description/tensor_space_filling_curve.hpp +++ b/include/ck/tensor_description/tensor_space_filling_curve.hpp @@ -24,6 +24,7 @@ struct IndexLookupTable MultiIndex data[NumAccesses > 0 ? NumAccesses : 1]; __host__ __device__ constexpr const MultiIndex& operator[](index_t i) const + [[clang::lifetimebound]] { return data[i]; } diff --git a/include/ck/utility/array.hpp b/include/ck/utility/array.hpp index 9dfc266d8b..e194267efa 100644 --- a/include/ck/utility/array.hpp +++ b/include/ck/utility/array.hpp @@ -24,13 +24,22 @@ struct Array __host__ __device__ static constexpr index_t Size() { return NSize; } - __host__ __device__ constexpr const TData& At(index_t i) const { return mData[i]; } + __host__ __device__ constexpr const TData& At(index_t i) const [[clang::lifetimebound]] + { + return mData[i]; + } - __host__ __device__ constexpr TData& At(index_t i) { return mData[i]; } + __host__ __device__ constexpr TData& At(index_t i) [[clang::lifetimebound]] { return mData[i]; } - __host__ __device__ constexpr const TData& operator[](index_t i) const { return At(i); } + __host__ __device__ constexpr const TData& operator[](index_t i) const [[clang::lifetimebound]] + { + return At(i); + } - __host__ __device__ constexpr TData& operator()(index_t i) { return At(i); } + __host__ __device__ constexpr TData& operator()(index_t i) [[clang::lifetimebound]] + { + return At(i); + } template __host__ constexpr auto Emplace(index_t i, Args&&... args) @@ -50,10 +59,16 @@ struct Array return *this; } - __host__ __device__ constexpr const TData* begin() const { return &mData[0]; } - __host__ __device__ constexpr const TData* end() const { return &mData[NSize]; } - __host__ __device__ constexpr TData* begin() { return &mData[0]; } - __host__ __device__ constexpr TData* end() { return &mData[NSize]; } + __host__ __device__ constexpr const TData* begin() const [[clang::lifetimebound]] + { + return &mData[0]; + } + __host__ __device__ constexpr const TData* end() const [[clang::lifetimebound]] + { + return &mData[NSize]; + } + __host__ __device__ constexpr TData* begin() [[clang::lifetimebound]] { return &mData[0]; } + __host__ __device__ constexpr TData* end() [[clang::lifetimebound]] { return &mData[NSize]; } }; // empty Array diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp index 503426949a..d6d4b83f81 100644 --- a/include/ck/utility/sequence.hpp +++ b/include/ck/utility/sequence.hpp @@ -403,8 +403,15 @@ struct index_array { index_t data[N > 0 ? N : 1]; - __host__ __device__ constexpr index_t& operator[](index_t i) { return data[i]; } - __host__ __device__ constexpr const index_t& operator[](index_t i) const { return data[i]; } + __host__ __device__ constexpr index_t& operator[](index_t i) [[clang::lifetimebound]] + { + return data[i]; + } + __host__ __device__ constexpr const index_t& operator[](index_t i) const + [[clang::lifetimebound]] + { + return data[i]; + } }; /** diff --git a/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp index 8dc5b8dd09..1139091233 100644 --- a/include/ck/utility/statically_indexed_array.hpp +++ b/include/ck/utility/statically_indexed_array.hpp @@ -69,7 +69,7 @@ struct StaticallyIndexedArray_v2 // read access template - __host__ __device__ constexpr const auto& At(Number) const + __host__ __device__ constexpr const auto& At(Number) const [[clang::lifetimebound]] { static_assert(I < N, "wrong! out of range"); @@ -78,7 +78,7 @@ struct StaticallyIndexedArray_v2 // write access template - __host__ __device__ constexpr auto& At(Number) + __host__ __device__ constexpr auto& At(Number) [[clang::lifetimebound]] { static_assert(I < N, "wrong! out of range"); @@ -87,14 +87,14 @@ struct StaticallyIndexedArray_v2 // read access template - __host__ __device__ constexpr const auto& operator[](Number i) const + __host__ __device__ constexpr const auto& operator[](Number i) const [[clang::lifetimebound]] { return At(i); } // write access template - __host__ __device__ constexpr auto& operator()(Number i) + __host__ __device__ constexpr auto& operator()(Number i) [[clang::lifetimebound]] { return At(i); } diff --git a/include/ck_tile/core/container/array.hpp b/include/ck_tile/core/container/array.hpp index b487467a71..45adbded2c 100644 --- a/include/ck_tile/core/container/array.hpp +++ b/include/ck_tile/core/container/array.hpp @@ -103,23 +103,23 @@ struct array // clang-format off CK_TILE_HOST_DEVICE constexpr auto& get() { return data; } CK_TILE_HOST_DEVICE constexpr const auto& get() const { return data; } - CK_TILE_HOST_DEVICE constexpr auto& get(index_t i) { return data[i]; } - CK_TILE_HOST_DEVICE constexpr const auto& get(index_t i) const { return data[i]; } + CK_TILE_HOST_DEVICE constexpr auto& get(index_t i) [[clang::lifetimebound]] { return data[i]; } + CK_TILE_HOST_DEVICE constexpr const auto& get(index_t i) const [[clang::lifetimebound]] { return data[i]; } template CK_TILE_HOST_DEVICE constexpr auto& get() { return data[I]; } template CK_TILE_HOST_DEVICE constexpr const auto& get() const { return data[I]; } - template CK_TILE_HOST_DEVICE constexpr auto& get(number) { return data[I]; } + template CK_TILE_HOST_DEVICE constexpr auto& get(number)[[clang::lifetimebound]] { return data[I]; } template CK_TILE_HOST_DEVICE constexpr const auto& get(number) const { return data[I]; } CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const { return get(i); } - template CK_TILE_HOST_DEVICE constexpr auto& at() { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at() const { return get(I); } - template CK_TILE_HOST_DEVICE constexpr auto& at(number) { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const { return get(I); } + CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const [[clang::lifetimebound]] { return get(i); } + template CK_TILE_HOST_DEVICE constexpr auto& at() [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at() const [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr auto& at(number) [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const [[clang::lifetimebound]] { return get(I); } - CK_TILE_HOST_DEVICE constexpr const value_type& operator[](index_t i) const { return get(i); } - CK_TILE_HOST_DEVICE constexpr value_type& operator[](index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr value_type& operator()(index_t i) { return get(i); } // TODO: compatible + CK_TILE_HOST_DEVICE constexpr const value_type& operator[](index_t i) const [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr value_type& operator[](index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr value_type& operator()(index_t i) [[clang::lifetimebound]] { return get(i); } // TODO: compatible #if 0 template CK_TILE_HOST_DEVICE constexpr auto operator=(const ArrayLike& arr) diff --git a/include/ck_tile/core/container/static_array.hpp b/include/ck_tile/core/container/static_array.hpp index baefcdc408..6794e8e169 100644 --- a/include/ck_tile/core/container/static_array.hpp +++ b/include/ck_tile/core/container/static_array.hpp @@ -27,8 +27,14 @@ struct static_array T elems[N > 0 ? N : 1]; // Basic constexpr accessors - CK_TILE_HOST_DEVICE constexpr const T& operator[](index_t i) const { return elems[i]; } - CK_TILE_HOST_DEVICE constexpr T& operator[](index_t i) { return elems[i]; } + CK_TILE_HOST_DEVICE constexpr const T& operator[](index_t i) const [[clang::lifetimebound]] + { + return elems[i]; + } + CK_TILE_HOST_DEVICE constexpr T& operator[](index_t i) [[clang::lifetimebound]] + { + return elems[i]; + } CK_TILE_HOST_DEVICE static constexpr index_t size() { return N; } }; diff --git a/include/ck_tile/core/container/thread_buffer.hpp b/include/ck_tile/core/container/thread_buffer.hpp index 2224056015..a955b7f84f 100644 --- a/include/ck_tile/core/container/thread_buffer.hpp +++ b/include/ck_tile/core/container/thread_buffer.hpp @@ -54,17 +54,17 @@ struct thread_buffer { CK_TILE_HOST_DEVICE static constexpr auto size() { return N; } CK_TILE_HOST_DEVICE auto & get() {return data; } CK_TILE_HOST_DEVICE const auto & get() const {return data; } - CK_TILE_HOST_DEVICE auto & get(index_t i) {return data[i]; } - CK_TILE_HOST_DEVICE const auto & get(index_t i) const {return data[i]; } - CK_TILE_HOST_DEVICE constexpr const auto& operator[](index_t i) const { return get(i); } - CK_TILE_HOST_DEVICE constexpr auto& operator[](index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr auto& operator()(index_t i) { return get(i); } // TODO: compatible - CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) { return get(i); } - CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const { return get(i); } - template CK_TILE_HOST_DEVICE constexpr auto& at() { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at() const { return get(I); } - template CK_TILE_HOST_DEVICE constexpr auto& at(number) { return get(I); } - template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const { return get(I); } + CK_TILE_HOST_DEVICE auto & get(index_t i) [[clang::lifetimebound]] {return data[i]; } + CK_TILE_HOST_DEVICE const auto & get(index_t i) const [[clang::lifetimebound]] {return data[i]; } + CK_TILE_HOST_DEVICE constexpr const auto& operator[](index_t i) const [[clang::lifetimebound]] {return get(i); } + CK_TILE_HOST_DEVICE constexpr auto& operator[](index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr auto& operator()(index_t i) [[clang::lifetimebound]] { return get(i); } // TODO: compatible + CK_TILE_HOST_DEVICE constexpr auto& at(index_t i) [[clang::lifetimebound]] { return get(i); } + CK_TILE_HOST_DEVICE constexpr const auto& at(index_t i) const [[clang::lifetimebound]] { return get(i); } + template CK_TILE_HOST_DEVICE constexpr auto& at() [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at() const [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr auto& at(number) [[clang::lifetimebound]] { return get(I); } + template CK_TILE_HOST_DEVICE constexpr const auto& at(number) const [[clang::lifetimebound]] { return get(I); } template ::value, bool>::type = false> diff --git a/include/ck_tile/host/kernel_launch.hpp b/include/ck_tile/host/kernel_launch.hpp index 4565c0e147..ca7a5c765c 100644 --- a/include/ck_tile/host/kernel_launch.hpp +++ b/include/ck_tile/host/kernel_launch.hpp @@ -87,8 +87,11 @@ template -CK_TILE_HOST auto -make_kernel(KernelImpl /*f*/, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) +CK_TILE_HOST auto make_kernel(KernelImpl /*f*/, + dim3 grid_dim, + dim3 block_dim, + std::size_t lds_byte, + [[clang::lifetimebound]] Args... args) { const auto kernel = []() { if constexpr(std::is_void_v) diff --git a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp index 71cd12e534..e1b7272d61 100644 --- a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp +++ b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp @@ -11,8 +11,7 @@ #include "profiler/profile_batched_gemm_reduce_impl.hpp" -static ck::index_t param_mask = 0xffff; -static ck::index_t instance_index = -1; +static ck::index_t param_mask = 0xffff; struct GemmParams { ck::index_t M; @@ -105,15 +104,14 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); if(argc == 1) {} - else if(argc == 3) + else if(argc == 2) { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); + param_mask = strtol(argv[1], nullptr, 0); } else { std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; + std::cout << "Arg1: param_mask " << std::endl; } return RUN_ALL_TESTS(); } diff --git a/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp b/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp index 56d4051b79..63acfbb2e3 100644 --- a/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp +++ b/test/grouped_gemm/test_grouped_gemm_fixed_nk_bias.cpp @@ -18,8 +18,7 @@ #include #include -static ck::index_t param_mask = 0xffffff; -static ck::index_t instance_index = -1; +static ck::index_t param_mask = 0xffffff; using FP32 = float; using FP16 = ck::half_t; @@ -292,15 +291,14 @@ int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); if(argc == 1) {} - else if(argc == 3) + else if(argc == 2) { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); + param_mask = strtol(argv[1], nullptr, 0); } else { std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; + std::cout << "Arg1: param_mask " << std::endl; } return RUN_ALL_TESTS(); } diff --git a/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp b/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp index 610e7f2b77..05caa6ed6d 100644 --- a/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp +++ b/test/grouped_gemm/test_grouped_gemm_multi_abd_fixed_nk.cpp @@ -15,9 +15,6 @@ #include "gtest/gtest.h" -static ck::index_t param_mask = 0xffffff; -static ck::index_t instance_index = -1; - using FP32 = float; using FP16 = ck::half_t; using BF16 = ck::bhalf_t; @@ -238,19 +235,5 @@ TYPED_TEST(TestGroupedGemmMultiABDFixedNK, Regular) int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); - if(argc == 1) - { - // Run with default arguments. - } - else if(argc == 3) - { - param_mask = strtol(argv[1], nullptr, 0); - instance_index = atoi(argv[2]); - } - else - { - std::cout << "Usage of " << argv[0] << std::endl; - std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl; - } return RUN_ALL_TESTS(); }