diff --git a/Jenkinsfile b/Jenkinsfile index f3bb013790..170e0bf432 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -915,13 +915,7 @@ def Build_CK(Map conf=[:]){ cmake_build(conf) if ( params.RUN_INDUCTOR_TESTS && arch == "gfx90a" ){ echo "Run inductor codegen tests" - sh """ - python3 -m venv ${env.WORKSPACE}/projects/composablekernel - . ${env.WORKSPACE}/projects/composablekernel/bin/activate - python3 -m pip install pytest build setuptools setuptools_scm - python3 -m pip install . - python3 -m pytest python/test/test_gen_instances.py - """ + sh "projects/composablekernel/script/run_inductor_tests.sh" } // run performance tests, stash the logs, results will be processed on the master node dir("projects/composablekernel/script"){ @@ -1338,8 +1332,8 @@ pipeline { description: "Generate a detailed time trace (default: OFF)") booleanParam( name: "RUN_INDUCTOR_TESTS", - defaultValue: false, - description: "Run inductor codegen tests (default: OFF)") + defaultValue: true, + description: "Run inductor codegen tests (default: ON)") booleanParam( name: "RUN_CODEGEN_TESTS", defaultValue: true, diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp index 9fdcf4aaad..3dde25776e 100644 --- a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp +++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp @@ -23,6 +23,9 @@ #include "ck/library/utility/literals.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + using ::ck::DeviceMem; using ::ck::hip_check_error; using ::ck::HostTensorDescriptor; @@ -74,3 +77,5 @@ using DeviceGemmInstance = #include "run_grouped_gemm_multiple_d_example.inc" int main(int argc, char* argv[]) { return !run_grouped_gemm_example(argc, argv); } + +#pragma clang diagnostic pop diff --git a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp index 82d02fc399..40d61d7174 100644 --- a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp @@ -18,6 +18,9 @@ #include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/numeric.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + using ::ck::DeviceMem; using ::ck::HostTensorDescriptor; using ::ck::make_ParallelTensorFunctor; @@ -478,3 +481,4 @@ int main(int argc, char* argv[]) return pass ? 0 : 1; } +#pragma clang diagnostic pop diff --git a/example/29_batched_gemm_bias_e_permute/run_batched_gemm_bias_e_permute_example.inc b/example/29_batched_gemm_bias_e_permute/run_batched_gemm_bias_e_permute_example.inc index 803c1eb0bf..0f49ea0979 100644 --- a/example/29_batched_gemm_bias_e_permute/run_batched_gemm_bias_e_permute_example.inc +++ b/example/29_batched_gemm_bias_e_permute/run_batched_gemm_bias_e_permute_example.inc @@ -1,3 +1,5 @@ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" // hardcoded for NumDimM == NumDimN == NumDimK == 2 template & src, const Axes& axes, Functor functor, Ten return true; } +#pragma clang diagnostic pop diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp index ad40f84201..ca9ac02648 100644 --- a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp @@ -16,6 +16,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + using ::ck::DeviceMem; using ::ck::HostTensorDescriptor; using ::ck::make_ParallelTensorFunctor; @@ -419,3 +422,4 @@ int main(int argc, char* argv[]) return 0; } +#pragma clang diagnostic pop diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp index a0905d6505..29cfc418b8 100644 --- a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp +++ b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp @@ -16,6 +16,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + using ::ck::DeviceMem; using ::ck::HostTensorDescriptor; using ::ck::make_ParallelTensorFunctor; @@ -419,3 +422,4 @@ int main(int argc, char* argv[]) return 0; } +#pragma clang diagnostic pop diff --git a/example/ck_tile/03_gemm/gemm_splitk_two_stage_reduce.cpp b/example/ck_tile/03_gemm/gemm_splitk_two_stage_reduce.cpp index 64305b85cf..5f96155c04 100644 --- a/example/ck_tile/03_gemm/gemm_splitk_two_stage_reduce.cpp +++ b/example/ck_tile/03_gemm/gemm_splitk_two_stage_reduce.cpp @@ -16,6 +16,9 @@ #include "gemm_utils.hpp" #include "run_gemm_example.inc" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + /** * @brief Tile partitioner with output offset support. * @@ -961,3 +964,5 @@ int main(int argc, char* argv[]) } return EXIT_SUCCESS; } + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp index 8085e0aeea..b2d1120d25 100644 --- a/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp +++ b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp @@ -12,6 +12,9 @@ #include "device_base.hpp" #include "ck/utility/ignore.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace device { @@ -178,3 +181,5 @@ struct DeviceGroupedGemm : public BaseOperator } // namespace device } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_wmma_cshuffle_v3_common.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_wmma_cshuffle_v3_common.hpp index a739af898f..d3574ad417 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_wmma_cshuffle_v3_common.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_wmma_cshuffle_v3_common.hpp @@ -18,6 +18,9 @@ #include "ck/utility/scheduler_enum.hpp" #include "ck/utility/integral_constant.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace device { @@ -900,3 +903,4 @@ struct DeviceGemmGemm_Wmma_CShuffleV3_Common_Invoker_Arg } // namespace device } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp index f63b20b0f0..b9a04b9686 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp @@ -18,6 +18,9 @@ #include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/io.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace device { @@ -779,3 +782,5 @@ struct DeviceBatchedGemmGemm_Xdl_CShuffle : public DeviceBatchedGemmGemm @@ -692,3 +695,5 @@ struct GridwiseGemm_ak0mak1_bk0nbk1_mn_dpp }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3.hpp index 7818074b7f..3d13ae6585 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3.hpp @@ -19,6 +19,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { /// @brief \"Universal\" GEMM kernel with SplitK support. @@ -1096,3 +1099,4 @@ struct GridwiseGemm_wmma_cshuffle_v3 }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_ab_scale.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_ab_scale.hpp index 92561d00d4..d4a8bcb537 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_ab_scale.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_ab_scale.hpp @@ -16,6 +16,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_wmma_cshuffle_v3_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template @@ -938,3 +941,5 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdl_cshuffle_v1 }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp index f765662904..f44af8eaec 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp @@ -15,6 +15,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template @@ -1019,3 +1022,5 @@ struct GridwiseGemm_xdl_cshuffle_v2 }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp index d926efab84..23ee2e0ac7 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp @@ -16,6 +16,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1577,3 +1580,6 @@ struct GridwiseGemm_xdl_cshuffle_v3 }; } // namespace ck + +#pragma clang diagnostic pop + diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_preshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_preshuffle.hpp index a81679ea78..91a8e4b22d 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_preshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_preshuffle.hpp @@ -15,6 +15,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1412,3 +1415,4 @@ struct GridwiseGemm_xdl_cshuffle_v3_b_preshuffle }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_scale.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_scale.hpp index f9be9e494b..d7c8042a73 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_scale.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_b_scale.hpp @@ -15,6 +15,9 @@ #include "ck/utility/env.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1585,3 +1588,5 @@ struct GridwiseGemm_xdl_cshuffle_v3 }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp index 671cfe4967..21172ffcbf 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp @@ -18,6 +18,9 @@ #define DEBUG_LOG 0 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1822,3 +1825,5 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3 }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_ab_scale.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_ab_scale.hpp index 54260d4386..a0ffab38ed 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_ab_scale.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_ab_scale.hpp @@ -17,6 +17,9 @@ #define DEBUG_LOG 0 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1284,3 +1287,4 @@ struct GridwiseGemmMultiD_ABScale_xdl_cshuffle_v3 }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp index 28bcf14cd0..f7131f235b 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_preshuffle.hpp @@ -17,6 +17,9 @@ #define DEBUG_LOG 0 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1474,3 +1477,5 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3_b_preshuffle }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp index fa0f401743..8bd4a35a25 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp @@ -17,6 +17,9 @@ #define DEBUG_LOG 0 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1550,3 +1553,4 @@ struct GridwiseGemmMultiD_blockscale_xdl_cshuffle_v3_b_preshuffle }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx.hpp index fa231c9b02..96a2f3c2e7 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx.hpp @@ -17,6 +17,9 @@ #include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1749,3 +1752,4 @@ struct GridwiseGemmMX_xdl_cshuffle_v3 }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx_bpreshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx_bpreshuffle.hpp index 43a46d6ff4..2161b58641 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx_bpreshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_mx_bpreshuffle.hpp @@ -16,6 +16,9 @@ #include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // Currently we do not have a elegant way to put single lds buffer & double lds buffer pipe in same @@ -1795,3 +1798,4 @@ struct GridwiseGemmMX_xdl_cshuffle_v3_bpreshuffle }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp index 3134096899..b81e9254aa 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp @@ -20,6 +20,9 @@ #include "ck/tensor_operation/gpu/device/matrix_padder.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template @@ -1172,3 +1175,5 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp index adb653e7d4..df560a4c0e 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp @@ -16,6 +16,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template && is_pointer_v, bool>::type = false> -__host__ __device__ PY c_style_pointer_cast(PX p_x) +__host__ __device__ PY c_style_pointer_cast([[clang::lifetimebound]] PX p_x) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" diff --git a/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp index ce4c92425e..13a946a247 100644 --- a/include/ck/utility/dynamic_buffer.hpp +++ b/include/ck/utility/dynamic_buffer.hpp @@ -15,6 +15,9 @@ #include "amd_transpose_load.hpp" #include "generic_memory_space_atomic.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { // T may be scalar or vector @@ -46,7 +49,8 @@ struct DynamicBuffer return 1; }(); - __host__ __device__ constexpr DynamicBuffer(T* p_data, ElementSpaceSize element_space_size) + __host__ __device__ constexpr DynamicBuffer([[clang::lifetimebound]] T* p_data, + ElementSpaceSize element_space_size) : p_data_{p_data}, element_space_size_{element_space_size} { } @@ -498,3 +502,5 @@ make_dynamic_buffer(T* p, ElementSpaceSize element_space_size, X invalid_element } } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/utility/span.hpp b/include/ck/utility/span.hpp index c0e68c95f4..eef376878c 100644 --- a/include/ck/utility/span.hpp +++ b/include/ck/utility/span.hpp @@ -7,6 +7,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template @@ -65,3 +68,5 @@ class span }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp index 16cd35e1d6..038a81b992 100644 --- a/include/ck/utility/tuple.hpp +++ b/include/ck/utility/tuple.hpp @@ -9,6 +9,9 @@ #include "ck/utility/enable_if.hpp" #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace detail { @@ -43,7 +46,7 @@ struct TupleElementKeyData // for read access of tuple element template __host__ __device__ constexpr const Data& -get_tuple_element_data_reference(const TupleElementKeyData& x) +get_tuple_element_data_reference([[clang::lifetimebound]] const TupleElementKeyData& x) { return static_cast(x.mData); } @@ -100,6 +103,7 @@ struct TupleImpl, Xs...> : TupleElementKeyData __host__ __device__ constexpr const auto& GetElementDataByKey(TupleElementKey) const + [[clang::lifetimebound]] { return get_tuple_element_data_reference>(*this); } @@ -268,3 +272,5 @@ template using tuple_element_or_t = typename detail::tuple_element_or_impl::type; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck/utility/workgroup_barrier.hpp b/include/ck/utility/workgroup_barrier.hpp index 0be341da88..b77c98d709 100644 --- a/include/ck/utility/workgroup_barrier.hpp +++ b/include/ck/utility/workgroup_barrier.hpp @@ -5,6 +5,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { struct workgroup_barrier { @@ -60,3 +63,5 @@ struct workgroup_barrier uint32_t* base_ptr; }; } // namespace ck + +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/arch/mma/mma_pipeline.hpp b/include/ck_tile/core/arch/mma/mma_pipeline.hpp index f2bde7a686..0f497a6ce6 100644 --- a/include/ck_tile/core/arch/mma/mma_pipeline.hpp +++ b/include/ck_tile/core/arch/mma/mma_pipeline.hpp @@ -9,6 +9,9 @@ #include "mma_traits.hpp" #include "mma_transforms.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile::core::arch::mma { /*! @enum MmaPipelineOptionFlag @@ -341,3 +344,5 @@ concept MmaPipelineInterface = std::derived_from - CK_TILE_DEVICE static decltype(auto) exec(VecType&& v) + CK_TILE_DEVICE static decltype(auto) exec([[clang::lifetimebound]] VecType&& v) { return std::forward(v); } diff --git a/include/ck_tile/core/arch/workgroup_barrier.hpp b/include/ck_tile/core/arch/workgroup_barrier.hpp index 2560fe501c..64a6e63a40 100644 --- a/include/ck_tile/core/arch/workgroup_barrier.hpp +++ b/include/ck_tile/core/arch/workgroup_barrier.hpp @@ -10,7 +10,7 @@ namespace ck_tile { struct workgroup_barrier { - CK_TILE_DEVICE workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {} + CK_TILE_DEVICE workgroup_barrier([[clang::lifetimebound]] uint32_t* ptr) : base_ptr(ptr) {} CK_TILE_DEVICE uint32_t ld(uint32_t offset = 0) { diff --git a/include/ck_tile/core/container/span.hpp b/include/ck_tile/core/container/span.hpp index 4cce87eb6f..e0a8768590 100644 --- a/include/ck_tile/core/container/span.hpp +++ b/include/ck_tile/core/container/span.hpp @@ -8,6 +8,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { // implement the c++20 std::span, lightweight, non-owning reference to a sequence @@ -76,3 +79,4 @@ class span }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/buffer_view.hpp b/include/ck_tile/core/tensor/buffer_view.hpp index 1705f5b0f2..0a6a1fcba9 100644 --- a/include/ck_tile/core/tensor/buffer_view.hpp +++ b/include/ck_tile/core/tensor/buffer_view.hpp @@ -17,6 +17,9 @@ #include "ck_tile/core/utility/type_traits.hpp" #include "ck_tile/core/utility/ignore.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { // T may be scalar or vector @@ -240,7 +243,8 @@ struct buffer_view -CK_TILE_HOST_DEVICE constexpr auto make_buffer_view(T* __restrict__ p, BufferSizeType buffer_size) +CK_TILE_HOST_DEVICE constexpr auto make_buffer_view([[clang::lifetimebound]] T* __restrict__ p, + BufferSizeType buffer_size) { return buffer_view{p, buffer_size}; } @@ -1325,3 +1331,5 @@ CK_TILE_HOST_DEVICE void print(const buffer_view && std::is_pointer_v, bool>::type = false> -CK_TILE_HOST_DEVICE PY c_style_pointer_cast(PX p_x) +CK_TILE_HOST_DEVICE PY c_style_pointer_cast([[clang::lifetimebound]] PX p_x) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wold-style-cast" diff --git a/include/ck_tile/ops/batched_contraction/kernel/batched_contraction_kernel.hpp b/include/ck_tile/ops/batched_contraction/kernel/batched_contraction_kernel.hpp index 968d5d6ac2..8e7ea3ce09 100644 --- a/include/ck_tile/ops/batched_contraction/kernel/batched_contraction_kernel.hpp +++ b/include/ck_tile/ops/batched_contraction/kernel/batched_contraction_kernel.hpp @@ -8,6 +8,9 @@ #include "ck_tile/ops/batched_contraction/utils/tensor_descriptor_utils.hpp" #include "ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + /** * @file batched_contraction_kernel.hpp * @brief Batched Tensor Contraction Operations @@ -687,3 +690,5 @@ struct BatchedContractionKernel }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp index bd98918b90..f051188442 100644 --- a/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp @@ -10,6 +10,9 @@ #include "ck_tile/ops/common.hpp" #include "ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { struct FlatmmProblem { @@ -970,3 +973,5 @@ struct FlatmmKernel }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp b/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp index ff96139f18..b4bc5ac38e 100644 --- a/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp +++ b/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp @@ -10,6 +10,9 @@ #include "ck_tile/ops/common.hpp" #include "ck_tile/ops/flatmm/kernel/flatmm_kernel.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template , @@ -468,3 +471,5 @@ struct GroupedFlatmmKernel : FlatmmKernel* } } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/fmha/block/variants.hpp b/include/ck_tile/ops/fmha/block/variants.hpp index b6f79873b4..a33da8c9f6 100644 --- a/include/ck_tile/ops/fmha/block/variants.hpp +++ b/include/ck_tile/ops/fmha/block/variants.hpp @@ -19,6 +19,9 @@ #define CK_TILE_ATTENTION_USE_SOFTSIGN_ASM 0 #endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { namespace internal { __device__ inline float @@ -333,3 +336,5 @@ struct ComposedAttention }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp b/include/ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp index 06ab134f85..07eda483d2 100644 --- a/include/ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp +++ b/include/ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp @@ -10,6 +10,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + #if !defined(CK_TILE_HAS_ROW_NEWBCAST) // row_newbcast (DPP modifier 0x157) support by architecture: // - Not supported: gfx908 (MI100) and older @@ -3125,3 +3128,5 @@ struct MoeSortingMultiPhaseKernel_P23 #undef MOE_SORTING_MOCK_ID } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp index d113336a3e..4ee97fb902 100644 --- a/include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp @@ -15,6 +15,9 @@ #include "ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp" #include "ck_tile/core/utility/type_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief The GEMM kernel host arguments. @@ -168,3 +171,5 @@ struct GemmKernel } }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm/kernel/gemm_multi_abd_kernel.hpp b/include/ck_tile/ops/gemm/kernel/gemm_multi_abd_kernel.hpp index 9fc8ef83c3..84eb3da752 100644 --- a/include/ck_tile/ops/gemm/kernel/gemm_multi_abd_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/gemm_multi_abd_kernel.hpp @@ -15,6 +15,9 @@ #include "ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp" #include "ck_tile/core/utility/type_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief The MultiABD GEMM kernel host arguments. @@ -195,3 +198,5 @@ struct GemmKernelMultiABD } }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm/kernel/gemm_multi_d_kernel.hpp b/include/ck_tile/ops/gemm/kernel/gemm_multi_d_kernel.hpp index 6360e868e5..8db4b3284c 100644 --- a/include/ck_tile/ops/gemm/kernel/gemm_multi_d_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/gemm_multi_d_kernel.hpp @@ -15,6 +15,9 @@ #include "ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp" #include "ck_tile/core/utility/type_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief The MultiD GEMM kernel host arguments. @@ -190,3 +193,5 @@ struct GemmKernelMultiD } }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp index bb054eeaa3..eeeff930a8 100644 --- a/include/ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp @@ -14,6 +14,9 @@ #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief The Grouped GEMM kernel host arguments. @@ -575,3 +578,5 @@ struct GroupedGemmKernel }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp index d5ba324326..44745d15ee 100644 --- a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp @@ -36,7 +36,7 @@ struct UniversalGemmHostArgs const std::array& as_ptr_, const std::array& bs_ptr_, const std::array& ds_ptr_, - void* e_ptr_, + [[clang::lifetimebound]] void* e_ptr_, index_t k_batch_, index_t M_, index_t N_, diff --git a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_async_eight_waves_policy.hpp b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_async_eight_waves_policy.hpp index 29991197cd..1e1f525c3b 100644 --- a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_async_eight_waves_policy.hpp +++ b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_async_eight_waves_policy.hpp @@ -176,10 +176,15 @@ struct GemmPipelineAgBgCrCompAsyncEightWavesPolicy const index_t M0 = integer_divide_ceil(rows, M1); const auto row_lens = make_tuple(M0, number{}); - const auto d0 = make_naive_tensor_descriptor_packed(container_concat(row_lens, col_lens)); - const auto desc_0 = decltype(d0)( // set correct size (without padding) - d0.get_transforms(), - tensor_view_tmp.get_tensor_descriptor().get_element_space_size()); + // Build the 6D view by composing unmerge transforms on top of the + // input view's existing descriptor. This preserves the input's actual + // strides (so a non-packed leading-dim stride is honored) and inherits + // its element_space_size for bounds checking. + const auto desc_0 = transform_tensor_descriptor( + tensor_view_tmp.get_tensor_descriptor(), + make_tuple(make_unmerge_transform(row_lens), make_unmerge_transform(col_lens)), + make_tuple(sequence<0>{}, sequence<1>{}), + make_tuple(sequence<0, 1>{}, sequence<2, 3, 4, 5>{})); const auto desc_1 = transform_tensor_descriptor( desc_0, make_tuple(make_pass_through_transform(M0), diff --git a/include/ck_tile/ops/gemm_mx/kernel/scale_pointer.hpp b/include/ck_tile/ops/gemm_mx/kernel/scale_pointer.hpp index 204372c036..b9597c7517 100644 --- a/include/ck_tile/ops/gemm_mx/kernel/scale_pointer.hpp +++ b/include/ck_tile/ops/gemm_mx/kernel/scale_pointer.hpp @@ -5,6 +5,9 @@ #include "ck_tile/core.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -111,3 +114,4 @@ struct MXScalePointer }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm_quant/block/block_gemm_quant_common.hpp b/include/ck_tile/ops/gemm_quant/block/block_gemm_quant_common.hpp index fcf1261754..89d2cd59ea 100644 --- a/include/ck_tile/ops/gemm_quant/block/block_gemm_quant_common.hpp +++ b/include/ck_tile/ops/gemm_quant/block/block_gemm_quant_common.hpp @@ -5,6 +5,9 @@ #include "ck_tile/core.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { // Common utilities for quantized GEMM block operations @@ -224,3 +227,4 @@ struct AQPickerCommon : public BlockGemmQuantBase float scale_reg_f = 0.0f; }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp b/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp index 5e7fb0e4da..8396a3e0eb 100644 --- a/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp +++ b/include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp @@ -14,6 +14,9 @@ #include "ck_tile/host/concat.hpp" #include "ck_tile/ops/gemm_quant/pipeline/tile_gemm_quant_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { namespace detail { @@ -1574,3 +1577,4 @@ struct QuantGemmKernel }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp b/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp index 8b77b01e2f..8cb589c67c 100644 --- a/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp +++ b/include/ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp @@ -15,6 +15,9 @@ #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief The Grouped GEMM kernel host arguments. @@ -646,3 +649,4 @@ struct QuantGroupedGemmKernel }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/grouped_convolution/utils/grouped_convolution_utils.hpp b/include/ck_tile/ops/grouped_convolution/utils/grouped_convolution_utils.hpp index 2efb435d5b..ab24665a47 100644 --- a/include/ck_tile/ops/grouped_convolution/utils/grouped_convolution_utils.hpp +++ b/include/ck_tile/ops/grouped_convolution/utils/grouped_convolution_utils.hpp @@ -7,6 +7,9 @@ #include "ck_tile/host/convolution_parameter.hpp" #include "ck_tile/ops/elementwise/unary_element_wise_operation.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { enum class GroupedConvDirection @@ -261,3 +264,5 @@ CK_TILE_HOST SplitImagePieceInfo calculate_spatial_piece(ck_tile::index_t piece_ } } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/include/ck_tile/ops/pooling/kernel/pool_kernel.hpp b/include/ck_tile/ops/pooling/kernel/pool_kernel.hpp index 91be63b803..d9a9c9cdb8 100644 --- a/include/ck_tile/ops/pooling/kernel/pool_kernel.hpp +++ b/include/ck_tile/ops/pooling/kernel/pool_kernel.hpp @@ -8,6 +8,9 @@ #include "ck_tile/ops/common.hpp" #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /// @brief Host arguments for pooling operations @@ -575,3 +578,5 @@ struct PoolKernel }; } // namespace ck_tile + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp index 16937fc7e8..0501430265 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp @@ -10,6 +10,9 @@ #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -352,3 +355,5 @@ struct ReferenceAvgPoolBwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp index 8c96f04930..8948954ed8 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp @@ -10,6 +10,9 @@ #include "ck/library/utility/host_tensor.hpp" #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -422,3 +425,4 @@ struct ReferenceBatchedGemm_GQA : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp index 01079bc5ba..cde4bf2585 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp @@ -10,6 +10,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -182,3 +185,5 @@ struct ReferenceCGemm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp index 459e2b52fc..9104b55892 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -366,3 +369,5 @@ struct ReferenceColumnToImage : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp index d73ceb1de5..b93e02c2f0 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -507,3 +510,5 @@ struct ReferenceBatchedContraction_G1_M3_N2_K1 : public ck::tensor_operation::de } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp index 24af36770c..62b44c7b77 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp @@ -10,6 +10,9 @@ #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -506,3 +509,5 @@ struct ReferenceConvBwdData : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp index 2e2dfeae46..de5fd48eff 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp @@ -10,6 +10,9 @@ #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -451,3 +454,5 @@ struct ReferenceConvBwdWeight : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp index 92115e6be4..1493379bf4 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp @@ -23,6 +23,9 @@ #include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -567,3 +570,5 @@ struct ReferenceConvFwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp index bd35fca181..a98a7fe29e 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp @@ -9,6 +9,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -190,3 +193,5 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp index aa6500185b..17bf92e490 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp @@ -9,6 +9,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -198,3 +201,5 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_elementwise.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_elementwise.hpp index 25fcebe64b..a0f7784e4b 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_elementwise.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_elementwise.hpp @@ -10,6 +10,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -108,3 +111,5 @@ struct ReferenceElementwise : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp index 1e5246313f..931728d5da 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp @@ -10,6 +10,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -145,3 +148,4 @@ struct ReferencefpAintBGemm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp index 052cf8eb9d..7aecc8107d 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -246,3 +249,5 @@ struct ReferenceGemm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp index b701733af0..deb665a0cb 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp @@ -7,6 +7,9 @@ #include #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -234,3 +237,5 @@ struct ReferenceGemmLayernorm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multi_abd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multi_abd.hpp index 2d766e621b..225da36c61 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multi_abd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multi_abd.hpp @@ -14,6 +14,9 @@ #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -192,3 +195,5 @@ struct ReferenceGemmMultiABD : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp index 24ddef739a..85e954dab1 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_multiple_d.hpp @@ -10,6 +10,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -155,3 +158,5 @@ struct ReferenceGemmMultipleD : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp index ee2f664df1..1565663408 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -210,3 +213,5 @@ struct ReferenceGroupnorm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp index 4d78da35c1..945955aae2 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -230,3 +233,5 @@ struct ReferenceGroupnormBwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp index c982ccb575..430af54805 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/numeric.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -330,3 +333,5 @@ struct ReferenceImageToColumn : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp index e72947e387..919d6020e4 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -258,3 +261,5 @@ struct ReferenceLayernorm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp index d0e2730484..2eacce3276 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -199,3 +202,5 @@ struct ReferenceLayernormBwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp index 7372e1132c..ab8ce8eb90 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp @@ -11,6 +11,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -110,3 +113,5 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm.hpp index 26a0607508..5b50643ffc 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -293,3 +296,4 @@ struct ReferenceMoeGemm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale.hpp index 4032260609..8fdea7d3c7 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -278,3 +281,4 @@ struct ReferenceMoeGemm1BlockScale : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale_splitk.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale_splitk.hpp index 9d9b8a62f5..9ad504e064 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale_splitk.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm1_blockscale_splitk.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -230,3 +233,4 @@ struct ReferenceMoeGemm1BlockScaleSplitK : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2.hpp index 937ab82e80..0f4fffc7a1 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2.hpp @@ -12,6 +12,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -266,3 +269,4 @@ struct ReferenceMoeGemm2 : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2_blockscale.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2_blockscale.hpp index 8295fc8ca2..ff10d7fa16 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2_blockscale.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_gemm2_blockscale.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -246,3 +249,4 @@ struct ReferenceMoeGemm2BlockScale : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm1.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm1.hpp index 10bcc5aa8e..ecd48c6dc9 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm1.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm1.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -262,3 +265,4 @@ struct ReferenceMoeMXGemm1 : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm2.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm2.hpp index a08c03d14b..8be787f6d9 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm2.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_moe_mx_gemm2.hpp @@ -11,6 +11,9 @@ #include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/library/utility/host_tensor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -236,3 +239,4 @@ struct ReferenceMoeMXGemm2 : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_mx_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_mx_gemm.hpp index 3930fcd7cd..a27ceb43f8 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_mx_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_mx_gemm.hpp @@ -11,6 +11,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -234,3 +237,4 @@ struct ReferenceMXGemm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp index 7fab05ad99..f6f3275126 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp @@ -14,6 +14,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -358,3 +361,5 @@ struct ReferencePoolingFwd : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp index 1ee471c95c..2b64e320cd 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp @@ -12,6 +12,9 @@ #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace tensor_operation { namespace host { @@ -203,3 +206,5 @@ struct ReferenceSparseEmbedding3ForwardLayernorm : public device::BaseOperator } // namespace host } // namespace tensor_operation } // namespace ck + +#pragma clang diagnostic pop diff --git a/python/ck4inductor/grouped_conv_fwd/op.py b/python/ck4inductor/grouped_conv_fwd/op.py index 8301f0d07f..576c36f66d 100644 --- a/python/ck4inductor/grouped_conv_fwd/op.py +++ b/python/ck4inductor/grouped_conv_fwd/op.py @@ -67,6 +67,7 @@ class CKGroupedConvFwdOp: b_compute_dtype: Optional[str] = None direct_load: Optional[bool] = None + num_groups_to_merge: Optional[int] = None def name(self): # cpp alias for template instance diff --git a/script/run_inductor_tests.sh b/script/run_inductor_tests.sh new file mode 100755 index 0000000000..6fed9d2dce --- /dev/null +++ b/script/run_inductor_tests.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Run inductor codegen tests +# This script is called from Jenkinsfile to reduce pipeline bytecode size + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CK_DIR="$(dirname "$SCRIPT_DIR")" +VENV_DIR="${WORKSPACE:-/tmp}/ck-inductor-venv" +export UV_CACHE_DIR="${WORKSPACE:-/tmp}/.uv-cache" + +cd "$CK_DIR" + +echo "Setting up Python virtual environment at $VENV_DIR" +python3 -m venv "$VENV_DIR" +. "$VENV_DIR/bin/activate" + +echo "Installing uv for faster package installation" +pip install uv + +echo "Installing test dependencies" +uv pip install pytest build setuptools setuptools_scm + +echo "Installing ck4inductor package" +uv pip install . + +echo "Running inductor codegen tests" +python3 -m pytest python/test/test_gen_instances.py -v diff --git a/test/ck_tile/gemm_block_scale/CMakeLists.txt b/test/ck_tile/gemm_block_scale/CMakeLists.txt index 9f77cf01d7..21d34f7b34 100644 --- a/test/ck_tile/gemm_block_scale/CMakeLists.txt +++ b/test/ck_tile/gemm_block_scale/CMakeLists.txt @@ -86,6 +86,11 @@ if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12") ) target_compile_options(test_tile_gemm_quant_abquant_eightwaves PRIVATE ${TEST_GEMM_COMPILE_OPTIONS}) + add_gtest_executable(test_tile_gemm_quant_abquant_eightwaves_padded_stride + test_gemm_quant_abquant_eightwaves_padded_stride.cpp + ) + target_compile_options(test_tile_gemm_quant_abquant_eightwaves_padded_stride PRIVATE ${TEST_GEMM_COMPILE_OPTIONS}) + # ABQuant split-K tests add_gtest_executable(test_tile_gemm_quant_abquant_splitk_decode test_gemm_quant_abquant_splitk_decode.cpp @@ -281,6 +286,7 @@ if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12") test_tile_gemm_quant_abquant_a4w4_padding test_tile_gemm_quant_abquant_a4w4_preshuffle test_tile_gemm_quant_abquant_eightwaves + test_tile_gemm_quant_abquant_eightwaves_padded_stride # ABQuant split-K tests test_tile_gemm_quant_abquant_splitk_decode test_tile_gemm_quant_abquant_splitk_prefill diff --git a/test/ck_tile/gemm_block_scale/test_gemm_quant_abquant_eightwaves_padded_stride.cpp b/test/ck_tile/gemm_block_scale/test_gemm_quant_abquant_eightwaves_padded_stride.cpp new file mode 100644 index 0000000000..28b7811af3 --- /dev/null +++ b/test/ck_tile/gemm_block_scale/test_gemm_quant_abquant_eightwaves_padded_stride.cpp @@ -0,0 +1,31 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +// Regression test for the EightWaves ABQuant pipeline on a B tensor whose +// leading-dim stride is larger than the packed value. The async B-load +// descriptor in the EightWaves policy must be built from the input view's +// real strides so that the kernel addresses B correctly when stride_B is +// larger than the inner length (e.g. row-aligned weight padding). + +#include "test_gemm_quant_common.hpp" + +using GroupSize2D128N = ck_tile::QuantGroupShape>; +#ifdef CK_GFX950_SUPPORT +// Tuple format: +// clang-format off +using ABQuantEightWavesPaddedStrideTypes = ::testing::Types< + std::tuple +>; +// clang-format on + +TYPED_TEST_SUITE(TestCkTileGemmABQuant, ABQuantEightWavesPaddedStrideTypes); + +TYPED_TEST(TestCkTileGemmABQuant, ABQuantGroupedPaddedBStrideTest) +{ + // 256-byte row alignment for FP8 -> 256 elements of leading-dim padding. + constexpr ck_tile::index_t k_batch = 1; + constexpr ck_tile::index_t stride_B_pad = 256; + this->run_test_with_validation(1024, 1024, 1024, k_batch, stride_B_pad); +} +#endif diff --git a/test/ck_tile/gemm_block_scale/test_gemm_quant_fixtures.hpp b/test/ck_tile/gemm_block_scale/test_gemm_quant_fixtures.hpp index 8fbda4a3ce..e5731c5caa 100644 --- a/test/ck_tile/gemm_block_scale/test_gemm_quant_fixtures.hpp +++ b/test/ck_tile/gemm_block_scale/test_gemm_quant_fixtures.hpp @@ -1038,12 +1038,17 @@ class TestCkTileGemmABQuant : public TestCkTileGemmQuantBaseis_row_major(ALayout{})); + // stride_B_pad lets a test exercise a B tensor whose leading-dim stride is + // larger than the packed value (e.g. row-aligned padding). The host tensor, + // device buffer, and kernel args are all built with this padded stride so + // the kernel must honor the runtime stride to address B correctly. const ck_tile::index_t stride_B = - ck_tile::get_default_stride(K, N, 0, this->is_row_major(BLayout{})); + ck_tile::get_default_stride(K, N, 0, this->is_row_major(BLayout{})) + stride_B_pad; const ck_tile::index_t stride_C = ck_tile::get_default_stride(M, N, 0, this->is_row_major(CLayout{})); diff --git a/tile_engine/ops/common/utils.hpp b/tile_engine/ops/common/utils.hpp index 4a7c2d586b..df4037f6b5 100644 --- a/tile_engine/ops/common/utils.hpp +++ b/tile_engine/ops/common/utils.hpp @@ -72,7 +72,8 @@ struct KernelInstance }; template -std::ostream& operator<<(std::ostream& os, const KernelInstance& obj) +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const KernelInstance& obj) { os << "{\n" << " \"name\": \"" << obj.name_ << "\",\n" @@ -82,7 +83,7 @@ std::ostream& operator<<(std::ostream& os, const KernelInstance& obj) return os; } -std::ostream& operator<<(std::ostream& os, const PerformanceResult& result) +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const PerformanceResult& result) { os << "{\n" << " \"latency(ms)\": " << std::fixed << std::setprecision(2) << result.latency_ << ",\n" diff --git a/tile_engine/ops/gemm/gemm_benchmark.hpp b/tile_engine/ops/gemm/gemm_benchmark.hpp index 7439264a39..afe375e1d9 100644 --- a/tile_engine/ops/gemm/gemm_benchmark.hpp +++ b/tile_engine/ops/gemm/gemm_benchmark.hpp @@ -26,7 +26,8 @@ struct GemmProblem bool structured_sparsity_; - friend std::ostream& operator<<(std::ostream& os, const GemmProblem& problem) + friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const GemmProblem& problem) { os << "{\n" << " \"split_k\":" << problem.split_k_ << ",\n"