From 576fd484c2576efe21ce2015810655fa0c45b222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Kocot?= Date: Tue, 16 Sep 2025 17:47:28 +0200 Subject: [PATCH] Disable GridwiseOp prints if env var is off (#2843) * Disable GridwiseOp prints if env var is off * Fixes [ROCm/composable_kernel commit: 671adb59c54875cdb7c485bb0be387045b83dfb7] --- ...d_contraction_multiple_d_wmma_cshuffle.hpp | 8 ++- ...ise_batched_gemm_gemm_wmma_cshuffle_v3.hpp | 51 ++++++++++------ ...atched_gemm_softmax_gemm_wmma_cshuffle.hpp | 59 ++++++++++++------- .../gpu/grid/gridwise_fpAintB_gemm_wmma.hpp | 32 ++++++---- ...gridwise_gemm_multiple_d_wmma_cshuffle.hpp | 28 +++++++-- .../gpu/grid/gridwise_gemm_wmma.hpp | 32 ++++++---- 6 files changed, 140 insertions(+), 70 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp index ab3f3856aa..537e6dab28 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp @@ -1,11 +1,12 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once #include #include +#include "ck/utility/env.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp" @@ -853,7 +854,10 @@ struct DeviceBatchedContractionMultipleD_Wmma_CShuffle arg.e_grid_desc_m_n_, arg.block_2_ctile_map_)) { - printf("GridwiseOp: Validity check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: Validity check failure\n"); + } return false; } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp index b61c7a09eb..fa7eb4faaa 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp @@ -398,41 +398,54 @@ struct GridwiseBatchedGemmGemm_wmma_cshuffle_v3 if(!(M == c_grid_desc_m_n.GetLength(I0) && N == c_grid_desc_m_n.GetLength(I1))) { - print("GridwiseOp: M/N Length err, A_M/N = %d, %d | C_M/N = %d, %d\n", - M, - N, - c_grid_desc_m_n.GetLength(I0), - c_grid_desc_m_n.GetLength(I1)); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + print("GridwiseOp: M/N Length err, A_M/N = %d, %d | C_M/N = %d, %d\n", + M, + N, + c_grid_desc_m_n.GetLength(I0), + c_grid_desc_m_n.GetLength(I1)); + } return false; } if(!(M % MPerBlock == 0 && L % LPerBlock == 0 && K % KPerBlock == 0 && N % NPerBlock == 0)) { - print("GridwiseOp: M/L/K/N Division err, M/L/K/N = %d, %d, %d, %d | M/L/K/NPerBlock = " - "%d, %d, %d, %d\n", - M, - L, - K, - N, - MPerBlock, - LPerBlock, - KPerBlock, - NPerBlock); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + print("GridwiseOp: M/L/K/N Division err, M/L/K/N = %d, %d, %d, %d | " + "M/L/K/NPerBlock = " + "%d, %d, %d, %d\n", + M, + L, + K, + N, + MPerBlock, + LPerBlock, + KPerBlock, + NPerBlock); + } return false; } // check gemm1 gridwise gemm pipeline if(!(LPerBlock % LTilePerBlock == 0)) { - print("GridwiseOp: inner loop division, L/LTilePerblock: %d, %d\n", - LPerBlock, - LTilePerBlock); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + print("GridwiseOp: inner loop division, L/LTilePerblock: %d, %d\n", + LPerBlock, + LTilePerBlock); + } return false; } if(!block_2_ctile_map.CheckValidity(c_grid_desc_m_n)) { - print("GridwiseOp: invalid block_2_ctile_map\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + print("GridwiseOp: invalid block_2_ctile_map\n"); + } return false; } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp index 1754e07e6a..502c449ef1 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp @@ -1,8 +1,9 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once +#include "ck/utility/env.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" @@ -569,26 +570,33 @@ struct GridwiseBatchedGemmSoftmaxGemm_Wmma if(!(M == c_grid_desc_m_n.GetLength(I0) && N == c_grid_desc_m_n.GetLength(I1))) { - printf("GridwiseOp: M/N Length err, A_M/N = %d, %d | C_M/N = %d, %d\n", - M, - N, - c_grid_desc_m_n.GetLength(I0), - c_grid_desc_m_n.GetLength(I1)); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: M/N Length err, A_M/N = %d, %d | C_M/N = %d, %d\n", + M, + N, + c_grid_desc_m_n.GetLength(I0), + c_grid_desc_m_n.GetLength(I1)); + } return false; } if(!(M % MPerBlock == 0 && L % LPerBlock == 0 && K % KPerBlock == 0 && N % NPerBlock == 0)) { - printf("GridwiseOp: M/L/K/N Division err, M/L/K/N = %d, %d, %d, %d | M/L/K/NPerBlock = " - "%d, %d, %d, %d\n", - M, - L, - K, - N, - MPerBlock, - LPerBlock, - KPerBlock, - NPerBlock); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: M/L/K/N Division err, M/L/K/N = %d, %d, %d, %d | " + "M/L/K/NPerBlock = " + "%d, %d, %d, %d\n", + M, + L, + K, + N, + MPerBlock, + LPerBlock, + KPerBlock, + NPerBlock); + } return false; } @@ -596,23 +604,32 @@ struct GridwiseBatchedGemmSoftmaxGemm_Wmma const auto num_gemm0_k_loop = K / KPerBlock; if(!GridwiseGemmPipe::IsSupported(num_gemm0_k_loop)) { - printf("GridwiseOp: outer loop unsupport\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: outer loop unsupport\n"); + } return false; } // check gemm1 gridwise gemm pipeline if(!(LPerBlock % LTilePerBlock == 0)) { - printf("GridwiseOp: inner loop division, L/LTilePerblock: %d, %d\n", - LPerBlock, - LTilePerBlock); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: inner loop division, L/LTilePerblock: %d, %d\n", + LPerBlock, + LTilePerBlock); + } return false; } const auto num_gemm1_k_inner_loop = LPerBlock / LTilePerBlock; if(!GridwiseGemmPipe::IsSupported(num_gemm1_k_inner_loop)) { - printf("GridwiseOp: inner loop unsupport\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: inner loop unsupport\n"); + } return false; } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp index 8011fa56d3..c8b154228f 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp @@ -1,8 +1,9 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once +#include "ck/utility/env.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" @@ -466,20 +467,26 @@ struct GridwiseFpAintBGemm_Wmma if(!(M == c_grid_desc_m_n.GetLength(I0) && N == c_grid_desc_m_n.GetLength(I1) && K == GetBProblemsizeNK()[I1])) { - printf("A: MxK = %d x %d, B: NxK = %d x %d, C: MxN = %d x %d\n", - GetAProblemsizeMK()[I0], - GetAProblemsizeMK()[I1], - GetBProblemsizeNK()[I0], - GetBProblemsizeNK()[I1], - c_grid_desc_m_n.GetLength(I0), - c_grid_desc_m_n.GetLength(I1)); - printf("GridwiseOp err: ProblemSize check"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("A: MxK = %d x %d, B: NxK = %d x %d, C: MxN = %d x %d\n", + GetAProblemsizeMK()[I0], + GetAProblemsizeMK()[I1], + GetBProblemsizeNK()[I0], + GetBProblemsizeNK()[I1], + c_grid_desc_m_n.GetLength(I0), + c_grid_desc_m_n.GetLength(I1)); + printf("GridwiseOp err: ProblemSize check"); + } return false; } if(!(M % MPerBlock == 0 && N % NPerBlock == 0 && K % KPerBlock == 0)) { - printf("GridwiseOp err: ProblemSize division"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp err: ProblemSize division"); + } return false; } @@ -488,7 +495,10 @@ struct GridwiseFpAintBGemm_Wmma if(!GridwiseGemmPipe::IsSupported(num_k_loop)) { - printf("GridwiseOp err: Pipeline not support this k_loop"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp err: Pipeline not support this k_loop"); + } return false; } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp index 46979a5620..7d68d64ed8 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp @@ -1,8 +1,9 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once +#include "ck/utility/env.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" @@ -653,13 +654,19 @@ struct GridwiseGemmMultipleD_Wmma if(!(M == e_grid_desc_m_n.GetLength(I0) && N == e_grid_desc_m_n.GetLength(I1) && K == GetBProblemsizeNK()[I1])) { - printf("GridwiseOp: ABE descriptor dimension cross check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: ABE descriptor dimension cross check failure\n"); + } return false; } if(!(M % MPerBlock == 0 && N % NPerBlock == 0 && K % KPerBlock == 0)) { - printf("GridwiseOp: Problemsize descriptor dimension check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: Problemsize descriptor dimension check failure\n"); + } return false; } @@ -747,20 +754,29 @@ struct GridwiseGemmMultipleD_Wmma if(!valid) { - printf("GridwiseOp: D descriptor dimension check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: D descriptor dimension check failure\n"); + } return false; } if(!(M == e_grid_desc_m_n.GetLength(I0) && N == e_grid_desc_m_n.GetLength(I1) && K == GetBProblemsizeNK()[I1])) { - printf("GridwiseOp: ABE descriptor dimension cross check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: ABE descriptor dimension cross check failure\n"); + } return false; } if(!(M % MPerBlock == 0 && N % NPerBlock == 0 && K % KPerBlock == 0)) { - printf("GridwiseOp: Problemsize descriptor dimension check failure\n"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp: Problemsize descriptor dimension check failure\n"); + } return false; } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp index 4a15958adb..65f74de3cf 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp @@ -1,8 +1,9 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once +#include "ck/utility/env.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" @@ -458,20 +459,26 @@ struct GridwiseGemm_Wmma if(!(M == c_grid_desc_m_n.GetLength(I0) && N == c_grid_desc_m_n.GetLength(I1) && K == GetBProblemsizeNK()[I1])) { - printf("A: MxK = %d x %d, B: NxK = %d x %d, C: MxN = %d x %d\n", - GetAProblemsizeMK()[I0], - GetAProblemsizeMK()[I1], - GetBProblemsizeNK()[I0], - GetBProblemsizeNK()[I1], - c_grid_desc_m_n.GetLength(I0), - c_grid_desc_m_n.GetLength(I1)); - printf("GridwiseOp err: ProblemSize check"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("A: MxK = %d x %d, B: NxK = %d x %d, C: MxN = %d x %d\n", + GetAProblemsizeMK()[I0], + GetAProblemsizeMK()[I1], + GetBProblemsizeNK()[I0], + GetBProblemsizeNK()[I1], + c_grid_desc_m_n.GetLength(I0), + c_grid_desc_m_n.GetLength(I1)); + printf("GridwiseOp err: ProblemSize check"); + } return false; } if(!(M % MPerBlock == 0 && N % NPerBlock == 0 && K % KPerBlock == 0)) { - printf("GridwiseOp err: ProblemSize division"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp err: ProblemSize division"); + } return false; } @@ -480,7 +487,10 @@ struct GridwiseGemm_Wmma if(!GridwiseGemmPipe::IsSupported(num_k_loop)) { - printf("GridwiseOp err: Pipeline not support this k_loop"); + if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING))) + { + printf("GridwiseOp err: Pipeline not support this k_loop"); + } return false; }