From 2026ce49e769a2114dca19469492aed448d1dd85 Mon Sep 17 00:00:00 2001
From: Illia Silin <98187287+illsilin@users.noreply.github.com>
Date: Fri, 17 May 2024 10:42:51 -0700
Subject: [PATCH]  replace the ENV macro with CK_ENV (#1296)

[ROCm/composable_kernel commit: 1274861a9da0d3051a9f5177a3640464b4c79d6a]
---
 include/ck/host_utility/flush_cache.hpp       |  6 +++---
 include/ck/host_utility/kernel_launch.hpp     |  8 ++++----
 ...ultiple_d_gemm_multiple_d_xdl_cshuffle.hpp |  2 +-
 ...evice_batched_gemm_reduce_xdl_cshuffle.hpp |  2 +-
 ...gemm_softmax_gemm_permute_xdl_cshuffle.hpp |  2 +-
 ...ice_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp |  2 +-
 ...fle_bias_activation_add_nhwc_kyxc_nhwk.hpp |  2 +-
 ...shuffle_bias_activation_nhwc_kyxc_nhwk.hpp |  2 +-
 ...onv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp |  2 +-
 .../device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp  |  2 +-
 ...evice_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp |  2 +-
 .../device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp |  2 +-
 ...device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp |  2 +-
 .../gpu/device/impl/device_gemm_dl.hpp        |  2 +-
 .../impl/device_gemm_reduce_xdl_cshuffle.hpp  |  2 +-
 .../device_gemm_xdl_layernorm_cshuffle.hpp    |  2 +-
 .../impl/device_gemm_xdl_skip_b_lds.hpp       |  2 +-
 .../device_grouped_gemm_multiple_d_dl.hpp     |  2 +-
 ...ltiple_d_splitk_xdl_cshuffle_two_stage.hpp |  8 ++++----
 ...gemm_multiple_d_xdl_cshuffle_tile_loop.hpp |  2 +-
 .../device/impl/device_grouped_gemm_xdl.hpp   |  2 +-
 ...evice_grouped_gemm_xdl_splitk_cshuffle.hpp |  4 ++--
 .../grid/gridwise_gemm_xdl_cshuffle_v3.hpp    | 20 +++++++++----------
 ...ridwise_gemm_xdl_cshuffle_v3_multi_abd.hpp | 18 ++++++++---------
 .../gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp  | 20 +++++++++----------
 include/ck/utility/env.hpp                    |  2 +-
 .../profile_grouped_gemm_fixed_nk_impl.hpp    |  2 +-
 .../profiler/profile_grouped_gemm_impl.hpp    |  2 +-
 .../profile_grouped_gemm_tile_loop_impl.hpp   |  2 +-
 .../profile_grouped_gemm_two_stage_impl.hpp   |  2 +-
 30 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/include/ck/host_utility/flush_cache.hpp b/include/ck/host_utility/flush_cache.hpp
index 36993d0ae2..041428e6a9 100644
--- a/include/ck/host_utility/flush_cache.hpp
+++ b/include/ck/host_utility/flush_cache.hpp
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
 #define MEDIAN 1
     if(stream_config.time_kernel_)
     {
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
                    __func__,
@@ -142,7 +142,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
         {
             return 0.0;
         }
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("Start running %d times...\n", nrepeat);
         }
@@ -186,7 +186,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
             total_time += cur_time;
 #endif
 
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "i: " << i << " cur_time: " << cur_time << std::endl;
 
diff --git a/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp
index 1cdb7f9c5a..a616433ac9 100644
--- a/include/ck/host_utility/kernel_launch.hpp
+++ b/include/ck/host_utility/kernel_launch.hpp
@@ -20,7 +20,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
 #if CK_TIME_KERNEL
     if(stream_config.time_kernel_)
     {
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
                    __func__,
@@ -41,7 +41,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
         }
 
         const int nrepeat = stream_config.nrepeat_;
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("Start running %d times...\n", nrepeat);
         }
@@ -95,7 +95,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
 #if CK_TIME_KERNEL
     if(stream_config.time_kernel_)
     {
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
                    __func__,
@@ -117,7 +117,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
         }
 
         const int nrepeat = stream_config.nrepeat_;
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             printf("Start running %d times...\n", nrepeat);
         }
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
index 4521b2161f..6ab1669e30 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp
@@ -587,7 +587,7 @@ struct DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
                                          BatchStrideD1s,
                                          BatchStrideE1}
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "a0_grid_desc_m_k_{" << a0_grid_desc_m_k_.GetLength(I0) << ", "
                           << a0_grid_desc_m_k_.GetLength(I1) << "}" << std::endl;
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
index 37ebe2f85c..34b1d503af 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp
@@ -658,7 +658,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceO
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 {
                     std::cout << "arg.Batch_ = " << arg.Batch_ << std::endl;
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
index 445467be55..e178b8f525 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
@@ -719,7 +719,7 @@ struct DeviceBatchedGemmSoftmaxGemmPermute_Xdl_CShuffle
 
     static bool IsSupportedArgument(const Argument& arg)
     {
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             arg.Print();
         }
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
index 6fd8c03232..0b73317c5e 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
@@ -516,7 +516,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
             float ave_time = 0;
             for(size_t i = 0; i < arg.a_grid_desc_k0_m_k1_container_.size(); i++)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     {
                         std::cout << "arg.a_grid_desc_k0_m_k1_container_{"
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
index f5c1460f56..13eb23574f 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
@@ -644,7 +644,7 @@ struct
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << DeviceOp{}.GetTypeString() << std::endl;
                 std::cout << "N " << arg.Conv_N_ << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
index 9015f640ad..28778d825b 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
@@ -614,7 +614,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << DeviceOp{}.GetTypeString() << std::endl;
                 std::cout << "N " << arg.Conv_N_ << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
index e815c0784d..7fa231d4f4 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
@@ -579,7 +579,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << DeviceOp{}.GetTypeString() << std::endl;
                 std::cout << "N " << arg.Conv_N_ << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
index 760e2840d4..3be7313d2b 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
@@ -431,7 +431,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
                           << ", " << arg.a_grid_desc_k0_m_k1_.GetLength(I1) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
index de48719398..6e69213513 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
@@ -401,7 +401,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "num_batches_of_GEMM = " << arg.num_subbatches_ << std::endl;
                 std::cout << "a_grid_desc_k0_m_k1{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
index 149aca7e3e..b84e181306 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp
@@ -1272,7 +1272,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Dl
             float ave_time = 0;
             for(size_t i = 0; i < arg.a_grid_desc_k0_m_k1_container_.size(); i++)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "arg.a_grid_desc_k0_m_k1_container_{"
                               << arg.a_grid_desc_k0_m_k1_container_[i].GetLength(I0) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
index 4398724553..de8f35a640 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp
@@ -1220,7 +1220,7 @@ struct DeviceConvNdBwdDataNwcKxcNwk_Xdl
             float ave_time = 0;
             for(size_t i = 0; i < arg.a_grid_desc_k0_m_k1_container_.size(); i++)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "arg.a_grid_desc_k0_m_k1{"
                               << arg.a_grid_desc_k0_m_k1_container_[i].GetLength(I0) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
index d3af5e63d3..b1784b3858 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp
@@ -334,7 +334,7 @@ struct DeviceGemmDl : public DeviceGemm<ALayout,
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "arg.a_grid_desc_k0_m0_m1_k1_{"
                           << arg.a_grid_desc_k0_m_k1_.GetLength(I0) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
index 45636a9de5..c88294edfa 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp
@@ -510,7 +510,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<0, ReduceOperatio
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "arg.a_grid_desc_ak0_m_ak1_{"
                           << arg.a_grid_desc_ak0_m_ak1_.GetLength(I0) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
index 009466dbd1..fd6f3b65f2 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp
@@ -514,7 +514,7 @@ struct DeviceGemmLayerNorm_Xdl_CShuffle : public BaseOperator
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "arg.a_grid_desc_ak0_m_ak1_{"
                           << arg.a_grid_desc_ak0_m_ak1_.GetLength(I0) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
index 610b274152..c2a27ebbdb 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp
@@ -299,7 +299,7 @@ struct DeviceGemmXdlSkipBLds : public DeviceGemm<ALayout,
 
         float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
                           << ", " << arg.a_grid_desc_k0_m_k1_.GetLength(I1) << ", "
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
index a88c7b4fb7..ac392cddc4 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
@@ -553,7 +553,7 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
 
             for(std::size_t i = 0; i < arg.gemm_desc_kernel_arg_.size(); i++)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "group: " << i << " arg.a_grid_desc_k0_m_k1_{"
                               << arg.gemm_desc_kernel_arg_[i].a_grid_desc_k0_m_k1_.GetLength(I0)
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
index a70ee6f053..8354335577 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
@@ -468,7 +468,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
                 gemm_kernel_args_[i].block_start_       = block_start;
                 gemm_kernel_args_[i].block_end_         = block_end;
 
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     index_t tiles = (block_end - block_start) / K_BATCH;
                     std::cout << "block_start: " << block_start << "\n"
@@ -495,7 +495,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
                 arg.karg_.p_c_grid = p_workspace + offset;
                 index_t tiles      = (arg.block_end_ - arg.block_start_) / arg.karg_.k_batch;
                 offset += tiles * MPerBlock * NPerBlock;
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "block_start: " << arg.block_start_ << "\n"
                               << "block_end: " << arg.block_end_ << "\n"
@@ -819,7 +819,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
         if((ck::type_convert<ck::index_t>(arg.gemm_kernel_args_.size()) +
             arg.skipped_group_count_) != arg.group_count_)
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "The group count is not equal to sum of skipped groups "
                              "and kernel args size!"
@@ -836,7 +836,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
             bool group_arg_valid = GridwiseGemm::CheckValidity(gemm_arg);
             if(not group_arg_valid)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "[" << __func__ << "] group id: " << i
                               << " has invalid GridwiseGemm settings!" << std::endl;
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
index 403bc7fad6..36cbd1cd26 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp
@@ -620,7 +620,7 @@ struct DeviceGroupedGemmMultipleDXdlCShuffleTileLoop
                  GridwiseGemm::template CheckTensorTransfersValidity<ALayout, BLayout, ELayout>(
                      M, N, K)))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "The provided GEMM problem size (M,N,K) [" << M << "," << N << ","
                               << K << "] are not supported by current template parameters!"
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
index 90c0593b28..658f323516 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
@@ -514,7 +514,7 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
 
             for(std::size_t i = 0; i < arg.gemm_desc_kernel_arg_.size(); i++)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "group: " << i << " arg.a_grid_desc_ak0_m_ak1_{"
                               << arg.gemm_desc_kernel_arg_[i].a_grid_desc_ak0_m_ak1_.GetLength(I0)
diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
index b7306580cf..6d9d1459c8 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
@@ -529,7 +529,7 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
         if((ck::type_convert<ck::index_t>(arg.gemm_kernel_args_.size()) +
             arg.skipped_group_count_) != arg.group_count_)
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "The group count is not equal to sum of skipped groups "
                              "and kernel args size!"
@@ -545,7 +545,7 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
             bool group_arg_valid = GridwiseGemm::CheckValidity(a);
             if(not group_arg_valid)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "[" << __func__ << "] group id: " << i
                               << " has invalid GridwiseGemm settings!" << std::endl;
diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
index dea92bc576..82c05937c7 100644
--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3.hpp
@@ -935,7 +935,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(!(karg.M % MPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M value is not a multiple of MPerBlock! M: " << karg.M << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -952,7 +952,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(!(karg.N % NPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N value is not a multiple of NPerBlock! N: " << karg.N << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -971,7 +971,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
             auto K_t = karg.KBatch * KPerBlock;
             if(!(karg.K % K_t == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K value is not a multiple of K_Batch * K0PerBlock * K1! K: "
                               << karg.K << " " << __FILE__ << ":" << __LINE__
@@ -995,7 +995,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.K % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -1009,7 +1009,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.M % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -1024,7 +1024,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.N % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -1038,7 +1038,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.K % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -1053,7 +1053,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.N % CShuffleBlockTransferScalarPerVector_NPerBlock != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of "
@@ -1069,7 +1069,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.M % CShuffleBlockTransferScalarPerVector_NPerBlock != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of "
@@ -1084,7 +1084,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
 
         if constexpr(is_same<remove_cvref_t<CDataType>, bhalf_t>::value)
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << " KBatch: " << karg.KBatch << " > 1 is not support yet" << __FILE__
                           << ":" << __LINE__ << ", in function: " << __func__ << std::endl;
diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_abd.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_abd.hpp
index fdafa9ca5c..aea1f5d387 100644
--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_abd.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_abd.hpp
@@ -1113,7 +1113,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(!(karg.M % MPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M value is not a multiple of MPerBlock! M: " << karg.M << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -1130,7 +1130,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(!(karg.N % NPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N value is not a multiple of NPerBlock! N: " << karg.N << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -1149,7 +1149,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
             auto K_t = karg.KBatch * KPerBlock;
             if(!(karg.K % K_t == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K value is not a multiple of K_Batch * K0PerBlock * K1! K: "
                               << karg.K << " " << __FILE__ << ":" << __LINE__
@@ -1173,7 +1173,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.K % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -1187,7 +1187,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.M % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -1202,7 +1202,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.N % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -1216,7 +1216,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.K % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -1231,7 +1231,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.N % CShuffleBlockTransferScalarPerVector_NPerBlock != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of "
@@ -1247,7 +1247,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
         {
             if(karg.M % CShuffleBlockTransferScalarPerVector_NPerBlock != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of "
diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
index f2eeaf7e3d..6ee279a3f1 100644
--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
@@ -446,7 +446,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(!(karg.M % MPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M value is not a multiple of MPerBlock! M: " << karg.M << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -463,7 +463,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(!(karg.N % NPerBlock == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N value is not a multiple of NPerBlock! N: " << karg.N << " "
                               << __FILE__ << ":" << __LINE__ << ", in function: " << __func__
@@ -482,7 +482,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
             auto K_t = karg.k_batch * K0PerBlock * K1;
             if(!(karg.K % K_t == 0))
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K value is not a multiple of K_Batch * K0PerBlock * K1! K: "
                               << karg.K << " " << __FILE__ << ":" << __LINE__
@@ -496,7 +496,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.K % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -510,7 +510,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.M % ABlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of ABlockTransferSrcScalarPerVector ("
@@ -525,7 +525,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.N % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -539,7 +539,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.K % BBlockTransferSrcScalarPerVector != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg K (" << karg.K
                               << ") value is not a multiple of BBlockTransferSrcScalarPerVector ("
@@ -554,7 +554,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.N % CBlockTransferScalarPerVector_NWaveNPerXDL != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg N (" << karg.N
                               << ") value is not a multiple of "
@@ -569,7 +569,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         {
             if(karg.M % CBlockTransferScalarPerVector_NWaveNPerXDL != 0)
             {
-                if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+                if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
                 {
                     std::cout << "Arg M (" << karg.M
                               << ") value is not a multiple of "
@@ -584,7 +584,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
         const auto num_k_loop = karg.K0Padded / K0PerBlock;
         if(!GridwiseGemmPipe::IsSupported(num_k_loop))
         {
-            if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+            if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
             {
                 std::cout << "The number of k loops (" << num_k_loop
                           << ") value is not supported by GridwiseGemm Pipeline."
diff --git a/include/ck/utility/env.hpp b/include/ck/utility/env.hpp
index 0b6504e528..6455402dcb 100644
--- a/include/ck/utility/env.hpp
+++ b/include/ck/utility/env.hpp
@@ -124,7 +124,7 @@ struct EnvVar
 
 #define CK_DECLARE_ENV_VAR_STR(name) CK_DECLARE_ENV_VAR(name, std::string, "")
 
-#define ENV(name) \
+#define CK_ENV(name) \
     ck::env::name {}
 
 template <class EnvVar>
diff --git a/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp
index 80c1c42b83..09e03de99c 100644
--- a/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp
@@ -88,7 +88,7 @@ bool profile_grouped_gemm_fixed_nk_impl(int do_verification,
 
         c_m_n_host_results.push_back(
             Tensor<CDataType>(f_host_tensor_descriptor(Ms[i], Ns[i], StrideCs[i], CLayout{})));
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             std::cout << "group: " << i << " a_m_k[" << i << "]:" << a_m_k[i].mDesc << ", b_k_n["
                       << i << "]:" << b_k_n[i].mDesc << ", c_m_n_device_results[" << i
diff --git a/profiler/include/profiler/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp
index 476ec37eb2..0b73e4fcd1 100644
--- a/profiler/include/profiler/profile_grouped_gemm_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_gemm_impl.hpp
@@ -87,7 +87,7 @@ bool profile_grouped_gemm_impl(int do_verification,
 
         c_m_n_host_results.push_back(
             Tensor<CDataType>(f_host_tensor_descriptor(Ms[i], Ns[i], StrideCs[i], CLayout{})));
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             std::cout << "group: " << i << " a_m_k[" << i << "]:" << a_m_k[i].mDesc << ", b_k_n["
                       << i << "]:" << b_k_n[i].mDesc << ", c_m_n_device_results[" << i
diff --git a/profiler/include/profiler/profile_grouped_gemm_tile_loop_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_tile_loop_impl.hpp
index 33e758f406..74faf15be3 100644
--- a/profiler/include/profiler/profile_grouped_gemm_tile_loop_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_gemm_tile_loop_impl.hpp
@@ -82,7 +82,7 @@ bool profile_grouped_gemm_tile_loop_impl(int do_verification,
             Tensor<CDataType>(f_host_tensor_descriptor(Ms[i], Ns[i], StrideCs[i], CLayout{})));
         c_m_n_host_results.push_back(
             Tensor<CDataType>(f_host_tensor_descriptor(Ms[i], Ns[i], StrideCs[i], CLayout{})));
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             std::cout << "group: " << i << " a_m_k[" << i << "]:" << a_m_k[i].mDesc << ", b_k_n["
                       << i << "]:" << b_k_n[i].mDesc << ", c_m_n_device_results[" << i
diff --git a/profiler/include/profiler/profile_grouped_gemm_two_stage_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_two_stage_impl.hpp
index feb0be87e7..14df96d505 100644
--- a/profiler/include/profiler/profile_grouped_gemm_two_stage_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_gemm_two_stage_impl.hpp
@@ -88,7 +88,7 @@ bool profile_grouped_gemm_two_stage_impl(int do_verification,
 
         c_m_n_host_results.push_back(
             Tensor<CDataType>(f_host_tensor_descriptor(Ms[i], Ns[i], StrideCs[i], CLayout{})));
-        if(ck::EnvIsEnabled(ENV(CK_LOGGING)))
+        if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
         {
             std::cout << "group: " << i << " a_m_k[" << i << "]:" << a_m_k[i].mDesc << ", b_k_n["
                       << i << "]:" << b_k_n[i].mDesc << ", c_m_n_device_results[" << i