From 2273f06ad658bef026f5330d7d62e7e13083d4a1 Mon Sep 17 00:00:00 2001
From: Johannes Graner <johannes.graner@amd.com>
Date: Wed, 7 Jan 2026 16:30:57 +0100
Subject: [PATCH] [CI, CK examples] Disable time_kernel for CI tests and
 examples (#3464)

* Disable kernel timing in tests

* default time_kernel = false in old CK examples

[ROCm/composable_kernel commit: 0a474aa62f6dd3f4b95bb405f0a8f1d457a4c0eb]
---
 example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp            | 2 +-
 example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp            | 2 +-
 example/12_reduce/reduce_blockwise.cpp                          | 2 +-
 example/12_reduce/reduce_multiblock_atomic_add.cpp              | 2 +-
 example/12_reduce/reduce_threadwise_multi_d.cpp                 | 2 +-
 example/13_pool2d_fwd/pool2d_fwd_fp16.cpp                       | 2 +-
 .../15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp | 2 +-
 example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp    | 2 +-
 .../gemm_add_add_mean_meansquare_xdl_fp16.cpp                   | 2 +-
 .../gemm_add_addsquare_xdl_int8.cpp                             | 2 +-
 example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp     | 2 +-
 example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp     | 2 +-
 example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp     | 2 +-
 example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp     | 2 +-
 example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp     | 2 +-
 .../gemm_mean_meansquare_xdl_bf16.cpp                           | 2 +-
 .../gemm_mean_meansquare_xdl_fp16.cpp                           | 2 +-
 .../gemm_mean_meansquare_xdl_fp32.cpp                           | 2 +-
 example/22_cgemm/cgemm_xdl_int4.cpp                             | 2 +-
 example/23_softmax/softmax_blockwise.cpp                        | 2 +-
 .../run_batched_gemm_example_fp16int4_b_scale.inc               | 2 +-
 .../batched_gemm_bias_e_permute_wmma_fp16.cpp                   | 2 +-
 example/30_grouped_conv_fwd_multiple_d/common.hpp               | 2 +-
 example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp          | 2 +-
 example/33_multiple_reduce/dual_reduce_common.hpp               | 2 +-
 example/35_splitK_gemm/common.hpp                               | 2 +-
 .../36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp | 2 +-
 .../batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp             | 2 +-
 .../conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp    | 2 +-
 .../conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp      | 2 +-
 .../conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp    | 2 +-
 .../conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp      | 2 +-
 .../conv2d_fwd_dl_perchannel_quantization_int8.cpp              | 2 +-
 .../conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp   | 2 +-
 .../conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp     | 2 +-
 .../conv2d_fwd_xdl_perchannel_quantization_int8.cpp             | 2 +-
 example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc          | 2 +-
 example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp   | 2 +-
 example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp  | 2 +-
 .../44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp  | 2 +-
 .../44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp  | 2 +-
 .../44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp  | 2 +-
 .../44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp  | 2 +-
 .../elementwise_scale_permute_amax_2D_fp16_fp8.cpp              | 2 +-
 example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp  | 2 +-
 .../elementwise_layernorm_blockwise.cpp                         | 2 +-
 example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp         | 2 +-
 .../65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp  | 2 +-
 example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp       | 2 +-
 example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp         | 2 +-
 .../65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp  | 2 +-
 example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp       | 2 +-
 example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp           | 2 +-
 example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp       | 2 +-
 .../67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp   | 2 +-
 example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp           | 2 +-
 example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp       | 2 +-
 .../67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp   | 2 +-
 test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp      | 2 +-
 test/gemm/gemm_standalone_xdl_fp16.cpp                          | 2 +-
 test/wrapper/test_wrapper_gemm_xdl.cpp                          | 2 +-
 61 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp
index 0bded7d2ac..9b48d5765d 100644
--- a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp
+++ b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp
@@ -119,7 +119,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 3840;
diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp
index 4acf4fe9ff..a770bf5c77 100644
--- a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp
+++ b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp
@@ -119,7 +119,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 3840;
diff --git a/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp
index 55f3d99823..f8299028da 100644
--- a/example/12_reduce/reduce_blockwise.cpp
+++ b/example/12_reduce/reduce_blockwise.cpp
@@ -31,7 +31,7 @@ class SimpleAppArgs
     bool do_verification = true;
     int data_type        = 1;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     public:
     void show_usage(const char* cmd)
diff --git a/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp
index af5903f83c..66fc2bb582 100644
--- a/example/12_reduce/reduce_multiblock_atomic_add.cpp
+++ b/example/12_reduce/reduce_multiblock_atomic_add.cpp
@@ -31,7 +31,7 @@ class SimpleAppArgs
     bool do_verification = true;
     int data_type        = 1;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     public:
     void show_usage(const char* cmd)
diff --git a/example/12_reduce/reduce_threadwise_multi_d.cpp b/example/12_reduce/reduce_threadwise_multi_d.cpp
index e77daea212..ee06395771 100644
--- a/example/12_reduce/reduce_threadwise_multi_d.cpp
+++ b/example/12_reduce/reduce_threadwise_multi_d.cpp
@@ -31,7 +31,7 @@ class SimpleAppArgs
     bool do_verification = true;
     int data_type        = 1;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     public:
     void show_usage(const char* cmd)
diff --git a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
index f0a9ce9270..fc083ba3e2 100644
--- a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
@@ -53,7 +53,7 @@ int main(int argc, char* argv[])
     {
         do_verification = true;
         init_method     = 1;
-        time_kernel     = true;
+        time_kernel     = false;
     }
     else if(argc == 4)
     {
diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
index 62d2022084..6fe285f165 100644
--- a/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
+++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
@@ -90,7 +90,7 @@ struct ExecutionConfig final
     bool do_verification = true;
     int init_method      = 1;
     int k_batch          = 128;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& config)
diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
index 1db8a9defb..0e1a38b19a 100644
--- a/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
+++ b/example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
@@ -89,7 +89,7 @@ struct ExecutionConfig final
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& config)
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp
index 08915fdd26..a30bedf282 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp
@@ -268,7 +268,7 @@ int main()
         pass &= ck::utils::check_err(r1_m, r1_m_host, "Error: Incorrect results d1", 1e-2, 1e-2);
     }
 
-    bool time_kernel = true;
+    bool time_kernel = false;
     if(time_kernel)
     {
         float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp
index 7a81d82c25..3401494625 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp
@@ -302,7 +302,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp
index 5a127d1cd4..e4960668eb 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp
@@ -106,7 +106,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp
index 29be3dde0a..c97fa7ebc5 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp
@@ -106,7 +106,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp
index 0574488e04..f32d5e9f6d 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp
@@ -106,7 +106,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp
index 7da40adc90..6c9fb8da75 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp
@@ -108,7 +108,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp
index 47f1d50ef5..4a63bee894 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp
@@ -105,7 +105,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp
index cac3db3078..ebd71f1799 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp
@@ -112,7 +112,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp
index 5ea09cfab2..1153a66615 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp
@@ -112,7 +112,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp
index 8e120851ec..6b5dde3cc7 100644
--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp
@@ -112,7 +112,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M = 1024;
diff --git a/example/22_cgemm/cgemm_xdl_int4.cpp b/example/22_cgemm/cgemm_xdl_int4.cpp
index 47b0e1d5a5..4f21c70562 100644
--- a/example/22_cgemm/cgemm_xdl_int4.cpp
+++ b/example/22_cgemm/cgemm_xdl_int4.cpp
@@ -81,7 +81,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // CGEMM shape
     ck::index_t M = 1024;
diff --git a/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp
index a741cb8133..0455819cdc 100644
--- a/example/23_softmax/softmax_blockwise.cpp
+++ b/example/23_softmax/softmax_blockwise.cpp
@@ -65,7 +65,7 @@ class SimpleAppArgs
 
     bool do_verification = true;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     public:
     void show_usage(const char* cmd)
diff --git a/example/24_batched_gemm/run_batched_gemm_example_fp16int4_b_scale.inc b/example/24_batched_gemm/run_batched_gemm_example_fp16int4_b_scale.inc
index 12d7cf0aa6..86a36d53e2 100644
--- a/example/24_batched_gemm/run_batched_gemm_example_fp16int4_b_scale.inc
+++ b/example/24_batched_gemm/run_batched_gemm_example_fp16int4_b_scale.inc
@@ -27,7 +27,7 @@ struct ExecutionConfig final
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 template <typename DataType>
diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp
index 6efed7eb29..06bf971ac4 100644
--- a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp
+++ b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp
@@ -248,7 +248,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     ck::index_t G0 = 1;
     ck::index_t G1 = 2;
diff --git a/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp
index e1939d4300..dce9f62293 100644
--- a/example/30_grouped_conv_fwd_multiple_d/common.hpp
+++ b/example/30_grouped_conv_fwd_multiple_d/common.hpp
@@ -92,7 +92,7 @@ struct ExecutionConfig final
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 #define DefaultConvParam                                                       \
diff --git a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp
index ca8cba039f..2b27405ecd 100644
--- a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp
+++ b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp
@@ -92,7 +92,7 @@ struct ExecutionConfig final
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 #define DefaultConvParam                                                       \
diff --git a/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp
index 3f04af5e89..923b5b6f15 100644
--- a/example/33_multiple_reduce/dual_reduce_common.hpp
+++ b/example/33_multiple_reduce/dual_reduce_common.hpp
@@ -40,7 +40,7 @@ class SimpleAppArgs
 
     bool do_verification = true;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     public:
     SimpleAppArgs()
diff --git a/example/35_splitK_gemm/common.hpp b/example/35_splitK_gemm/common.hpp
index d0f03f3611..8bf09ee786 100644
--- a/example/35_splitK_gemm/common.hpp
+++ b/example/35_splitK_gemm/common.hpp
@@ -44,7 +44,7 @@ struct ExecutionConfig final
 {
     bool do_verification = true;
     int init_method      = 2;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 };
 
 template <ck::index_t... Is>
diff --git a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
index 2f290497c9..ea8858b958 100644
--- a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
+++ b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
@@ -56,7 +56,7 @@ template<> struct emb_kernel<ck::half_t, 8192> { using kernel_type = DeviceInsta
 
 int main(int argc, char* argv[])
 {
-    bool time_kernel = true;
+    bool time_kernel = false;
 
     ck::index_t num_rows          = 65536;
     constexpr auto dims           = ck::Sequence<256, 512, 768, 1024, 1536, 2048, 4096, 8192>{};
diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
index dc0b95863e..ab87124c6b 100644
--- a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
+++ b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
@@ -195,7 +195,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t M              = 1024;
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp
index c6cc9c6a15..9e7039461c 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp
@@ -86,7 +86,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp
index 0f49cb5a38..fa6a36c212 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp
@@ -84,7 +84,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp
index 5652cc38ab..45651da757 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp
@@ -87,7 +87,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp
index 138a214127..cda4c1419c 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp
@@ -84,7 +84,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp
index 1652cea214..0e52ac280a 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp
@@ -84,7 +84,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp
index f127940377..9bff452a67 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp
@@ -90,7 +90,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
index 7a03a3efe0..17a7b632af 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
@@ -88,7 +88,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp
index 155024dc62..345277e092 100644
--- a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp
+++ b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp
@@ -88,7 +88,7 @@ using DeviceGroupedConvNDFwdInstance =
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc
index b1596b5a53..d5f9b831f0 100644
--- a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc
+++ b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc
@@ -12,7 +12,7 @@ int run_groupnorm_fwd_example(int argc, char* argv[])
     ck::index_t C = 128;
 
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
     bool log_kernel      = true;
 
     if(argc == 1)
diff --git a/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp
index 14b338c9c5..e90880dabd 100644
--- a/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp
+++ b/example/44_elementwise_permute/elementwise_binary_4D_fp16.cpp
@@ -53,7 +53,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     std::vector<std::size_t> nchw = {16, 128, 32, 64};
 
diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
index a7d139fc95..2b99d9261f 100644
--- a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
+++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
@@ -46,7 +46,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp
index cd1db4cdaf..276aa7f3c7 100644
--- a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp
+++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp
@@ -50,7 +50,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp
index 683c5cb072..0842325bad 100644
--- a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp
+++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp
@@ -50,7 +50,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp
index abfd3ccf7c..a48f2349c9 100644
--- a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp
+++ b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp
@@ -49,7 +49,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp
index ff4e8f3a3d..39d88c47a1 100644
--- a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp
+++ b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp
@@ -50,7 +50,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp
index 939860bf69..3aef0fdaac 100644
--- a/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp
+++ b/example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp
@@ -121,7 +121,7 @@ void reference_scale_permute_amax(Tensor<InputDataType>& input,
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     const float scale = 2.f;
 
diff --git a/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp
index 497f1c67c8..86af00e4fb 100644
--- a/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp
+++ b/example/44_elementwise_permute/elementwise_trinary_4D_fp16.cpp
@@ -58,7 +58,7 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     if(argc == 1)
     {
diff --git a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp
index eb95128f38..71cee9c420 100644
--- a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp
+++ b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp
@@ -84,7 +84,7 @@ void host_elementwise2D(HostTensorC& C,
 int main(int argc, char* argv[])
 {
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     ck::index_t M = 48 * 256;
     ck::index_t N = 1024;
diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp
index c0452b6067..10f7a38863 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8.cpp
@@ -205,7 +205,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // GEMM shape
     ck::index_t N               = 4096;
diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp
index ecc3034bba..d6082e5882 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_fp8_blockscale.cpp
@@ -193,7 +193,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 #if 1
     // GEMM shape
     ck::index_t N       = 4096;
diff --git a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp
index 0067c1d1fb..a2002270dc 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm1_xdl_pk_i4.cpp
@@ -194,7 +194,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp
index a602838c30..9f4cd13573 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8.cpp
@@ -185,7 +185,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp
index fb5e3b6456..552d3cd7b5 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_fp8_blockscale.cpp
@@ -188,7 +188,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // tokens = 1
     // topk = 1
diff --git a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp
index f56410d37a..377b53b519 100644
--- a/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp
+++ b/example/65_gemm_multiply_multiply/moe_gemm2_xdl_pk_i4.cpp
@@ -164,7 +164,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp
index 3ce059ba20..586ecd81bf 100644
--- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp
+++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4.cpp
@@ -178,7 +178,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp
index d1d601977d..b3b2ebcbc0 100644
--- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp
+++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bns.cpp
@@ -178,7 +178,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp
index 0078cc5625..5c7668ab73 100644
--- a/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp
+++ b/example/67_gemm_microscaling/moe_gemm1_xdl_mx_fp4_bpreshuffle.cpp
@@ -208,7 +208,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp
index 202241d14f..04c3afc62b 100644
--- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp
+++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4.cpp
@@ -171,7 +171,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp
index 660ccabc94..12bb76eccd 100644
--- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp
+++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp
@@ -171,7 +171,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp
index f398959114..6a5f5a6b9f 100644
--- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp
+++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bpreshuffle.cpp
@@ -204,7 +204,7 @@ int main(int argc, char* argv[])
 {
     bool do_verification = true;
     int init_method      = 1;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
 
     // per expert:
     // GEMM shape
diff --git a/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp
index e26ac53abe..2403c564b7 100644
--- a/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp
+++ b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp
@@ -61,7 +61,7 @@ class TestBatchedGemmMultiD : public ::testing::Test
                 true,  // do_verification
                 1,     // init_method
                 false, // do_log
-                1,     // time_kernel,
+                false, // time_kernel,
                 M,
                 N,
                 K,
diff --git a/test/gemm/gemm_standalone_xdl_fp16.cpp b/test/gemm/gemm_standalone_xdl_fp16.cpp
index 90a5a325b8..2df67a083a 100644
--- a/test/gemm/gemm_standalone_xdl_fp16.cpp
+++ b/test/gemm/gemm_standalone_xdl_fp16.cpp
@@ -104,7 +104,7 @@ int main(int argc, char* argv[])
     };
 
     bool do_verification = true;
-    bool time_kernel     = true;
+    bool time_kernel     = false;
     int problem_index    = -1;
 
     if(argc == 1)
diff --git a/test/wrapper/test_wrapper_gemm_xdl.cpp b/test/wrapper/test_wrapper_gemm_xdl.cpp
index b9d4bc3e57..b8965a217b 100644
--- a/test/wrapper/test_wrapper_gemm_xdl.cpp
+++ b/test/wrapper/test_wrapper_gemm_xdl.cpp
@@ -306,7 +306,7 @@ void PerformGemm(const ck::index_t M,
 
     const auto kernel =
         DeviceGemm<DataType, GemmTraits, scalar_per_vector, BlockShape, ThreadLayout, DoPadding>;
-    const float avg_time = launch_and_time_kernel(StreamConfig{nullptr, true},
+    const float avg_time = launch_and_time_kernel(StreamConfig{nullptr, false},
                                                   kernel,
                                                   dim3(grid_size_x, grid_size_y, 1),
                                                   dim3(ck::wrapper::size(thread_layout)),