From c7ddaf768b03efa0cbd6704b22d34576a4ad8fa0 Mon Sep 17 00:00:00 2001
From: Illia Silin <98187287+illsilin@users.noreply.github.com>
Date: Wed, 24 Jan 2024 13:47:48 -0800
Subject: [PATCH] Fixing most of the cppcheck errors. (#1142)

* fix cppcheck errors, first pass

* fix format

* fix returned value in examples

* add macro definitions for cppcheck

* fix the profile_gemm logic

* update the gemm profiler logic

* add more difinitions to cppcheck, fix couple more errors

* replace runtime error with message in device function

* fix a couple of int4 issues

* no return for fill function

* fix errors in data_types.hpp

* fix format

* fix few remaining errors

* fix errors in data_types.hpp

* fix last couple of errors in datat_types.hpp

[ROCm/composable_kernel commit: 180e5720760d201b4bfc15f99f59a311b1bc5562]
---
 Jenkinsfile                                   |  8 ++-
 example/01_gemm/gemm_dl_int4.cpp              |  5 +-
 example/01_gemm/gemm_xdl_int4.cpp             |  5 +-
 .../gemm_add_add_fastgelu_xdl_int4.cpp        |  5 +-
 .../convnd_fwd_max_xdl_int4.cpp               |  5 +-
 .../batched_gemm_reduce_xdl_fp16.cpp          |  9 ++-
 ...rouped_conv_fwd_bias_relu_add_xdl_int4.cpp |  5 +-
 .../batched_gemm_gemm_xdl_int4.cpp            |  5 +-
 .../grouped_conv_conv_fwd_xdl_int4.cpp        |  5 +-
 example/48_pool3d_fwd/pool3d_fwd_common.hpp   |  4 ++
 .../51_avgpool3d_bwd/avgpool3d_bwd_common.hpp |  4 ++
 include/ck/utility/data_type.hpp              | 65 +++++++++++++++++++
 .../cpu/reference_column_to_image.hpp         |  2 +
 .../cpu/reference_conv_bwd_data.hpp           |  3 +
 .../cpu/reference_conv_bwd_weight.hpp         |  2 +
 .../cpu/reference_conv_fwd.hpp                |  2 +
 .../cpu/reference_gemm.hpp                    |  7 +-
 .../cpu/reference_image_to_column.hpp         |  2 +
 .../gpu/batched_gemm_gemm.hpp                 |  3 +-
 .../gpu/gemm_streamk.hpp                      |  3 +-
 profiler/src/profile_gemm.cpp                 | 12 +++-
 script/clang-format-overwrite.sh              |  2 +-
 .../test_conv_tensor_rearrange_interface.cpp  |  2 +
 23 files changed, 125 insertions(+), 40 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e333a35ecd..9359aa1f69 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -304,7 +304,7 @@ def buildHipClangJob(Map conf=[:]){
 
         gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
             withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
-                timeout(time: 20, unit: 'HOURS')
+                timeout(time: 48, unit: 'HOURS')
                 {
                     cmake_build(conf)
                 }
@@ -755,7 +755,11 @@ pipeline {
                                 -o -iname \'*.cl\' \
                                 | grep -v 'build/' \
                                 | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\' && \
-                                /cppcheck/build/bin/cppcheck ../* -v -j \$(nproc) -I ../include -I ../profiler/include -I ../library/include --file-filter=*.cpp --enable=all --output-file=ck_cppcheck.log"
+                                /cppcheck/build/bin/cppcheck ../* -v -j \$(nproc) -I ../include -I ../profiler/include -I ../library/include \
+                                -D CK_ENABLE_FP64 -D CK_ENABLE_FP32 -D CK_ENABLE_FP16 -D CK_ENABLE_FP8 -D CK_ENABLE_BF16 -D CK_ENABLE_BF8 -D CK_ENABLE_INT8 -D DL_KERNELS \
+                                -D __gfx908__ -D __gfx90a__ -D __gfx940__ -D __gfx941__ -D __gfx942__ -D __gfx1030__ -D __gfx1100__ -D __gfx1101__ -D __gfx1102__ \
+                                -U __gfx803__ -U __gfx900__ -U __gfx906__ -U CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 \
+                                --file-filter=*.cpp --force --enable=all --output-file=ck_cppcheck.log"
                     }
                     steps{
                         buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", execute_cmd: execute_cmd, no_reboot:true)
diff --git a/example/01_gemm/gemm_dl_int4.cpp b/example/01_gemm/gemm_dl_int4.cpp
index e55ae14013..43c0cfe2e0 100644
--- a/example/01_gemm/gemm_dl_int4.cpp
+++ b/example/01_gemm/gemm_dl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include "common.hpp"
 
@@ -43,3 +41,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
 #include "run_gemm_example.inc"
 
 int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }
+#endif
\ No newline at end of file
diff --git a/example/01_gemm/gemm_xdl_int4.cpp b/example/01_gemm/gemm_xdl_int4.cpp
index f6238c7aa5..fb4f383fae 100644
--- a/example/01_gemm/gemm_xdl_int4.cpp
+++ b/example/01_gemm/gemm_xdl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include "common.hpp"
 
@@ -44,3 +42,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
 #include "run_gemm_example.inc"
 
 int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }
+#endif
\ No newline at end of file
diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp
index f206bbeb41..1d0b0f7861 100644
--- a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp
+++ b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include "common.hpp"
 
@@ -58,3 +56,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataTyp
 #include "run_gemm_add_add_fastgelu_example.inc"
 
 int main(int argc, char* argv[]) { return !run_gemm_add_add_fastgelu_example(argc, argv); }
+#endif
diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp
index bf7127502f..ce8ff0b49a 100644
--- a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp
+++ b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #define BUILD_INT4_EXAMPLE
 
@@ -24,3 +22,4 @@ using RsDataType        = ck::Tuple<R0DataType>;
 #include "run_convnd_fwd_max_example.inc"
 
 int main(int argc, char* argv[]) { return !run_convnd_fwd_max_example(argc, argv); }
+#endif
diff --git a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
index e363dc5c12..62295c57eb 100644
--- a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
+++ b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
@@ -272,15 +272,14 @@ int main(int argc, char* argv[])
         {
             for(int m = 0; m < M; ++m)
             {
-                auto reduce0_acc = reduce0_op.GetIdentityValue<ReduceAccDataType>();
-                auto reduce1_acc = reduce1_op.GetIdentityValue<ReduceAccDataType>();
-
+                auto reduce0_acc         = reduce0_op.GetIdentityValue<ReduceAccDataType>();
+                auto reduce1_acc         = reduce1_op.GetIdentityValue<ReduceAccDataType>();
+                ReduceAccDataType d0_val = 0;
+                ReduceAccDataType d1_val = 0;
                 for(int n = 0; n < N; ++n)
                 {
                     auto c_val =
                         ck::type_convert<ReduceAccDataType>(c_g_m_n_host_result(batch, m, n));
-                    ReduceAccDataType d0_val;
-                    ReduceAccDataType d1_val;
 
                     UnaryIdenticElementOp{}(d0_val, c_val);
                     UnarySquareElementOp{}(d1_val, c_val);
diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp
index 5494563fdd..6f91d51a5f 100644
--- a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp
+++ b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include "common.hpp"
 
@@ -29,3 +27,4 @@ using OutElementOp = ck::tensor_operation::element_wise::AddReluAdd;
 #include "run_grouped_conv_fwd_bias_relu_add_example.inc"
 
 int main(int argc, char* argv[]) { return !run_grouped_conv_fwd_bias_relu_add_example(argc, argv); }
+#endif
diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
index d166214c33..2caee6b8dc 100644
--- a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
+++ b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp
@@ -9,9 +9,7 @@ Gemm + Gemm fused operation. Computes C_m_o = A_m_k * B0_k_n * B1_n_o
                                                        Gemm1
 */
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include <iostream>
 #include <numeric>
@@ -144,3 +142,4 @@ static_assert(sizeof(ck::int4_t) == sizeof(int8_t));
 #endif
 
 int main(int argc, char* argv[]) { return run_batched_gemm_gemm_example(argc, argv) ? 0 : 1; }
+#endif
diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
index 80f6e9ae05..cf7b1ce3a8 100644
--- a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
+++ b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 
-#ifndef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
-#error Should compile this file with ck::int4_t support
-#endif
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
 
 #include <cstdlib>
 #include <iostream>
@@ -120,3 +118,4 @@ static_assert(sizeof(ck::int4_t) == sizeof(int8_t));
 #endif
 
 int main(int argc, char* argv[]) { return run_grouped_conv_conv_fwd_example(argc, argv) ? 0 : 1; }
+#endif
diff --git a/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp
index 39032fa123..788f38ec52 100644
--- a/example/48_pool3d_fwd/pool3d_fwd_common.hpp
+++ b/example/48_pool3d_fwd/pool3d_fwd_common.hpp
@@ -32,6 +32,8 @@ std::vector<ck::index_t> f_tensor_strides_ncdhw(ck::index_t N_,
         return {C_ * D * H * W, D * H * W, H * W, W, 1_uz};
     else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
         return {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_};
+    throw std::runtime_error("Pool3d_fwd: problem with layout. ");
+    return {0, 0, 0, 0, 0};
 };
 
 template <typename TensorLayout>
@@ -53,6 +55,8 @@ HostTensorDescriptor f_host_tensor_descriptor(std::size_t N_,
         return HostTensorDescriptor({N_, C_, D, H, W},
                                     {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
     }
+    throw std::runtime_error("Pool3d_fwd: problem with layout. ");
+    return HostTensorDescriptor({0, 0, 0, 0, 0}, {0, 0, 0, 0, 0});
 };
 
 template <typename DevicePoolFwdInstance,
diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp
index 394f046b1e..032828f7bc 100644
--- a/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp
+++ b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp
@@ -26,6 +26,8 @@ std::vector<ck::index_t> f_tensor_strides_ncdhw(ck::index_t N_,
         return {C_ * D * H * W, D * H * W, H * W, W, 1_uz};
     else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
         return {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_};
+    throw std::runtime_error("Avgpool3d_bwd: problem with layout. ");
+    return {0, 0, 0, 0, 0};
 };
 
 template <typename TensorLayout>
@@ -47,6 +49,8 @@ HostTensorDescriptor f_host_tensor_descriptor(std::size_t N_,
         return HostTensorDescriptor({N_, C_, D, H, W},
                                     {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
     }
+    throw std::runtime_error("Avgpool3d_bwd: problem with layout. ");
+    return HostTensorDescriptor({0, 0, 0, 0, 0}, {0, 0, 0, 0, 0});
 };
 
 template <typename DevicePoolBwdInstance,
diff --git a/include/ck/utility/data_type.hpp b/include/ck/utility/data_type.hpp
index d367ad8df5..31ae71880a 100644
--- a/include/ck/utility/data_type.hpp
+++ b/include/ck/utility/data_type.hpp
@@ -189,6 +189,7 @@ struct vector_type<T, 1>
     }
 };
 
+int static err = 0;
 template <typename T>
 struct vector_type<T, 2>
 {
@@ -221,6 +222,10 @@ struct vector_type<T, 2>
         {
             return data_.d2x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -236,6 +241,10 @@ struct vector_type<T, 2>
         {
             return data_.d2x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -278,6 +287,10 @@ struct vector_type<T, 4>
         {
             return data_.d4x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -298,6 +311,10 @@ struct vector_type<T, 4>
         {
             return data_.d4x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -347,6 +364,10 @@ struct vector_type<T, 8>
         {
             return data_.d8x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -372,6 +393,10 @@ struct vector_type<T, 8>
         {
             return data_.d8x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -428,6 +453,10 @@ struct vector_type<T, 16>
         {
             return data_.d16x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -458,6 +487,10 @@ struct vector_type<T, 16>
         {
             return data_.d16x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -520,6 +553,10 @@ struct vector_type<T, 32>
         {
             return data_.d32x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -554,6 +591,10 @@ struct vector_type<T, 32>
         {
             return data_.d32x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -623,6 +664,10 @@ struct vector_type<T, 64>
         {
             return data_.d64x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -662,6 +707,10 @@ struct vector_type<T, 64>
         {
             return data_.d64x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -737,6 +786,10 @@ struct vector_type<T, 128>
         {
             return data_.d128x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -780,6 +833,10 @@ struct vector_type<T, 128>
         {
             return data_.d128x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
@@ -861,6 +918,10 @@ struct vector_type<T, 256>
         {
             return data_.d256x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 
     template <typename X>
@@ -908,6 +969,10 @@ struct vector_type<T, 256>
         {
             return data_.d256x1_;
         }
+        else
+        {
+            return err;
+        }
     }
 };
 
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp
index 45e35ec56d..5f2ab12164 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp
@@ -265,6 +265,8 @@ struct ReferenceColumnToImage : public device::BaseOperator
 
                 return 0;
             }
+            throw std::runtime_error("Col2Img: number of dimensions should be between 1 and 3.");
+            return 1;
         }
 
         float Run(const device::BaseArgument* p_arg,
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
index 50040a2441..bfb8b48187 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
@@ -313,6 +313,9 @@ struct ReferenceConvBwdData : public device::BaseOperator
 
                 return 0;
             }
+            throw std::runtime_error(
+                "Conv_bwd_data: number of dimensions must be between 1 and 3.");
+            return 1;
         }
 
         float Run(const device::BaseArgument* p_arg,
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp
index 02ad7a033a..d0b98efd1f 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp
@@ -265,6 +265,8 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
 
                 return 0;
             }
+            throw std::runtime_error("Conv_bwd: number of dimensions must be between 1 and 3.");
+            return 1;
         }
 
         float Run(const device::BaseArgument* p_arg,
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
index ffc9470df2..d63b5256f9 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
@@ -360,6 +360,8 @@ struct ReferenceConvFwd : public device::BaseOperator
 
                 return 0;
             }
+            throw std::runtime_error("Conv_fwd: number of dimensions must be between 1 and 3.");
+            return 1;
         }
 
         float Run(const device::BaseArgument* p_arg,
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
index 6e39dee71c..4d52563f42 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
@@ -63,12 +63,11 @@ struct ReferenceGemm : public device::BaseOperator
                 const int K = arg.a_m_k_.mDesc.GetLengths()[1];
 
                 AccDataType v_acc = 0;
+                ComputeTypeA v_a  = 0;
+                ComputeTypeB v_b  = 0;
 
                 for(int k = 0; k < K; ++k)
                 {
-                    ComputeTypeA v_a;
-                    ComputeTypeB v_b;
-
                     // use PassThrough instead of ConvertBF16RTN for reference calculation
                     if constexpr(is_same_v<AElementwiseOperation,
                                            ck::tensor_operation::element_wise::ConvertBF16RTN>)
@@ -94,7 +93,7 @@ struct ReferenceGemm : public device::BaseOperator
                         ck::type_convert<AccDataType>(v_a) * ck::type_convert<AccDataType>(v_b);
                 }
 
-                CDataType v_c;
+                CDataType v_c = 0;
 
                 arg.c_element_op_(v_c, v_acc);
 
diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp
index 750d4d14f8..4682c5c223 100644
--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp
@@ -230,6 +230,8 @@ struct ReferenceImageToColumn : public device::BaseOperator
 
                 return 0;
             }
+            throw std::runtime_error("Img2Col: number of dimensions should be between 1 and 3.");
+            return 1;
         }
 
         float Run(const device::BaseArgument* p_arg,
diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp
index 77ad36b97b..42ca8e755d 100644
--- a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp
+++ b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp
@@ -106,9 +106,8 @@ struct DeviceOperationInstanceFactory<
         return op_ptrs;
     }
 };
-
+#endif
 } // namespace instance
 } // namespace device
 } // namespace tensor_operation
 } // namespace ck
-#endif
diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp
index 2df378b0c6..730785f702 100644
--- a/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp
+++ b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp
@@ -114,9 +114,8 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGemmSt
         return op_ptrs;
     }
 };
-
+#endif
 } // namespace instance
 } // namespace device
 } // namespace tensor_operation
 } // namespace ck
-#endif
diff --git a/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp
index 0d6c5021f3..c322c7054b 100644
--- a/profiler/src/profile_gemm.cpp
+++ b/profiler/src/profile_gemm.cpp
@@ -137,8 +137,14 @@ int profile_gemm(int argc, char* argv[])
         return pass ? 0 : 1;
     };
 
-    if(false)
-        ;
+    if(data_type != GemmDataType::F32_F32_F32 && data_type != GemmDataType::F16_F16_F16 &&
+       data_type != GemmDataType::BF16_BF16_BF16 && data_type != GemmDataType::INT8_INT8_INT8 &&
+       data_type != GemmDataType::F8_F8_F8)
+    {
+        // dummy clause before the else clauses for different data types
+        std::cout << "Gemm: this data_type is not implemented" << std::endl;
+        return 1;
+    }
 #ifdef CK_ENABLE_FP32
     else if(data_type == GemmDataType::F32_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
     {
@@ -231,7 +237,7 @@ int profile_gemm(int argc, char* argv[])
 #endif
     else
     {
-        std::cout << "this data_type & layout is not implemented" << std::endl;
+        std::cout << "Gemm: this data_type & layout is not implemented" << std::endl;
 
         return 1;
     }
diff --git a/script/clang-format-overwrite.sh b/script/clang-format-overwrite.sh
index da83254f00..728b8c1092 100755
--- a/script/clang-format-overwrite.sh
+++ b/script/clang-format-overwrite.sh
@@ -1,2 +1,2 @@
-#find . -name deps -prune -o -name build -prune -o -iname '*.h' -o -iname '*.hpp' -o -iname '*.cpp' -o -iname '*.h.in' -o -iname '*.hpp.in' -o -iname '*.cpp.in' -o -iname '*.cl' -o -iname '*.cuh' -o -iname '*.cu' -o -iname '*.inc' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-12 -i -style=file {}'
+find . -name deps -prune -o -name build -prune -o -iname '*.h' -o -iname '*.hpp' -o -iname '*.cpp' -o -iname '*.h.in' -o -iname '*.hpp.in' -o -iname '*.cpp.in' -o -iname '*.cl' -o -iname '*.cuh' -o -iname '*.cu' -o -iname '*.inc' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-12 -i -style=file {}'
 git status --porcelain | awk '$1 != "D" && (match($2, "\\.cpp|hpp|inc")) {print $2}' | xargs -n 1 -P 16 -I{} -t sh -c 'clang-format-12 -i -style=file {}'
diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
index 67c0f2698c..df8b77aba1 100644
--- a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
+++ b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
@@ -135,6 +135,8 @@ class TestConvTensorRearrangeInterface : public ::testing::Test
 
             return col2img.IsSupportedArgument(argument);
         }
+        throw std::runtime_error("Conv_tensor_rearrange: problem with tensor rearrange operator. ");
+        return 1;
     }
 };