diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp
index c1b52e00a2..7582669e08 100644
--- a/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm.hpp
@@ -194,7 +194,6 @@ struct GridwiseMoeGemm
     // static constexpr index_t NumTokens = 1;
     static constexpr index_t SortedTileSize = MPerBlock;
 
-
     static constexpr auto MakeDsGridPointer()
     {
         return generate_tuple(
@@ -1471,11 +1470,11 @@ struct GridwiseMoeGemm
                                     else if(ActivationOperation == Activation::gelu)
                                     {
                                         const float scale_up =
-                                        p_scale_b[(n0 * NWave * NPerXdl + problem.N) *
-                                                  PerTokenQuant];
+                                            p_scale_b[(n0 * NWave * NPerXdl + problem.N) *
+                                                      PerTokenQuant];
                                         auto gate = scale_a * scale_b * c_thread_buf[cidx];
                                         auto up   = scale_a * scale_up * c_thread_buf_up[cidx];
-                                        if constexpr (is_same_v<remove_cvref_t<BDataType>, pk_i4_t>)
+                                        if constexpr(is_same_v<remove_cvref_t<BDataType>, pk_i4_t>)
                                         {
                                             gate *= 16;
                                             up *= 16;
@@ -1490,7 +1489,7 @@ struct GridwiseMoeGemm
                                                       PerTokenQuant];
                                         auto gate = scale_a * scale_b * c_thread_buf[cidx];
                                         auto up   = scale_a * scale_up * c_thread_buf_up[cidx];
-                                        if constexpr (is_same_v<remove_cvref_t<BDataType>, pk_i4_t>)
+                                        if constexpr(is_same_v<remove_cvref_t<BDataType>, pk_i4_t>)
                                         {
                                             gate *= 16;
                                             up *= 16;
diff --git a/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp
index 04d3a9790a..1d80f196b5 100644
--- a/include/ck/utility/dynamic_buffer.hpp
+++ b/include/ck/utility/dynamic_buffer.hpp
@@ -25,7 +25,7 @@ template <AddressSpaceEnum BufferAddressSpace,
           typename ElementSpaceSize,
           bool InvalidElementUseNumericalZeroValue,
           AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence,
-          typename IndexType = index_t>
+          typename IndexType               = index_t>
 struct DynamicBuffer
 {
     using type = T;
@@ -380,13 +380,14 @@ struct DynamicBuffer
             (is_same_v<remove_cvref_t<scalar_t>, half_t> && scalar_per_x_vector % 2 == 0) ||
             (is_same_v<remove_cvref_t<scalar_t>, bhalf_t> && scalar_per_x_vector % 2 == 0);
 #elif CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER && (!CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT)
-        bool constexpr use_amd_buffer_addressing = sizeof(IndexType) <= sizeof(int32_t) && is_same_v<remove_cvref_t<scalar_t>, int32_t>;
+        bool constexpr use_amd_buffer_addressing =
+            sizeof(IndexType) <= sizeof(int32_t) && is_same_v<remove_cvref_t<scalar_t>, int32_t>;
 #elif(!CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER) && CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT
         bool constexpr use_amd_buffer_addressing =
-        sizeof(IndexType) <= sizeof(int32_t) && (
-            is_same_v<remove_cvref_t<scalar_t>, float> ||
-            (is_same_v<remove_cvref_t<scalar_t>, half_t> && scalar_per_x_vector % 2 == 0) ||
-            (is_same_v<remove_cvref_t<scalar_t>, bhalf_t> && scalar_per_x_vector % 2 == 0));
+            sizeof(IndexType) <= sizeof(int32_t) &&
+            (is_same_v<remove_cvref_t<scalar_t>, float> ||
+             (is_same_v<remove_cvref_t<scalar_t>, half_t> && scalar_per_x_vector % 2 == 0) ||
+             (is_same_v<remove_cvref_t<scalar_t>, bhalf_t> && scalar_per_x_vector % 2 == 0));
 #else
         bool constexpr use_amd_buffer_addressing = false;
 #endif
@@ -424,8 +425,9 @@ struct DynamicBuffer
         static_assert(GetAddressSpace() == AddressSpaceEnum::Global, "only support global mem");
 
 #if CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64
-        using scalar_t                           = typename scalar_type<remove_cvref_t<T>>::type;
-        bool constexpr use_amd_buffer_addressing = sizeof(IndexType) <= sizeof(int32_t) && is_same_v<remove_cvref_t<scalar_t>, double>;
+        using scalar_t = typename scalar_type<remove_cvref_t<T>>::type;
+        bool constexpr use_amd_buffer_addressing =
+            sizeof(IndexType) <= sizeof(int32_t) && is_same_v<remove_cvref_t<scalar_t>, double>;
 #else
         bool constexpr use_amd_buffer_addressing = false;
 #endif
@@ -462,7 +464,8 @@ template <AddressSpaceEnum BufferAddressSpace,
           AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence,
           typename T,
           typename ElementSpaceSize>
-__host__ __device__ constexpr auto make_long_dynamic_buffer(T* p, ElementSpaceSize element_space_size)
+__host__ __device__ constexpr auto make_long_dynamic_buffer(T* p,
+                                                            ElementSpaceSize element_space_size)
 {
     return DynamicBuffer<BufferAddressSpace, T, ElementSpaceSize, true, coherence, long_index_t>{
         p, element_space_size};