fix clang format

2026-06-08 23:38:11 +00:00 · 2024-10-14 17:27:49 -07:00
parent dda18da0bf
commit c019a85081
6 changed files with 155 additions and 159 deletions
--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp
@@ -42,27 +42,27 @@ template <typename GridwiseGemm,
          bool HasMainKBlockLoop>
 __global__ void
 #if CK_USE_LAUNCH_BOUNDS
-__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
+    __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
 #endif
-    kernel_batched_gemm_softmax_gemm_xdl_cshuffle_v1(
-        const FloatAB* __restrict__ p_a_grid,
-        const FloatAB* __restrict__ p_b_grid,
-        const FloatAB* __restrict__ p_b1_grid,
-        FloatC* __restrict__ p_c_grid,
-        const AElementwiseOperation a_element_op,
-        const BElementwiseOperation b_element_op,
-        const AccElementwiseOperation acc_element_op,
-        const B1ElementwiseOperation b1_element_op,
-        const CElementwiseOperation c_element_op,
-        const AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1,
-        const BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1,
-        const B1GridDesc_BK0_N_BK1 b1_grid_desc_bk0_n_bk1,
-        const CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
-            c_grid_desc_mblock_mperblock_nblock_nperblock,
-        const Block2CTileMap block_2_ctile_map,
-        const index_t batch_count,
-        const ComputeBasePtrOfStridedBatch compute_base_ptr_of_batch,
-        const C0MatrixMask c0_matrix_mask)
+        kernel_batched_gemm_softmax_gemm_xdl_cshuffle_v1(
+            const FloatAB* __restrict__ p_a_grid,
+            const FloatAB* __restrict__ p_b_grid,
+            const FloatAB* __restrict__ p_b1_grid,
+            FloatC* __restrict__ p_c_grid,
+            const AElementwiseOperation a_element_op,
+            const BElementwiseOperation b_element_op,
+            const AccElementwiseOperation acc_element_op,
+            const B1ElementwiseOperation b1_element_op,
+            const CElementwiseOperation c_element_op,
+            const AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1,
+            const BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1,
+            const B1GridDesc_BK0_N_BK1 b1_grid_desc_bk0_n_bk1,
+            const CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
+                c_grid_desc_mblock_mperblock_nblock_nperblock,
+            const Block2CTileMap block_2_ctile_map,
+            const index_t batch_count,
+            const ComputeBasePtrOfStridedBatch compute_base_ptr_of_batch,
+            const C0MatrixMask c0_matrix_mask)
 {
 #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
    defined(__gfx94__))
--- a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp
@@ -37,22 +37,22 @@ template <typename GridwiseGemm,
          bool HasMainKBlockLoop>
 __global__ void
 #if CK_USE_LAUNCH_BOUNDS
-__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
+    __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
 #endif
-    kernel_gemm_multiple_d_xdl_cshuffle(const ADataType* __restrict__ p_a_grid,
-                                        const BDataType* __restrict__ p_b_grid,
-                                        DsPointer p_ds_grid,
-                                        EDataType* __restrict__ p_e_grid,
-                                        const AElementwiseOperation a_element_op,
-                                        const BElementwiseOperation b_element_op,
-                                        const CDEElementwiseOperation cde_element_op,
-                                        const AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1,
-                                        const BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1,
-                                        const DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
-                                            ds_grid_desc_mblock_mperblock_nblock_nperblock,
-                                        const EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
-                                            e_grid_desc_mblock_mperblock_nblock_nperblock,
-                                        const Block2ETileMap block_2_etile_map)
+        kernel_gemm_multiple_d_xdl_cshuffle(const ADataType* __restrict__ p_a_grid,
+                                            const BDataType* __restrict__ p_b_grid,
+                                            DsPointer p_ds_grid,
+                                            EDataType* __restrict__ p_e_grid,
+                                            const AElementwiseOperation a_element_op,
+                                            const BElementwiseOperation b_element_op,
+                                            const CDEElementwiseOperation cde_element_op,
+                                            const AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1,
+                                            const BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1,
+                                            const DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
+                                                ds_grid_desc_mblock_mperblock_nblock_nperblock,
+                                            const EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
+                                                e_grid_desc_mblock_mperblock_nblock_nperblock,
+                                            const Block2ETileMap block_2_etile_map)
 {
 #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
    defined(__gfx94__))
--- a/include/ck/utility/amd_wave_read_first_lane.hpp
+++ b/include/ck/utility/amd_wave_read_first_lane.hpp
@@ -124,9 +124,8 @@ __device__ inline int64_t amd_wave_read_first_lane(int64_t value)
    return *reinterpret_cast<int64_t*>(to_obj);
 }

-template <
-    typename Object,
-    typename = ck::enable_if_t<ck::is_class_v<Object> && ck::is_trivially_copyable_v<Object>>>
+template <typename Object,
+          typename = ck::enable_if_t<ck::is_class_v<Object> && ck::is_trivially_copyable_v<Object>>>
 __device__ auto amd_wave_read_first_lane(const Object& obj)
 {
    using Size                = unsigned;
--- a/include/ck/utility/random_gen.hpp
+++ b/include/ck/utility/random_gen.hpp
@@ -43,15 +43,15 @@ template <typename T,
          ck::enable_if_t<!(ck::is_same<float, T>{} || ck::is_same<half_t, T>{}), bool> = false>
 __host__ __device__ uint32_t prand_generator(int id, T val, uint32_t seed = seed_t)
 {
-    #ifdef __HIPCC_RTC__
+#ifdef __HIPCC_RTC__
    static_cast<void>(id);
    static_cast<void>(val);
    static_cast<void>(seed);
-    #else
+#else
    std::ignore = id;
    std::ignore = val;
    std::ignore = seed;
-    #endif
+#endif

    return 0;
 }