From 511f5eb24e9b12d92b28d05eb6200938bb58ba2e Mon Sep 17 00:00:00 2001 From: AviralGoelAMD Date: Thu, 24 Apr 2025 14:33:44 +0000 Subject: [PATCH] clang format --- .../99_toy_example/00_add_vector/add_vector.cpp | 13 +++++++------ ...k_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp | 6 +++--- ...k_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp | 6 +++--- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp b/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp index 7aea661b17..ef5c2afae4 100644 --- a/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp +++ b/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp @@ -64,12 +64,13 @@ bool run(const ck_tile::ArgParser& arg_parser) x_buf_b.ToDevice(x_host_b.data()); // Dividing the problem into blocktile, warptile, and vector - // The blocktile is the size of the tile that will be processed by a single thread block (also called work group) - // The warptile is the size of the tile that will be processed by a single warp (also called wavefront) - // The vector is the size of the tile that will be processed by a single thread (also called work item) - // The problem is divided into blocks of size BlockTile, each block is further divided into - // warps of size WarpTile and each warp is composed of 64 or 32 threads of size Vector - // each of the thread in a warp will process one vector worth elements of the data + // The blocktile is the size of the tile that will be processed by a single thread block (also + // called work group) The warptile is the size of the tile that will be processed by a single + // warp (also called wavefront) The vector is the size of the tile that will be processed by a + // single thread (also called work item) The problem is divided into blocks of size BlockTile, + // each block is further divided into warps of size WarpTile and each warp is composed of 64 or + // 32 threads of size Vector each of the thread in a warp will process one vector worth elements + // of the data using BlockTile = ck_tile::sequence<8192>; // Size of the block tile (Entire problem is divided // into blocks of this size) using BlockWarps = ck_tile::sequence<8>; // How many concurrent warps are in a block (Each warp diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp index cfbd7d6376..32acb0f1b6 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp @@ -374,9 +374,9 @@ struct BlockGemmPipelineAGmemBGmemCReg { return operator()( a_dram_block_window_tmp, - [](const ADataType & a) { return a; }, + [](const ADataType& a) { return a; }, b_dram_block_window_tmp, - [](const BDataType & b) { return b; }, + [](const BDataType& b) { return b; }, a_reg_block_tensor_tmp, p_smem); } @@ -388,7 +388,7 @@ struct BlockGemmPipelineAGmemBGmemCReg { return operator()( b_dram_block_window_tmp, - [](const BDataType & b) { return b; }, + [](const BDataType& b) { return b; }, a_reg_block_tensor_tmp, p_smem); } diff --git a/example/ck_tile/99_toy_example/04_codegen_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp b/example/ck_tile/99_toy_example/04_codegen_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp index cfbd7d6376..32acb0f1b6 100644 --- a/example/ck_tile/99_toy_example/04_codegen_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp +++ b/example/ck_tile/99_toy_example/04_codegen_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp @@ -374,9 +374,9 @@ struct BlockGemmPipelineAGmemBGmemCReg { return operator()( a_dram_block_window_tmp, - [](const ADataType & a) { return a; }, + [](const ADataType& a) { return a; }, b_dram_block_window_tmp, - [](const BDataType & b) { return b; }, + [](const BDataType& b) { return b; }, a_reg_block_tensor_tmp, p_smem); } @@ -388,7 +388,7 @@ struct BlockGemmPipelineAGmemBGmemCReg { return operator()( b_dram_block_window_tmp, - [](const BDataType & b) { return b; }, + [](const BDataType& b) { return b; }, a_reg_block_tensor_tmp, p_smem); }