From aaa757e36352b1f5d16d21b32fe23c2c820bfbd2 Mon Sep 17 00:00:00 2001 From: AviralGoelAMD Date: Thu, 24 Apr 2025 14:24:01 +0000 Subject: [PATCH] fixed function and struct names --- .../CMakeLists.txt | 4 +-- .../add_vector.cpp} | 25 ++++++++++--------- .../add_vector.hpp} | 9 +++---- .../reference_add_vector.hpp} | 2 +- example/ck_tile/99_toy_example/CMakeLists.txt | 2 +- 5 files changed, 20 insertions(+), 22 deletions(-) rename example/ck_tile/99_toy_example/{00_vector_add => 00_add_vector}/CMakeLists.txt (91%) rename example/ck_tile/99_toy_example/{00_vector_add/vector_add.cpp => 00_add_vector/add_vector.cpp} (89%) rename example/ck_tile/99_toy_example/{00_vector_add/vector_add.hpp => 00_add_vector/add_vector.hpp} (95%) rename example/ck_tile/99_toy_example/{00_vector_add/reference_vector_add.hpp => 00_add_vector/reference_add_vector.hpp} (93%) diff --git a/example/ck_tile/99_toy_example/00_vector_add/CMakeLists.txt b/example/ck_tile/99_toy_example/00_add_vector/CMakeLists.txt similarity index 91% rename from example/ck_tile/99_toy_example/00_vector_add/CMakeLists.txt rename to example/ck_tile/99_toy_example/00_add_vector/CMakeLists.txt index b6a7886d12..8c1665a1c4 100644 --- a/example/ck_tile/99_toy_example/00_vector_add/CMakeLists.txt +++ b/example/ck_tile/99_toy_example/00_add_vector/CMakeLists.txt @@ -1,9 +1,9 @@ -set(EXAMPLE_REDUCE "vector_add") +set(EXAMPLE_REDUCE "add_vector") # not using add_example_executable() to add this target, since we don't want this to have # to be included in "make all/install/check" message("adding example ${EXAMPLE_REDUCE}") -add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL vector_add.cpp) +add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL add_vector.cpp) target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) set(EXAMPLE_REDUCE_COMPILE_OPTIONS) diff --git a/example/ck_tile/99_toy_example/00_vector_add/vector_add.cpp b/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp similarity index 89% rename from example/ck_tile/99_toy_example/00_vector_add/vector_add.cpp rename to example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp index b7d9effa76..7aea661b17 100644 --- a/example/ck_tile/99_toy_example/00_vector_add/vector_add.cpp +++ b/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp @@ -1,6 +1,6 @@ #include "ck_tile/host.hpp" -#include "reference_vector_add.hpp" -#include "vector_add.hpp" +#include "reference_add_vector.hpp" +#include "add_vector.hpp" #include // This example demonstrates how to use the ck_tile library to perform an elementwise vector @@ -39,10 +39,10 @@ bool run(const ck_tile::ArgParser& arg_parser) int repeat = arg_parser.get_int("repeat"); ck_tile::HostTensor x_host_a( - {m}); // length input vector A, if given two arguments m, n the HostTensor will be created + {m}); // length input vector A, if given two arguments (m, n) the HostTensor will be created // with shape (m, n) ck_tile::HostTensor x_host_b( - {m}); // length input vector B, if given two arguments m, n the HostTensor will be created + {m}); // length input vector B, if given two arguments (m, n) the HostTensor will be created // with shape (m, n) ck_tile::HostTensor y_host_ref({m}); @@ -64,11 +64,12 @@ bool run(const ck_tile::ArgParser& arg_parser) x_buf_b.ToDevice(x_host_b.data()); // Dividing the problem into blocktile, warptile, and vector - // The blocktile is the size of the tile that will be processed by a single block - // The warptile is the size of the tile that will be processed by a single warp - // The vector is the size of the tile that will be processed by a single thread + // The blocktile is the size of the tile that will be processed by a single thread block (also called work group) + // The warptile is the size of the tile that will be processed by a single warp (also called wavefront) + // The vector is the size of the tile that will be processed by a single thread (also called work item) // The problem is divided into blocks of size BlockTile, each block is further divided into - // warps of size WarpTile and each warp is further divided into threads of size Vector + // warps of size WarpTile and each warp is composed of 64 or 32 threads of size Vector + // each of the thread in a warp will process one vector worth elements of the data using BlockTile = ck_tile::sequence<8192>; // Size of the block tile (Entire problem is divided // into blocks of this size) using BlockWarps = ck_tile::sequence<8>; // How many concurrent warps are in a block (Each warp @@ -91,7 +92,7 @@ bool run(const ck_tile::ArgParser& arg_parser) std::cout << "block x-size = " << BlockTile::at(ck_tile::number<0>{}) << std::endl; std::cout << "grid size " << kGridSize << std::endl; - using Shape = ck_tile::MultiplyVector; + using Shape = ck_tile::AddVectorShape; std::cout << "Problem Shape:: M = " << m << std::endl; std::cout << "BlockTile: " << BlockTile::at(ck_tile::number<0>{}) << std::endl; std::cout << "Number of Blocks in Grid: " << m / BlockTile::at(ck_tile::number<0>{}) @@ -107,14 +108,14 @@ bool run(const ck_tile::ArgParser& arg_parser) // What is a Problem in CKTile? // A Problem defines the shape of the data, the precision of the data - using Problem = ck_tile::MultiplyVectorProblem; + using Problem = ck_tile::AddVectorProblem; // What is a Policy in CKTile? // A Policy defines how to map the data between threads and data in memory // The kernel is the function that will be executed on the device // It requires a Problem and Policy to be defined - using Kernel = ck_tile::MultiplyVectorKernel; + using Kernel = ck_tile::AddVectorKernel; // The kernel is launched with the following parameters: float ave_time = launch_kernel( @@ -139,7 +140,7 @@ bool run(const ck_tile::ArgParser& arg_parser) if(do_validation) { - ck_tile::reference_vector_add(x_host_a, x_host_b, y_host_ref); + ck_tile::reference_add_vector(x_host_a, x_host_b, y_host_ref); y_buf.FromDevice(y_host_dev.mData.data()); pass = ck_tile::check_err(y_host_dev, y_host_ref); diff --git a/example/ck_tile/99_toy_example/00_vector_add/vector_add.hpp b/example/ck_tile/99_toy_example/00_add_vector/add_vector.hpp similarity index 95% rename from example/ck_tile/99_toy_example/00_vector_add/vector_add.hpp rename to example/ck_tile/99_toy_example/00_add_vector/add_vector.hpp index b028a5a634..26e569c21e 100644 --- a/example/ck_tile/99_toy_example/00_vector_add/vector_add.hpp +++ b/example/ck_tile/99_toy_example/00_add_vector/add_vector.hpp @@ -14,7 +14,7 @@ namespace ck_tile { // and the number of times the warp tile is repeated in the block tile // and the block size template -struct MultiplyVector +struct AddVectorShape { static constexpr index_t Block_M = BlockTile::at(number<0>{}); @@ -35,7 +35,7 @@ struct MultiplyVector }; template -struct MultiplyVectorProblem +struct AddVectorProblem { using XDataType = remove_cvref_t; using ComputeDataType = remove_cvref_t; @@ -65,7 +65,7 @@ struct AddDefaultPolicy }; template -struct MultiplyVectorKernel +struct AddVectorKernel { using Problem = ck_tile::remove_cvref_t; using Policy = ck_tile::remove_cvref_t; @@ -89,9 +89,6 @@ struct MultiplyVectorKernel number{}); // raw pointer, shape of the tensor, stride of the tensor, and // lastGarunteedVectorLength - // lastGarunteedVectorLength --> intuitively, this is the number of elements in the last - // dimension of the tensor that are guaranteed to be fetched by same thread - const auto x_m_n_b = make_naive_tensor_view( p_x_b, make_tuple(M), make_tuple(1), number{}); diff --git a/example/ck_tile/99_toy_example/00_vector_add/reference_vector_add.hpp b/example/ck_tile/99_toy_example/00_add_vector/reference_add_vector.hpp similarity index 93% rename from example/ck_tile/99_toy_example/00_vector_add/reference_vector_add.hpp rename to example/ck_tile/99_toy_example/00_add_vector/reference_add_vector.hpp index 80acd663d2..4c034797dc 100644 --- a/example/ck_tile/99_toy_example/00_vector_add/reference_vector_add.hpp +++ b/example/ck_tile/99_toy_example/00_add_vector/reference_add_vector.hpp @@ -10,7 +10,7 @@ namespace ck_tile { template -CK_TILE_HOST void reference_vector_add(const HostTensor& xa_m_n, +CK_TILE_HOST void reference_add_vector(const HostTensor& xa_m_n, const HostTensor& xb_m_n, HostTensor& y_m_n) { diff --git a/example/ck_tile/99_toy_example/CMakeLists.txt b/example/ck_tile/99_toy_example/CMakeLists.txt index 49c3677fea..ba225e086c 100644 --- a/example/ck_tile/99_toy_example/CMakeLists.txt +++ b/example/ck_tile/99_toy_example/CMakeLists.txt @@ -2,7 +2,7 @@ include_directories(AFTER ${CMAKE_CURRENT_LIST_DIR} ) -add_subdirectory(00_vector_add) +add_subdirectory(00_add_vector) add_subdirectory(01_add) add_subdirectory(02_gemm) add_subdirectory(03_flash_attention_fwd)