fixed function and struct names

2026-06-29 11:16:59 +00:00 · 2025-04-24 14:24:01 +00:00
parent b26f00cab2
commit aaa757e363
5 changed files with 20 additions and 22 deletions
--- a/example/ck_tile/99_toy_example/00_add_vector/CMakeLists.txt
+++ b/example/ck_tile/99_toy_example/00_add_vector/CMakeLists.txt
@@ -1,9 +1,9 @@
-set(EXAMPLE_REDUCE "vector_add")
+set(EXAMPLE_REDUCE "add_vector")
 # not using add_example_executable() to add this target, since we don't want this to have
 # to be included in "make all/install/check"
 message("adding example ${EXAMPLE_REDUCE}")

-add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL vector_add.cpp)
+add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL add_vector.cpp)
 target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
 set(EXAMPLE_REDUCE_COMPILE_OPTIONS)

--- a/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp
+++ b/example/ck_tile/99_toy_example/00_add_vector/add_vector.cpp
@@ -1,6 +1,6 @@
 #include "ck_tile/host.hpp"
-#include "reference_vector_add.hpp"
-#include "vector_add.hpp"
+#include "reference_add_vector.hpp"
+#include "add_vector.hpp"
 #include <cstring>

 // This example demonstrates how to use the ck_tile library to perform an elementwise vector
@@ -39,10 +39,10 @@ bool run(const ck_tile::ArgParser& arg_parser)
    int repeat         = arg_parser.get_int("repeat");

    ck_tile::HostTensor<XDataType> x_host_a(
-        {m}); // length input vector A, if given two arguments m, n the HostTensor will be created
+        {m}); // length input vector A, if given two arguments (m, n) the HostTensor will be created
              // with shape (m, n)
    ck_tile::HostTensor<XDataType> x_host_b(
-        {m}); // length input vector B, if given two arguments m, n the HostTensor will be created
+        {m}); // length input vector B, if given two arguments (m, n) the HostTensor will be created
              // with shape (m, n)

    ck_tile::HostTensor<YDataType> y_host_ref({m});
@@ -64,11 +64,12 @@ bool run(const ck_tile::ArgParser& arg_parser)
    x_buf_b.ToDevice(x_host_b.data());

    // Dividing the problem into blocktile, warptile, and vector
-    // The blocktile is the size of the tile that will be processed by a single block
-    // The warptile is the size of the tile that will be processed by a single warp
-    // The vector is the size of the tile that will be processed by a single thread
+    // The blocktile is the size of the tile that will be processed by a single thread block (also called work group)
+    // The warptile is the size of the tile that will be processed by a single warp (also called wavefront)
+    // The vector is the size of the tile that will be processed by a single thread (also called work item)
    // The problem is divided into blocks of size BlockTile, each block is further divided into
-    // warps of size WarpTile and each warp is further divided into threads of size Vector
+    // warps of size WarpTile and each warp is composed of 64 or 32 threads of size Vector
+    // each of the thread in a warp will process one vector worth elements of the data
    using BlockTile = ck_tile::sequence<8192>; // Size of the block tile (Entire problem is divided
                                               // into blocks of this size)
    using BlockWarps = ck_tile::sequence<8>; // How many concurrent warps are in a block (Each warp
@@ -91,7 +92,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
    std::cout << "block x-size = " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
    std::cout << "grid size " << kGridSize << std::endl;

-    using Shape = ck_tile::MultiplyVector<BlockWarps, BlockTile, WarpTile, Vector>;
+    using Shape = ck_tile::AddVectorShape<BlockWarps, BlockTile, WarpTile, Vector>;
    std::cout << "Problem Shape:: M = " << m << std::endl;
    std::cout << "BlockTile: " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
    std::cout << "Number of Blocks in Grid: " << m / BlockTile::at(ck_tile::number<0>{})
@@ -107,14 +108,14 @@ bool run(const ck_tile::ArgParser& arg_parser)

    // What is a Problem in CKTile?
    // A Problem defines the shape of the data, the precision of the data
-    using Problem = ck_tile::MultiplyVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;
+    using Problem = ck_tile::AddVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;

    // What is a Policy in CKTile?
    // A Policy defines how to map the data between threads and data in memory

    // The kernel is the function that will be executed on the device
    // It requires a Problem and Policy to be defined
-    using Kernel = ck_tile::MultiplyVectorKernel<Problem>;
+    using Kernel = ck_tile::AddVectorKernel<Problem>;

    // The kernel is launched with the following parameters:
    float ave_time = launch_kernel(
@@ -139,7 +140,7 @@ bool run(const ck_tile::ArgParser& arg_parser)

    if(do_validation)
    {
-        ck_tile::reference_vector_add<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
+        ck_tile::reference_add_vector<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
        y_buf.FromDevice(y_host_dev.mData.data());
        pass = ck_tile::check_err(y_host_dev, y_host_ref);

--- a/example/ck_tile/99_toy_example/00_add_vector/add_vector.hpp
+++ b/example/ck_tile/99_toy_example/00_add_vector/add_vector.hpp
@@ -14,7 +14,7 @@ namespace ck_tile {
 // and the number of times the warp tile is repeated in the block tile
 // and the block size
 template <typename BlockWarps, typename BlockTile, typename WarpTile, typename Vector>
-struct MultiplyVector
+struct AddVectorShape
 {
    static constexpr index_t Block_M = BlockTile::at(number<0>{});

@@ -35,7 +35,7 @@ struct MultiplyVector
 };

 template <typename XDataType_, typename ComputeDataType_, typename YDataType_, typename BlockShape_>
-struct MultiplyVectorProblem
+struct AddVectorProblem
 {
    using XDataType       = remove_cvref_t<XDataType_>;
    using ComputeDataType = remove_cvref_t<ComputeDataType_>;
@@ -65,7 +65,7 @@ struct AddDefaultPolicy
 };

 template <typename Problem_, typename Policy_ = AddDefaultPolicy>
-struct MultiplyVectorKernel
+struct AddVectorKernel
 {
    using Problem = ck_tile::remove_cvref_t<Problem_>;
    using Policy  = ck_tile::remove_cvref_t<Policy_>;
@@ -89,9 +89,6 @@ struct MultiplyVectorKernel
            number<S::Vector_M>{}); // raw pointer, shape of the tensor, stride of the tensor, and
                                    // lastGarunteedVectorLength

-        // lastGarunteedVectorLength --> intuitively, this is the number of elements in the last
-        // dimension of the tensor that are guaranteed to be fetched by same thread
-
        const auto x_m_n_b = make_naive_tensor_view<address_space_enum::global>(
            p_x_b, make_tuple(M), make_tuple(1), number<S::Vector_M>{});

--- a/example/ck_tile/99_toy_example/00_add_vector/reference_add_vector.hpp
+++ b/example/ck_tile/99_toy_example/00_add_vector/reference_add_vector.hpp
@@ -10,7 +10,7 @@
 namespace ck_tile {

 template <typename XDataType, typename YDataType>
-CK_TILE_HOST void reference_vector_add(const HostTensor<XDataType>& xa_m_n,
+CK_TILE_HOST void reference_add_vector(const HostTensor<XDataType>& xa_m_n,
                                       const HostTensor<XDataType>& xb_m_n,
                                       HostTensor<YDataType>& y_m_n)
 {
--- a/example/ck_tile/99_toy_example/CMakeLists.txt
+++ b/example/ck_tile/99_toy_example/CMakeLists.txt
@@ -2,7 +2,7 @@ include_directories(AFTER
  ${CMAKE_CURRENT_LIST_DIR}
 )

-add_subdirectory(00_vector_add)
+add_subdirectory(00_add_vector)
 add_subdirectory(01_add)
 add_subdirectory(02_gemm)
 add_subdirectory(03_flash_attention_fwd)