fixed function and struct names

This commit is contained in:
AviralGoelAMD
2025-04-24 14:24:01 +00:00
parent b26f00cab2
commit aaa757e363
5 changed files with 20 additions and 22 deletions

View File

@@ -1,9 +1,9 @@
set(EXAMPLE_REDUCE "vector_add")
set(EXAMPLE_REDUCE "add_vector")
# not using add_example_executable() to add this target, since we don't want this to have
# to be included in "make all/install/check"
message("adding example ${EXAMPLE_REDUCE}")
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL vector_add.cpp)
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL add_vector.cpp)
target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
set(EXAMPLE_REDUCE_COMPILE_OPTIONS)

View File

@@ -1,6 +1,6 @@
#include "ck_tile/host.hpp"
#include "reference_vector_add.hpp"
#include "vector_add.hpp"
#include "reference_add_vector.hpp"
#include "add_vector.hpp"
#include <cstring>
// This example demonstrates how to use the ck_tile library to perform an elementwise vector
@@ -39,10 +39,10 @@ bool run(const ck_tile::ArgParser& arg_parser)
int repeat = arg_parser.get_int("repeat");
ck_tile::HostTensor<XDataType> x_host_a(
{m}); // length input vector A, if given two arguments m, n the HostTensor will be created
{m}); // length input vector A, if given two arguments (m, n) the HostTensor will be created
// with shape (m, n)
ck_tile::HostTensor<XDataType> x_host_b(
{m}); // length input vector B, if given two arguments m, n the HostTensor will be created
{m}); // length input vector B, if given two arguments (m, n) the HostTensor will be created
// with shape (m, n)
ck_tile::HostTensor<YDataType> y_host_ref({m});
@@ -64,11 +64,12 @@ bool run(const ck_tile::ArgParser& arg_parser)
x_buf_b.ToDevice(x_host_b.data());
// Dividing the problem into blocktile, warptile, and vector
// The blocktile is the size of the tile that will be processed by a single block
// The warptile is the size of the tile that will be processed by a single warp
// The vector is the size of the tile that will be processed by a single thread
// The blocktile is the size of the tile that will be processed by a single thread block (also called work group)
// The warptile is the size of the tile that will be processed by a single warp (also called wavefront)
// The vector is the size of the tile that will be processed by a single thread (also called work item)
// The problem is divided into blocks of size BlockTile, each block is further divided into
// warps of size WarpTile and each warp is further divided into threads of size Vector
// warps of size WarpTile and each warp is composed of 64 or 32 threads of size Vector
// each of the thread in a warp will process one vector worth elements of the data
using BlockTile = ck_tile::sequence<8192>; // Size of the block tile (Entire problem is divided
// into blocks of this size)
using BlockWarps = ck_tile::sequence<8>; // How many concurrent warps are in a block (Each warp
@@ -91,7 +92,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
std::cout << "block x-size = " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
std::cout << "grid size " << kGridSize << std::endl;
using Shape = ck_tile::MultiplyVector<BlockWarps, BlockTile, WarpTile, Vector>;
using Shape = ck_tile::AddVectorShape<BlockWarps, BlockTile, WarpTile, Vector>;
std::cout << "Problem Shape:: M = " << m << std::endl;
std::cout << "BlockTile: " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
std::cout << "Number of Blocks in Grid: " << m / BlockTile::at(ck_tile::number<0>{})
@@ -107,14 +108,14 @@ bool run(const ck_tile::ArgParser& arg_parser)
// What is a Problem in CKTile?
// A Problem defines the shape of the data, the precision of the data
using Problem = ck_tile::MultiplyVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;
using Problem = ck_tile::AddVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;
// What is a Policy in CKTile?
// A Policy defines how to map the data between threads and data in memory
// The kernel is the function that will be executed on the device
// It requires a Problem and Policy to be defined
using Kernel = ck_tile::MultiplyVectorKernel<Problem>;
using Kernel = ck_tile::AddVectorKernel<Problem>;
// The kernel is launched with the following parameters:
float ave_time = launch_kernel(
@@ -139,7 +140,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
if(do_validation)
{
ck_tile::reference_vector_add<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
ck_tile::reference_add_vector<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
y_buf.FromDevice(y_host_dev.mData.data());
pass = ck_tile::check_err(y_host_dev, y_host_ref);

View File

@@ -14,7 +14,7 @@ namespace ck_tile {
// and the number of times the warp tile is repeated in the block tile
// and the block size
template <typename BlockWarps, typename BlockTile, typename WarpTile, typename Vector>
struct MultiplyVector
struct AddVectorShape
{
static constexpr index_t Block_M = BlockTile::at(number<0>{});
@@ -35,7 +35,7 @@ struct MultiplyVector
};
template <typename XDataType_, typename ComputeDataType_, typename YDataType_, typename BlockShape_>
struct MultiplyVectorProblem
struct AddVectorProblem
{
using XDataType = remove_cvref_t<XDataType_>;
using ComputeDataType = remove_cvref_t<ComputeDataType_>;
@@ -65,7 +65,7 @@ struct AddDefaultPolicy
};
template <typename Problem_, typename Policy_ = AddDefaultPolicy>
struct MultiplyVectorKernel
struct AddVectorKernel
{
using Problem = ck_tile::remove_cvref_t<Problem_>;
using Policy = ck_tile::remove_cvref_t<Policy_>;
@@ -89,9 +89,6 @@ struct MultiplyVectorKernel
number<S::Vector_M>{}); // raw pointer, shape of the tensor, stride of the tensor, and
// lastGarunteedVectorLength
// lastGarunteedVectorLength --> intuitively, this is the number of elements in the last
// dimension of the tensor that are guaranteed to be fetched by same thread
const auto x_m_n_b = make_naive_tensor_view<address_space_enum::global>(
p_x_b, make_tuple(M), make_tuple(1), number<S::Vector_M>{});

View File

@@ -10,7 +10,7 @@
namespace ck_tile {
template <typename XDataType, typename YDataType>
CK_TILE_HOST void reference_vector_add(const HostTensor<XDataType>& xa_m_n,
CK_TILE_HOST void reference_add_vector(const HostTensor<XDataType>& xa_m_n,
const HostTensor<XDataType>& xb_m_n,
HostTensor<YDataType>& y_m_n)
{

View File

@@ -2,7 +2,7 @@ include_directories(AFTER
${CMAKE_CURRENT_LIST_DIR}
)
add_subdirectory(00_vector_add)
add_subdirectory(00_add_vector)
add_subdirectory(01_add)
add_subdirectory(02_gemm)
add_subdirectory(03_flash_attention_fwd)