mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
fixed function and struct names
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
set(EXAMPLE_REDUCE "vector_add")
|
||||
set(EXAMPLE_REDUCE "add_vector")
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_REDUCE}")
|
||||
|
||||
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL vector_add.cpp)
|
||||
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL add_vector.cpp)
|
||||
target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
set(EXAMPLE_REDUCE_COMPILE_OPTIONS)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "ck_tile/host.hpp"
|
||||
#include "reference_vector_add.hpp"
|
||||
#include "vector_add.hpp"
|
||||
#include "reference_add_vector.hpp"
|
||||
#include "add_vector.hpp"
|
||||
#include <cstring>
|
||||
|
||||
// This example demonstrates how to use the ck_tile library to perform an elementwise vector
|
||||
@@ -39,10 +39,10 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
int repeat = arg_parser.get_int("repeat");
|
||||
|
||||
ck_tile::HostTensor<XDataType> x_host_a(
|
||||
{m}); // length input vector A, if given two arguments m, n the HostTensor will be created
|
||||
{m}); // length input vector A, if given two arguments (m, n) the HostTensor will be created
|
||||
// with shape (m, n)
|
||||
ck_tile::HostTensor<XDataType> x_host_b(
|
||||
{m}); // length input vector B, if given two arguments m, n the HostTensor will be created
|
||||
{m}); // length input vector B, if given two arguments (m, n) the HostTensor will be created
|
||||
// with shape (m, n)
|
||||
|
||||
ck_tile::HostTensor<YDataType> y_host_ref({m});
|
||||
@@ -64,11 +64,12 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
x_buf_b.ToDevice(x_host_b.data());
|
||||
|
||||
// Dividing the problem into blocktile, warptile, and vector
|
||||
// The blocktile is the size of the tile that will be processed by a single block
|
||||
// The warptile is the size of the tile that will be processed by a single warp
|
||||
// The vector is the size of the tile that will be processed by a single thread
|
||||
// The blocktile is the size of the tile that will be processed by a single thread block (also called work group)
|
||||
// The warptile is the size of the tile that will be processed by a single warp (also called wavefront)
|
||||
// The vector is the size of the tile that will be processed by a single thread (also called work item)
|
||||
// The problem is divided into blocks of size BlockTile, each block is further divided into
|
||||
// warps of size WarpTile and each warp is further divided into threads of size Vector
|
||||
// warps of size WarpTile and each warp is composed of 64 or 32 threads of size Vector
|
||||
// each of the thread in a warp will process one vector worth elements of the data
|
||||
using BlockTile = ck_tile::sequence<8192>; // Size of the block tile (Entire problem is divided
|
||||
// into blocks of this size)
|
||||
using BlockWarps = ck_tile::sequence<8>; // How many concurrent warps are in a block (Each warp
|
||||
@@ -91,7 +92,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
std::cout << "block x-size = " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
|
||||
std::cout << "grid size " << kGridSize << std::endl;
|
||||
|
||||
using Shape = ck_tile::MultiplyVector<BlockWarps, BlockTile, WarpTile, Vector>;
|
||||
using Shape = ck_tile::AddVectorShape<BlockWarps, BlockTile, WarpTile, Vector>;
|
||||
std::cout << "Problem Shape:: M = " << m << std::endl;
|
||||
std::cout << "BlockTile: " << BlockTile::at(ck_tile::number<0>{}) << std::endl;
|
||||
std::cout << "Number of Blocks in Grid: " << m / BlockTile::at(ck_tile::number<0>{})
|
||||
@@ -107,14 +108,14 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
// What is a Problem in CKTile?
|
||||
// A Problem defines the shape of the data, the precision of the data
|
||||
using Problem = ck_tile::MultiplyVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;
|
||||
using Problem = ck_tile::AddVectorProblem<XDataType, ComputeDataType, YDataType, Shape>;
|
||||
|
||||
// What is a Policy in CKTile?
|
||||
// A Policy defines how to map the data between threads and data in memory
|
||||
|
||||
// The kernel is the function that will be executed on the device
|
||||
// It requires a Problem and Policy to be defined
|
||||
using Kernel = ck_tile::MultiplyVectorKernel<Problem>;
|
||||
using Kernel = ck_tile::AddVectorKernel<Problem>;
|
||||
|
||||
// The kernel is launched with the following parameters:
|
||||
float ave_time = launch_kernel(
|
||||
@@ -139,7 +140,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
if(do_validation)
|
||||
{
|
||||
ck_tile::reference_vector_add<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
|
||||
ck_tile::reference_add_vector<XDataType, YDataType>(x_host_a, x_host_b, y_host_ref);
|
||||
y_buf.FromDevice(y_host_dev.mData.data());
|
||||
pass = ck_tile::check_err(y_host_dev, y_host_ref);
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace ck_tile {
|
||||
// and the number of times the warp tile is repeated in the block tile
|
||||
// and the block size
|
||||
template <typename BlockWarps, typename BlockTile, typename WarpTile, typename Vector>
|
||||
struct MultiplyVector
|
||||
struct AddVectorShape
|
||||
{
|
||||
static constexpr index_t Block_M = BlockTile::at(number<0>{});
|
||||
|
||||
@@ -35,7 +35,7 @@ struct MultiplyVector
|
||||
};
|
||||
|
||||
template <typename XDataType_, typename ComputeDataType_, typename YDataType_, typename BlockShape_>
|
||||
struct MultiplyVectorProblem
|
||||
struct AddVectorProblem
|
||||
{
|
||||
using XDataType = remove_cvref_t<XDataType_>;
|
||||
using ComputeDataType = remove_cvref_t<ComputeDataType_>;
|
||||
@@ -65,7 +65,7 @@ struct AddDefaultPolicy
|
||||
};
|
||||
|
||||
template <typename Problem_, typename Policy_ = AddDefaultPolicy>
|
||||
struct MultiplyVectorKernel
|
||||
struct AddVectorKernel
|
||||
{
|
||||
using Problem = ck_tile::remove_cvref_t<Problem_>;
|
||||
using Policy = ck_tile::remove_cvref_t<Policy_>;
|
||||
@@ -89,9 +89,6 @@ struct MultiplyVectorKernel
|
||||
number<S::Vector_M>{}); // raw pointer, shape of the tensor, stride of the tensor, and
|
||||
// lastGarunteedVectorLength
|
||||
|
||||
// lastGarunteedVectorLength --> intuitively, this is the number of elements in the last
|
||||
// dimension of the tensor that are guaranteed to be fetched by same thread
|
||||
|
||||
const auto x_m_n_b = make_naive_tensor_view<address_space_enum::global>(
|
||||
p_x_b, make_tuple(M), make_tuple(1), number<S::Vector_M>{});
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
namespace ck_tile {
|
||||
|
||||
template <typename XDataType, typename YDataType>
|
||||
CK_TILE_HOST void reference_vector_add(const HostTensor<XDataType>& xa_m_n,
|
||||
CK_TILE_HOST void reference_add_vector(const HostTensor<XDataType>& xa_m_n,
|
||||
const HostTensor<XDataType>& xb_m_n,
|
||||
HostTensor<YDataType>& y_m_n)
|
||||
{
|
||||
@@ -2,7 +2,7 @@ include_directories(AFTER
|
||||
${CMAKE_CURRENT_LIST_DIR}
|
||||
)
|
||||
|
||||
add_subdirectory(00_vector_add)
|
||||
add_subdirectory(00_add_vector)
|
||||
add_subdirectory(01_add)
|
||||
add_subdirectory(02_gemm)
|
||||
add_subdirectory(03_flash_attention_fwd)
|
||||
|
||||
Reference in New Issue
Block a user