mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 06:01:23 +00:00
@@ -58,8 +58,7 @@ set(EXAMPLE_FMHA_FWD "tile_example_fmha_fwd")
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_FMHA_FWD}")
|
||||
add_executable(${EXAMPLE_FMHA_FWD} fmha_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_FMHA_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_FMHA_FWD} EXCLUDE_FROM_ALL fmha_fwd.cpp)
|
||||
target_include_directories(${EXAMPLE_FMHA_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_FMHA_FWD} PRIVATE ${FMHA_FWD_GEN_BLOBS})
|
||||
|
||||
@@ -67,8 +66,7 @@ set(EXAMPLE_FMHA_BWD "tile_example_fmha_bwd")
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_FMHA_BWD}")
|
||||
add_executable(${EXAMPLE_FMHA_BWD} fmha_bwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_FMHA_BWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_FMHA_BWD} EXCLUDE_FROM_ALL fmha_bwd.cpp)
|
||||
target_include_directories(${EXAMPLE_FMHA_BWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_FMHA_BWD} PRIVATE ${FMHA_BWD_GEN_BLOBS})
|
||||
|
||||
|
||||
@@ -26,8 +26,7 @@ add_custom_command(
|
||||
set(EXAMPLE_LAYERNORM2D_FWD "tile_example_layernorm2d_fwd")
|
||||
|
||||
message("adding example ${EXAMPLE_LAYERNORM2D_FWD}")
|
||||
add_executable(${EXAMPLE_LAYERNORM2D_FWD} layernorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_LAYERNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_LAYERNORM2D_FWD} EXCLUDE_FROM_ALL layernorm2d_fwd.cpp)
|
||||
target_include_directories(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${LAYERNORM2D_FWD_GEN_BLOBS})
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
add_executable(tile_example_gemm_basic gemm_basic.cpp)
|
||||
rocm_install(TARGETS tile_example_gemm_basic COMPONENT examples)
|
||||
add_executable(tile_example_gemm_universal universal_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_gemm_universal COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp)
|
||||
add_executable(tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp)
|
||||
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
|
||||
if(CK_USE_OCP_FP8)
|
||||
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
for file in gemm_universal_*; do mv "$file" "${file/f16_f16_f16/fp16_fp16_fp16}"; done
|
||||
@@ -1,4 +1,3 @@
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(tile_example_img2col image_to_column.cpp)
|
||||
rocm_install(TARGETS tile_example_img2col COMPONENT examples)
|
||||
add_executable(tile_example_img2col EXCLUDE_FROM_ALL image_to_column.cpp)
|
||||
|
||||
@@ -3,9 +3,7 @@ set(EXAMPLE_REDUCE "tile_example_reduce")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_REDUCE}")
|
||||
|
||||
add_executable(${EXAMPLE_REDUCE} reduce.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_REDUCE} COMPONENT examples)
|
||||
|
||||
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL reduce.cpp)
|
||||
target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
set(EXAMPLE_REDUCE_COMPILE_OPTIONS)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(tile_example_permute permute.cpp)
|
||||
rocm_install(TARGETS tile_example_permute COMPONENT examples)
|
||||
add_executable(tile_example_permute EXCLUDE_FROM_ALL permute.cpp)
|
||||
|
||||
if(NOT DEFINED PERMUTE_USE_ALTERNATIVE_IMPL)
|
||||
# set(PERMUTE_USE_ALTERNATIVE_IMPL false)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
add_executable(tile_example_topk_softmax topk_softmax.cpp topk_softmax_api.cpp)
|
||||
rocm_install(TARGETS tile_example_topk_softmax COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_topk_softmax EXCLUDE_FROM_ALL topk_softmax.cpp topk_softmax_api.cpp)
|
||||
target_include_directories(tile_example_topk_softmax PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
set(EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS)
|
||||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
|
||||
list(APPEND EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
|
||||
@@ -26,8 +26,7 @@ add_custom_command(
|
||||
set(TILE_RMSNORM2D_FWD "tile_rmsnorm2d_fwd")
|
||||
|
||||
message("adding ${TILE_RMSNORM2D_FWD}")
|
||||
add_executable(${TILE_RMSNORM2D_FWD} rmsnorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${TILE_RMSNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${TILE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL rmsnorm2d_fwd.cpp)
|
||||
target_include_directories(${TILE_RMSNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_RMSNORM2D_FWD} PRIVATE ${RMSNORM2D_FWD_GEN_BLOBS})
|
||||
|
||||
@@ -39,8 +38,7 @@ list(APPEND TILE_RMSNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno
|
||||
target_compile_options(${TILE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
|
||||
|
||||
set(EXAMPLE_RMSNORM2D_FWD "tile_example_rmsnorm2d_fwd")
|
||||
add_executable(${EXAMPLE_RMSNORM2D_FWD} example_rmsnorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_RMSNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL example_rmsnorm2d_fwd.cpp)
|
||||
target_compile_options(${EXAMPLE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
|
||||
|
||||
# TODO: we have to turn off this global prop, otherwise the progress bar generated
|
||||
|
||||
@@ -3,8 +3,7 @@ set(TILE_ADD_RMSNORM2D_RDQUANT_FWD "tile_add_rmsnorm2d_rdquant_fwd")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding ${TILE_ADD_RMSNORM2D_RDQUANT_FWD}")
|
||||
file(GLOB INSTANCE_SRCS instances/*.cpp)
|
||||
add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
rocm_install(TARGETS ${TILE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples)
|
||||
add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
target_include_directories(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${INSTANCE_SRCS})
|
||||
|
||||
@@ -16,8 +15,7 @@ list(APPEND TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS -Wno-undefined-func-t
|
||||
target_compile_options(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS})
|
||||
|
||||
set(EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD "tile_example_add_rmsnorm2d_rdquant_fwd")
|
||||
add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} example_add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL example_add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
target_compile_options(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS})
|
||||
|
||||
# TODO: we have to turn off this global prop, otherwise the progress bar generated
|
||||
|
||||
@@ -67,14 +67,13 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
using TypeConfig = AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>;
|
||||
|
||||
using ADataType = typename TypeConfig::ADataType;
|
||||
using BDataType = typename TypeConfig::BDataType;
|
||||
using GammaDataType = typename TypeConfig::GammaDataType;
|
||||
using XDataType = typename TypeConfig::XDataType;
|
||||
using UnquantYDataType = ck_tile::null_type;
|
||||
using YScaleDataType = typename TypeConfig::YScaleDataType;
|
||||
using QYDataType = typename TypeConfig::QYDataType;
|
||||
using ComputeDataType = float;
|
||||
using ADataType = typename TypeConfig::ADataType;
|
||||
using BDataType = typename TypeConfig::BDataType;
|
||||
using GammaDataType = typename TypeConfig::GammaDataType;
|
||||
using XDataType = typename TypeConfig::XDataType;
|
||||
using YScaleDataType = typename TypeConfig::YScaleDataType;
|
||||
using QYDataType = typename TypeConfig::QYDataType;
|
||||
using ComputeDataType = float;
|
||||
|
||||
// host verify
|
||||
ck_tile::HostTensor<ADataType> a_host({m, n}, {stride, 1});
|
||||
@@ -89,7 +88,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
ck_tile::HostTensor<QYDataType> qy_host_ref({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_dev({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<UnquantYDataType> unquant_y_host_ref({m, n}, {stride, 1});
|
||||
|
||||
ck_tile::FillUniformDistribution<ADataType>{-.5f, .5f}(a_host);
|
||||
ck_tile::FillUniformDistribution<BDataType>{-.5f, .5f}(b_host);
|
||||
@@ -193,9 +191,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
GammaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
InvRmsDataType,
|
||||
UnquantYDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon);
|
||||
InvRmsDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon);
|
||||
}
|
||||
|
||||
// yscale
|
||||
|
||||
21
example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp
Executable file → Normal file
21
example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp
Executable file → Normal file
@@ -62,14 +62,13 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
assert(stride >= n);
|
||||
|
||||
using ADataType = DataType;
|
||||
using BDataType = DataType;
|
||||
using GammaDataType = DataType;
|
||||
using XDataType = DataType;
|
||||
using UnquantYDataType = ck_tile::null_type;
|
||||
using YScaleDataType = float;
|
||||
using QYDataType = ck_tile::int8_t;
|
||||
using ComputeDataType = float;
|
||||
using ADataType = DataType;
|
||||
using BDataType = DataType;
|
||||
using GammaDataType = DataType;
|
||||
using XDataType = DataType;
|
||||
using YScaleDataType = float;
|
||||
using QYDataType = ck_tile::int8_t;
|
||||
using ComputeDataType = float;
|
||||
|
||||
// host verify
|
||||
ck_tile::HostTensor<ADataType> a_host({m, n}, {stride, 1});
|
||||
@@ -82,7 +81,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
ck_tile::HostTensor<YScaleDataType> yscale_host_dev({m}, {1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_ref({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_dev({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<UnquantYDataType> unquant_y_host_ref({m, n}, {stride, 1});
|
||||
|
||||
ck_tile::FillUniformDistribution<ADataType>{-.5f, .5f}(a_host);
|
||||
ck_tile::FillUniformDistribution<BDataType>{-.5f, .5f}(b_host);
|
||||
@@ -195,9 +193,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
GammaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
InvRmsDataType,
|
||||
UnquantYDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon);
|
||||
InvRmsDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon);
|
||||
}
|
||||
|
||||
// yscale
|
||||
|
||||
@@ -2,8 +2,7 @@ function (add_smoothquant_example TARGET_NAME MAIN_SRC)
|
||||
message("adding ${TARGET_NAME}")
|
||||
# not using add_example_executable() to add target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(${TARGET_NAME} ${MAIN_SRC})
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC})
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
add_executable(tile_example_moe_sorting moe_sorting.cpp moe_sorting_api.cpp)
|
||||
rocm_install(TARGETS tile_example_moe_sorting COMPONENT examples)
|
||||
add_executable(tile_example_moe_sorting EXCLUDE_FROM_ALL moe_sorting.cpp moe_sorting_api.cpp)
|
||||
target_include_directories(tile_example_moe_sorting PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
set(EXAMPLE_MOE_SORTING_COMPILE_OPTIONS)
|
||||
|
||||
@@ -2,8 +2,7 @@ function (add_moe_smoothquant_example TARGET_NAME MAIN_SRC)
|
||||
message("adding ${TARGET_NAME}")
|
||||
# not using add_example_executable() to add target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(${TARGET_NAME} ${MAIN_SRC})
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC})
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
|
||||
@@ -3,8 +3,7 @@ set(TILE_EXAPMLE_FUSED_MOE "tile_example_fused_moe")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding ${TILE_EXAPMLE_FUSED_MOE}")
|
||||
file(GLOB INSTANCE_SRCS instances/*.cpp)
|
||||
add_executable(${TILE_EXAPMLE_FUSED_MOE} main.cpp)
|
||||
rocm_install(TARGETS ${TILE_EXAPMLE_FUSED_MOE} COMPONENT examples)
|
||||
add_executable(${TILE_EXAPMLE_FUSED_MOE} EXCLUDE_FROM_ALL main.cpp)
|
||||
target_include_directories(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${INSTANCE_SRCS})
|
||||
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
add_executable(tile_example_batched_gemm batched_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_batched_gemm COMPONENT examples)
|
||||
add_executable(tile_example_batched_gemm EXCLUDE_FROM_ALL batched_gemm.cpp)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(tile_example_grouped_gemm grouped_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_grouped_gemm COMPONENT examples)
|
||||
add_executable(tile_example_grouped_gemm EXCLUDE_FROM_ALL grouped_gemm.cpp)
|
||||
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
add_executable(tile_example_flatmm_basic flatmm_basic.cpp)
|
||||
rocm_install(TARGETS tile_example_flatmm_basic COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_flatmm_basic EXCLUDE_FROM_ALL flatmm_basic.cpp)
|
||||
|
||||
set(EXAMPLE_FLATMM_COMPILE_OPTIONS)
|
||||
# list(APPEND EXAMPLE_FLATMM_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
set(TARGET_NAME tile_example_batched_transpose)
|
||||
add_executable(${TARGET_NAME} batched_transpose_example.cpp batched_transpose_api.cpp)
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL batched_transpose_example.cpp batched_transpose_api.cpp)
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
|
||||
list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
# list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -v --save-temps -Wno-gnu-line-marker)
|
||||
target_compile_options(tile_example_batched_transpose PRIVATE ${EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS})
|
||||
|
||||
|
||||
@@ -14,11 +14,8 @@ add_subdirectory(11_add_rmsnorm2d_rdquant)
|
||||
add_subdirectory(12_smoothquant)
|
||||
add_subdirectory(13_moe_sorting)
|
||||
add_subdirectory(14_moe_smoothquant)
|
||||
add_subdirectory(15_fused_moe)
|
||||
add_subdirectory(16_batched_gemm)
|
||||
add_subdirectory(17_grouped_gemm)
|
||||
add_subdirectory(18_flatmm)
|
||||
add_subdirectory(35_batched_transpose)
|
||||
|
||||
if (SUPPORTED_GPU_TARGETS MATCHES "gfx94")
|
||||
add_subdirectory(15_fused_moe)
|
||||
endif()
|
||||
|
||||
Reference in New Issue
Block a user