mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
Merge commit 'cfae8634313f804593d123b3ec51a43319f5fab1' into develop
This commit is contained in:
@@ -42,19 +42,19 @@ DOXYFILE_ENCODING = UTF-8
|
||||
# title of most generated pages and in a few other places.
|
||||
# The default value is: My Project.
|
||||
|
||||
PROJECT_NAME = "ck"
|
||||
PROJECT_NAME = "Composable Kernel"
|
||||
|
||||
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = v3.0.1.0
|
||||
PROJECT_NUMBER =
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
# quick idea about the purpose of the project. Keep the description short.
|
||||
|
||||
PROJECT_BRIEF = "prototype interfaces compatible with ROCm platform and HIP"
|
||||
PROJECT_BRIEF = "Prototype interfaces compatible with ROCm platform and HiP"
|
||||
|
||||
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
|
||||
# in the documentation. The maximum height of the logo should not exceed 55
|
||||
@@ -949,8 +949,8 @@ INPUT = ../../include/ck/tensor_operation/gpu/grid \
|
||||
../../include/ck/tensor_operation/gpu/block \
|
||||
../../include/ck/tensor_operation/gpu/thread \
|
||||
../../library/include/ck/library/utility \
|
||||
../../include/ck/wrapper
|
||||
|
||||
../../include/ck/wrapper \
|
||||
../../include/ck_tile
|
||||
|
||||
# This tag can be used to specify the character encoding of the source files
|
||||
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
||||
@@ -1161,7 +1161,8 @@ FILTER_SOURCE_PATTERNS =
|
||||
# (index.html). This can be useful if you have a project on for instance GitHub
|
||||
# and want to reuse the introduction page also for the doxygen output.
|
||||
|
||||
USE_MDFILE_AS_MAINPAGE = ../../README.md
|
||||
|
||||
USE_MDFILE_AS_MAINPAGE =
|
||||
|
||||
# The Fortran standard specifies that for fixed formatted Fortran code all
|
||||
# characters from position 72 are to be considered as comment. A common
|
||||
@@ -1370,7 +1371,7 @@ HTML_EXTRA_STYLESHEET = ../_doxygen/extra_stylesheet.css
|
||||
# files will be copied as-is; there are no commands or markers available.
|
||||
# This tag requires that the tag GENERATE_HTML is set to YES.
|
||||
|
||||
HTML_EXTRA_FILES =
|
||||
HTML_EXTRA_FILES = ../_doxygen/extra_stylesheet.css
|
||||
|
||||
# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
|
||||
# should be rendered with a dark or light theme.
|
||||
|
||||
@@ -35,9 +35,9 @@ The Composable Kernel repository is located at `https://github.com/ROCm/composab
|
||||
* :doc:`Composable Kernel supported scalar types <./reference/Composable_Kernel_supported_scalar_types>`
|
||||
* :doc:`Composable Kernel custom types <./reference/Composable_Kernel_custom_types>`
|
||||
* :doc:`Composable Kernel vector utilities <./reference/Composable_Kernel_vector_utilities>`
|
||||
* :ref:`api-reference`
|
||||
* :ref:`wrapper`
|
||||
|
||||
* :ref:`wrapper`
|
||||
* :doc:`Composable Kernel complete class list <./doxygen/html/annotated>`
|
||||
|
||||
To contribute to the documentation refer to `Contributing to ROCm <https://rocm.docs.amd.com/en/latest/contribute/contributing.html>`_.
|
||||
|
||||
You can find licensing information on the `Licensing <https://rocm.docs.amd.com/en/latest/about/license.html>`_ page.
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
.. meta::
|
||||
:description: Composable Kernel documentation and API reference library
|
||||
:keywords: composable kernel, CK, ROCm, API, documentation
|
||||
|
||||
.. _api-reference:
|
||||
|
||||
********************************************************************
|
||||
Composable Kernel API reference guide
|
||||
********************************************************************
|
||||
|
||||
This document contains details of the APIs for the Composable Kernel library and introduces some of the key design principles that are used to write new classes that extend the functionality of the Composable Kernel library.
|
||||
|
||||
=================
|
||||
DeviceMem
|
||||
=================
|
||||
|
||||
.. doxygenstruct:: DeviceMem
|
||||
|
||||
=============================
|
||||
Kernels For Flashattention
|
||||
=============================
|
||||
|
||||
The Flashattention algorithm is defined in :cite:t:`dao2022flashattention`. This section lists
|
||||
the classes that are used in the CK GPU implementation of Flashattention.
|
||||
|
||||
**Gridwise classes**
|
||||
|
||||
.. doxygenstruct:: ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
|
||||
|
||||
**Blockwise classes**
|
||||
|
||||
.. doxygenstruct:: ck::ThreadGroupTensorSliceTransfer_v4r1
|
||||
|
||||
.. doxygenstruct:: ck::BlockwiseGemmXdlops_v2
|
||||
|
||||
.. doxygenstruct:: ck::BlockwiseSoftmax
|
||||
|
||||
**Threadwise classes**
|
||||
|
||||
.. doxygenstruct:: ck::ThreadwiseTensorSliceTransfer_StaticToStatic
|
||||
|
||||
.. bibliography::
|
||||
@@ -32,10 +32,10 @@ subtrees:
|
||||
title: Composable Kernel custom types
|
||||
- file: reference/Composable_Kernel_vector_utilities.rst
|
||||
title: Composable Kernel vector utilities
|
||||
- file: reference/Composable-Kernel-API-reference.rst
|
||||
title: Composable Kernel API reference
|
||||
- file: reference/Composable-Kernel-wrapper.rst
|
||||
title: Composable Kernel Wrapper
|
||||
title: Composable Kernel wrapper
|
||||
- file: doxygen/html/annotated.rst
|
||||
title: Composable Kernel class list
|
||||
|
||||
- caption: About
|
||||
entries:
|
||||
|
||||
@@ -5,6 +5,7 @@ include_directories(BEFORE
|
||||
|
||||
add_custom_target(examples)
|
||||
|
||||
|
||||
# list of examples that are labelled as REGRESSION_EXAMPLE for make regression (runtime more than 30 seconds)
|
||||
# all other tests are labelled as SMOKE_EXAMPLE
|
||||
set(REGRESSION_EXAMPLES
|
||||
@@ -231,9 +232,6 @@ endfunction(add_example_executable_no_testing EXAMPLE_NAME)
|
||||
|
||||
# add all example subdir
|
||||
file(GLOB dir_list LIST_DIRECTORIES true *)
|
||||
if (NOT SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
list(FILTER dir_list EXCLUDE REGEX ".*/ck_tile")
|
||||
endif()
|
||||
FOREACH(subdir ${dir_list})
|
||||
if(IS_DIRECTORY "${subdir}" AND EXISTS "${subdir}/CMakeLists.txt")
|
||||
add_subdirectory(${subdir})
|
||||
|
||||
@@ -58,8 +58,7 @@ set(EXAMPLE_FMHA_FWD "tile_example_fmha_fwd")
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_FMHA_FWD}")
|
||||
add_executable(${EXAMPLE_FMHA_FWD} fmha_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_FMHA_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_FMHA_FWD} EXCLUDE_FROM_ALL fmha_fwd.cpp)
|
||||
target_include_directories(${EXAMPLE_FMHA_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_FMHA_FWD} PRIVATE ${FMHA_FWD_GEN_BLOBS})
|
||||
|
||||
@@ -67,8 +66,7 @@ set(EXAMPLE_FMHA_BWD "tile_example_fmha_bwd")
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_FMHA_BWD}")
|
||||
add_executable(${EXAMPLE_FMHA_BWD} fmha_bwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_FMHA_BWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_FMHA_BWD} EXCLUDE_FROM_ALL fmha_bwd.cpp)
|
||||
target_include_directories(${EXAMPLE_FMHA_BWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_FMHA_BWD} PRIVATE ${FMHA_BWD_GEN_BLOBS})
|
||||
|
||||
|
||||
@@ -26,8 +26,7 @@ add_custom_command(
|
||||
set(EXAMPLE_LAYERNORM2D_FWD "tile_example_layernorm2d_fwd")
|
||||
|
||||
message("adding example ${EXAMPLE_LAYERNORM2D_FWD}")
|
||||
add_executable(${EXAMPLE_LAYERNORM2D_FWD} layernorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_LAYERNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_LAYERNORM2D_FWD} EXCLUDE_FROM_ALL layernorm2d_fwd.cpp)
|
||||
target_include_directories(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${LAYERNORM2D_FWD_GEN_BLOBS})
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
add_executable(tile_example_gemm_basic gemm_basic.cpp)
|
||||
rocm_install(TARGETS tile_example_gemm_basic COMPONENT examples)
|
||||
add_executable(tile_example_gemm_universal universal_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_gemm_universal COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp)
|
||||
add_executable(tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp)
|
||||
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
|
||||
if(CK_USE_OCP_FP8)
|
||||
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
for file in gemm_universal_*; do mv "$file" "${file/f16_f16_f16/fp16_fp16_fp16}"; done
|
||||
@@ -1,4 +1,3 @@
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(tile_example_img2col image_to_column.cpp)
|
||||
rocm_install(TARGETS tile_example_img2col COMPONENT examples)
|
||||
add_executable(tile_example_img2col EXCLUDE_FROM_ALL image_to_column.cpp)
|
||||
|
||||
@@ -3,9 +3,7 @@ set(EXAMPLE_REDUCE "tile_example_reduce")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding example ${EXAMPLE_REDUCE}")
|
||||
|
||||
add_executable(${EXAMPLE_REDUCE} reduce.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_REDUCE} COMPONENT examples)
|
||||
|
||||
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL reduce.cpp)
|
||||
target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
set(EXAMPLE_REDUCE_COMPILE_OPTIONS)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# not using add_example_executable() to add this target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(tile_example_permute permute.cpp)
|
||||
rocm_install(TARGETS tile_example_permute COMPONENT examples)
|
||||
add_executable(tile_example_permute EXCLUDE_FROM_ALL permute.cpp)
|
||||
|
||||
if(NOT DEFINED PERMUTE_USE_ALTERNATIVE_IMPL)
|
||||
# set(PERMUTE_USE_ALTERNATIVE_IMPL false)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
add_executable(tile_example_topk_softmax topk_softmax.cpp topk_softmax_api.cpp)
|
||||
rocm_install(TARGETS tile_example_topk_softmax COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_topk_softmax EXCLUDE_FROM_ALL topk_softmax.cpp topk_softmax_api.cpp)
|
||||
target_include_directories(tile_example_topk_softmax PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
set(EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS)
|
||||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
|
||||
list(APPEND EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
|
||||
@@ -26,8 +26,7 @@ add_custom_command(
|
||||
set(TILE_RMSNORM2D_FWD "tile_rmsnorm2d_fwd")
|
||||
|
||||
message("adding ${TILE_RMSNORM2D_FWD}")
|
||||
add_executable(${TILE_RMSNORM2D_FWD} rmsnorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${TILE_RMSNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${TILE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL rmsnorm2d_fwd.cpp)
|
||||
target_include_directories(${TILE_RMSNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_RMSNORM2D_FWD} PRIVATE ${RMSNORM2D_FWD_GEN_BLOBS})
|
||||
|
||||
@@ -39,8 +38,7 @@ list(APPEND TILE_RMSNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno
|
||||
target_compile_options(${TILE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
|
||||
|
||||
set(EXAMPLE_RMSNORM2D_FWD "tile_example_rmsnorm2d_fwd")
|
||||
add_executable(${EXAMPLE_RMSNORM2D_FWD} example_rmsnorm2d_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_RMSNORM2D_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL example_rmsnorm2d_fwd.cpp)
|
||||
target_compile_options(${EXAMPLE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
|
||||
|
||||
# TODO: we have to turn off this global prop, otherwise the progress bar generated
|
||||
|
||||
@@ -3,8 +3,7 @@ set(TILE_ADD_RMSNORM2D_RDQUANT_FWD "tile_add_rmsnorm2d_rdquant_fwd")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding ${TILE_ADD_RMSNORM2D_RDQUANT_FWD}")
|
||||
file(GLOB INSTANCE_SRCS instances/*.cpp)
|
||||
add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
rocm_install(TARGETS ${TILE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples)
|
||||
add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
target_include_directories(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${INSTANCE_SRCS})
|
||||
|
||||
@@ -16,8 +15,7 @@ list(APPEND TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS -Wno-undefined-func-t
|
||||
target_compile_options(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS})
|
||||
|
||||
set(EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD "tile_example_add_rmsnorm2d_rdquant_fwd")
|
||||
add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} example_add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
rocm_install(TARGETS ${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples)
|
||||
add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL example_add_rmsnorm2d_rdquant_fwd.cpp)
|
||||
target_compile_options(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS})
|
||||
|
||||
# TODO: we have to turn off this global prop, otherwise the progress bar generated
|
||||
|
||||
@@ -67,14 +67,13 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
using TypeConfig = AddRmsnormRdquantTypeConfig<InputDataType, QuantizedDataType>;
|
||||
|
||||
using ADataType = typename TypeConfig::ADataType;
|
||||
using BDataType = typename TypeConfig::BDataType;
|
||||
using GammaDataType = typename TypeConfig::GammaDataType;
|
||||
using XDataType = typename TypeConfig::XDataType;
|
||||
using UnquantYDataType = ck_tile::null_type;
|
||||
using YScaleDataType = typename TypeConfig::YScaleDataType;
|
||||
using QYDataType = typename TypeConfig::QYDataType;
|
||||
using ComputeDataType = float;
|
||||
using ADataType = typename TypeConfig::ADataType;
|
||||
using BDataType = typename TypeConfig::BDataType;
|
||||
using GammaDataType = typename TypeConfig::GammaDataType;
|
||||
using XDataType = typename TypeConfig::XDataType;
|
||||
using YScaleDataType = typename TypeConfig::YScaleDataType;
|
||||
using QYDataType = typename TypeConfig::QYDataType;
|
||||
using ComputeDataType = float;
|
||||
|
||||
// host verify
|
||||
ck_tile::HostTensor<ADataType> a_host({m, n}, {stride, 1});
|
||||
@@ -89,7 +88,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
ck_tile::HostTensor<QYDataType> qy_host_ref({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_dev({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<UnquantYDataType> unquant_y_host_ref({m, n}, {stride, 1});
|
||||
|
||||
ck_tile::FillUniformDistribution<ADataType>{-.5f, .5f}(a_host);
|
||||
ck_tile::FillUniformDistribution<BDataType>{-.5f, .5f}(b_host);
|
||||
@@ -193,9 +191,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
GammaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
InvRmsDataType,
|
||||
UnquantYDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon);
|
||||
InvRmsDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon);
|
||||
}
|
||||
|
||||
// yscale
|
||||
|
||||
21
example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp
Executable file → Normal file
21
example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp
Executable file → Normal file
@@ -62,14 +62,13 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
assert(stride >= n);
|
||||
|
||||
using ADataType = DataType;
|
||||
using BDataType = DataType;
|
||||
using GammaDataType = DataType;
|
||||
using XDataType = DataType;
|
||||
using UnquantYDataType = ck_tile::null_type;
|
||||
using YScaleDataType = float;
|
||||
using QYDataType = ck_tile::int8_t;
|
||||
using ComputeDataType = float;
|
||||
using ADataType = DataType;
|
||||
using BDataType = DataType;
|
||||
using GammaDataType = DataType;
|
||||
using XDataType = DataType;
|
||||
using YScaleDataType = float;
|
||||
using QYDataType = ck_tile::int8_t;
|
||||
using ComputeDataType = float;
|
||||
|
||||
// host verify
|
||||
ck_tile::HostTensor<ADataType> a_host({m, n}, {stride, 1});
|
||||
@@ -82,7 +81,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
ck_tile::HostTensor<YScaleDataType> yscale_host_dev({m}, {1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_ref({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<QYDataType> qy_host_dev({m, n}, {stride, 1});
|
||||
ck_tile::HostTensor<UnquantYDataType> unquant_y_host_ref({m, n}, {stride, 1});
|
||||
|
||||
ck_tile::FillUniformDistribution<ADataType>{-.5f, .5f}(a_host);
|
||||
ck_tile::FillUniformDistribution<BDataType>{-.5f, .5f}(b_host);
|
||||
@@ -195,9 +193,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
GammaDataType,
|
||||
ComputeDataType,
|
||||
YDataType,
|
||||
InvRmsDataType,
|
||||
UnquantYDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon);
|
||||
InvRmsDataType>(
|
||||
x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon);
|
||||
}
|
||||
|
||||
// yscale
|
||||
|
||||
@@ -2,8 +2,7 @@ function (add_smoothquant_example TARGET_NAME MAIN_SRC)
|
||||
message("adding ${TARGET_NAME}")
|
||||
# not using add_example_executable() to add target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(${TARGET_NAME} ${MAIN_SRC})
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC})
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
add_executable(tile_example_moe_sorting moe_sorting.cpp moe_sorting_api.cpp)
|
||||
rocm_install(TARGETS tile_example_moe_sorting COMPONENT examples)
|
||||
add_executable(tile_example_moe_sorting EXCLUDE_FROM_ALL moe_sorting.cpp moe_sorting_api.cpp)
|
||||
target_include_directories(tile_example_moe_sorting PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
set(EXAMPLE_MOE_SORTING_COMPILE_OPTIONS)
|
||||
|
||||
@@ -2,8 +2,7 @@ function (add_moe_smoothquant_example TARGET_NAME MAIN_SRC)
|
||||
message("adding ${TARGET_NAME}")
|
||||
# not using add_example_executable() to add target, since we don't want this to have
|
||||
# to be included in "make all/install/check"
|
||||
add_executable(${TARGET_NAME} ${MAIN_SRC})
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC})
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
|
||||
@@ -3,8 +3,7 @@ set(TILE_EXAPMLE_FUSED_MOE "tile_example_fused_moe")
|
||||
# to be included in "make all/install/check"
|
||||
message("adding ${TILE_EXAPMLE_FUSED_MOE}")
|
||||
file(GLOB INSTANCE_SRCS instances/*.cpp)
|
||||
add_executable(${TILE_EXAPMLE_FUSED_MOE} main.cpp)
|
||||
rocm_install(TARGETS ${TILE_EXAPMLE_FUSED_MOE} COMPONENT examples)
|
||||
add_executable(${TILE_EXAPMLE_FUSED_MOE} EXCLUDE_FROM_ALL main.cpp)
|
||||
target_include_directories(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_sources(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${INSTANCE_SRCS})
|
||||
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
add_executable(tile_example_batched_gemm batched_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_batched_gemm COMPONENT examples)
|
||||
add_executable(tile_example_batched_gemm EXCLUDE_FROM_ALL batched_gemm.cpp)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(tile_example_grouped_gemm grouped_gemm.cpp)
|
||||
rocm_install(TARGETS tile_example_grouped_gemm COMPONENT examples)
|
||||
add_executable(tile_example_grouped_gemm EXCLUDE_FROM_ALL grouped_gemm.cpp)
|
||||
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
add_executable(tile_example_flatmm_basic flatmm_basic.cpp)
|
||||
rocm_install(TARGETS tile_example_flatmm_basic COMPONENT examples)
|
||||
|
||||
add_executable(tile_example_flatmm_basic EXCLUDE_FROM_ALL flatmm_basic.cpp)
|
||||
|
||||
set(EXAMPLE_FLATMM_COMPILE_OPTIONS)
|
||||
# list(APPEND EXAMPLE_FLATMM_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
set(TARGET_NAME tile_example_batched_transpose)
|
||||
add_executable(${TARGET_NAME} batched_transpose_example.cpp batched_transpose_api.cpp)
|
||||
rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples)
|
||||
add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL batched_transpose_example.cpp batched_transpose_api.cpp)
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations
|
||||
list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal)
|
||||
# list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -v --save-temps -Wno-gnu-line-marker)
|
||||
target_compile_options(tile_example_batched_transpose PRIVATE ${EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS})
|
||||
|
||||
|
||||
@@ -14,11 +14,8 @@ add_subdirectory(11_add_rmsnorm2d_rdquant)
|
||||
add_subdirectory(12_smoothquant)
|
||||
add_subdirectory(13_moe_sorting)
|
||||
add_subdirectory(14_moe_smoothquant)
|
||||
add_subdirectory(15_fused_moe)
|
||||
add_subdirectory(16_batched_gemm)
|
||||
add_subdirectory(17_grouped_gemm)
|
||||
add_subdirectory(18_flatmm)
|
||||
add_subdirectory(35_batched_transpose)
|
||||
|
||||
if (SUPPORTED_GPU_TARGETS MATCHES "gfx94")
|
||||
add_subdirectory(15_fused_moe)
|
||||
endif()
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include "ck_tile/core.hpp"
|
||||
#include "ck_tile/host/concat.hpp"
|
||||
#include "ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1_policy.hpp"
|
||||
#include "ck_tile/host/concat.hpp"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
|
||||
@@ -30,7 +30,8 @@ struct GemmPipelineProblemBase
|
||||
using BLayout = remove_cvref_t<typename Traits::BLayout>;
|
||||
using CLayout = remove_cvref_t<typename Traits::CLayout>;
|
||||
|
||||
static constexpr bool TransposeC = Traits::TransposeC;
|
||||
static constexpr bool TransposeC = Traits::TransposeC;
|
||||
|
||||
static constexpr bool UseStructuredSparsity = Traits::UseStructuredSparsity;
|
||||
|
||||
static constexpr index_t kBlockSize = BlockGemmShape::NumWarps * get_warp_size();
|
||||
|
||||
@@ -12,8 +12,7 @@ template <bool kPadM_,
|
||||
bool kPadK_,
|
||||
typename ALayout_,
|
||||
typename BLayout_,
|
||||
typename CLayout_,
|
||||
bool UseStructuredSparsity_ = false>
|
||||
typename CLayout_>
|
||||
struct TileGemmTraits
|
||||
{
|
||||
static constexpr bool kPadM = kPadM_;
|
||||
@@ -28,7 +27,7 @@ struct TileGemmTraits
|
||||
using CLayout = CLayout_;
|
||||
|
||||
static constexpr bool TransposeC = false;
|
||||
static constexpr bool UseStructuredSparsity = UseStructuredSparsity_;
|
||||
static constexpr bool UseStructuredSparsity = false;
|
||||
};
|
||||
|
||||
template <bool kPadM_,
|
||||
|
||||
Reference in New Issue
Block a user