diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 0e61fd33ef..996a543ecc 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories(BEFORE add_custom_target(examples) + # list of examples that are labelled as REGRESSION_EXAMPLE for make regression (runtime more than 30 seconds) # all other tests are labelled as SMOKE_EXAMPLE set(REGRESSION_EXAMPLES @@ -231,9 +232,6 @@ endfunction(add_example_executable_no_testing EXAMPLE_NAME) # add all example subdir file(GLOB dir_list LIST_DIRECTORIES true *) -if (NOT SUPPORTED_GPU_TARGETS MATCHES "gfx9") - list(FILTER dir_list EXCLUDE REGEX ".*/ck_tile") -endif() FOREACH(subdir ${dir_list}) if(IS_DIRECTORY "${subdir}" AND EXISTS "${subdir}/CMakeLists.txt") add_subdirectory(${subdir}) diff --git a/example/ck_tile/01_fmha/CMakeLists.txt b/example/ck_tile/01_fmha/CMakeLists.txt index ce3c8b3978..9ba3a453fc 100644 --- a/example/ck_tile/01_fmha/CMakeLists.txt +++ b/example/ck_tile/01_fmha/CMakeLists.txt @@ -58,8 +58,7 @@ set(EXAMPLE_FMHA_FWD "tile_example_fmha_fwd") # not using add_example_executable() to add this target, since we don't want this to have # to be included in "make all/install/check" message("adding example ${EXAMPLE_FMHA_FWD}") -add_executable(${EXAMPLE_FMHA_FWD} fmha_fwd.cpp) -rocm_install(TARGETS ${EXAMPLE_FMHA_FWD} COMPONENT examples) +add_executable(${EXAMPLE_FMHA_FWD} EXCLUDE_FROM_ALL fmha_fwd.cpp) target_include_directories(${EXAMPLE_FMHA_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${EXAMPLE_FMHA_FWD} PRIVATE ${FMHA_FWD_GEN_BLOBS}) @@ -67,8 +66,7 @@ set(EXAMPLE_FMHA_BWD "tile_example_fmha_bwd") # not using add_example_executable() to add this target, since we don't want this to have # to be included in "make all/install/check" message("adding example ${EXAMPLE_FMHA_BWD}") -add_executable(${EXAMPLE_FMHA_BWD} fmha_bwd.cpp) -rocm_install(TARGETS ${EXAMPLE_FMHA_BWD} COMPONENT examples) +add_executable(${EXAMPLE_FMHA_BWD} EXCLUDE_FROM_ALL fmha_bwd.cpp) target_include_directories(${EXAMPLE_FMHA_BWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${EXAMPLE_FMHA_BWD} PRIVATE ${FMHA_BWD_GEN_BLOBS}) diff --git a/example/ck_tile/02_layernorm2d/CMakeLists.txt b/example/ck_tile/02_layernorm2d/CMakeLists.txt index 74f195a9db..fa69ac0f7a 100644 --- a/example/ck_tile/02_layernorm2d/CMakeLists.txt +++ b/example/ck_tile/02_layernorm2d/CMakeLists.txt @@ -26,8 +26,7 @@ add_custom_command( set(EXAMPLE_LAYERNORM2D_FWD "tile_example_layernorm2d_fwd") message("adding example ${EXAMPLE_LAYERNORM2D_FWD}") -add_executable(${EXAMPLE_LAYERNORM2D_FWD} layernorm2d_fwd.cpp) -rocm_install(TARGETS ${EXAMPLE_LAYERNORM2D_FWD} COMPONENT examples) +add_executable(${EXAMPLE_LAYERNORM2D_FWD} EXCLUDE_FROM_ALL layernorm2d_fwd.cpp) target_include_directories(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${LAYERNORM2D_FWD_GEN_BLOBS}) diff --git a/example/ck_tile/03_gemm/CMakeLists.txt b/example/ck_tile/03_gemm/CMakeLists.txt index deccb71d23..411db2e317 100644 --- a/example/ck_tile/03_gemm/CMakeLists.txt +++ b/example/ck_tile/03_gemm/CMakeLists.txt @@ -1,8 +1,5 @@ -add_executable(tile_example_gemm_basic gemm_basic.cpp) -rocm_install(TARGETS tile_example_gemm_basic COMPONENT examples) -add_executable(tile_example_gemm_universal universal_gemm.cpp) -rocm_install(TARGETS tile_example_gemm_universal COMPONENT examples) - +add_executable(tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp) +add_executable(tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp) set(EXAMPLE_GEMM_COMPILE_OPTIONS) if(CK_USE_OCP_FP8) list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8) diff --git a/example/ck_tile/03_gemm/stript.sh b/example/ck_tile/03_gemm/stript.sh deleted file mode 100644 index 4b91cb36ce..0000000000 --- a/example/ck_tile/03_gemm/stript.sh +++ /dev/null @@ -1 +0,0 @@ -for file in gemm_universal_*; do mv "$file" "${file/f16_f16_f16/fp16_fp16_fp16}"; done diff --git a/example/ck_tile/04_img2col/CMakeLists.txt b/example/ck_tile/04_img2col/CMakeLists.txt index d3737467d8..3864c9ed9d 100644 --- a/example/ck_tile/04_img2col/CMakeLists.txt +++ b/example/ck_tile/04_img2col/CMakeLists.txt @@ -1,4 +1,3 @@ # not using add_example_executable() to add this target, since we don't want this to have # to be included in "make all/install/check" -add_executable(tile_example_img2col image_to_column.cpp) -rocm_install(TARGETS tile_example_img2col COMPONENT examples) +add_executable(tile_example_img2col EXCLUDE_FROM_ALL image_to_column.cpp) diff --git a/example/ck_tile/05_reduce/CMakeLists.txt b/example/ck_tile/05_reduce/CMakeLists.txt index 855e59c48e..6caa38d50d 100644 --- a/example/ck_tile/05_reduce/CMakeLists.txt +++ b/example/ck_tile/05_reduce/CMakeLists.txt @@ -3,9 +3,7 @@ set(EXAMPLE_REDUCE "tile_example_reduce") # to be included in "make all/install/check" message("adding example ${EXAMPLE_REDUCE}") -add_executable(${EXAMPLE_REDUCE} reduce.cpp) -rocm_install(TARGETS ${EXAMPLE_REDUCE} COMPONENT examples) - +add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL reduce.cpp) target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) set(EXAMPLE_REDUCE_COMPILE_OPTIONS) diff --git a/example/ck_tile/06_permute/CMakeLists.txt b/example/ck_tile/06_permute/CMakeLists.txt index 22483a4295..327fceb685 100644 --- a/example/ck_tile/06_permute/CMakeLists.txt +++ b/example/ck_tile/06_permute/CMakeLists.txt @@ -1,7 +1,6 @@ # not using add_example_executable() to add this target, since we don't want this to have # to be included in "make all/install/check" -add_executable(tile_example_permute permute.cpp) -rocm_install(TARGETS tile_example_permute COMPONENT examples) +add_executable(tile_example_permute EXCLUDE_FROM_ALL permute.cpp) if(NOT DEFINED PERMUTE_USE_ALTERNATIVE_IMPL) # set(PERMUTE_USE_ALTERNATIVE_IMPL false) diff --git a/example/ck_tile/09_topk_softmax/CMakeLists.txt b/example/ck_tile/09_topk_softmax/CMakeLists.txt index fc2a4d3fe0..b43b989792 100644 --- a/example/ck_tile/09_topk_softmax/CMakeLists.txt +++ b/example/ck_tile/09_topk_softmax/CMakeLists.txt @@ -1,7 +1,6 @@ -add_executable(tile_example_topk_softmax topk_softmax.cpp topk_softmax_api.cpp) -rocm_install(TARGETS tile_example_topk_softmax COMPONENT examples) - +add_executable(tile_example_topk_softmax EXCLUDE_FROM_ALL topk_softmax.cpp topk_softmax_api.cpp) target_include_directories(tile_example_topk_softmax PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/) + set(EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS) # NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations list(APPEND EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal) diff --git a/example/ck_tile/10_rmsnorm2d/CMakeLists.txt b/example/ck_tile/10_rmsnorm2d/CMakeLists.txt index 731ff639a4..5684c9b2e0 100644 --- a/example/ck_tile/10_rmsnorm2d/CMakeLists.txt +++ b/example/ck_tile/10_rmsnorm2d/CMakeLists.txt @@ -26,8 +26,7 @@ add_custom_command( set(TILE_RMSNORM2D_FWD "tile_rmsnorm2d_fwd") message("adding ${TILE_RMSNORM2D_FWD}") -add_executable(${TILE_RMSNORM2D_FWD} rmsnorm2d_fwd.cpp) -rocm_install(TARGETS ${TILE_RMSNORM2D_FWD} COMPONENT examples) +add_executable(${TILE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL rmsnorm2d_fwd.cpp) target_include_directories(${TILE_RMSNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${TILE_RMSNORM2D_FWD} PRIVATE ${RMSNORM2D_FWD_GEN_BLOBS}) @@ -39,8 +38,7 @@ list(APPEND TILE_RMSNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno target_compile_options(${TILE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS}) set(EXAMPLE_RMSNORM2D_FWD "tile_example_rmsnorm2d_fwd") -add_executable(${EXAMPLE_RMSNORM2D_FWD} example_rmsnorm2d_fwd.cpp) -rocm_install(TARGETS ${EXAMPLE_RMSNORM2D_FWD} COMPONENT examples) +add_executable(${EXAMPLE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL example_rmsnorm2d_fwd.cpp) target_compile_options(${EXAMPLE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS}) # TODO: we have to turn off this global prop, otherwise the progress bar generated diff --git a/example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt b/example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt index 7071127e01..6b0c3cef7a 100644 --- a/example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt +++ b/example/ck_tile/11_add_rmsnorm2d_rdquant/CMakeLists.txt @@ -3,8 +3,7 @@ set(TILE_ADD_RMSNORM2D_RDQUANT_FWD "tile_add_rmsnorm2d_rdquant_fwd") # to be included in "make all/install/check" message("adding ${TILE_ADD_RMSNORM2D_RDQUANT_FWD}") file(GLOB INSTANCE_SRCS instances/*.cpp) -add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} add_rmsnorm2d_rdquant_fwd.cpp) -rocm_install(TARGETS ${TILE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples) +add_executable(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL add_rmsnorm2d_rdquant_fwd.cpp) target_include_directories(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${INSTANCE_SRCS}) @@ -16,8 +15,7 @@ list(APPEND TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS -Wno-undefined-func-t target_compile_options(${TILE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS}) set(EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD "tile_example_add_rmsnorm2d_rdquant_fwd") -add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} example_add_rmsnorm2d_rdquant_fwd.cpp) -rocm_install(TARGETS ${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} COMPONENT examples) +add_executable(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} EXCLUDE_FROM_ALL example_add_rmsnorm2d_rdquant_fwd.cpp) target_compile_options(${EXAMPLE_ADD_RMSNORM2D_RDQUANT_FWD} PRIVATE ${TILE_ADD_RMSNORM2D_RDQUANT_FWD_COMPILE_OPTIONS}) # TODO: we have to turn off this global prop, otherwise the progress bar generated diff --git a/example/ck_tile/11_add_rmsnorm2d_rdquant/add_rmsnorm2d_rdquant_fwd.cpp b/example/ck_tile/11_add_rmsnorm2d_rdquant/add_rmsnorm2d_rdquant_fwd.cpp index 7d82a16aa9..574edf64d3 100644 --- a/example/ck_tile/11_add_rmsnorm2d_rdquant/add_rmsnorm2d_rdquant_fwd.cpp +++ b/example/ck_tile/11_add_rmsnorm2d_rdquant/add_rmsnorm2d_rdquant_fwd.cpp @@ -67,14 +67,13 @@ bool run(const ck_tile::ArgParser& arg_parser) using TypeConfig = AddRmsnormRdquantTypeConfig; - using ADataType = typename TypeConfig::ADataType; - using BDataType = typename TypeConfig::BDataType; - using GammaDataType = typename TypeConfig::GammaDataType; - using XDataType = typename TypeConfig::XDataType; - using UnquantYDataType = ck_tile::null_type; - using YScaleDataType = typename TypeConfig::YScaleDataType; - using QYDataType = typename TypeConfig::QYDataType; - using ComputeDataType = float; + using ADataType = typename TypeConfig::ADataType; + using BDataType = typename TypeConfig::BDataType; + using GammaDataType = typename TypeConfig::GammaDataType; + using XDataType = typename TypeConfig::XDataType; + using YScaleDataType = typename TypeConfig::YScaleDataType; + using QYDataType = typename TypeConfig::QYDataType; + using ComputeDataType = float; // host verify ck_tile::HostTensor a_host({m, n}, {stride, 1}); @@ -89,7 +88,6 @@ bool run(const ck_tile::ArgParser& arg_parser) ck_tile::HostTensor qy_host_ref({m, n}, {stride, 1}); ck_tile::HostTensor qy_host_dev({m, n}, {stride, 1}); - ck_tile::HostTensor unquant_y_host_ref({m, n}, {stride, 1}); ck_tile::FillUniformDistribution{-.5f, .5f}(a_host); ck_tile::FillUniformDistribution{-.5f, .5f}(b_host); @@ -193,9 +191,8 @@ bool run(const ck_tile::ArgParser& arg_parser) GammaDataType, ComputeDataType, YDataType, - InvRmsDataType, - UnquantYDataType>( - x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon); + InvRmsDataType>( + x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon); } // yscale diff --git a/example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp b/example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp old mode 100755 new mode 100644 index 3aab357909..ada4c6f2da --- a/example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp +++ b/example/ck_tile/11_add_rmsnorm2d_rdquant/example_add_rmsnorm2d_rdquant_fwd.cpp @@ -62,14 +62,13 @@ bool run(const ck_tile::ArgParser& arg_parser) assert(stride >= n); - using ADataType = DataType; - using BDataType = DataType; - using GammaDataType = DataType; - using XDataType = DataType; - using UnquantYDataType = ck_tile::null_type; - using YScaleDataType = float; - using QYDataType = ck_tile::int8_t; - using ComputeDataType = float; + using ADataType = DataType; + using BDataType = DataType; + using GammaDataType = DataType; + using XDataType = DataType; + using YScaleDataType = float; + using QYDataType = ck_tile::int8_t; + using ComputeDataType = float; // host verify ck_tile::HostTensor a_host({m, n}, {stride, 1}); @@ -82,7 +81,6 @@ bool run(const ck_tile::ArgParser& arg_parser) ck_tile::HostTensor yscale_host_dev({m}, {1}); ck_tile::HostTensor qy_host_ref({m, n}, {stride, 1}); ck_tile::HostTensor qy_host_dev({m, n}, {stride, 1}); - ck_tile::HostTensor unquant_y_host_ref({m, n}, {stride, 1}); ck_tile::FillUniformDistribution{-.5f, .5f}(a_host); ck_tile::FillUniformDistribution{-.5f, .5f}(b_host); @@ -195,9 +193,8 @@ bool run(const ck_tile::ArgParser& arg_parser) GammaDataType, ComputeDataType, YDataType, - InvRmsDataType, - UnquantYDataType>( - x_host_ref, gamma_host, y_host, invRms_host_ref, unquant_y_host_ref, epsilon); + InvRmsDataType>( + x_host_ref, gamma_host, y_host, invRms_host_ref, epsilon); } // yscale diff --git a/example/ck_tile/12_smoothquant/CMakeLists.txt b/example/ck_tile/12_smoothquant/CMakeLists.txt index daeeb827bd..3849833aca 100644 --- a/example/ck_tile/12_smoothquant/CMakeLists.txt +++ b/example/ck_tile/12_smoothquant/CMakeLists.txt @@ -2,8 +2,7 @@ function (add_smoothquant_example TARGET_NAME MAIN_SRC) message("adding ${TARGET_NAME}") # not using add_example_executable() to add target, since we don't want this to have # to be included in "make all/install/check" - add_executable(${TARGET_NAME} ${MAIN_SRC}) - rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples) + add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC}) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) foreach(source IN LISTS ARGN) diff --git a/example/ck_tile/13_moe_sorting/CMakeLists.txt b/example/ck_tile/13_moe_sorting/CMakeLists.txt index 662e16f0d3..09f3e4ac4e 100644 --- a/example/ck_tile/13_moe_sorting/CMakeLists.txt +++ b/example/ck_tile/13_moe_sorting/CMakeLists.txt @@ -1,5 +1,4 @@ -add_executable(tile_example_moe_sorting moe_sorting.cpp moe_sorting_api.cpp) -rocm_install(TARGETS tile_example_moe_sorting COMPONENT examples) +add_executable(tile_example_moe_sorting EXCLUDE_FROM_ALL moe_sorting.cpp moe_sorting_api.cpp) target_include_directories(tile_example_moe_sorting PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/) set(EXAMPLE_MOE_SORTING_COMPILE_OPTIONS) diff --git a/example/ck_tile/14_moe_smoothquant/CMakeLists.txt b/example/ck_tile/14_moe_smoothquant/CMakeLists.txt index 9acb27552a..12224a39a2 100644 --- a/example/ck_tile/14_moe_smoothquant/CMakeLists.txt +++ b/example/ck_tile/14_moe_smoothquant/CMakeLists.txt @@ -2,8 +2,7 @@ function (add_moe_smoothquant_example TARGET_NAME MAIN_SRC) message("adding ${TARGET_NAME}") # not using add_example_executable() to add target, since we don't want this to have # to be included in "make all/install/check" - add_executable(${TARGET_NAME} ${MAIN_SRC}) - rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples) + add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL ${MAIN_SRC}) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) foreach(source IN LISTS ARGN) diff --git a/example/ck_tile/15_fused_moe/CMakeLists.txt b/example/ck_tile/15_fused_moe/CMakeLists.txt index bb25a55c7d..a716eef19e 100644 --- a/example/ck_tile/15_fused_moe/CMakeLists.txt +++ b/example/ck_tile/15_fused_moe/CMakeLists.txt @@ -3,8 +3,7 @@ set(TILE_EXAPMLE_FUSED_MOE "tile_example_fused_moe") # to be included in "make all/install/check" message("adding ${TILE_EXAPMLE_FUSED_MOE}") file(GLOB INSTANCE_SRCS instances/*.cpp) -add_executable(${TILE_EXAPMLE_FUSED_MOE} main.cpp) -rocm_install(TARGETS ${TILE_EXAPMLE_FUSED_MOE} COMPONENT examples) +add_executable(${TILE_EXAPMLE_FUSED_MOE} EXCLUDE_FROM_ALL main.cpp) target_include_directories(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_sources(${TILE_EXAPMLE_FUSED_MOE} PRIVATE ${INSTANCE_SRCS}) diff --git a/example/ck_tile/16_batched_gemm/CMakeLists.txt b/example/ck_tile/16_batched_gemm/CMakeLists.txt index 9eb7a45d80..78e78c6b04 100644 --- a/example/ck_tile/16_batched_gemm/CMakeLists.txt +++ b/example/ck_tile/16_batched_gemm/CMakeLists.txt @@ -1,2 +1 @@ -add_executable(tile_example_batched_gemm batched_gemm.cpp) -rocm_install(TARGETS tile_example_batched_gemm COMPONENT examples) +add_executable(tile_example_batched_gemm EXCLUDE_FROM_ALL batched_gemm.cpp) diff --git a/example/ck_tile/17_grouped_gemm/CMakeLists.txt b/example/ck_tile/17_grouped_gemm/CMakeLists.txt index 80d688125b..d34013dd6c 100644 --- a/example/ck_tile/17_grouped_gemm/CMakeLists.txt +++ b/example/ck_tile/17_grouped_gemm/CMakeLists.txt @@ -1,2 +1,2 @@ -add_executable(tile_example_grouped_gemm grouped_gemm.cpp) -rocm_install(TARGETS tile_example_grouped_gemm COMPONENT examples) +add_executable(tile_example_grouped_gemm EXCLUDE_FROM_ALL grouped_gemm.cpp) + diff --git a/example/ck_tile/18_flatmm/CMakeLists.txt b/example/ck_tile/18_flatmm/CMakeLists.txt index 3a70f0447d..9fbe65e3a7 100644 --- a/example/ck_tile/18_flatmm/CMakeLists.txt +++ b/example/ck_tile/18_flatmm/CMakeLists.txt @@ -1,6 +1,4 @@ -add_executable(tile_example_flatmm_basic flatmm_basic.cpp) -rocm_install(TARGETS tile_example_flatmm_basic COMPONENT examples) - +add_executable(tile_example_flatmm_basic EXCLUDE_FROM_ALL flatmm_basic.cpp) set(EXAMPLE_FLATMM_COMPILE_OPTIONS) # list(APPEND EXAMPLE_FLATMM_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal) diff --git a/example/ck_tile/35_batched_transpose/CMakeLists.txt b/example/ck_tile/35_batched_transpose/CMakeLists.txt index 10101e4d2e..a08fcebb74 100644 --- a/example/ck_tile/35_batched_transpose/CMakeLists.txt +++ b/example/ck_tile/35_batched_transpose/CMakeLists.txt @@ -1,9 +1,9 @@ set(TARGET_NAME tile_example_batched_transpose) -add_executable(${TARGET_NAME} batched_transpose_example.cpp batched_transpose_api.cpp) -rocm_install(TARGETS ${TARGET_NAME} COMPONENT examples) +add_executable(${TARGET_NAME} EXCLUDE_FROM_ALL batched_transpose_example.cpp batched_transpose_api.cpp) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/) # NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -Wno-undefined-func-template -Wno-float-equal) # list(APPEND EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS -v --save-temps -Wno-gnu-line-marker) target_compile_options(tile_example_batched_transpose PRIVATE ${EXAMPLE_BATCHED_TRANSPOSE_COMPILE_OPTIONS}) + diff --git a/example/ck_tile/CMakeLists.txt b/example/ck_tile/CMakeLists.txt index 16f68c6255..88efe0d8d9 100644 --- a/example/ck_tile/CMakeLists.txt +++ b/example/ck_tile/CMakeLists.txt @@ -14,11 +14,8 @@ add_subdirectory(11_add_rmsnorm2d_rdquant) add_subdirectory(12_smoothquant) add_subdirectory(13_moe_sorting) add_subdirectory(14_moe_smoothquant) +add_subdirectory(15_fused_moe) add_subdirectory(16_batched_gemm) add_subdirectory(17_grouped_gemm) add_subdirectory(18_flatmm) add_subdirectory(35_batched_transpose) - -if (SUPPORTED_GPU_TARGETS MATCHES "gfx94") - add_subdirectory(15_fused_moe) -endif() diff --git a/include/ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1.hpp b/include/ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1.hpp index ad6641bc13..611aff318f 100644 --- a/include/ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1.hpp +++ b/include/ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1.hpp @@ -6,7 +6,6 @@ #include "ck_tile/core.hpp" #include "ck_tile/host/concat.hpp" #include "ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1_policy.hpp" -#include "ck_tile/host/concat.hpp" namespace ck_tile { diff --git a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp index 893c9d1ad3..0b38e7789e 100644 --- a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp +++ b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp @@ -30,7 +30,8 @@ struct GemmPipelineProblemBase using BLayout = remove_cvref_t; using CLayout = remove_cvref_t; - static constexpr bool TransposeC = Traits::TransposeC; + static constexpr bool TransposeC = Traits::TransposeC; + static constexpr bool UseStructuredSparsity = Traits::UseStructuredSparsity; static constexpr index_t kBlockSize = BlockGemmShape::NumWarps * get_warp_size(); diff --git a/include/ck_tile/ops/gemm/pipeline/tile_gemm_traits.hpp b/include/ck_tile/ops/gemm/pipeline/tile_gemm_traits.hpp index ecf861e4e8..a31004b425 100644 --- a/include/ck_tile/ops/gemm/pipeline/tile_gemm_traits.hpp +++ b/include/ck_tile/ops/gemm/pipeline/tile_gemm_traits.hpp @@ -12,8 +12,7 @@ template + typename CLayout_> struct TileGemmTraits { static constexpr bool kPadM = kPadM_; @@ -28,7 +27,7 @@ struct TileGemmTraits using CLayout = CLayout_; static constexpr bool TransposeC = false; - static constexpr bool UseStructuredSparsity = UseStructuredSparsity_; + static constexpr bool UseStructuredSparsity = false; }; template