mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
RE-enable DL and DPP instances by default. (#1954)
* enable DL and DPP instances by default * fix cmake logic
This commit is contained in:
@@ -20,6 +20,7 @@ None
|
||||
|
||||
* Removed support for gfx940 and gfx941 targets (#1944)
|
||||
* Replaced the raw buffer load/store intrinsics with Clang20 built-ins (#1876)
|
||||
* DL and DPP kernels are now enabled by default.
|
||||
|
||||
### Known issues
|
||||
|
||||
|
||||
@@ -94,12 +94,14 @@ add_compile_options(-Wno-pass-failed)
|
||||
add_compile_options(-Wno-switch-default)
|
||||
add_compile_options(-Wno-unique-object-duplication)
|
||||
|
||||
if(DL_KERNELS)
|
||||
if(NOT DISABLE_DL_KERNELS)
|
||||
add_definitions(-DDL_KERNELS)
|
||||
set(DL_KERNELS "ON")
|
||||
set(CK_ENABLE_DL_KERNELS "ON")
|
||||
endif()
|
||||
if(DPP_KERNELS)
|
||||
if(NOT DISABLE_DPP_KERNELS)
|
||||
add_definitions(-DDPP_KERNELS)
|
||||
set(DPP_KERNELS "ON")
|
||||
set(CK_ENABLE_DPP_KERNELS "ON")
|
||||
endif()
|
||||
option(CK_USE_CODEGEN "Enable codegen library" OFF)
|
||||
|
||||
20
Jenkinsfile
vendored
20
Jenkinsfile
vendored
@@ -199,8 +199,8 @@ def cmake_build(Map conf=[:]){
|
||||
} else{
|
||||
setup_args = ' -DBUILD_DEV=On' + setup_args
|
||||
}
|
||||
if (params.DL_KERNELS){
|
||||
setup_args = setup_args + " -DDL_KERNELS=ON "
|
||||
if (params.DISABLE_DL_KERNELS){
|
||||
setup_args = setup_args + " -DDISABLE_DL_KERNELS=ON "
|
||||
}
|
||||
|
||||
if(build_type_debug){
|
||||
@@ -717,10 +717,10 @@ def process_results(Map conf=[:]){
|
||||
}
|
||||
|
||||
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.3;RUN_CK_TILE_FMHA_TESTS=true;RUN_CK_TILE_GEMM_TESTS=true
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;DISABLE_DL_KERNELS=true;ROCMVERSION=6.3;RUN_CK_TILE_FMHA_TESTS=true;RUN_CK_TILE_GEMM_TESTS=true
|
||||
0 21 * * * % ROCMVERSION=6.3;hipTensor_test=true;RUN_CODEGEN_TESTS=true
|
||||
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
|
||||
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
|
||||
0 19 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
|
||||
0 17 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-mainline;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
|
||||
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false
|
||||
0 13 * * * % BUILD_LEGACY_OS=true''' : ""
|
||||
|
||||
@@ -762,7 +762,7 @@ pipeline {
|
||||
defaultValue: false,
|
||||
description: "Select whether to run small set of performance tests (default) or full QA")
|
||||
booleanParam(
|
||||
name: "DL_KERNELS",
|
||||
name: "DISABLE_DL_KERNELS",
|
||||
defaultValue: false,
|
||||
description: "Select whether to build DL kernels (default: OFF)")
|
||||
booleanParam(
|
||||
@@ -861,7 +861,7 @@ pipeline {
|
||||
| grep -v 'build/' \
|
||||
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\' && \
|
||||
/cppcheck/build/bin/cppcheck ../* -v -j \$(nproc) -I ../include -I ../profiler/include -I ../library/include \
|
||||
-D CK_ENABLE_FP64 -D CK_ENABLE_FP32 -D CK_ENABLE_FP16 -D CK_ENABLE_FP8 -D CK_ENABLE_BF16 -D CK_ENABLE_BF8 -D CK_ENABLE_INT8 -D DL_KERNELS \
|
||||
-D CK_ENABLE_FP64 -D CK_ENABLE_FP32 -D CK_ENABLE_FP16 -D CK_ENABLE_FP8 -D CK_ENABLE_BF16 -D CK_ENABLE_BF8 -D CK_ENABLE_INT8 \
|
||||
-D __gfx908__ -D __gfx90a__ -D __gfx942__ -D __gfx1030__ -D __gfx1100__ -D __gfx1101__ -D __gfx1102__ \
|
||||
-U __gfx803__ -U __gfx900__ -U __gfx906__ -U CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 \
|
||||
--file-filter=*.cpp --force --enable=all --output-file=ck_cppcheck.log"
|
||||
@@ -1164,7 +1164,7 @@ pipeline {
|
||||
}
|
||||
agent{ label rocmnode("gfx1030") }
|
||||
environment{
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1030" -DDL_KERNELS=ON -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1030" -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
|
||||
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
|
||||
-DGPU_TARGETS="gfx1030" \
|
||||
@@ -1184,7 +1184,7 @@ pipeline {
|
||||
}
|
||||
agent{ label rocmnode("gfx1101") }
|
||||
environment{
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
|
||||
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
|
||||
-DGPU_TARGETS="gfx1101" \
|
||||
@@ -1204,7 +1204,7 @@ pipeline {
|
||||
}
|
||||
agent{ label rocmnode("gfx1201") }
|
||||
environment{
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1201" -DDL_KERNELS=ON -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1201" -DCMAKE_CXX_FLAGS=" -O3 " """
|
||||
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
|
||||
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
|
||||
-DGPU_TARGETS="gfx1201" \
|
||||
|
||||
@@ -158,12 +158,12 @@ Additional cmake flags can be used to significantly speed-up the build:
|
||||
instances of select data types only. The main default data types are fp32 and fp16; you can safely skip
|
||||
other data types.
|
||||
|
||||
* `DL_KERNELS` (default is OFF) must be set to ON in order to build instances, such as `gemm_dl` or
|
||||
* `DISABLE_DL_KERNELS` (default is OFF) must be set to ON in order not to build instances, such as `gemm_dl` or
|
||||
`batched_gemm_multi_d_dl`. These instances are useful on architectures like the NAVI2x, as most
|
||||
other platforms have faster instances, such as `xdl` or `wmma`, available.
|
||||
|
||||
* `DPP_KERNELS` (default is OFF) must be set to ON in order to build instances, such as `gemm_dpp`.
|
||||
These instances are useful on architectures like the NAVI2x, as most other platforms have faster instances, such as `xdl` or `wmma`, available.
|
||||
* `DISABLE_DPP_KERNELS` (default is OFF) must be set to ON in order not to build instances, such as `gemm_dpp`.
|
||||
These instances offer a slightly better performance of fp16 gemms on NAVI2x. But on other architectures faster alternatives are available.
|
||||
|
||||
* `CK_USE_FP8_ON_UNSUPPORTED_ARCH` (default is OFF) must be set to ON in order to build instances,
|
||||
such as `gemm_universal`, `gemm_universal_streamk` and `gemm_multiply_multiply` for fp8 data type for GPU targets which do not have native support for fp8 data type, such as gfx908 or gfx90a. These instances are useful on
|
||||
|
||||
Reference in New Issue
Block a user