From e9577d7ef349fd811da6df7495b13b9fb15aa395 Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Tue, 19 Aug 2025 08:14:30 +0000 Subject: [PATCH] Merge commit 'f38751fc2aa0f84bca7eab7ff4a588ae9cf16a24' into develop --- example/ck_tile/01_fmha/README.md | 2 +- example/ck_tile/02_layernorm2d/README.md | 2 +- example/ck_tile/03_gemm/README.md | 2 +- example/ck_tile/04_img2col/README.md | 2 +- example/ck_tile/06_permute/README.md | 2 +- example/ck_tile/09_topk_softmax/README.md | 2 +- example/ck_tile/10_rmsnorm2d/README.md | 2 +- .../11_add_rmsnorm2d_rdquant/README.md | 2 +- example/ck_tile/12_smoothquant/README.md | 2 +- example/ck_tile/13_moe_sorting/README.md | 2 +- example/ck_tile/14_moe_smoothquant/README.md | 2 +- example/ck_tile/16_batched_gemm/README.md | 2 +- example/ck_tile/17_grouped_gemm/README.md | 2 +- example/ck_tile/18_flatmm/README.md | 2 +- example/ck_tile/19_gemm_multi_d/README.md | 2 +- .../ck_tile/35_batched_transpose/README.md | 2 +- example/ck_tile/38_block_scale_gemm/README.md | 2 +- example/ck_tile/39_copy/README.md | 2 +- script/cmake-ck-dev.sh | 47 ++++++++++--------- script/cmake-ck-release.sh | 34 -------------- test/ck_tile/memory_copy/README.md | 2 +- tile_engine/ops/gemm/README.md | 4 +- tile_engine/ops/gemm_multi_d/README.md | 4 +- 23 files changed, 48 insertions(+), 79 deletions(-) delete mode 100755 script/cmake-ck-release.sh diff --git a/example/ck_tile/01_fmha/README.md b/example/ck_tile/01_fmha/README.md index 72109a660b..f72d7afa02 100644 --- a/example/ck_tile/01_fmha/README.md +++ b/example/ck_tile/01_fmha/README.md @@ -7,7 +7,7 @@ This folder contains example for fmha(fused multi-head attention) using ck_tile # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ make tile_example_fmha_fwd -j ``` This will result in an executable `build/bin/tile_example_fmha_fwd` diff --git a/example/ck_tile/02_layernorm2d/README.md b/example/ck_tile/02_layernorm2d/README.md index 817f62dae7..da74e2e3c1 100644 --- a/example/ck_tile/02_layernorm2d/README.md +++ b/example/ck_tile/02_layernorm2d/README.md @@ -42,7 +42,7 @@ return hidden_states, per_token_scale ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_example_layernorm2d_fwd -j ``` This will result in an executable `build/bin/tile_example_layernorm2d_fwd` diff --git a/example/ck_tile/03_gemm/README.md b/example/ck_tile/03_gemm/README.md index c9e392dbd5..6358b76fd9 100644 --- a/example/ck_tile/03_gemm/README.md +++ b/example/ck_tile/03_gemm/README.md @@ -7,7 +7,7 @@ This folder contains example for GEMM using ck_tile tile-programming implementat # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # The basic pipeline method on the gemm calculation make tile_example_gemm_basic -j # The memory bound pipeline on the gemm calculation diff --git a/example/ck_tile/04_img2col/README.md b/example/ck_tile/04_img2col/README.md index df5c51a9c0..3b1b6f999b 100644 --- a/example/ck_tile/04_img2col/README.md +++ b/example/ck_tile/04_img2col/README.md @@ -7,7 +7,7 @@ This folder contains example for Image to Column using ck_tile tile-programming # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ make tile_example_img2col -j ``` This will result in an executable `build/bin/tile_example_img2col` diff --git a/example/ck_tile/06_permute/README.md b/example/ck_tile/06_permute/README.md index 03bd810ff4..5e88e71572 100644 --- a/example/ck_tile/06_permute/README.md +++ b/example/ck_tile/06_permute/README.md @@ -15,7 +15,7 @@ args: ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_example_permute -j ``` This will result in an executable `build/bin/tile_example_permute` diff --git a/example/ck_tile/09_topk_softmax/README.md b/example/ck_tile/09_topk_softmax/README.md index 1043012900..2e15aeaae5 100644 --- a/example/ck_tile/09_topk_softmax/README.md +++ b/example/ck_tile/09_topk_softmax/README.md @@ -6,7 +6,7 @@ This folder contains example for topk-softmax kernel using ck_tile tile-programm ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_example_topk_softmax -j ``` This will result in an executable `build/bin/tile_example_topk_softmax` diff --git a/example/ck_tile/10_rmsnorm2d/README.md b/example/ck_tile/10_rmsnorm2d/README.md index c067496477..1d27ad153e 100644 --- a/example/ck_tile/10_rmsnorm2d/README.md +++ b/example/ck_tile/10_rmsnorm2d/README.md @@ -6,7 +6,7 @@ This folder contains example for Rmsnorm2D forward using ck_tile tile-programmin ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_rmsnorm2d_fwd -j ``` This will result in an executable `build/bin/tile_rmsnorm2d_fwd` diff --git a/example/ck_tile/11_add_rmsnorm2d_rdquant/README.md b/example/ck_tile/11_add_rmsnorm2d_rdquant/README.md index 960369b78d..f9ba76c9e3 100644 --- a/example/ck_tile/11_add_rmsnorm2d_rdquant/README.md +++ b/example/ck_tile/11_add_rmsnorm2d_rdquant/README.md @@ -6,7 +6,7 @@ This folder contains example for add + Rmsnorm2D + rowwise dynamic quantization ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_add_rmsnorm2d_rdquant_fwd -j ``` This will result in an executable `build/bin/tile_add_rmsnorm2d_rdquant_fwd` diff --git a/example/ck_tile/12_smoothquant/README.md b/example/ck_tile/12_smoothquant/README.md index d6b815f8cf..6b3acd558b 100644 --- a/example/ck_tile/12_smoothquant/README.md +++ b/example/ck_tile/12_smoothquant/README.md @@ -6,7 +6,7 @@ This folder contains example for smoothquant using ck_tile tile-programming impl ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_smoothquant -j ``` This will result in an executable `build/bin/tile_smoothquant` diff --git a/example/ck_tile/13_moe_sorting/README.md b/example/ck_tile/13_moe_sorting/README.md index 1822ff3a37..c99f40aa57 100644 --- a/example/ck_tile/13_moe_sorting/README.md +++ b/example/ck_tile/13_moe_sorting/README.md @@ -6,7 +6,7 @@ This folder contains example for moe-sorting kernel using ck_tile tile-programmi ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_example_moe_sorting -j ``` This will result in an executable `build/bin/tile_example_moe_sorting` diff --git a/example/ck_tile/14_moe_smoothquant/README.md b/example/ck_tile/14_moe_smoothquant/README.md index 599b4c3489..c10a922607 100644 --- a/example/ck_tile/14_moe_smoothquant/README.md +++ b/example/ck_tile/14_moe_smoothquant/README.md @@ -9,7 +9,7 @@ Unlike standard smoothquant op, the input scale is from different expert `[exper ``` # in the root of ck_tile mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... +../script/cmake-ck-dev.sh ../ # you can replace this to gfx90a, gfx942... make tile_example_moe_smoothquant -j ``` This will result in an executable `build/bin/tile_example_moe_smoothquant` diff --git a/example/ck_tile/16_batched_gemm/README.md b/example/ck_tile/16_batched_gemm/README.md index 34b56db526..8a64a3912c 100644 --- a/example/ck_tile/16_batched_gemm/README.md +++ b/example/ck_tile/16_batched_gemm/README.md @@ -7,7 +7,7 @@ This folder contains example for batched GEMM using ck_tile tile-programming imp # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ make tile_example_batched_gemm -j ``` This will result in an executable `build/bin/tile_example_batched_gemm` diff --git a/example/ck_tile/17_grouped_gemm/README.md b/example/ck_tile/17_grouped_gemm/README.md index 29642e96c1..8715ee79e1 100644 --- a/example/ck_tile/17_grouped_gemm/README.md +++ b/example/ck_tile/17_grouped_gemm/README.md @@ -148,7 +148,7 @@ All the necessary parameters are set, the tiling is computed, the GEMM pipeline # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # The basic pipeline method on the gemm calculation make tile_example_grouped_gemm -j ``` diff --git a/example/ck_tile/18_flatmm/README.md b/example/ck_tile/18_flatmm/README.md index beaac785fc..eeaa7658bd 100644 --- a/example/ck_tile/18_flatmm/README.md +++ b/example/ck_tile/18_flatmm/README.md @@ -7,7 +7,7 @@ This folder contains example for FLATMM using ck_tile tile-programming implement # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # The basic pipeline method on the flatmm calculation make tile_example_flatmm_basic -j ``` diff --git a/example/ck_tile/19_gemm_multi_d/README.md b/example/ck_tile/19_gemm_multi_d/README.md index 7e8cd87546..2cf2b1ea03 100644 --- a/example/ck_tile/19_gemm_multi_d/README.md +++ b/example/ck_tile/19_gemm_multi_d/README.md @@ -8,7 +8,7 @@ This folder contains example for Multiple D GEMM using ck_tile tile-programming mkdir build && cd build #you can replace < arch> with the appropriate architecture(for example gfx90a or gfx942) or \ leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ #The basic pipeline method on the gemm calculation make tile_example_gemm_multi_d_fp16 -j ``` diff --git a/example/ck_tile/35_batched_transpose/README.md b/example/ck_tile/35_batched_transpose/README.md index 38bb2b32e4..56e9610b35 100644 --- a/example/ck_tile/35_batched_transpose/README.md +++ b/example/ck_tile/35_batched_transpose/README.md @@ -6,7 +6,7 @@ This folder contains example for batched Transpose using ck_tile tile-programmin # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # Make the transpose executable make tile_example_batched_transpose -j ``` diff --git a/example/ck_tile/38_block_scale_gemm/README.md b/example/ck_tile/38_block_scale_gemm/README.md index 742a88dee7..fc905790f1 100644 --- a/example/ck_tile/38_block_scale_gemm/README.md +++ b/example/ck_tile/38_block_scale_gemm/README.md @@ -7,7 +7,7 @@ This folder contains example for Block Scale GEMM using ck_tile tile-programming # in the root of ck_tile mkdir build && cd build # you can replace with the appropriate architecture (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # The aquant pipeline method on the gemm calculation make tile_example_gemm_aquant_basic -j ``` diff --git a/example/ck_tile/39_copy/README.md b/example/ck_tile/39_copy/README.md index fa98cc1de6..b5bc5d56be 100644 --- a/example/ck_tile/39_copy/README.md +++ b/example/ck_tile/39_copy/README.md @@ -12,7 +12,7 @@ This experimental kernel is intended for novice CK developers. It introduces the mkdir build && cd build # you can replace with the appropriate architecture # (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # Make the copy kernel executable make tile_example_copy -j ``` diff --git a/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh index 25a1590808..b93555901e 100755 --- a/script/cmake-ck-dev.sh +++ b/script/cmake-ck-dev.sh @@ -1,44 +1,47 @@ #!/bin/bash -set -euo pipefail -IFS=$'\n\t' +# exit when a command exits with non-zero status; also when an unbound variable is referenced +set -eu +# pipefail is supported by many shells, not supported by sh and dash +set -o pipefail 2>/dev/null | true +# when treating a string as a sequence, do not split on spaces +IFS=$(printf '\n\t') -rm -f CMakeCache.txt -rm -f *.cmake -rm -rf CMakeFiles +# clean the build system files +find . -name CMakeFiles -type d -exec rm -rfv {} + +find . -name CMakeCache.txt -type f -exec rm -rv {} + -MY_PROJECT_SOURCE=$1 +if [ $# -ge 1 ]; then + MY_PROJECT_SOURCE="$1" + shift 1 +else + MY_PROJECT_SOURCE=".." +fi +GPU_TARGETS="gfx908;gfx90a;gfx942" -if [ $# -ge 2 ]; then - case "$2" in - gfx*) - GPU_TARGETS=$2 - shift 2 +if [ $# -ge 1 ]; then + case "$1" in + gfx*) + GPU_TARGETS=$1 + shift 1 echo "GPU targets provided: $GPU_TARGETS" - REST_ARGS=$@ ;; *) - echo "No GPU targets provided, using default targets: gfx908;gfx90a;gfx942" - GPU_TARGETS="gfx908;gfx90a;gfx942" - shift 1 - REST_ARGS=$@ + echo "No GPU targets provided, using default targets: $GPU_TARGETS" ;; esac else - echo "No GPU targets provided, using default targets: gfx908;gfx90a;gfx942" - GPU_TARGETS="gfx908;gfx90a;gfx942" - shift 1 - REST_ARGS=$@ + echo "No GPU targets provided, using default targets: $GPU_TARGETS" fi cmake \ -D CMAKE_PREFIX_PATH=/opt/rocm/ \ -D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ --D CMAKE_CXX_FLAGS="-std=c++20 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \ +-D CMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \ -D CMAKE_BUILD_TYPE=Release \ -D BUILD_DEV=ON \ -D GPU_TARGETS=$GPU_TARGETS \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D USE_BITINT_EXTENSION_INT4=OFF \ -$REST_ARGS \ +$@ \ ${MY_PROJECT_SOURCE} diff --git a/script/cmake-ck-release.sh b/script/cmake-ck-release.sh deleted file mode 100755 index 5263de92c8..0000000000 --- a/script/cmake-ck-release.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -euo pipefail -IFS=$'\n\t' - -rm -f CMakeCache.txt -rm -f *.cmake -rm -rf CMakeFiles - -MY_PROJECT_SOURCE=$1 - -if [ $# -ge 2 ] && [[ "$2" =~ ^gfx ]]; then - GPU_TARGETS=$2 - shift 2 - echo "GPU targets provided: $GPU_TARGETS" - REST_ARGS=$@ -else - echo "No GPU targets provided, using default targets: gfx908;gfx90a;gfx942" - GPU_TARGETS="gfx908;gfx90a;gfx942" - shift 1 - REST_ARGS=$@ -fi - -cmake \ --D CMAKE_PREFIX_PATH=/opt/rocm \ --D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ --D CMAKE_CXX_FLAGS="-O3" \ --D CMAKE_BUILD_TYPE=Release \ --D BUILD_DEV=OFF \ --D GPU_TARGETS=$GPU_TARGETS \ --D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ --D USE_BITINT_EXTENSION_INT4=OFF \ -$REST_ARGS \ -${MY_PROJECT_SOURCE} - diff --git a/test/ck_tile/memory_copy/README.md b/test/ck_tile/memory_copy/README.md index 7856f0b4bd..9c56052b64 100644 --- a/test/ck_tile/memory_copy/README.md +++ b/test/ck_tile/memory_copy/README.md @@ -12,7 +12,7 @@ is moved to output DRAM window for a simple copy operation. mkdir build && cd build # you can replace with the appropriate architecture # (for example gfx90a or gfx942) or leave it blank -sh ../script/cmake-ck-dev.sh ../ +../script/cmake-ck-dev.sh ../ # Make the copy kernel executable make test_copy -j ``` diff --git a/tile_engine/ops/gemm/README.md b/tile_engine/ops/gemm/README.md index a16b74d297..79152a1a0d 100644 --- a/tile_engine/ops/gemm/README.md +++ b/tile_engine/ops/gemm/README.md @@ -20,7 +20,7 @@ mkdir build && cd build # replace [Arch] with the appropriate architecture or leave blank and # replace [Datatype1;Datatype2;...] in comma separated datatypes string (possible datatypes are [fp8, bf8, int8, fp16, bf16]) # replace [Layout1;Layout2;...] in comma separated datatypes string (possible layouts are [rcr, rrr, crr, ccr]) -sh ../script/cmake-ck-dev.sh ../ [Arch] -DGEMM_DATATYPE="[Datatype1;Datatype2]" -DGEMM_LAYOUT="[Layout1;Layout2]" +../script/cmake-ck-dev.sh ../ [Arch] -DGEMM_DATATYPE="[Datatype1;Datatype2]" -DGEMM_LAYOUT="[Layout1;Layout2]" # generate different executable for each passed datatype make benchmark_gemm_[Datatype1]_[Layout1] -j make benchmark_gemm_[Datatype1]_[Layout2] -j @@ -38,7 +38,7 @@ rm -rf tile_engine/ && make benchmark_gemm_[Datatypes]_[Layout] -j # rebuild ## For eaxmple build for gfx942 for fp8 and fp16 datatypes with rcr layout ``` bash mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ gfx942 -DGEMM_DATATYPE="fp8;fp16" -DGEMM_LAYOUT="rcr" +../script/cmake-ck-dev.sh ../ gfx942 -DGEMM_DATATYPE="fp8;fp16" -DGEMM_LAYOUT="rcr" make benchmark_gemm_fp8_rcr -j make benchmark_gemm_fp16_rcr -j ``` diff --git a/tile_engine/ops/gemm_multi_d/README.md b/tile_engine/ops/gemm_multi_d/README.md index 369553b121..66f0ed80af 100644 --- a/tile_engine/ops/gemm_multi_d/README.md +++ b/tile_engine/ops/gemm_multi_d/README.md @@ -21,7 +21,7 @@ mkdir build && cd build # replace [Datatype] in comma separated datatypes string (possible datatypes are [fp16]) # replace [Layout1;Layout2;...] in comma separated datatypes string (possible layouts are [rcr, rrr, crr, ccr]) # replace "mul" with either of mul,add,passthrough for Elementwise function as Multiply, Add or Passthrough respectively. If this is not specified it is considered as mul by default. -sh ../script/cmake-ck-dev.sh ../ [Arch] -DGEMM_MULTI_D_DATATYPE="[Datatype]" -DGEMM_MULTI_D_LAYOUT="[Layout1;Layout2]" -DGEMM_MULTI_D_ELEMENTWISE_FUNCTION="mul" +../script/cmake-ck-dev.sh ../ [Arch] -DGEMM_MULTI_D_DATATYPE="[Datatype]" -DGEMM_MULTI_D_LAYOUT="[Layout1;Layout2]" -DGEMM_MULTI_D_ELEMENTWISE_FUNCTION="mul" # generate different executable for each passed datatype make benchmark_gemm_multi_d_[Datatype]_[Layout1] -j make benchmark_gemm_multi_d_[Datatype]_[Layout2] -j @@ -37,7 +37,7 @@ rm -rf tile_engine/ && make benchmark_gemm_multi_d_[Datatype]_[Layout] -j # reb ## For eaxmple build for gfx942 for datatype with rcr layout ``` bash mkdir build && cd build -sh ../script/cmake-ck-dev.sh ../ gfx942 -DGEMM_MULTI_D_DATATYPE="fp16" -DGEMM_MULTI_D_LAYOUT="rcrr" +../script/cmake-ck-dev.sh ../ gfx942 -DGEMM_MULTI_D_DATATYPE="fp16" -DGEMM_MULTI_D_LAYOUT="rcrr" make benchmark_gemm_multi_d_fp16_rcrr -j ## benchmark_gemm inputs