mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
Vectorized Transpose for Batched Transpose CK Tile Operator (#2131)
* Shared Memory for single data point * CKTile Transpose vectorize CP1 * CKTile Transpose vectorize CP2 * CKTile Transpose vectorize CP2.1 * fixed the compile error of the transpose tile 2d * Have the correct result for the current test sample * Changes to printing tensor * fp8 support added * Debugging for transpose * solving the corner issue * Changed padding flag * Intermideate Debugging * Intermidiate Debugging * Intermediate Debugging * Finished debugging of the transpose op * Code Cleanup * Adding edge case smoke tests * Adding Transpose test to CI/CD * Adding Transpose test to CI/CD * Adding Transpose test to CI/CD * Addressing Review Comment * Addressing Comments * Addressing Comments * Measuring Perf Tests * Code Cleanup * Changlog * Added the running iterations * clang format * Fix the changelog * Fix the compilation error * change the printing factor --------- Co-authored-by: ThruptiRajLakshmanaGowda <tlakshma@amd.com>
This commit is contained in:
11
example/ck_tile/35_batched_transpose/script/perf_test.sh
Executable file
11
example/ck_tile/35_batched_transpose/script/perf_test.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/sh
|
||||
|
||||
EXE=./build/bin/tile_example_batched_transpose
|
||||
|
||||
for pr in "fp8" "fp16" "bf16"; do
|
||||
$EXE -pr=$pr -N=1 -C=64 -H=1 -W=64 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=1024 -H=1 -W=1024 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=1024 -H=1 -W=2048 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=4096 -H=1 -W=2048 -layout_in='NCHW' -layout_out='NHWC'
|
||||
|
||||
done
|
||||
38
example/ck_tile/35_batched_transpose/script/run_full_test.sh
Executable file
38
example/ck_tile/35_batched_transpose/script/run_full_test.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# in order to run this script you'd first need to build the tile_example_batched_transpose executables in ../build/bin/
|
||||
#
|
||||
# run the script as "./run_full_test.sh <tag for your test environment> <branch name> <host name> <gpu_arch>
|
||||
# input arguments:
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# host name : $hostname
|
||||
# gpu architecture: e.g., gfx90a, or gfx942, etc.
|
||||
|
||||
#get the command line arguments:
|
||||
export env_type=$1
|
||||
echo 'Environment type: ' $env_type
|
||||
export branch=$2
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$3
|
||||
echo 'Host name: ' $host_name
|
||||
export GPU_arch=$4
|
||||
echo 'GPU_arch: ' $GPU_arch
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run verification tests
|
||||
example/ck_tile/35_batched_transpose/script/smoke_test.sh
|
||||
|
||||
#run performance benchmarks
|
||||
|
||||
@@ -2,10 +2,26 @@
|
||||
|
||||
EXE=./build/bin/tile_example_batched_transpose
|
||||
|
||||
for pr in "fp32" "fp16" "int8" ; do
|
||||
for pr in "fp8" "fp16" "bf16"; do
|
||||
$EXE -pr=$pr -N=1 -C=32 -H=1 -W=32 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=64 -H=1 -W=64 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=2 -C=12 -H=1 -W=32 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=3 -C=1334 -H=1 -W=37 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=4 -C=27 -H=1 -W=32 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=5 -C=1234 -H=1 -W=12 -layout_in='NCHW' -layout_out='NHWC'
|
||||
done
|
||||
$EXE -pr=$pr -N=1 -C=1 -H=1 -W=1 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=1 -H=1 -W=1 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=128 -C=1024 -H=64 -W=64 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=128 -C=1024 -H=64 -W=64 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=16 -C=64 -H=32 -W=128 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=16 -C=64 -H=128 -W=32 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=1 -C=2048 -H=1 -W=1 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=2048 -H=1 -W=1 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=1 -C=1 -H=1024 -W=1024 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=1 -H=1024 -W=1024 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=8 -C=16 -H=8 -W=16 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=8 -C=16 -H=8 -W=16 -layout_in='NHWC' -layout_out='NCHW'
|
||||
$EXE -pr=$pr -N=1 -C=64 -H=1 -W=1024 -layout_in='NCHW' -layout_out='NHWC'
|
||||
$EXE -pr=$pr -N=1 -C=64 -H=1024 -W=1 -layout_in='NHWC' -layout_out='NCHW'
|
||||
|
||||
done
|
||||
Reference in New Issue
Block a user