mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
* start fixing 16bit data packing * adding StaticTensor * adding StaticTensor * adding StaticTensor * add missing constexpr * adding static tensor * adding static tensor * adding transpose * add inline asm for transpose 2x2 of half_t * add general transpose_vectors(), but have unnecessary register initialization using v_mov * fix unnecessary register initialization in transpose_vector by using more pass-by-reference * add hardcoded logic for NHWC wrw * improve asm for v_pack * make ThreadwiseTensorSliceTransfer_v3r2 support any tensor * tweak * reorganize file
25 lines
873 B
Bash
Executable File
25 lines
873 B
Bash
Executable File
#!/bin/bash
|
|
|
|
## GPU visibility
|
|
export HIP_VISIBLE_DEVICES=0
|
|
|
|
make -j ckProfiler
|
|
|
|
DRIVER="./profiler/ckProfiler"
|
|
|
|
OP=$1
|
|
DATATYPE=$2
|
|
LAYOUT=$3
|
|
VERIFY=$4
|
|
INIT=$5
|
|
LOG=$6
|
|
REPEAT=$7
|
|
|
|
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC
|
|
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256 256 256 256 256 256
|
|
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 1024 1024 1024
|
|
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024
|
|
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 2048 2048 2048
|
|
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 4096 4096 4096
|
|
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 8192 8192 8192
|