Add scripts for testing hstu attention fwd with drop-out

This commit is contained in:
Qianfeng Zhang
2026-06-27 15:48:54 +00:00
parent 35234a492d
commit 52eff34d21
4 changed files with 280 additions and 14 deletions

View File

@@ -0,0 +1,58 @@
#!/bin/bash
## This script can be used the verifying the using of WarpGemm 32x32x16 which is used by hdim64 + softmax
BUILD=build
EXE="$BUILD/bin/tile_example_hstu_attention -softmax=0 -p_drop=0.2"
attn_scale=1.0
ndist=1
dtype="fp16"
for hdim in 96 64; do
set -x
## no masking batched
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## no masking jagged
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local+context
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local+context+target
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context+target
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged no-causal+local+context+target
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged no-causal+local+context+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
set +x
done

View File

@@ -18,45 +18,45 @@ for dtype in "fp16" "bf16"; do
set -x
## no masking batched
$EXE -v=1 -prec=$dtype -b=50 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## no masking jagged
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal
$EXE -v=1 -prec=$dtype -b=50 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local
$EXE -v=1 -prec=$dtype -b=50 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local+context
$EXE -v=1 -prec=$dtype -b=50 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist
## batched causal+local+context+target
$EXE -v=1 -prec=$dtype -b=50 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context+target
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged no-causal+local+context+target
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged causal+local+context+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
## jagged no-causal+local+context+target (minfull_len > max_uih_len)
$EXE -v=1 -prec=$dtype -b=50 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
$EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist
set +x
done

View File

@@ -0,0 +1,104 @@
#!/bin/bash
set +x
ndist=0
BUILD=build
USE_SOFTMAX=0
if [ $# -ge 1 ]; then
USE_SOFTMAX=$1
fi
Training=${TEST_HSTU_FWD_TRAINING:-0}
if [ $USE_SOFTMAX -eq 1 ]; then
EXE="$BUILD/bin/tile_example_hstu_attention -softmax=1 -training=$Training -p_drop=0.2"
else
EXE="$BUILD/bin/tile_example_hstu_attention -training=$Training -p_drop=0.2"
fi
dtype="bf16"
set -x
target8="10,10,14,17,16,12,14,9"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
target16="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
target32="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10,11,0,4,8,2,10,20,14,11,7,4,6,9,7,14,17"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
set +x

View File

@@ -0,0 +1,104 @@
#!/bin/bash
set +x
ndist=0
BUILD=build
USE_SOFTMAX=0
if [ $# -ge 1 ]; then
USE_SOFTMAX=$1
fi
Training=${TEST_HSTU_FWD_TRAINING:-0}
if [ $USE_SOFTMAX -eq 1 ]; then
EXE="$BUILD/bin/tile_example_hstu_attention -softmax=1 -training=$Training -p_drop=0.2"
else
EXE="$BUILD/bin/tile_example_hstu_attention -training=$Training -p_drop=0.2"
fi
dtype="bf16"
set -x
target8="10,10,14,17,16,12,14,9"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
target16="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072orm
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
target32="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10,11,0,4,8,2,10,20,14,11,7,4,6,9,7,14,17"
## seqlen 1024
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 2048
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 3072
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 4096
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 8192
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
## seqlen 16384
$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist
echo -e ""
set +x