From fb09061b0c29a6677755f31ea097abd27d8660c5 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Tue, 12 Aug 2025 03:04:27 +0000 Subject: [PATCH] Add norm_dist parameter for hstu example to select either normal or uniform distribution to initialize data --- example/ck_tile/18_hstu_attention/README.md | 1 + .../example_hstu_attention.cpp | 27 ++++++++---- .../scripts/test_hstu_attention.sh | 34 ++++++++------ .../test_jagged_causal_mattn0_full0.sh | 42 ++++++++++-------- .../test_jagged_causal_mattn256_full0.sh | 42 ++++++++++-------- .../test_jagged_causal_mattn256_full256.sh | 44 +++++++++++-------- 6 files changed, 113 insertions(+), 77 deletions(-) diff --git a/example/ck_tile/18_hstu_attention/README.md b/example/ck_tile/18_hstu_attention/README.md index 0d844822b6..3d52ba825d 100644 --- a/example/ck_tile/18_hstu_attention/README.md +++ b/example/ck_tile/18_hstu_attention/README.md @@ -54,6 +54,7 @@ .insert("minfull_len", "6", "sequence length at the end of the query sequence that should be included for attention") .insert("init_qkv", "0", "initialize q, k, v tensor from local files q.dat, k.dat and v.data") .insert("seed", "13579", "seed by the uniform or normal distribution generator") + .insert("norm_dist", "0", "if true, initialize the data in normal distribution, or else in uniform distribution") .insert("alpha", "0", "scale factor of S=Q@K. 0 means equal to 1/sqrt(hdim)") .insert("attn_scale", "0", "scale factor of SiLu(Q@K), 0 means using 1/max_seqlen for scaling") .insert("save_mask", "1", "save the mask tensor to disk by the CPU validation codes") diff --git a/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp b/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp index c82651f552..eaff1800b1 100644 --- a/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp +++ b/example/ck_tile/18_hstu_attention/example_hstu_attention.cpp @@ -108,6 +108,7 @@ auto create_args(int argc, char* argv[]) .insert("context_len", "6", "sequence length at the begin of the query sequence the should be included for attention") .insert("minfull_len", "6", "sequence length at the end of the query sequence that should be included for attention") .insert("seed", "13579", "seed by the uniform or normal distribution generator") + .insert("norm_dist", "0", "if true, initialize the data in normal distribution, or else in uniform distribution") .insert("alpha", "0", "scale factor of S=Q@K. 0 means equal to 1/sqrt(hdim)") .insert("attn_scale", "0", "scale factor of SiLU(Q@K). 0 means using 1/max_seqlen for scaling") .insert("init_qkv", "0", "initialize q, k, v tensor from local files q.dat, k.dat and v.data") @@ -223,11 +224,12 @@ bool run(const ck_tile::ArgParser& arg_parser) int contextual_seqlen = arg_parser.get_int("context_len"); int min_full_attn_seqlen = arg_parser.get_int("minfull_len"); - float alpha = arg_parser.get_float("alpha"); - float attn_scale = arg_parser.get_float("attn_scale"); - int seed = arg_parser.get_int("seed"); - bool measure_perf = static_cast(arg_parser.get_int("perf")); - bool dump_output = static_cast(arg_parser.get_int("dump_output")); + float alpha = arg_parser.get_float("alpha"); + float attn_scale = arg_parser.get_float("attn_scale"); + int seed = arg_parser.get_int("seed"); + bool use_normal_dist = arg_parser.get_int("norm_dist"); + bool measure_perf = static_cast(arg_parser.get_int("perf")); + bool dump_output = static_cast(arg_parser.get_int("dump_output")); bool save_mask = static_cast(arg_parser.get_int("save_mask")); bool initialize_qkv = static_cast(arg_parser.get_int("init_qkv")); @@ -367,9 +369,18 @@ bool run(const ck_tile::ArgParser& arg_parser) if(!initialize_qkv) { - ck_tile::FillNormalDistribution{0.f, 1.f, seed}(q_host); - ck_tile::FillNormalDistribution{0.f, 1.f, seed}(k_host); - ck_tile::FillNormalDistribution{0.f, 1.f, seed}(v_host); + if(use_normal_dist) + { + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(q_host); + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(k_host); + ck_tile::FillNormalDistribution{0.f, 1.f, seed}(v_host); + } + else + { + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(q_host); + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(k_host); + ck_tile::FillUniformDistribution{-1.f, 1.f, seed}(v_host); + }; } else { diff --git a/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention.sh b/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention.sh index 311e9d29d3..18ddcaf759 100644 --- a/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention.sh +++ b/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention.sh @@ -8,49 +8,55 @@ if [ $# -ge 1 ]; then attn_scale=$1 fi +ndist=0 + +if [ $# -ge 2 ]; then + ndist=$2 +fi + for dtype in "fp16" "bf16"; do set -x ## no masking batched - $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## no masking jagged - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## batched causal - $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## batched causal+local - $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal+local - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## batched causal+local+context - $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal+local+context - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist ## batched causal+local+context+target - $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal+local+context+target - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged no-causal+local+context+target - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal+local+target (minfull_len > max_uih_len) - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged causal+local+context+target (minfull_len > max_uih_len) - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist ## jagged no-causal+local+context+target (minfull_len > max_uih_len) - $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist set +x done diff --git a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn0_full0.sh b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn0_full0.sh index 724096be3a..99de954415 100644 --- a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn0_full0.sh +++ b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn0_full0.sh @@ -1,5 +1,11 @@ #!/bin/bash +ndist=0 + +if [ $# -ge 1 ]; then + ndist=$1 +fi + set +x BUILD=build EXE=$BUILD/bin/tile_example_hstu_attention @@ -11,79 +17,79 @@ set -x target8="10,10,14,17,16,12,14,9" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target16="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target32="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10,11,0,4,8,2,10,20,14,11,7,4,6,9,7,14,17" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" set +x diff --git a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full0.sh b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full0.sh index 5d3be552d9..339f7ca2cf 100644 --- a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full0.sh +++ b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full0.sh @@ -1,5 +1,11 @@ #!/bin/bash +ndist=0 + +if [ $# -ge 1 ]; then + ndist=$1 +fi + set +x BUILD=build EXE=$BUILD/bin/tile_example_hstu_attention @@ -11,79 +17,79 @@ set -x target8="10,10,14,17,16,12,14,9" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target16="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target32="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10,11,0,4,8,2,10,20,14,11,7,4,6,9,7,14,17" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=0 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" set +x diff --git a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full256.sh b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full256.sh index d9ea45982b..605891f815 100644 --- a/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full256.sh +++ b/example/ck_tile/18_hstu_attention/scripts/test_jagged_causal_mattn256_full256.sh @@ -1,5 +1,11 @@ #!/bin/bash +ndist=0 + +if [ $# -ge 1 ]; then + ndist=$1 +fi + set +x BUILD=build EXE=$BUILD/bin/tile_example_hstu_attention @@ -11,79 +17,79 @@ set -x target8="10,10,14,17,16,12,14,9" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=8 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target8 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target16="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" -## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +## seqlen 3072orm +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=16 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target16 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" target32="13,17,16,13,7,14,3,18,15,15,1,9,18,18,7,10,11,0,4,8,2,10,20,14,11,7,4,6,9,7,14,17" ## seqlen 1024 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=1004 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 2048 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=2028 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 3072 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=3052 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 4096 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=4076 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 8192 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=8172 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" ## seqlen 16384 -$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 +$EXE -v=1 -prec=$dtype -b=32 -jagged=1 -nhead=4 -hdim_qk=128 -hdim_v=128 -seqlens=16364 -causal=1 -local_len=256 -context_len=0 -minfull_len=256 -targets=$target32 -max_target=20 -alpha=2.0 -norm_dist=$ndist echo -e "" set +x