From f79a29ac80d24b2e60e6c628e8891b489a80815a Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 12 Dec 2025 15:23:01 +0000 Subject: [PATCH] Rename and add scripts for testing hdim96 --- .../test_hstu_attention_hdim96_hdim64.sh | 58 +++++++++++++++++++ ...t_hstu_softmax_attention_hdim96_hdim64.sh} | 4 +- 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 example/ck_tile/18_hstu_attention/scripts/test_hstu_attention_hdim96_hdim64.sh rename example/ck_tile/18_hstu_attention/scripts/{test_hstu_softmax_attention_hdim64.sh => test_hstu_softmax_attention_hdim96_hdim64.sh} (99%) diff --git a/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention_hdim96_hdim64.sh b/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention_hdim96_hdim64.sh new file mode 100644 index 0000000000..ce2f765641 --- /dev/null +++ b/example/ck_tile/18_hstu_attention/scripts/test_hstu_attention_hdim96_hdim64.sh @@ -0,0 +1,58 @@ +#!/bin/bash +## This script can be used the verifying the using of WarpGemm 32x32x16 which is used by hdim64 + softmax + +BUILD=build +EXE="$BUILD/bin/tile_example_hstu_attention -softmax=0" + +attn_scale=1.0 +ndist=1 + +dtype="fp16" + +for hdim in 96 64; do + set -x + + ## no masking batched + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## no masking jagged + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## batched causal + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=0 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## batched causal+local + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal+local + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=0 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## batched causal+local+context + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal+local+context + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=0 -attn_scale=$attn_scale -norm_dist=$ndist + + ## batched causal+local+context+target + $EXE -v=1 -prec=$dtype -b=10 -jagged=0 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=256 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal+local+context+target + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged no-causal+local+context+target + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=8 -minfull_len=7 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal+local+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=0 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=1 -local_len=5 -context_len=8 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + ## jagged no-causal+local+context+target (minfull_len > max_uih_len) + $EXE -v=1 -prec=$dtype -b=10 -jagged=1 -nhead=4 -hdim_qk=$hdim -hdim_v=$hdim -seqlens=300,300,290,280,310 -causal=0 -local_len=5 -context_len=3 -minfull_len=290 -targets=8 -attn_scale=$attn_scale -norm_dist=$ndist + + set +x +done diff --git a/example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim64.sh b/example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim96_hdim64.sh similarity index 99% rename from example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim64.sh rename to example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim96_hdim64.sh index ec0d78c2ef..7aacfb2e12 100644 --- a/example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim64.sh +++ b/example/ck_tile/18_hstu_attention/scripts/test_hstu_softmax_attention_hdim96_hdim64.sh @@ -7,9 +7,9 @@ EXE="$BUILD/bin/tile_example_hstu_attention -softmax=1" attn_scale=1.0 ndist=1 -dtype = "fp16" +dtype="fp16" -for hdim in 256 64; do +for hdim in 96 64; do set -x ## no masking batched