From 424dfec6e438f67c9789fb5f8c3bafce9bbd5140 Mon Sep 17 00:00:00 2001 From: Po Yen Chen Date: Mon, 18 May 2026 16:10:30 +0000 Subject: [PATCH] [rocm-libraries] ROCm/rocm-libraries#7530 (commit 378e049) [CK] Fix FMHA sink dispatch when init_sink_value is set (#7530) ## Summary - Fix `traits.has_sink` in `fmha_fwd_runner.hpp` to also check `init_sink_value != 0`, so the GPU kernel dispatches with sink support when `-init_sink=1` is passed. - Gate `run_sink_mask_tests` (StreamLLM) and `run_sink_init_tests` (GPT-OSS) behind opt-in flags `-m` and `-g` in `smoke_test_fwd.sh`. These tests require sink=true kernel instances which are excluded by the `BUILD_TESTING` CMake filter (`*_nsink*`), causing unconditional "not supported yet" failures (48 tests in CI). The opt-in flag approach was borrowed from PR #6057. ## Why gate tests instead of compiling sink=true kernels? The `BUILD_TESTING` filter in `CMakeLists.txt` uses `*_nsink*` glob patterns for the `fwd` and `fwd_splitkv` APIs, excluding sink=true kernel instances from compilation. We chose opt-in flags over widening the filter because: - **Compile time**: Enabling sink=true kernels doubles the kernel variants for `fwd` and `fwd_splitkv` APIs. The filter exists specifically to reduce CI build times. - **Incremental enablement**: Sink support (StreamLLM / GPT-OSS) is still maturing. Gating lets teams opt in explicitly (`smoke_test_fwd.sh -g`) while keeping the default CI path fast. - **Precedent**: splitkv (`-s`) and appendkv (`-a`) tests already follow this opt-in pattern. ## Test plan - [ ] Run `smoke_test_fwd.sh -g` with sink=true kernels compiled and verify sink-enabled kernels are dispatched - [ ] Verify `smoke_test_fwd.sh` still passes without `-m` / `-g` flags - [ ] Confirm CI no longer fails on sink tests (they are now opt-in) --- example/ck_tile/01_fmha/fmha_fwd_runner.hpp | 2 +- .../ck_tile/01_fmha/script/smoke_test_fwd.sh | 21 ++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/example/ck_tile/01_fmha/fmha_fwd_runner.hpp b/example/ck_tile/01_fmha/fmha_fwd_runner.hpp index 1c99dffcda..21c0ead009 100644 --- a/example/ck_tile/01_fmha/fmha_fwd_runner.hpp +++ b/example/ck_tile/01_fmha/fmha_fwd_runner.hpp @@ -1138,7 +1138,7 @@ fwd_result fmha_fwd_run(mode_enum mode, traits.has_logits_soft_cap = 0.f < logits_soft_cap; traits.mask_type = mask.type; traits.bias_type = bias.type; - traits.has_sink = mask.sink > 0 ? true : false; + traits.has_sink = (mask.sink > 0 || init_sink_value != 0) ? true : false; traits.has_lse = lse; if constexpr(std::is_same_v>) diff --git a/example/ck_tile/01_fmha/script/smoke_test_fwd.sh b/example/ck_tile/01_fmha/script/smoke_test_fwd.sh index 1e9942a6e1..b2a4afd1ac 100755 --- a/example/ck_tile/01_fmha/script/smoke_test_fwd.sh +++ b/example/ck_tile/01_fmha/script/smoke_test_fwd.sh @@ -28,10 +28,14 @@ COMMON_ARGS='-v=1 -warmup=0 -repeat=1' TEST_SPLITKV=0 TEST_APPENDKV=0 +TEST_STREAM_SINK=0 +TEST_GPTOSS_SINK=0 # options: # -s: run splitkv tests # -a: run appendkv tests -while getopts ":sa" opt; do +# -m: run StreamLLM sink mask tests (requires sink=true kernels) +# -g: run GPT-OSS sink init tests (requires sink=true kernels) +while getopts ":samg" opt; do case "${opt}" in s) TEST_SPLITKV=1 @@ -39,6 +43,12 @@ while getopts ":sa" opt; do a) TEST_APPENDKV=1 ;; + m) + TEST_STREAM_SINK=1 + ;; + g) + TEST_GPTOSS_SINK=1 + ;; *) ;; esac @@ -300,8 +310,13 @@ run_padding_smoke_tests run_padding_basic_boundary_tests run_fp8bf16_tests run_fp8fp32_tests -run_sink_mask_tests -run_sink_init_tests +if [ $TEST_STREAM_SINK -eq 1 ] ; then + run_sink_mask_tests +fi + +if [ $TEST_GPTOSS_SINK -eq 1 ] ; then + run_sink_init_tests +fi if [ $TEST_APPENDKV -eq 1 ] ; then run_fp16_appendkv_tests