From 20f331e7fe279faf7100ec4b3a5868361b41e1f7 Mon Sep 17 00:00:00 2001 From: carlushuang Date: Fri, 21 Mar 2025 18:28:43 +0800 Subject: [PATCH] add mask support in hdim=192/128 (#1999) [ROCm/composable_kernel commit: 6c08c5c46d7a13fd13b428ef6c6b59b05e4e9666] --- example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py index 4ff7ede765..e5d11c6dc9 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py @@ -492,7 +492,7 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, mask_impl) -> Tuple[Fm continue if hdim == 192 and tile.F_bn1 == 128: # NOTE: this is used to speedup deepseek prefill case, we don't gen training - if pipeline.F_bias != 'no' or pipeline.F_lse == 't' or pipeline.F_dropout == 't' or (pipeline.F_mask not in ['no', 's_no']): + if pipeline.F_bias != 'no' or pipeline.F_lse == 't' or pipeline.F_dropout == 't': continue k = FmhaFwdKernel(F_idx=0, F_hdim=hdim,