From 691df60037f4bccc5d9dea3ce18fde8c1a7e8878 Mon Sep 17 00:00:00 2001 From: yurko Date: Sun, 8 Feb 2026 00:54:14 -0800 Subject: [PATCH] qwen3next: add absolute sanity guards to fused regression --- docs/development/qwen3next_perf_diff_report.md | 2 ++ scripts/qwen3next-eval.sh | 8 ++++++++ scripts/qwen3next-fused-regression.sh | 13 ++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/development/qwen3next_perf_diff_report.md b/docs/development/qwen3next_perf_diff_report.md index 2be9c980..cbed5a8f 100644 --- a/docs/development/qwen3next_perf_diff_report.md +++ b/docs/development/qwen3next_perf_diff_report.md @@ -104,3 +104,5 @@ Relative (`ik` vs mainline): - Also integrated into the broader eval harness: - `scripts/qwen3next-eval.sh --with-gpu --with-fused-regression ...` - Results are surfaced in `SUMMARY.md` under `IK Fused Delta Regression`. +- Fused regression now enforces absolute non-fused sanity too: + - mode0 decode/prefill PPL must stay below configurable thresholds (defaults: `10.0` / `10.0`). diff --git a/scripts/qwen3next-eval.sh b/scripts/qwen3next-eval.sh index 54aa233c..102699f6 100755 --- a/scripts/qwen3next-eval.sh +++ b/scripts/qwen3next-eval.sh @@ -447,11 +447,17 @@ main_has_complexity="$(has_token /out/cpu_gen_mainline.out 'complexity|O\(')" ik_has_complexity="$(has_token /out/cpu_gen_ik.out 'complexity|O\(')" fused_decode_safe="NA" fused_prefill_safe="NA" +fused_mode0_decode_sane="NA" +fused_mode0_prefill_sane="NA" if [[ -f /out/ik_fused_regression.md ]]; then fused_decode_safe="$(sed -nE 's/^- decode safety .*: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)" fused_prefill_safe="$(sed -nE 's/^- prefill safety .*: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)" + fused_mode0_decode_sane="$(sed -nE 's/^- mode0 decode sanity: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)" + fused_mode0_prefill_sane="$(sed -nE 's/^- mode0 prefill sanity: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)" if [[ -z "$fused_decode_safe" ]]; then fused_decode_safe="NA"; fi if [[ -z "$fused_prefill_safe" ]]; then fused_prefill_safe="NA"; fi + if [[ -z "$fused_mode0_decode_sane" ]]; then fused_mode0_decode_sane="NA"; fi + if [[ -z "$fused_mode0_prefill_sane" ]]; then fused_mode0_prefill_sane="NA"; fi fi { @@ -479,6 +485,8 @@ fi if [[ -f /out/ik_fused_regression.md ]]; then echo "- decode safety (mode1 ~= mode0): \`$fused_decode_safe\`" echo "- prefill safety (mode1 ~= mode0): \`$fused_prefill_safe\`" + echo "- mode0 decode sanity: \`$fused_mode0_decode_sane\`" + echo "- mode0 prefill sanity: \`$fused_mode0_prefill_sane\`" echo "- report: \`/out/ik_fused_regression.md\`" else echo "- status: \`requested but no report generated\`" diff --git a/scripts/qwen3next-fused-regression.sh b/scripts/qwen3next-fused-regression.sh index cb741b0f..b3a0042b 100755 --- a/scripts/qwen3next-fused-regression.sh +++ b/scripts/qwen3next-fused-regression.sh @@ -24,6 +24,9 @@ PREFILL_UB="${PREFILL_UB:-512}" # 2) mode=1 prefill should stay aligned with mode=0 prefill. MAX_DECODE_DELTA_01="${MAX_DECODE_DELTA_01:-0.10}" MAX_PREFILL_DELTA_01="${MAX_PREFILL_DELTA_01:-0.10}" +# 3) mode=0 absolute perplexity should stay in a sane range. +MAX_MODE0_DECODE_PPL="${MAX_MODE0_DECODE_PPL:-10.0}" +MAX_MODE0_PREFILL_PPL="${MAX_MODE0_PREFILL_PPL:-10.0}" usage() { cat <<'USAGE' @@ -48,6 +51,8 @@ Options: --prefill-ub N prefill ubatch size (default: 512) --max-decode-delta-01 X fail threshold for |PPL(mode1)-PPL(mode0)| in decode (default: 0.10) --max-prefill-delta-01 X fail threshold for |PPL(mode1)-PPL(mode0)| in prefill (default: 0.10) + --max-mode0-decode-ppl X fail threshold for PPL(mode0) in decode (default: 10.0) + --max-mode0-prefill-ppl X fail threshold for PPL(mode0) in prefill (default: 10.0) -h, --help show this help USAGE } @@ -71,6 +76,8 @@ while [[ $# -gt 0 ]]; do --prefill-ub) PREFILL_UB="$2"; shift 2 ;; --max-decode-delta-01) MAX_DECODE_DELTA_01="$2"; shift 2 ;; --max-prefill-delta-01) MAX_PREFILL_DELTA_01="$2"; shift 2 ;; + --max-mode0-decode-ppl) MAX_MODE0_DECODE_PPL="$2"; shift 2 ;; + --max-mode0-prefill-ppl) MAX_MODE0_PREFILL_PPL="$2"; shift 2 ;; -h|--help) usage; exit 0 ;; *) echo "unknown option: $1" >&2 @@ -169,6 +176,8 @@ prefill_delta_02="$(abs_delta "$prefill_0" "$prefill_2")" decode_ok="$(awk -v d="$decode_delta_01" -v t="$MAX_DECODE_DELTA_01" 'BEGIN { print(d <= t ? "yes" : "no") }')" prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN { print(d <= t ? "yes" : "no") }')" +mode0_decode_ok="$(awk -v p="$decode_0" -v t="$MAX_MODE0_DECODE_PPL" 'BEGIN { print(p <= t ? "yes" : "no") }')" +mode0_prefill_ok="$(awk -v p="$prefill_0" -v t="$MAX_MODE0_PREFILL_PPL" 'BEGIN { print(p <= t ? "yes" : "no") }')" { echo "# Qwen3Next Fused DeltaNet Regression Report" @@ -195,6 +204,8 @@ prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN { echo echo "- decode safety (mode1 ~= mode0): \`${decode_ok}\` (threshold \`${MAX_DECODE_DELTA_01}\`)" echo "- prefill safety (mode1 ~= mode0): \`${prefill_ok}\` (threshold \`${MAX_PREFILL_DELTA_01}\`)" + echo "- mode0 decode sanity: \`${mode0_decode_ok}\` (PPL \`${decode_0}\`, max \`${MAX_MODE0_DECODE_PPL}\`)" + echo "- mode0 prefill sanity: \`${mode0_prefill_ok}\` (PPL \`${prefill_0}\`, max \`${MAX_MODE0_PREFILL_PPL}\`)" echo echo "## Logs" echo @@ -209,7 +220,7 @@ prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN { echo "wrote report: $OUT_FILE" -if [[ "$decode_ok" != "yes" || "$prefill_ok" != "yes" ]]; then +if [[ "$decode_ok" != "yes" || "$prefill_ok" != "yes" || "$mode0_decode_ok" != "yes" || "$mode0_prefill_ok" != "yes" ]]; then echo "regression check failed; see report: $OUT_FILE" >&2 exit 1 fi