mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-28 00:54:09 +00:00
qwen3next: add absolute sanity guards to fused regression
This commit is contained in:
@@ -104,3 +104,5 @@ Relative (`ik` vs mainline):
|
||||
- Also integrated into the broader eval harness:
|
||||
- `scripts/qwen3next-eval.sh --with-gpu --with-fused-regression ...`
|
||||
- Results are surfaced in `SUMMARY.md` under `IK Fused Delta Regression`.
|
||||
- Fused regression now enforces absolute non-fused sanity too:
|
||||
- mode0 decode/prefill PPL must stay below configurable thresholds (defaults: `10.0` / `10.0`).
|
||||
|
||||
@@ -447,11 +447,17 @@ main_has_complexity="$(has_token /out/cpu_gen_mainline.out 'complexity|O\(')"
|
||||
ik_has_complexity="$(has_token /out/cpu_gen_ik.out 'complexity|O\(')"
|
||||
fused_decode_safe="NA"
|
||||
fused_prefill_safe="NA"
|
||||
fused_mode0_decode_sane="NA"
|
||||
fused_mode0_prefill_sane="NA"
|
||||
if [[ -f /out/ik_fused_regression.md ]]; then
|
||||
fused_decode_safe="$(sed -nE 's/^- decode safety .*: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)"
|
||||
fused_prefill_safe="$(sed -nE 's/^- prefill safety .*: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)"
|
||||
fused_mode0_decode_sane="$(sed -nE 's/^- mode0 decode sanity: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)"
|
||||
fused_mode0_prefill_sane="$(sed -nE 's/^- mode0 prefill sanity: `([^`]+)`.*/\1/p' /out/ik_fused_regression.md | tail -n1 || true)"
|
||||
if [[ -z "$fused_decode_safe" ]]; then fused_decode_safe="NA"; fi
|
||||
if [[ -z "$fused_prefill_safe" ]]; then fused_prefill_safe="NA"; fi
|
||||
if [[ -z "$fused_mode0_decode_sane" ]]; then fused_mode0_decode_sane="NA"; fi
|
||||
if [[ -z "$fused_mode0_prefill_sane" ]]; then fused_mode0_prefill_sane="NA"; fi
|
||||
fi
|
||||
|
||||
{
|
||||
@@ -479,6 +485,8 @@ fi
|
||||
if [[ -f /out/ik_fused_regression.md ]]; then
|
||||
echo "- decode safety (mode1 ~= mode0): \`$fused_decode_safe\`"
|
||||
echo "- prefill safety (mode1 ~= mode0): \`$fused_prefill_safe\`"
|
||||
echo "- mode0 decode sanity: \`$fused_mode0_decode_sane\`"
|
||||
echo "- mode0 prefill sanity: \`$fused_mode0_prefill_sane\`"
|
||||
echo "- report: \`/out/ik_fused_regression.md\`"
|
||||
else
|
||||
echo "- status: \`requested but no report generated\`"
|
||||
|
||||
@@ -24,6 +24,9 @@ PREFILL_UB="${PREFILL_UB:-512}"
|
||||
# 2) mode=1 prefill should stay aligned with mode=0 prefill.
|
||||
MAX_DECODE_DELTA_01="${MAX_DECODE_DELTA_01:-0.10}"
|
||||
MAX_PREFILL_DELTA_01="${MAX_PREFILL_DELTA_01:-0.10}"
|
||||
# 3) mode=0 absolute perplexity should stay in a sane range.
|
||||
MAX_MODE0_DECODE_PPL="${MAX_MODE0_DECODE_PPL:-10.0}"
|
||||
MAX_MODE0_PREFILL_PPL="${MAX_MODE0_PREFILL_PPL:-10.0}"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
@@ -48,6 +51,8 @@ Options:
|
||||
--prefill-ub N prefill ubatch size (default: 512)
|
||||
--max-decode-delta-01 X fail threshold for |PPL(mode1)-PPL(mode0)| in decode (default: 0.10)
|
||||
--max-prefill-delta-01 X fail threshold for |PPL(mode1)-PPL(mode0)| in prefill (default: 0.10)
|
||||
--max-mode0-decode-ppl X fail threshold for PPL(mode0) in decode (default: 10.0)
|
||||
--max-mode0-prefill-ppl X fail threshold for PPL(mode0) in prefill (default: 10.0)
|
||||
-h, --help show this help
|
||||
USAGE
|
||||
}
|
||||
@@ -71,6 +76,8 @@ while [[ $# -gt 0 ]]; do
|
||||
--prefill-ub) PREFILL_UB="$2"; shift 2 ;;
|
||||
--max-decode-delta-01) MAX_DECODE_DELTA_01="$2"; shift 2 ;;
|
||||
--max-prefill-delta-01) MAX_PREFILL_DELTA_01="$2"; shift 2 ;;
|
||||
--max-mode0-decode-ppl) MAX_MODE0_DECODE_PPL="$2"; shift 2 ;;
|
||||
--max-mode0-prefill-ppl) MAX_MODE0_PREFILL_PPL="$2"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*)
|
||||
echo "unknown option: $1" >&2
|
||||
@@ -169,6 +176,8 @@ prefill_delta_02="$(abs_delta "$prefill_0" "$prefill_2")"
|
||||
|
||||
decode_ok="$(awk -v d="$decode_delta_01" -v t="$MAX_DECODE_DELTA_01" 'BEGIN { print(d <= t ? "yes" : "no") }')"
|
||||
prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN { print(d <= t ? "yes" : "no") }')"
|
||||
mode0_decode_ok="$(awk -v p="$decode_0" -v t="$MAX_MODE0_DECODE_PPL" 'BEGIN { print(p <= t ? "yes" : "no") }')"
|
||||
mode0_prefill_ok="$(awk -v p="$prefill_0" -v t="$MAX_MODE0_PREFILL_PPL" 'BEGIN { print(p <= t ? "yes" : "no") }')"
|
||||
|
||||
{
|
||||
echo "# Qwen3Next Fused DeltaNet Regression Report"
|
||||
@@ -195,6 +204,8 @@ prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN {
|
||||
echo
|
||||
echo "- decode safety (mode1 ~= mode0): \`${decode_ok}\` (threshold \`${MAX_DECODE_DELTA_01}\`)"
|
||||
echo "- prefill safety (mode1 ~= mode0): \`${prefill_ok}\` (threshold \`${MAX_PREFILL_DELTA_01}\`)"
|
||||
echo "- mode0 decode sanity: \`${mode0_decode_ok}\` (PPL \`${decode_0}\`, max \`${MAX_MODE0_DECODE_PPL}\`)"
|
||||
echo "- mode0 prefill sanity: \`${mode0_prefill_ok}\` (PPL \`${prefill_0}\`, max \`${MAX_MODE0_PREFILL_PPL}\`)"
|
||||
echo
|
||||
echo "## Logs"
|
||||
echo
|
||||
@@ -209,7 +220,7 @@ prefill_ok="$(awk -v d="$prefill_delta_01" -v t="$MAX_PREFILL_DELTA_01" 'BEGIN {
|
||||
|
||||
echo "wrote report: $OUT_FILE"
|
||||
|
||||
if [[ "$decode_ok" != "yes" || "$prefill_ok" != "yes" ]]; then
|
||||
if [[ "$decode_ok" != "yes" || "$prefill_ok" != "yes" || "$mode0_decode_ok" != "yes" || "$mode0_prefill_ok" != "yes" ]]; then
|
||||
echo "regression check failed; see report: $OUT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user