mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-20 06:20:11 +00:00
feat: structured JSON verdict from AI reviewer, light-first theme
- Video review prompt now requests a ## Verdict JSON block:
{"verdict": "REPRODUCED|NOT_REPRODUCIBLE|INCONCLUSIVE", "risk": "low|medium|high"}
- Deploy script reads JSON verdict first, falls back to grep
- Eliminates all regex-matching false positives permanently
- Theme: light mode is default, dark via prefers-color-scheme:dark
- Cards use solid backgrounds, grain overlay only in dark mode
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -222,14 +222,26 @@ if [ -d video-reviews ]; then
|
||||
for rpt in video-reviews/*-qa-video-report.md; do
|
||||
[ -f "$rpt" ] || continue
|
||||
TOTAL_REPORTS=$((TOTAL_REPORTS + 1))
|
||||
SUMM=$(sed -n '/^## Summary/,/^## /p' "$rpt" 2>/dev/null | head -15)
|
||||
# Check negatives FIRST — "fails to reproduce" contains "reproduce" but is negative
|
||||
if echo "$SUMM" | grep -iq 'INCONCLUSIVE'; then
|
||||
INCONC_COUNT=$((INCONC_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'not reproduced\|could not reproduce\|could not be confirmed\|unable to reproduce\|fails\? to reproduce\|was NOT\|NOT visible\|not observed\|fail.* to demonstrate\|does not demonstrate\|steps were not performed\|never.*tested\|never.*accessed\|not.* confirmed'; then
|
||||
NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'reproduc\|confirm'; then
|
||||
REPRO_COUNT=$((REPRO_COUNT + 1))
|
||||
# Try structured JSON verdict first (from ## Verdict section)
|
||||
VERDICT_JSON=$(grep -oP '\{"verdict":\s*"[^"]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '"[A-Z_]+"$' | tr -d '"')
|
||||
RISK_JSON=$(grep -oP '"risk":\s*"[^"]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '"[a-z]+"$' | tr -d '"')
|
||||
|
||||
if [ -n "$VERDICT_JSON" ]; then
|
||||
case "$VERDICT_JSON" in
|
||||
REPRODUCED) REPRO_COUNT=$((REPRO_COUNT + 1)) ;;
|
||||
NOT_REPRODUCIBLE) NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1)) ;;
|
||||
INCONCLUSIVE) INCONC_COUNT=$((INCONC_COUNT + 1)) ;;
|
||||
esac
|
||||
else
|
||||
# Fallback: grep Summary section (for older reports without ## Verdict)
|
||||
SUMM=$(sed -n '/^## Summary/,/^## /p' "$rpt" 2>/dev/null | head -15)
|
||||
if echo "$SUMM" | grep -iq 'INCONCLUSIVE'; then
|
||||
INCONC_COUNT=$((INCONC_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'not reproduced\|could not reproduce\|could not be confirmed\|unable to reproduce\|fails\? to reproduce\|fails\? to perform\|was NOT\|NOT visible\|not observed\|fail.* to demonstrate\|does not demonstrate\|steps were not performed\|never.*tested\|never.*accessed\|not.* confirmed'; then
|
||||
NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'reproduc\|confirm'; then
|
||||
REPRO_COUNT=$((REPRO_COUNT + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
@@ -274,19 +286,33 @@ BADGE_LABEL="QA${QA_DATE}"
|
||||
# For PRs, also extract fix quality from Overall Risk section
|
||||
FIX_RESULT="" FIX_COLOR="#4c1"
|
||||
if [ "$TARGET_TYPE" != "issue" ]; then
|
||||
RISK_TEXT=""
|
||||
if [ -d video-reviews ]; then
|
||||
RISK_TEXT=$(sed -n '/^## Overall Risk/,/^## /p' video-reviews/*.md 2>/dev/null | sed 's/\*//g' | head -20)
|
||||
fi
|
||||
RISK_FIRST=$(echo "$RISK_TEXT" | grep -oiP '^\s*(high|medium|moderate|low|minimal|critical)' | head -1 | tr '[:upper:]' '[:lower:]')
|
||||
if [ -n "$RISK_FIRST" ]; then
|
||||
case "$RISK_FIRST" in
|
||||
*low*|*minimal*) FIX_RESULT="APPROVED" FIX_COLOR="#4c1" ;;
|
||||
*medium*|*moderate*) FIX_RESULT="MINOR ISSUES" FIX_COLOR="#dfb317" ;;
|
||||
*high*|*critical*) FIX_RESULT="MAJOR ISSUES" FIX_COLOR="#e05d44" ;;
|
||||
esac
|
||||
elif echo "$RISK_TEXT" | grep -iq 'no.*risk\|approved\|looks good'; then
|
||||
FIX_RESULT="APPROVED" FIX_COLOR="#4c1"
|
||||
# Try structured JSON risk first
|
||||
ALL_RISKS=$(grep -ohP '"risk":\s*"[a-z]+"' video-reviews/*.md 2>/dev/null | grep -oP '"[a-z]+"$' | tr -d '"')
|
||||
if [ -n "$ALL_RISKS" ]; then
|
||||
# Use worst risk across all reports
|
||||
if echo "$ALL_RISKS" | grep -q 'high'; then
|
||||
FIX_RESULT="MAJOR ISSUES" FIX_COLOR="#e05d44"
|
||||
elif echo "$ALL_RISKS" | grep -q 'medium'; then
|
||||
FIX_RESULT="MINOR ISSUES" FIX_COLOR="#dfb317"
|
||||
elif echo "$ALL_RISKS" | grep -q 'low'; then
|
||||
FIX_RESULT="APPROVED" FIX_COLOR="#4c1"
|
||||
fi
|
||||
else
|
||||
# Fallback: grep Overall Risk section
|
||||
RISK_TEXT=""
|
||||
if [ -d video-reviews ]; then
|
||||
RISK_TEXT=$(sed -n '/^## Overall Risk/,/^## /p' video-reviews/*.md 2>/dev/null | sed 's/\*//g' | head -20)
|
||||
fi
|
||||
RISK_FIRST=$(echo "$RISK_TEXT" | grep -oiP '^\s*(high|medium|moderate|low|minimal|critical)' | head -1 | tr '[:upper:]' '[:lower:]')
|
||||
if [ -n "$RISK_FIRST" ]; then
|
||||
case "$RISK_FIRST" in
|
||||
*low*|*minimal*) FIX_RESULT="APPROVED" FIX_COLOR="#4c1" ;;
|
||||
*medium*|*moderate*) FIX_RESULT="MINOR ISSUES" FIX_COLOR="#dfb317" ;;
|
||||
*high*|*critical*) FIX_RESULT="MAJOR ISSUES" FIX_COLOR="#e05d44" ;;
|
||||
esac
|
||||
elif echo "$RISK_TEXT" | grep -iq 'no.*risk\|approved\|looks good'; then
|
||||
FIX_RESULT="APPROVED" FIX_COLOR="#4c1"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -2,15 +2,11 @@
|
||||
<link rel=preconnect href=https://fonts.googleapis.com><link rel=preconnect href=https://fonts.gstatic.com crossorigin><link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel=stylesheet>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<style>
|
||||
:root{--bg:oklch(8% 0.02 265);--surface:oklch(12% 0.02 265);--surface-up:oklch(16% 0.02 265);--fg:oklch(96% 0.01 95);--fg-muted:oklch(65% 0.01 265);--fg-dim:oklch(45% 0.01 265);--primary:oklch(62% 0.21 265);--primary-up:oklch(68% 0.21 265);--primary-glow:oklch(62% 0.15 265);--ok:oklch(62% 0.18 155);--err:oklch(62% 0.22 25);--border:oklch(22% 0.02 265);--border-faint:oklch(15% 0.01 265);--r:0.75rem;--r-lg:1rem;--ease-out:cubic-bezier(0.22,1,0.36,1);--dur-base:250ms;--dur-slow:500ms;--font:'Inter',system-ui,sans-serif;--font-mono:'JetBrains Mono',monospace}
|
||||
@media(prefers-color-scheme:light){:root{--bg:oklch(97% 0.01 265);--surface:oklch(100% 0 0);--surface-up:oklch(94% 0.01 265);--fg:oklch(15% 0.02 265);--fg-muted:oklch(40% 0.01 265);--fg-dim:oklch(55% 0.01 265);--primary:oklch(50% 0.21 265);--primary-up:oklch(45% 0.21 265);--primary-glow:oklch(55% 0.15 265);--ok:oklch(45% 0.18 155);--err:oklch(50% 0.22 25);--border:oklch(85% 0.01 265);--border-faint:oklch(90% 0.01 265);--glass:oklch(0% 0 0/.04);--glass-border:oklch(0% 0 0/.08);--glass-hover:oklch(0% 0 0/.1);--shadow:oklch(0% 0 0/.08)}
|
||||
.card{background:var(--surface);border-color:var(--border)}
|
||||
.card:hover{border-color:var(--border);box-shadow:0 4px 16px var(--shadow)}
|
||||
body::after{display:none}
|
||||
}
|
||||
:root{--bg:oklch(97% 0.01 265);--surface:oklch(100% 0 0);--surface-up:oklch(94% 0.01 265);--fg:oklch(15% 0.02 265);--fg-muted:oklch(40% 0.01 265);--fg-dim:oklch(55% 0.01 265);--primary:oklch(50% 0.21 265);--primary-up:oklch(45% 0.21 265);--primary-glow:oklch(55% 0.15 265);--ok:oklch(45% 0.18 155);--err:oklch(50% 0.22 25);--border:oklch(85% 0.01 265);--border-faint:oklch(90% 0.01 265);--r:0.75rem;--r-lg:1rem;--ease-out:cubic-bezier(0.22,1,0.36,1);--dur-base:250ms;--dur-slow:500ms;--font:'Inter',system-ui,sans-serif;--font-mono:'JetBrains Mono',monospace}
|
||||
@media(prefers-color-scheme:dark){:root{--bg:oklch(8% 0.02 265);--surface:oklch(12% 0.02 265);--surface-up:oklch(16% 0.02 265);--fg:oklch(96% 0.01 95);--fg-muted:oklch(65% 0.01 265);--fg-dim:oklch(45% 0.01 265);--primary:oklch(62% 0.21 265);--primary-up:oklch(68% 0.21 265);--primary-glow:oklch(62% 0.15 265);--ok:oklch(62% 0.18 155);--err:oklch(62% 0.22 25);--border:oklch(22% 0.02 265);--border-faint:oklch(15% 0.01 265)}}
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
body{background:var(--bg);color:var(--fg);font-family:var(--font);min-height:100vh;padding:clamp(1.5rem,4vw,3rem) clamp(1rem,3vw,2rem);position:relative}
|
||||
body::after{content:'';position:fixed;inset:0;pointer-events:none;opacity:.03;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E")}
|
||||
@media(prefers-color-scheme:dark){body::after{content:'';position:fixed;inset:0;pointer-events:none;opacity:.03;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E")}}
|
||||
.container{max-width:1200px;margin:0 auto}
|
||||
header{display:flex;align-items:center;gap:1rem;margin-bottom:clamp(1.5rem,4vw,3rem);padding-bottom:1.25rem;border-bottom:1px solid var(--border)}
|
||||
.header-icon{width:36px;height:36px;display:grid;place-items:center;background:linear-gradient(135deg,oklch(100% 0 0/.06),oklch(100% 0 0/.02));backdrop-filter:blur(12px);border:1px solid oklch(100% 0 0/.1);border-radius:var(--r);flex-shrink:0}
|
||||
@@ -18,8 +14,8 @@ header{display:flex;align-items:center;gap:1rem;margin-bottom:clamp(1.5rem,4vw,3
|
||||
h1{font-size:clamp(1.25rem,2.5vw,1.625rem);font-weight:700;letter-spacing:-.03em;background:linear-gradient(135deg,var(--fg),var(--fg-muted));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}
|
||||
.meta{color:var(--fg-dim);font-size:.8125rem;margin-top:.15rem;letter-spacing:.01em}
|
||||
.grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(min(480px,100%),1fr));gap:1.5rem}
|
||||
.card{background:linear-gradient(135deg,oklch(100% 0 0/.05),oklch(100% 0 0/.015));backdrop-filter:blur(16px) saturate(150%);border:1px solid oklch(100% 0 0/.08);border-radius:var(--r-lg);overflow:hidden;transition:border-color var(--dur-base) var(--ease-out),box-shadow var(--dur-base) var(--ease-out),transform var(--dur-base) var(--ease-out)}
|
||||
.card:hover{border-color:oklch(100% 0 0/.16);box-shadow:0 8px 32px oklch(0% 0 0/.3),inset 0 1px 0 oklch(100% 0 0/.1);transform:translateY(-2px)}
|
||||
.card{background:var(--surface);border:1px solid var(--border);border-radius:var(--r-lg);overflow:hidden;transition:border-color var(--dur-base) var(--ease-out),box-shadow var(--dur-base) var(--ease-out),transform var(--dur-base) var(--ease-out)}
|
||||
.card:hover{border-color:var(--primary);box-shadow:0 4px 16px oklch(0% 0 0/.1);transform:translateY(-2px)}
|
||||
.video-wrap{position:relative;background:var(--surface);border-bottom:1px solid var(--border-faint)}
|
||||
.video-wrap video{width:100%;display:block;aspect-ratio:16/9;object-fit:contain}
|
||||
.card-body{padding:.75rem 1rem;display:flex;align-items:center;justify-content:space-between}
|
||||
|
||||
@@ -401,7 +401,14 @@ function buildComparativePrompt(
|
||||
'',
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk',
|
||||
'(Assess whether the PR achieves its goal based on the before/after comparison)'
|
||||
'(Assess whether the PR achieves its goal based on the before/after comparison)',
|
||||
'',
|
||||
'## Verdict',
|
||||
'End your report with this EXACT JSON block (no markdown fence):',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high", "confidence": "high" | "medium" | "low"}',
|
||||
'- REPRODUCED: the before video confirms the old behavior and the after video shows the fix working',
|
||||
'- NOT_REPRODUCIBLE: the before video does not show the reported bug',
|
||||
'- INCONCLUSIVE: the videos do not adequately demonstrate the behavior change'
|
||||
)
|
||||
|
||||
return lines.filter(Boolean).join('\n')
|
||||
@@ -496,7 +503,14 @@ function buildSingleVideoPrompt(
|
||||
'`SEVERITY` `TIMESTAMP` `Confidence: LEVEL`',
|
||||
'Do NOT use a table for issues — use the block format above.',
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk'
|
||||
'## Overall Risk',
|
||||
'',
|
||||
'## Verdict',
|
||||
'End your report with this EXACT JSON block (no markdown fence):',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low"}',
|
||||
'- REPRODUCED: the bug/behavior is clearly visible in the video',
|
||||
'- NOT_REPRODUCIBLE: the steps were performed correctly but the bug was not observed',
|
||||
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient'
|
||||
)
|
||||
|
||||
return lines.filter(Boolean).join('\n')
|
||||
|
||||
Reference in New Issue
Block a user