fix: badge mismatch, multi-pass report overwrite, agent node creation

- Check INCONCLUSIVE before reproduced/confirmed in badge detection
- Exclude markdown headings from reproduced grep match
- Add --pass-label to qa-video-review.ts for unique multi-pass filenames
- Pass pass label from workflow YAML when reviewing numbered sessions
- Collect all pass-specific reports in deploy script HTML
- Add addNode/cloneNode convenience actions to qa-record agent
- Improve strategy hints for visual/rendering bug reproduction

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
snomiao
2026-03-25 10:45:50 +00:00
parent 628f64631b
commit f9a5baba1a
4 changed files with 88 additions and 16 deletions

View File

@@ -10,6 +10,9 @@
name: 'PR: QA'
on:
# TODO: remove push trigger before merge
push:
branches: [sno-skills, sno-qa-*]
pull_request:
types: [labeled]
branches: [main]
@@ -21,9 +24,10 @@ on:
options: [focused, full]
default: focused
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# TODO: restore concurrency group before merge (disabled for parallel sno-qa-* testing)
# concurrency:
# group: ${{ github.workflow }}-${{ github.ref }}
# cancel-in-progress: true
jobs:
resolve-matrix:
@@ -781,12 +785,20 @@ jobs:
if [ -f "$DIR/qa-before-session.mp4" ]; then
BEFORE_FLAG="--before-video $DIR/qa-before-session.mp4"
fi
# Extract pass label from multi-pass filenames (qa-session-1.mp4 → pass1)
PASS_LABEL_FLAG=""
case "$(basename "$vid")" in
qa-session-[0-9].mp4)
PASS_NUM=$(basename "$vid" | sed 's/qa-session-\([0-9]\).mp4/\1/')
PASS_LABEL_FLAG="--pass-label pass${PASS_NUM}"
;;
esac
echo "::group::Reviewing $vid"
pnpm exec tsx scripts/qa-video-review.ts \
--artifacts-dir qa-artifacts \
--output-dir video-reviews \
--video-file "$vid" \
--model gemini-3-flash-preview $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG || true
--model gemini-3-flash-preview $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG $PASS_LABEL_FLAG || true
echo "::endgroup::"
done

View File

@@ -52,15 +52,27 @@ for os in Linux macOS Windows; do
HAS_AFTER=$( ([ -f "$DEPLOY_DIR/qa-${os}.mp4" ] || [ -f "$DEPLOY_DIR/qa-${os}-pass1.mp4" ]) && echo 1 || echo 0)
[ "$HAS_AFTER" = "0" ] && continue
REPORT_FILE="video-reviews/${OS_LOWER}-qa-video-report.md"
# Collect all reports for this platform (single + multi-pass)
REPORT_FILES=""
REPORT_LINK=""
REPORT_HTML=""
if [ -f "$REPORT_FILE" ]; then
cp "$REPORT_FILE" "$DEPLOY_DIR/report-${OS_LOWER}.md"
REPORT_LINK="<a class=dl href=report-${OS_LOWER}.md><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z'/><polyline points='14 2 14 8 20 8'/><line x1=16 y1=13 x2=8 y2=13/><line x1=16 y1=17 x2=8 y2=17'/></svg>Report</a>"
for rpt in "video-reviews/${OS_LOWER}-qa-video-report.md" video-reviews/${OS_LOWER}-pass*-qa-video-report.md; do
[ -f "$rpt" ] && REPORT_FILES="${REPORT_FILES} ${rpt}"
done
REPORT_MD=$(cat "$REPORT_FILE" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
REPORT_HTML="<details class=report open><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Comparative Review</summary><div class=report-body data-md>${REPORT_MD}</div></details>"
if [ -n "$REPORT_FILES" ]; then
# Concatenate all reports into one combined report file
COMBINED_MD=""
for rpt in $REPORT_FILES; do
cp "$rpt" "$DEPLOY_DIR/$(basename "$rpt")"
RPT_MD=$(cat "$rpt" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
[ -n "$COMBINED_MD" ] && COMBINED_MD="${COMBINED_MD}&#10;&#10;---&#10;&#10;"
COMBINED_MD="${COMBINED_MD}${RPT_MD}"
done
FIRST_REPORT=$(echo $REPORT_FILES | awk '{print $1}')
FIRST_BASENAME=$(basename "$FIRST_REPORT")
REPORT_LINK="<a class=dl href=${FIRST_BASENAME}><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z'/><polyline points='14 2 14 8 20 8'/><line x1=16 y1=13 x2=8 y2=13/><line x1=16 y1=17 x2=8 y2=17'/></svg>Report</a>"
REPORT_HTML="<details class=report open><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Comparative Review</summary><div class=report-body data-md>${COMBINED_MD}</div></details>"
fi
if [ "$HAS_BEFORE" = "1" ]; then
@@ -134,11 +146,15 @@ ERROREOF
# Generate badge SVGs into deploy dir
REPRO_RESULT="" REPRO_COLOR="#9f9f9f"
if grep -riq 'not reproduced\|could not reproduce\|unable to reproduce' video-reviews/ 2>/dev/null; then
# Check INCONCLUSIVE first — the AI prompt explicitly uses this word
if grep -riq 'INCONCLUSIVE' video-reviews/ 2>/dev/null; then
REPRO_RESULT="INCONCLUSIVE" REPRO_COLOR="#9f9f9f"
elif grep -riq 'not reproduced\|could not reproduce\|unable to reproduce' video-reviews/ 2>/dev/null; then
REPRO_RESULT="NOT REPRODUCIBLE" REPRO_COLOR="#9f9f9f"
elif grep -riq 'partially reproduced\|partial' video-reviews/ 2>/dev/null; then
REPRO_RESULT="PARTIAL" REPRO_COLOR="#dfb317"
elif grep -riq 'reproduced\|confirmed' video-reviews/ 2>/dev/null; then
# Exclude markdown headings (e.g. "## Confirmed Issues") — only match body text
elif grep -ri 'reproduced\|confirmed' video-reviews/ 2>/dev/null | grep -viq '^[^:]*:##'; then
REPRO_RESULT="REPRODUCED" REPRO_COLOR="#2196f3"
fi

View File

@@ -57,6 +57,8 @@ type TestAction =
y: number
durationMs?: number
}
| { action: 'addNode'; nodeName: string; x?: number; y?: number }
| { action: 'cloneNode'; x: number; y: number }
interface ActionResult {
action: TestAction
@@ -236,6 +238,10 @@ Each step is an object with an "action" field:
- { "action": "loadWorkflow", "url": "https://..." } — loads a workflow JSON from a URL
- { "action": "setSetting", "id": "Comfy.Setting.Id", "value": true } — changes a ComfyUI setting
### Compound actions (save multiple turns)
- { "action": "addNode", "nodeName": "KSampler", "x": 640, "y": 400 } — double-clicks canvas to open node search, types name, presses Enter
- { "action": "cloneNode", "x": 750, "y": 350 } — right-clicks node at coords and clicks Clone in context menu
### Utility actions
- { "action": "wait", "ms": 1000 } — waits (use sparingly, max 3000ms)
- { "action": "screenshot", "name": "step-name" } — takes a screenshot
@@ -553,7 +559,7 @@ async function executeAction(
outputDir: string
): Promise<ActionResult> {
console.warn(
`${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
`${step.action}${('label' in step && `: ${step.label}`) || ('nodeName' in step && `: ${step.nodeName}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
)
// Reject invalid click targets
const INVALID_TARGETS = ['undefined', 'null', '[object Object]', '']
@@ -758,6 +764,28 @@ async function executeAction(
await sleep(duration + 200)
break
}
case 'addNode': {
const nx = step.x ?? 640
const ny = step.y ?? 400
await page.mouse.dblclick(nx, ny)
await sleep(500)
await page.keyboard.type(step.nodeName, { delay: 30 })
await sleep(300)
await page.keyboard.press('Enter')
await sleep(500)
console.warn(` Added node "${step.nodeName}" at (${nx}, ${ny})`)
break
}
case 'cloneNode': {
await page.mouse.move(step.x, step.y)
await sleep(300)
await page.mouse.click(step.x, step.y, { button: 'right' })
await sleep(500)
await clickSubmenuItem(page, 'Clone')
await sleep(500)
console.warn(` Cloned node at (${step.x}, ${step.y})`)
break
}
default:
console.warn(`Unknown action: ${JSON.stringify(step)}`)
}
@@ -931,6 +959,8 @@ Each action is a JSON object with an "action" field:
- { "action": "reload" } — reloads the page (for bugs that manifest on load)
- { "action": "wait", "ms": 1000 } — waits (max 3000ms)
- { "action": "screenshot", "name": "step-name" } — takes a named screenshot
- { "action": "addNode", "nodeName": "KSampler", "x": 640, "y": 400 } — double-clicks canvas to open search, types node name, presses Enter (compound action)
- { "action": "cloneNode", "x": 750, "y": 350 } — right-clicks node at coords and clicks Clone
- { "action": "done", "reason": "..." } — signals you are finished
## Response Format
@@ -949,7 +979,11 @@ Return { "reasoning": "...", "action": { "action": "done", "reason": "..." } } w
## Strategy Hints
- For accessibility/UI bugs: use openSettings, setSetting to change themes or display options.
- For workflow bugs: use loadDefaultWorkflow first to ensure a clean starting state.
- Prefer convenience actions (loadDefaultWorkflow, openSettings) over manual menu navigation — they save turns and are more reliable.
- Prefer convenience actions (loadDefaultWorkflow, openSettings, addNode, cloneNode) over manual menu navigation — they save turns and are more reliable.
- To add nodes to the canvas: use addNode (double-clicks canvas → types name → Enter), or loadDefaultWorkflow (loads 7 default nodes).
- For visual/rendering bugs (z-index, overlap, z-fighting): ALWAYS start with loadDefaultWorkflow to get nodes on canvas. You cannot reproduce visual bugs on an empty canvas.
- To clone a node: use cloneNode at the node's coordinates (right-clicks → Clone).
- To overlap nodes for z-index testing: use dragCanvas to move one node on top of another.
- Do NOT waste turns on generic exploration. Focus on reproducing the specific bug.
${focusSection}${qaSection}
## Issue to Reproduce

View File

@@ -16,6 +16,7 @@ interface CliOptions {
dryRun: boolean
prContext: string
targetUrl: string
passLabel: string
}
interface VideoCandidate {
@@ -33,7 +34,8 @@ const DEFAULT_OPTIONS: CliOptions = {
requestTimeoutMs: 300_000,
dryRun: false,
prContext: '',
targetUrl: ''
targetUrl: '',
passLabel: ''
}
const USAGE = `Usage:
@@ -56,6 +58,8 @@ Options:
--pr-context <file> File with PR context (title, body, diff)
for PR-aware review
--target-url <url> Issue or PR URL to include in the report
--pass-label <label> Label for multi-pass reports (e.g. pass1)
Output becomes {platform}-{label}-qa-video-report.md
--dry-run Discover videos and output targets only
--help Show this help text
@@ -133,6 +137,11 @@ function parseCliOptions(args: string[]): CliOptions {
continue
}
if (argument === '--pass-label') {
options.passLabel = requireValue(argument)
continue
}
if (argument === '--dry-run') {
options.dryRun = true
continue
@@ -653,9 +662,10 @@ async function reviewVideo(
})
const videoStat = await stat(video.videoPath)
const passSegment = options.passLabel ? `-${options.passLabel}` : ''
const outputPath = resolve(
options.outputDir,
`${video.platformName}-qa-video-report.md`
`${video.platformName}${passSegment}-qa-video-report.md`
)
const reportInput: Parameters<typeof buildReportMarkdown>[0] = {