fix: badge mismatch, multi-pass report overwrite, agent node creation

- Check INCONCLUSIVE before reproduced/confirmed in badge detection - Exclude markdown headings from reproduced grep match - Add --pass-label to qa-video-review.ts for unique multi-pass filenames - Pass pass label from workflow YAML when reviewing numbered sessions - Collect all pass-specific reports in deploy script HTML - Add addNode/cloneNode convenience actions to qa-record agent - Improve strategy hints for visual/rendering bug reproduction Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-20 06:20:11 +00:00 · 2026-03-25 10:45:50 +00:00
parent 628f64631b
commit f9a5baba1a
4 changed files with 88 additions and 16 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -10,6 +10,9 @@
 name: 'PR: QA'

 on:
+  # TODO: remove push trigger before merge
+  push:
+    branches: [sno-skills, sno-qa-*]
  pull_request:
    types: [labeled]
    branches: [main]
@@ -21,9 +24,10 @@ on:
        options: [focused, full]
        default: focused

-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
+# TODO: restore concurrency group before merge (disabled for parallel sno-qa-* testing)
+# concurrency:
+#   group: ${{ github.workflow }}-${{ github.ref }}
+#   cancel-in-progress: true

 jobs:
  resolve-matrix:
@@ -781,12 +785,20 @@ jobs:
            if [ -f "$DIR/qa-before-session.mp4" ]; then
              BEFORE_FLAG="--before-video $DIR/qa-before-session.mp4"
            fi
+            # Extract pass label from multi-pass filenames (qa-session-1.mp4 → pass1)
+            PASS_LABEL_FLAG=""
+            case "$(basename "$vid")" in
+              qa-session-[0-9].mp4)
+                PASS_NUM=$(basename "$vid" | sed 's/qa-session-\([0-9]\).mp4/\1/')
+                PASS_LABEL_FLAG="--pass-label pass${PASS_NUM}"
+                ;;
+            esac
            echo "::group::Reviewing $vid"
            pnpm exec tsx scripts/qa-video-review.ts \
              --artifacts-dir qa-artifacts \
              --output-dir video-reviews \
              --video-file "$vid" \
-              --model gemini-3-flash-preview $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG || true
+              --model gemini-3-flash-preview $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG $PASS_LABEL_FLAG || true
            echo "::endgroup::"
          done

--- a/scripts/qa-deploy-pages.sh
+++ b/scripts/qa-deploy-pages.sh
@@ -52,15 +52,27 @@ for os in Linux macOS Windows; do
  HAS_AFTER=$( ([ -f "$DEPLOY_DIR/qa-${os}.mp4" ] || [ -f "$DEPLOY_DIR/qa-${os}-pass1.mp4" ]) && echo 1 || echo 0)
  [ "$HAS_AFTER" = "0" ] && continue

-  REPORT_FILE="video-reviews/${OS_LOWER}-qa-video-report.md"
+  # Collect all reports for this platform (single + multi-pass)
+  REPORT_FILES=""
  REPORT_LINK=""
  REPORT_HTML=""
-  if [ -f "$REPORT_FILE" ]; then
-    cp "$REPORT_FILE" "$DEPLOY_DIR/report-${OS_LOWER}.md"
-    REPORT_LINK="<a class=dl href=report-${OS_LOWER}.md><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z'/><polyline points='14 2 14 8 20 8'/><line x1=16 y1=13 x2=8 y2=13/><line x1=16 y1=17 x2=8 y2=17'/></svg>Report</a>"
+  for rpt in "video-reviews/${OS_LOWER}-qa-video-report.md" video-reviews/${OS_LOWER}-pass*-qa-video-report.md; do
+    [ -f "$rpt" ] && REPORT_FILES="${REPORT_FILES} ${rpt}"
+  done

-    REPORT_MD=$(cat "$REPORT_FILE" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
-    REPORT_HTML="<details class=report open><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Comparative Review</summary><div class=report-body data-md>${REPORT_MD}</div></details>"
+  if [ -n "$REPORT_FILES" ]; then
+    # Concatenate all reports into one combined report file
+    COMBINED_MD=""
+    for rpt in $REPORT_FILES; do
+      cp "$rpt" "$DEPLOY_DIR/$(basename "$rpt")"
+      RPT_MD=$(cat "$rpt" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
+      [ -n "$COMBINED_MD" ] && COMBINED_MD="${COMBINED_MD}&#10;&#10;---&#10;&#10;"
+      COMBINED_MD="${COMBINED_MD}${RPT_MD}"
+    done
+    FIRST_REPORT=$(echo $REPORT_FILES | awk '{print $1}')
+    FIRST_BASENAME=$(basename "$FIRST_REPORT")
+    REPORT_LINK="<a class=dl href=${FIRST_BASENAME}><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z'/><polyline points='14 2 14 8 20 8'/><line x1=16 y1=13 x2=8 y2=13/><line x1=16 y1=17 x2=8 y2=17'/></svg>Report</a>"
+    REPORT_HTML="<details class=report open><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Comparative Review</summary><div class=report-body data-md>${COMBINED_MD}</div></details>"
  fi

  if [ "$HAS_BEFORE" = "1" ]; then
@@ -134,11 +146,15 @@ ERROREOF

 # Generate badge SVGs into deploy dir
 REPRO_RESULT="" REPRO_COLOR="#9f9f9f"
-if grep -riq 'not reproduced\|could not reproduce\|unable to reproduce' video-reviews/ 2>/dev/null; then
+# Check INCONCLUSIVE first — the AI prompt explicitly uses this word
+if grep -riq 'INCONCLUSIVE' video-reviews/ 2>/dev/null; then
+  REPRO_RESULT="INCONCLUSIVE" REPRO_COLOR="#9f9f9f"
+elif grep -riq 'not reproduced\|could not reproduce\|unable to reproduce' video-reviews/ 2>/dev/null; then
  REPRO_RESULT="NOT REPRODUCIBLE" REPRO_COLOR="#9f9f9f"
 elif grep -riq 'partially reproduced\|partial' video-reviews/ 2>/dev/null; then
  REPRO_RESULT="PARTIAL" REPRO_COLOR="#dfb317"
-elif grep -riq 'reproduced\|confirmed' video-reviews/ 2>/dev/null; then
+# Exclude markdown headings (e.g. "## Confirmed Issues") — only match body text
+elif grep -ri 'reproduced\|confirmed' video-reviews/ 2>/dev/null | grep -viq '^[^:]*:##'; then
  REPRO_RESULT="REPRODUCED" REPRO_COLOR="#2196f3"
 fi

--- a/scripts/qa-record.ts
+++ b/scripts/qa-record.ts
@@ -57,6 +57,8 @@ type TestAction =
      y: number
      durationMs?: number
    }
+  | { action: 'addNode'; nodeName: string; x?: number; y?: number }
+  | { action: 'cloneNode'; x: number; y: number }

 interface ActionResult {
  action: TestAction
@@ -236,6 +238,10 @@ Each step is an object with an "action" field:
 - { "action": "loadWorkflow", "url": "https://..." } — loads a workflow JSON from a URL
 - { "action": "setSetting", "id": "Comfy.Setting.Id", "value": true } — changes a ComfyUI setting

+### Compound actions (save multiple turns)
+- { "action": "addNode", "nodeName": "KSampler", "x": 640, "y": 400 } — double-clicks canvas to open node search, types name, presses Enter
+- { "action": "cloneNode", "x": 750, "y": 350 } — right-clicks node at coords and clicks Clone in context menu
+
 ### Utility actions
 - { "action": "wait", "ms": 1000 } — waits (use sparingly, max 3000ms)
 - { "action": "screenshot", "name": "step-name" } — takes a screenshot
@@ -553,7 +559,7 @@ async function executeAction(
  outputDir: string
 ): Promise<ActionResult> {
  console.warn(
-    `  → ${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
+    `  → ${step.action}${('label' in step && `: ${step.label}`) || ('nodeName' in step && `: ${step.nodeName}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
  )
  // Reject invalid click targets
  const INVALID_TARGETS = ['undefined', 'null', '[object Object]', '']
@@ -758,6 +764,28 @@ async function executeAction(
        await sleep(duration + 200)
        break
      }
+      case 'addNode': {
+        const nx = step.x ?? 640
+        const ny = step.y ?? 400
+        await page.mouse.dblclick(nx, ny)
+        await sleep(500)
+        await page.keyboard.type(step.nodeName, { delay: 30 })
+        await sleep(300)
+        await page.keyboard.press('Enter')
+        await sleep(500)
+        console.warn(`  Added node "${step.nodeName}" at (${nx}, ${ny})`)
+        break
+      }
+      case 'cloneNode': {
+        await page.mouse.move(step.x, step.y)
+        await sleep(300)
+        await page.mouse.click(step.x, step.y, { button: 'right' })
+        await sleep(500)
+        await clickSubmenuItem(page, 'Clone')
+        await sleep(500)
+        console.warn(`  Cloned node at (${step.x}, ${step.y})`)
+        break
+      }
      default:
        console.warn(`Unknown action: ${JSON.stringify(step)}`)
    }
@@ -931,6 +959,8 @@ Each action is a JSON object with an "action" field:
 - { "action": "reload" } — reloads the page (for bugs that manifest on load)
 - { "action": "wait", "ms": 1000 } — waits (max 3000ms)
 - { "action": "screenshot", "name": "step-name" } — takes a named screenshot
+- { "action": "addNode", "nodeName": "KSampler", "x": 640, "y": 400 } — double-clicks canvas to open search, types node name, presses Enter (compound action)
+- { "action": "cloneNode", "x": 750, "y": 350 } — right-clicks node at coords and clicks Clone
 - { "action": "done", "reason": "..." } — signals you are finished

 ## Response Format
@@ -949,7 +979,11 @@ Return { "reasoning": "...", "action": { "action": "done", "reason": "..." } } w
 ## Strategy Hints
 - For accessibility/UI bugs: use openSettings, setSetting to change themes or display options.
 - For workflow bugs: use loadDefaultWorkflow first to ensure a clean starting state.
- Prefer convenience actions (loadDefaultWorkflow, openSettings) over manual menu navigation — they save turns and are more reliable.
+- Prefer convenience actions (loadDefaultWorkflow, openSettings, addNode, cloneNode) over manual menu navigation — they save turns and are more reliable.
+- To add nodes to the canvas: use addNode (double-clicks canvas → types name → Enter), or loadDefaultWorkflow (loads 7 default nodes).
+- For visual/rendering bugs (z-index, overlap, z-fighting): ALWAYS start with loadDefaultWorkflow to get nodes on canvas. You cannot reproduce visual bugs on an empty canvas.
+- To clone a node: use cloneNode at the node's coordinates (right-clicks → Clone).
+- To overlap nodes for z-index testing: use dragCanvas to move one node on top of another.
 - Do NOT waste turns on generic exploration. Focus on reproducing the specific bug.
 ${focusSection}${qaSection}
 ## Issue to Reproduce
--- a/scripts/qa-video-review.ts
+++ b/scripts/qa-video-review.ts
@@ -16,6 +16,7 @@ interface CliOptions {
  dryRun: boolean
  prContext: string
  targetUrl: string
+  passLabel: string
 }

 interface VideoCandidate {
@@ -33,7 +34,8 @@ const DEFAULT_OPTIONS: CliOptions = {
  requestTimeoutMs: 300_000,
  dryRun: false,
  prContext: '',
-  targetUrl: ''
+  targetUrl: '',
+  passLabel: ''
 }

 const USAGE = `Usage:
@@ -56,6 +58,8 @@ Options:
  --pr-context <file>           File with PR context (title, body, diff)
                                 for PR-aware review
  --target-url <url>            Issue or PR URL to include in the report
+  --pass-label <label>          Label for multi-pass reports (e.g. pass1)
+                                 Output becomes {platform}-{label}-qa-video-report.md
  --dry-run                     Discover videos and output targets only
  --help                        Show this help text

@@ -133,6 +137,11 @@ function parseCliOptions(args: string[]): CliOptions {
      continue
    }

+    if (argument === '--pass-label') {
+      options.passLabel = requireValue(argument)
+      continue
+    }
+
    if (argument === '--dry-run') {
      options.dryRun = true
      continue
@@ -653,9 +662,10 @@ async function reviewVideo(
  })

  const videoStat = await stat(video.videoPath)
+  const passSegment = options.passLabel ? `-${options.passLabel}` : ''
  const outputPath = resolve(
    options.outputDir,
-    `${video.platformName}-qa-video-report.md`
+    `${video.platformName}${passSegment}-qa-video-report.md`
  )

  const reportInput: Parameters<typeof buildReportMarkdown>[0] = {