feat: auto-generate regression tests from QA reports

- Tighten BEFORE prompt to 15s snapshot (show old state only) - Add qa-generate-test.ts: Gemini-powered Playwright test generator - New workflow step: generate .spec.ts and push to {branch}-add-qa-test - Tests assert UIUX behavior (tab names, dirty state, visibility)
2026-04-19 22:09:37 +00:00 · 2026-03-20 20:15:25 +00:00
parent 774dcd823e
commit 5c243d16be
2 changed files with 274 additions and 8 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -207,11 +207,11 @@ jobs:

          ## Time budget: keep the video recording under 30 seconds."

-            # BEFORE prompt (main branch — demonstrate the old behavior / bug)
+            # BEFORE prompt (main branch — brief snapshot of old behavior / missing feature)
            cat > "${{ runner.temp }}/qa-before-prompt.txt" <<PROMPT
-          You are running the BEFORE pass of a focused QA comparison on PR #${PR_NUM}.
-          This is the MAIN branch (before the PR). Your goal is to demonstrate the
-          OLD behavior that this PR intends to change or fix.
+          You are recording a BEFORE snapshot on the main branch for PR #${PR_NUM}.
+          Keep this SHORT — under 15 seconds of video. Your ONLY goal is to briefly
+          show the OLD state so reviewers can see the contrast with the AFTER video.

          Environment: CI=true, OS=${{ runner.os }}
          Server URL: http://127.0.0.1:8188
@@ -219,17 +219,25 @@ jobs:

          ${DIFF_CONTEXT}

-          ${TEST_DESIGN}
+          ## What to record
+          Read the diff and identify what changed. Then do ONE of these:
+          - **New feature**: Show the UI WHERE the feature would appear. Open the
+            relevant menu/panel/dialog to prove it doesn't exist yet. That's it.
+          - **Bug fix**: Trigger the bug ONCE. Show the broken behavior. Stop.
+          - **Behavior change**: Perform the action ONCE with the OLD behavior. Stop.
+
+          Do NOT explore, test exhaustively, or try multiple variations.
+          One clear demonstration is all that's needed.

          ${COMMON_HEADER}

          ${COMMON_STEPS}
-          5. Execute ONLY your PR-targeted test steps (snapshot between each action)
+          5. Perform ONE action that shows the old/missing behavior (snapshot before and after)
          6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-before-session.webm
-          7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
-             Include PASS/FAIL for each test step.
+          7. Write a 2-line report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md

          ${COMMON_RULES}
+          - KEEP IT SHORT — stop recording within 15 seconds of starting video
          PROMPT

            # AFTER prompt (PR branch — prove the fix works)
@@ -507,6 +515,56 @@ jobs:
            echo "::endgroup::"
          done

+      - name: Generate regression test from QA report
+        if: needs.resolve-matrix.outputs.mode == 'focused'
+        env:
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          PR_NUM="${{ steps.pr.outputs.number }}"
+          PR_BRANCH="${{ github.head_ref || github.ref_name }}"
+          if [ -z "$PR_NUM" ]; then
+            echo "No PR number, skipping test generation"
+            exit 0
+          fi
+
+          # Find the first QA report
+          REPORT=$(find video-reviews -name '*-qa-video-report.md' -type f | head -1)
+          if [ ! -f "$REPORT" ]; then
+            echo "No QA report found, skipping test generation"
+            exit 0
+          fi
+
+          # Ensure we have the PR diff
+          DIFF_FILE="${{ runner.temp }}/pr-diff.txt"
+          if [ ! -f "$DIFF_FILE" ]; then
+            gh pr diff "$PR_NUM" --repo "${{ github.repository }}" > "$DIFF_FILE" 2>/dev/null || true
+          fi
+
+          # Generate the test
+          TEST_NAME="qa-pr${PR_NUM}"
+          TEST_PATH="browser_tests/tests/${TEST_NAME}.spec.ts"
+          echo "::group::Generating regression test from QA report"
+          pnpm exec tsx scripts/qa-generate-test.ts \
+            --qa-report "$REPORT" \
+            --pr-diff "$DIFF_FILE" \
+            --output "$TEST_PATH" || {
+            echo "Test generation failed (non-fatal)"
+            exit 0
+          }
+          echo "::endgroup::"
+
+          # Push to {branch}-add-qa-test
+          TEST_BRANCH="${PR_BRANCH}-add-qa-test"
+          git checkout -b "$TEST_BRANCH" HEAD 2>/dev/null || git checkout "$TEST_BRANCH" 2>/dev/null || true
+          git add "$TEST_PATH"
+          git commit -m "test: add QA regression test for PR #${PR_NUM}" || {
+            echo "Nothing to commit"
+            exit 0
+          }
+          git push origin "$TEST_BRANCH" --force-with-lease || echo "Push failed (non-fatal)"
+          echo "Pushed regression test to branch: $TEST_BRANCH"
+
      - name: Deploy to Cloudflare Pages
        id: deploy-videos
        env:
--- a/scripts/qa-generate-test.ts
+++ b/scripts/qa-generate-test.ts
@@ -0,0 +1,208 @@
+#!/usr/bin/env tsx
+/**
+ * Generates a Playwright regression test (.spec.ts) from a QA report + PR diff.
+ * Uses Gemini to produce a test that asserts UIUX behavior verified during QA.
+ *
+ * Usage:
+ *   pnpm exec tsx scripts/qa-generate-test.ts \
+ *     --qa-report <path>       QA video review report (markdown)
+ *     --pr-diff <path>         PR diff file
+ *     --output <path>          Output .spec.ts file path
+ *     --model <name>           Gemini model (default: gemini-2.5-flash)
+ */
+import { readFile, writeFile } from 'node:fs/promises'
+import { basename, resolve } from 'node:path'
+
+import { GoogleGenerativeAI } from '@google/generative-ai'
+
+interface CliOptions {
+  qaReport: string
+  prDiff: string
+  output: string
+  model: string
+}
+
+const DEFAULTS: CliOptions = {
+  qaReport: '',
+  prDiff: '',
+  output: '',
+  model: 'gemini-2.5-flash'
+}
+
+// ── Fixture API reference for the prompt ────────────────────────────
+const FIXTURE_API = `
+## ComfyUI Playwright Test Fixture API
+
+Import pattern:
+\`\`\`typescript
+import { expect } from '@playwright/test'
+import { comfyPageFixture as test } from '../fixtures/ComfyPage'
+\`\`\`
+
+### Available helpers on \`comfyPage\`:
+- \`comfyPage.page\` — raw Playwright Page
+- \`comfyPage.menu.topbar\` — Topbar helper:
+  - \`.getTabNames(): Promise<string[]>\` — get all open tab names
+  - \`.getActiveTabName(): Promise<string>\` — get active tab name
+  - \`.saveWorkflow(name)\` — Save via File > Save dialog
+  - \`.saveWorkflowAs(name)\` — Save via File > Save As dialog
+  - \`.exportWorkflow(name)\` — Export via File > Export dialog
+  - \`.triggerTopbarCommand(path: string[])\` — e.g. ['File', 'Save As']
+  - \`.getWorkflowTab(name)\` — get a tab locator by name
+  - \`.closeWorkflowTab(name)\` — close a tab
+  - \`.openTopbarMenu()\` — open the hamburger menu
+  - \`.openSubmenu(label)\` — hover to open a submenu
+- \`comfyPage.menu.workflowsTab\` — Workflows sidebar:
+  - \`.open()\` / \`.close()\` — toggle sidebar
+  - \`.getTopLevelSavedWorkflowNames()\` — list saved workflows
+  - \`.getPersistedItem(name)\` — get a workflow item locator
+- \`comfyPage.workflow\` — WorkflowHelper:
+  - \`.loadWorkflow(name)\` — load from browser_tests/assets/{name}.json
+  - \`.setupWorkflowsDirectory(structure)\` — setup test directory
+  - \`.deleteWorkflow(name)\` — delete a workflow
+  - \`.isCurrentWorkflowModified(): Promise<boolean>\` — check dirty state
+  - \`.getUndoQueueSize()\` / \`.getRedoQueueSize()\`
+- \`comfyPage.settings.setSetting(key, value)\` — change settings
+- \`comfyPage.keyboard\` — KeyboardHelper:
+  - \`.undo()\` / \`.redo()\` / \`.bypass()\`
+- \`comfyPage.nodeOps\` — NodeOperationsHelper
+- \`comfyPage.canvas\` — CanvasHelper
+- \`comfyPage.contextMenu\` — ContextMenu
+- \`comfyPage.toast\` — ToastHelper
+- \`comfyPage.confirmDialog\` — confirmation dialog
+- \`comfyPage.nextFrame()\` — wait for Vue re-render
+
+### Test patterns:
+- Use \`test.describe('Name', { tag: '@ui' }, () => { ... })\` for UI tests
+- Use \`test.beforeEach\` to set up common state (settings, workflow dir)
+- Use \`expect(locator).toHaveScreenshot('name.png')\` for visual assertions
+- Use \`expect(locator).toBeVisible()\` / \`.toHaveText()\` for behavioral assertions
+- Use \`comfyPage.workflow.setupWorkflowsDirectory({})\` to ensure clean state
+`
+
+// ── Prompt builder ──────────────────────────────────────────────────
+function buildPrompt(qaReport: string, prDiff: string): string {
+  return `You are a Playwright test generator for the ComfyUI frontend.
+
+Your task: Generate a single .spec.ts regression test file that asserts the UIUX behavior
+described in the QA report below. The test must:
+
+1. Use the ComfyUI Playwright fixture API (documented below)
+2. Test UIUX behavior ONLY — element visibility, tab names, dialog states, workflow states
+3. NOT test code implementation details
+4. Be concise — only test the behavior that the PR changed
+5. Follow existing test conventions (see API reference)
+
+${FIXTURE_API}
+
+## QA Video Review Report
+${qaReport}
+
+## PR Diff (for context on what changed)
+${prDiff.slice(0, 8000)}
+
+## Output Requirements
+- Output ONLY the .spec.ts file content — no markdown fences, no explanations
+- Start with imports, end with closing brace
+- Use descriptive test names that explain the expected behavior
+- Add screenshot assertions where visual verification matters
+- Keep it focused: 2-5 test cases covering the core behavioral change
+- Use \`test.beforeEach\` for common setup (settings, workflow directory)
+- Tag the describe block with \`{ tag: '@ui' }\` or \`{ tag: '@workflow' }\` as appropriate
+`
+}
+
+// ── Gemini call ─────────────────────────────────────────────────────
+async function generateTest(
+  qaReport: string,
+  prDiff: string,
+  model: string
+): Promise<string> {
+  const apiKey = process.env.GEMINI_API_KEY
+  if (!apiKey) throw new Error('GEMINI_API_KEY env var required')
+
+  const genAI = new GoogleGenerativeAI(apiKey)
+  const genModel = genAI.getGenerativeModel({ model })
+
+  const prompt = buildPrompt(qaReport, prDiff)
+  console.warn(`Sending prompt to ${model} (${prompt.length} chars)...`)
+
+  const result = await genModel.generateContent({
+    contents: [{ role: 'user', parts: [{ text: prompt }] }],
+    generationConfig: {
+      temperature: 0.2,
+      maxOutputTokens: 8192
+    }
+  })
+
+  const text = result.response.text()
+
+  // Strip markdown fences if model wraps output
+  return text
+    .replace(/^```(?:typescript|ts)?\n?/, '')
+    .replace(/\n?```$/, '')
+    .trim()
+}
+
+// ── CLI ─────────────────────────────────────────────────────────────
+function parseArgs(): CliOptions {
+  const args = process.argv.slice(2)
+  const opts = { ...DEFAULTS }
+
+  for (let i = 0; i < args.length; i++) {
+    switch (args[i]) {
+      case '--qa-report':
+        opts.qaReport = args[++i]
+        break
+      case '--pr-diff':
+        opts.prDiff = args[++i]
+        break
+      case '--output':
+        opts.output = args[++i]
+        break
+      case '--model':
+        opts.model = args[++i]
+        break
+      case '--help':
+        console.warn(`Usage:
+  pnpm exec tsx scripts/qa-generate-test.ts [options]
+
+Options:
+  --qa-report <path>   QA video review report (markdown) [required]
+  --pr-diff <path>     PR diff file [required]
+  --output <path>      Output .spec.ts path [required]
+  --model <name>       Gemini model (default: gemini-2.5-flash)`)
+        process.exit(0)
+    }
+  }
+
+  if (!opts.qaReport || !opts.prDiff || !opts.output) {
+    console.error('Missing required args. Run with --help for usage.')
+    process.exit(1)
+  }
+
+  return opts
+}
+
+async function main() {
+  const opts = parseArgs()
+
+  const qaReport = await readFile(resolve(opts.qaReport), 'utf-8')
+  const prDiff = await readFile(resolve(opts.prDiff), 'utf-8')
+
+  console.warn(
+    `QA report: ${basename(opts.qaReport)} (${qaReport.length} chars)`
+  )
+  console.warn(`PR diff: ${basename(opts.prDiff)} (${prDiff.length} chars)`)
+
+  const testCode = await generateTest(qaReport, prDiff, opts.model)
+
+  const outputPath = resolve(opts.output)
+  await writeFile(outputPath, testCode + '\n')
+  console.warn(`Generated test: ${outputPath} (${testCode.length} chars)`)
+}
+
+main().catch((err) => {
+  console.error(err)
+  process.exit(1)
+})