feat: auto-generate regression tests from QA reports

- Tighten BEFORE prompt to 15s snapshot (show old state only)
- Add qa-generate-test.ts: Gemini-powered Playwright test generator
- New workflow step: generate .spec.ts and push to {branch}-add-qa-test
- Tests assert UIUX behavior (tab names, dirty state, visibility)
This commit is contained in:
snomiao
2026-03-20 20:15:25 +00:00
parent 774dcd823e
commit 5c243d16be
2 changed files with 274 additions and 8 deletions

View File

@@ -207,11 +207,11 @@ jobs:
## Time budget: keep the video recording under 30 seconds."
# BEFORE prompt (main branch — demonstrate the old behavior / bug)
# BEFORE prompt (main branch — brief snapshot of old behavior / missing feature)
cat > "${{ runner.temp }}/qa-before-prompt.txt" <<PROMPT
You are running the BEFORE pass of a focused QA comparison on PR #${PR_NUM}.
This is the MAIN branch (before the PR). Your goal is to demonstrate the
OLD behavior that this PR intends to change or fix.
You are recording a BEFORE snapshot on the main branch for PR #${PR_NUM}.
Keep this SHORT — under 15 seconds of video. Your ONLY goal is to briefly
show the OLD state so reviewers can see the contrast with the AFTER video.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
@@ -219,17 +219,25 @@ jobs:
${DIFF_CONTEXT}
${TEST_DESIGN}
## What to record
Read the diff and identify what changed. Then do ONE of these:
- **New feature**: Show the UI WHERE the feature would appear. Open the
relevant menu/panel/dialog to prove it doesn't exist yet. That's it.
- **Bug fix**: Trigger the bug ONCE. Show the broken behavior. Stop.
- **Behavior change**: Perform the action ONCE with the OLD behavior. Stop.
Do NOT explore, test exhaustively, or try multiple variations.
One clear demonstration is all that's needed.
${COMMON_HEADER}
${COMMON_STEPS}
5. Execute ONLY your PR-targeted test steps (snapshot between each action)
5. Perform ONE action that shows the old/missing behavior (snapshot before and after)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-before-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
Include PASS/FAIL for each test step.
7. Write a 2-line report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
${COMMON_RULES}
- KEEP IT SHORT — stop recording within 15 seconds of starting video
PROMPT
# AFTER prompt (PR branch — prove the fix works)
@@ -507,6 +515,56 @@ jobs:
echo "::endgroup::"
done
- name: Generate regression test from QA report
if: needs.resolve-matrix.outputs.mode == 'focused'
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
PR_NUM="${{ steps.pr.outputs.number }}"
PR_BRANCH="${{ github.head_ref || github.ref_name }}"
if [ -z "$PR_NUM" ]; then
echo "No PR number, skipping test generation"
exit 0
fi
# Find the first QA report
REPORT=$(find video-reviews -name '*-qa-video-report.md' -type f | head -1)
if [ ! -f "$REPORT" ]; then
echo "No QA report found, skipping test generation"
exit 0
fi
# Ensure we have the PR diff
DIFF_FILE="${{ runner.temp }}/pr-diff.txt"
if [ ! -f "$DIFF_FILE" ]; then
gh pr diff "$PR_NUM" --repo "${{ github.repository }}" > "$DIFF_FILE" 2>/dev/null || true
fi
# Generate the test
TEST_NAME="qa-pr${PR_NUM}"
TEST_PATH="browser_tests/tests/${TEST_NAME}.spec.ts"
echo "::group::Generating regression test from QA report"
pnpm exec tsx scripts/qa-generate-test.ts \
--qa-report "$REPORT" \
--pr-diff "$DIFF_FILE" \
--output "$TEST_PATH" || {
echo "Test generation failed (non-fatal)"
exit 0
}
echo "::endgroup::"
# Push to {branch}-add-qa-test
TEST_BRANCH="${PR_BRANCH}-add-qa-test"
git checkout -b "$TEST_BRANCH" HEAD 2>/dev/null || git checkout "$TEST_BRANCH" 2>/dev/null || true
git add "$TEST_PATH"
git commit -m "test: add QA regression test for PR #${PR_NUM}" || {
echo "Nothing to commit"
exit 0
}
git push origin "$TEST_BRANCH" --force-with-lease || echo "Push failed (non-fatal)"
echo "Pushed regression test to branch: $TEST_BRANCH"
- name: Deploy to Cloudflare Pages
id: deploy-videos
env:

208
scripts/qa-generate-test.ts Normal file
View File

@@ -0,0 +1,208 @@
#!/usr/bin/env tsx
/**
* Generates a Playwright regression test (.spec.ts) from a QA report + PR diff.
* Uses Gemini to produce a test that asserts UIUX behavior verified during QA.
*
* Usage:
* pnpm exec tsx scripts/qa-generate-test.ts \
* --qa-report <path> QA video review report (markdown)
* --pr-diff <path> PR diff file
* --output <path> Output .spec.ts file path
* --model <name> Gemini model (default: gemini-2.5-flash)
*/
import { readFile, writeFile } from 'node:fs/promises'
import { basename, resolve } from 'node:path'
import { GoogleGenerativeAI } from '@google/generative-ai'
interface CliOptions {
qaReport: string
prDiff: string
output: string
model: string
}
const DEFAULTS: CliOptions = {
qaReport: '',
prDiff: '',
output: '',
model: 'gemini-2.5-flash'
}
// ── Fixture API reference for the prompt ────────────────────────────
const FIXTURE_API = `
## ComfyUI Playwright Test Fixture API
Import pattern:
\`\`\`typescript
import { expect } from '@playwright/test'
import { comfyPageFixture as test } from '../fixtures/ComfyPage'
\`\`\`
### Available helpers on \`comfyPage\`:
- \`comfyPage.page\` — raw Playwright Page
- \`comfyPage.menu.topbar\` — Topbar helper:
- \`.getTabNames(): Promise<string[]>\` — get all open tab names
- \`.getActiveTabName(): Promise<string>\` — get active tab name
- \`.saveWorkflow(name)\` — Save via File > Save dialog
- \`.saveWorkflowAs(name)\` — Save via File > Save As dialog
- \`.exportWorkflow(name)\` — Export via File > Export dialog
- \`.triggerTopbarCommand(path: string[])\` — e.g. ['File', 'Save As']
- \`.getWorkflowTab(name)\` — get a tab locator by name
- \`.closeWorkflowTab(name)\` — close a tab
- \`.openTopbarMenu()\` — open the hamburger menu
- \`.openSubmenu(label)\` — hover to open a submenu
- \`comfyPage.menu.workflowsTab\` — Workflows sidebar:
- \`.open()\` / \`.close()\` — toggle sidebar
- \`.getTopLevelSavedWorkflowNames()\` — list saved workflows
- \`.getPersistedItem(name)\` — get a workflow item locator
- \`comfyPage.workflow\` — WorkflowHelper:
- \`.loadWorkflow(name)\` — load from browser_tests/assets/{name}.json
- \`.setupWorkflowsDirectory(structure)\` — setup test directory
- \`.deleteWorkflow(name)\` — delete a workflow
- \`.isCurrentWorkflowModified(): Promise<boolean>\` — check dirty state
- \`.getUndoQueueSize()\` / \`.getRedoQueueSize()\`
- \`comfyPage.settings.setSetting(key, value)\` — change settings
- \`comfyPage.keyboard\` — KeyboardHelper:
- \`.undo()\` / \`.redo()\` / \`.bypass()\`
- \`comfyPage.nodeOps\` — NodeOperationsHelper
- \`comfyPage.canvas\` — CanvasHelper
- \`comfyPage.contextMenu\` — ContextMenu
- \`comfyPage.toast\` — ToastHelper
- \`comfyPage.confirmDialog\` — confirmation dialog
- \`comfyPage.nextFrame()\` — wait for Vue re-render
### Test patterns:
- Use \`test.describe('Name', { tag: '@ui' }, () => { ... })\` for UI tests
- Use \`test.beforeEach\` to set up common state (settings, workflow dir)
- Use \`expect(locator).toHaveScreenshot('name.png')\` for visual assertions
- Use \`expect(locator).toBeVisible()\` / \`.toHaveText()\` for behavioral assertions
- Use \`comfyPage.workflow.setupWorkflowsDirectory({})\` to ensure clean state
`
// ── Prompt builder ──────────────────────────────────────────────────
function buildPrompt(qaReport: string, prDiff: string): string {
return `You are a Playwright test generator for the ComfyUI frontend.
Your task: Generate a single .spec.ts regression test file that asserts the UIUX behavior
described in the QA report below. The test must:
1. Use the ComfyUI Playwright fixture API (documented below)
2. Test UIUX behavior ONLY — element visibility, tab names, dialog states, workflow states
3. NOT test code implementation details
4. Be concise — only test the behavior that the PR changed
5. Follow existing test conventions (see API reference)
${FIXTURE_API}
## QA Video Review Report
${qaReport}
## PR Diff (for context on what changed)
${prDiff.slice(0, 8000)}
## Output Requirements
- Output ONLY the .spec.ts file content — no markdown fences, no explanations
- Start with imports, end with closing brace
- Use descriptive test names that explain the expected behavior
- Add screenshot assertions where visual verification matters
- Keep it focused: 2-5 test cases covering the core behavioral change
- Use \`test.beforeEach\` for common setup (settings, workflow directory)
- Tag the describe block with \`{ tag: '@ui' }\` or \`{ tag: '@workflow' }\` as appropriate
`
}
// ── Gemini call ─────────────────────────────────────────────────────
async function generateTest(
qaReport: string,
prDiff: string,
model: string
): Promise<string> {
const apiKey = process.env.GEMINI_API_KEY
if (!apiKey) throw new Error('GEMINI_API_KEY env var required')
const genAI = new GoogleGenerativeAI(apiKey)
const genModel = genAI.getGenerativeModel({ model })
const prompt = buildPrompt(qaReport, prDiff)
console.warn(`Sending prompt to ${model} (${prompt.length} chars)...`)
const result = await genModel.generateContent({
contents: [{ role: 'user', parts: [{ text: prompt }] }],
generationConfig: {
temperature: 0.2,
maxOutputTokens: 8192
}
})
const text = result.response.text()
// Strip markdown fences if model wraps output
return text
.replace(/^```(?:typescript|ts)?\n?/, '')
.replace(/\n?```$/, '')
.trim()
}
// ── CLI ─────────────────────────────────────────────────────────────
function parseArgs(): CliOptions {
const args = process.argv.slice(2)
const opts = { ...DEFAULTS }
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '--qa-report':
opts.qaReport = args[++i]
break
case '--pr-diff':
opts.prDiff = args[++i]
break
case '--output':
opts.output = args[++i]
break
case '--model':
opts.model = args[++i]
break
case '--help':
console.warn(`Usage:
pnpm exec tsx scripts/qa-generate-test.ts [options]
Options:
--qa-report <path> QA video review report (markdown) [required]
--pr-diff <path> PR diff file [required]
--output <path> Output .spec.ts path [required]
--model <name> Gemini model (default: gemini-2.5-flash)`)
process.exit(0)
}
}
if (!opts.qaReport || !opts.prDiff || !opts.output) {
console.error('Missing required args. Run with --help for usage.')
process.exit(1)
}
return opts
}
async function main() {
const opts = parseArgs()
const qaReport = await readFile(resolve(opts.qaReport), 'utf-8')
const prDiff = await readFile(resolve(opts.prDiff), 'utf-8')
console.warn(
`QA report: ${basename(opts.qaReport)} (${qaReport.length} chars)`
)
console.warn(`PR diff: ${basename(opts.prDiff)} (${prDiff.length} chars)`)
const testCode = await generateTest(qaReport, prDiff, opts.model)
const outputPath = resolve(opts.output)
await writeFile(outputPath, testCode + '\n')
console.warn(`Generated test: ${outputPath} (${testCode.length} chars)`)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})