refactor: replace Codex with direct Playwright recording in QA pipeline

Replace the unreliable codex exec approach with a Playwright script
(qa-record.ts) that uses Gemini to generate targeted test steps from
the PR diff, then executes them deterministically via Playwright's API.

Key changes:
- New scripts/qa-record.ts: Gemini generates JSON test actions, Playwright
  executes them with reliable helper functions (menu nav, dialog fill, etc.)
- Remove codex CLI and playwright-cli dependencies
- Remove 150+ lines of prompt templates from pr-qa.yaml
- Firefox headless with video recording (same approach proven locally)
- Fallback steps if Gemini fails

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
snomiao
2026-03-20 21:43:03 +00:00
parent 6993a7ad5f
commit 4e5f683185
2 changed files with 412 additions and 149 deletions

View File

@@ -118,27 +118,13 @@ jobs:
with:
launch_server: 'false'
- name: Install playwright-cli and Codex CLI
- name: Install Playwright browser
shell: bash
run: |
npm install -g @playwright/cli@latest @openai/codex@latest
which playwright-cli
playwright-cli --version || true
npx playwright install chromium
npx playwright install firefox
mkdir -p "$QA_ARTIFACTS"
- name: Configure playwright-cli output
shell: bash
run: |
mkdir -p "$QA_ARTIFACTS" .playwright
cat > .playwright/cli.config.json <<CEOF
{
"outputDir": "$QA_ARTIFACTS",
"saveVideo": { "width": 1280, "height": 720 }
}
CEOF
- name: Get PR diff for focused QA
if: needs.resolve-matrix.outputs.mode == 'focused'
- name: Get PR diff
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -151,121 +137,6 @@ jobs:
grep '^diff --git' "${{ runner.temp }}/pr-diff.txt" | \
sed 's|diff --git a/||;s| b/.*||' | sort -u | tee "${{ runner.temp }}/changed-files.txt"
- name: Write QA prompts
shell: bash
env:
BRANCH: ${{ github.head_ref || github.ref_name }}
PR_NUM: ${{ github.event.pull_request.number || 'N/A' }}
SHA: ${{ github.sha }}
run: |
OS_LOWER=$(echo "$RUNNER_OS" | tr '[:upper:]' '[:lower:]')
COMMON_HEADER="CRITICAL: \"playwright-cli\" is already installed globally in PATH. Do NOT use pnpm dlx or npx.
Chromium is already installed. Just run the commands directly."
COMMON_STEPS="You MUST follow these exact steps in order:
1. playwright-cli open http://127.0.0.1:8188
2. QUICK LOGIN (before video): snapshot, fill the username input with \"qa-ci\", click Next button, wait for graph editor to load
3. playwright-cli snapshot — verify graph editor is loaded
4. playwright-cli video-start"
COMMON_RULES="RULES:
- Do NOT browse templates, explore sidebar panels, or test unrelated features
- Do NOT use pnpm/npx to run playwright-cli
- Do NOT create a PR, post PR comments, commit, or push anything"
if [ "$QA_MODE" = "full" ]; then
cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
You are running a FULL automated QA pass on the ComfyUI frontend.
Read the file .claude/skills/comfy-qa/SKILL.md and follow the FULL QA test plan.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: ${BRANCH}, PR: #${PR_NUM}, Commit: ${SHA}
${COMMON_HEADER}
${COMMON_STEPS}
5. Test the UI (click, fill, navigate — use snapshot between actions to get refs)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
Do NOT skip any steps. Skip tests not available in CI (file dialogs, GPU execution).
PROMPT
else
# Focused QA — write separate before/after prompts with identical test steps
DIFF_CONTEXT="CHANGED FILES:
$(cat "${{ runner.temp }}/changed-files.txt" 2>/dev/null || echo "Unknown")
DIFF (truncated to 500 lines):
$(head -500 "${{ runner.temp }}/pr-diff.txt" 2>/dev/null || echo "No diff available")"
TEST_DESIGN="## Instructions
1. Read the diff above carefully. Identify what UI behavior changed.
2. Design 3-6 targeted test steps that exercise EXACTLY that behavior.
3. Execute ONLY those steps.
## Time budget: keep the video recording under 30 seconds."
# BEFORE prompt (main branch — brief snapshot of old behavior / missing feature)
cat > "${{ runner.temp }}/qa-before-prompt.txt" <<PROMPT
You are recording a BEFORE snapshot on the main branch for PR #${PR_NUM}.
Keep this SHORT — under 15 seconds of video. Your ONLY goal is to briefly
show the OLD state so reviewers can see the contrast with the AFTER video.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: main (before PR)
${DIFF_CONTEXT}
## What to record
Read the diff and identify what changed. Then do ONE of these:
- **New feature**: Show the UI WHERE the feature would appear. Open the
relevant menu/panel/dialog to prove it doesn't exist yet. That's it.
- **Bug fix**: Trigger the bug ONCE. Show the broken behavior. Stop.
- **Behavior change**: Perform the action ONCE with the OLD behavior. Stop.
Do NOT explore, test exhaustively, or try multiple variations.
One clear demonstration is all that's needed.
${COMMON_HEADER}
${COMMON_STEPS}
5. Perform ONE action that shows the old/missing behavior (snapshot before and after)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-before-session.webm
7. Write a 2-line report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
${COMMON_RULES}
- KEEP IT SHORT — stop recording within 15 seconds of starting video
PROMPT
# AFTER prompt (PR branch — prove the fix works)
cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
You are running the AFTER pass of a focused QA comparison on PR #${PR_NUM}.
This is the PR branch (after the changes). Your goal is to prove the PR's
changes work correctly and the intended behavior is now in place.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: ${BRANCH} (PR)
${DIFF_CONTEXT}
${TEST_DESIGN}
${COMMON_HEADER}
${COMMON_STEPS}
5. Execute ONLY your PR-targeted test steps (snapshot between each action)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
Include PASS/FAIL for each test step.
${COMMON_RULES}
PROMPT
fi
# ── BEFORE run (main branch) ──
- name: Start server with main branch frontend
if: needs.resolve-matrix.outputs.mode == 'focused'
@@ -284,15 +155,13 @@ jobs:
if: needs.resolve-matrix.outputs.mode == 'focused'
shell: bash
env:
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CI: 'true'
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
codex exec \
--model gpt-5.4-mini \
--sandbox danger-full-access \
- < "${{ runner.temp }}/qa-before-prompt.txt"
pnpm exec tsx scripts/qa-record.ts \
--mode before \
--diff "${{ runner.temp }}/pr-diff.txt" \
--output-dir "$QA_ARTIFACTS" \
--url http://127.0.0.1:8188
- name: Stop server after BEFORE run
if: needs.resolve-matrix.outputs.mode == 'focused'
@@ -323,15 +192,13 @@ jobs:
- name: Run AFTER QA (PR branch)
shell: bash
env:
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CI: 'true'
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
codex exec \
--model gpt-5.4-mini \
--sandbox danger-full-access \
- < "${{ runner.temp }}/qa-prompt.txt"
pnpm exec tsx scripts/qa-record.ts \
--mode after \
--diff "${{ runner.temp }}/pr-diff.txt" \
--output-dir "$QA_ARTIFACTS" \
--url http://127.0.0.1:8188
- name: Collect artifacts
if: always()

396
scripts/qa-record.ts Normal file
View File

@@ -0,0 +1,396 @@
#!/usr/bin/env tsx
/**
* QA Recording Script
*
* Records a ComfyUI frontend QA session using Playwright with video capture.
* Uses Gemini to generate targeted test steps based on the PR diff.
*
* Usage:
* pnpm exec tsx scripts/qa-record.ts \
* --mode before|after \
* --diff <path-to-diff> \
* --output-dir <path> \
* [--url <server-url>] \
* [--model <gemini-model>]
*
* Env: GEMINI_API_KEY (required)
*/
import { firefox } from '@playwright/test'
import type { Page } from '@playwright/test'
import { GoogleGenerativeAI } from '@google/generative-ai'
import { readFileSync, mkdirSync, readdirSync, renameSync } from 'fs'
// ── Types ──
type TestAction =
| { action: 'openMenu' }
| { action: 'hoverMenuItem'; label: string }
| { action: 'clickMenuItem'; label: string }
| { action: 'fillDialog'; text: string }
| { action: 'pressKey'; key: string }
| { action: 'click'; text: string }
| { action: 'wait'; ms: number }
| { action: 'screenshot'; name: string }
interface Options {
mode: 'before' | 'after'
diffFile: string
outputDir: string
serverUrl: string
model: string
apiKey: string
}
// ── CLI parsing ──
function parseArgs(): Options {
const args = process.argv.slice(2)
const opts: Partial<Options> = {
model: 'gemini-2.5-flash',
serverUrl: 'http://127.0.0.1:8188',
apiKey: process.env.GEMINI_API_KEY || ''
}
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '--mode':
opts.mode = args[++i] as 'before' | 'after'
break
case '--diff':
opts.diffFile = args[++i]
break
case '--output-dir':
opts.outputDir = args[++i]
break
case '--url':
opts.serverUrl = args[++i]
break
case '--model':
opts.model = args[++i]
break
case '--help':
console.warn(
'Usage: qa-record.ts --mode before|after --diff <path> --output-dir <path> [--url <url>] [--model <model>]'
)
process.exit(0)
}
}
if (!opts.mode || !opts.diffFile || !opts.outputDir) {
console.error(
'Required: --mode before|after --diff <path> --output-dir <path>'
)
process.exit(1)
}
if (!opts.apiKey) {
console.error('GEMINI_API_KEY environment variable is required')
process.exit(1)
}
return opts as Options
}
// ── Gemini test step generation ──
function buildPrompt(mode: string, diff: string): string {
const modeDesc =
mode === 'before'
? 'BEFORE (main branch). Show the OLD state briefly — under 15 seconds. One quick demonstration of missing feature / old behavior.'
: 'AFTER (PR branch). Prove the changes work — 3-6 targeted steps, under 30 seconds.'
return `You are generating test steps for a ComfyUI frontend QA recording.
MODE: ${modeDesc}
## Available actions (JSON array)
Each step is an object with an "action" field:
- { "action": "openMenu" } — clicks the Comfy hamburger menu (top-left C logo)
- { "action": "hoverMenuItem", "label": "File" } — hovers a top-level menu item to open submenu
- { "action": "clickMenuItem", "label": "Save As" } — clicks an item in the visible submenu
- { "action": "fillDialog", "text": "test-name" } — fills the dialog input and presses Enter
- { "action": "pressKey", "key": "Escape" } — presses a keyboard key
- { "action": "click", "text": "Button Text" } — clicks an element by visible text
- { "action": "wait", "ms": 1000 } — waits (use sparingly, max 3000ms)
- { "action": "screenshot", "name": "step-name" } — takes a screenshot
## PR Diff
\`\`\`
${diff.slice(0, 3000)}
\`\`\`
## Rules
- Output ONLY a valid JSON array of actions, no markdown fences or explanation
- ${mode === 'before' ? 'Keep it minimal — just show the old/missing behavior' : 'Test the specific behavior that changed in the PR'}
- Always include at least one screenshot
- Do NOT include login steps (handled automatically)
- Menu navigation pattern: openMenu → hoverMenuItem → clickMenuItem (or screenshot)
## Example output
[
{"action":"openMenu"},
{"action":"hoverMenuItem","label":"File"},
{"action":"screenshot","name":"file-menu"},
{"action":"clickMenuItem","label":"Save As"},
{"action":"wait","ms":800},
{"action":"fillDialog","text":"test-save"},
{"action":"wait","ms":2000},
{"action":"screenshot","name":"after-save"}
]`
}
async function generateTestSteps(opts: Options): Promise<TestAction[]> {
const diff = readFileSync(opts.diffFile, 'utf-8')
const prompt = buildPrompt(opts.mode, diff)
const genAI = new GoogleGenerativeAI(opts.apiKey)
const model = genAI.getGenerativeModel({ model: opts.model })
console.warn(`Generating ${opts.mode} test steps with ${opts.model}...`)
const result = await model.generateContent({
contents: [{ role: 'user', parts: [{ text: prompt }] }],
generationConfig: { temperature: 0.2, maxOutputTokens: 4096 }
})
let text = result.response.text()
// Strip markdown fences if present
text = text
.replace(/^```(?:json)?\n?/gm, '')
.replace(/```$/gm, '')
.trim()
console.warn('Generated steps:', text)
const steps: TestAction[] = JSON.parse(text)
if (!Array.isArray(steps)) throw new Error('Expected JSON array')
return steps
}
// ── Fallback steps ──
const FALLBACK_BEFORE: TestAction[] = [
{ action: 'openMenu' },
{ action: 'wait', ms: 300 },
{ action: 'hoverMenuItem', label: 'File' },
{ action: 'wait', ms: 500 },
{ action: 'screenshot', name: 'file-menu-before' },
{ action: 'pressKey', key: 'Escape' },
{ action: 'wait', ms: 500 },
{ action: 'screenshot', name: 'editor-before' }
]
const FALLBACK_AFTER: TestAction[] = [
{ action: 'openMenu' },
{ action: 'wait', ms: 300 },
{ action: 'hoverMenuItem', label: 'File' },
{ action: 'wait', ms: 500 },
{ action: 'screenshot', name: 'file-menu-after' },
{ action: 'pressKey', key: 'Escape' },
{ action: 'wait', ms: 500 },
{ action: 'screenshot', name: 'editor-after' }
]
// ── Playwright helpers ──
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms))
async function openComfyMenu(page: Page) {
await page.mouse.click(20, 67)
await sleep(800)
}
async function hoverMenuItem(page: Page, label: string) {
const item = page
.locator('.p-menubar-item-label, .p-tieredmenu-item-label')
.filter({ hasText: label })
.first()
if (await item.isVisible().catch(() => false)) {
const parent = item.locator('..').locator('..')
await parent.hover()
await sleep(600)
} else {
console.warn(`Menu item "${label}" not visible`)
}
}
async function clickSubmenuItem(page: Page, label: string) {
const item = page
.locator('.p-tieredmenu-submenu:visible')
.locator(`text=${label}`)
.first()
if (await item.isVisible().catch(() => false)) {
await item.click()
await sleep(800)
} else {
console.warn(`Submenu item "${label}" not found`)
}
}
async function fillDialogAndConfirm(page: Page, text: string) {
const input = page.locator('.p-dialog-content input')
if (await input.isVisible().catch(() => false)) {
await input.fill(text)
await sleep(300)
await page.keyboard.press('Enter')
await sleep(2000)
} else {
console.warn('Dialog input not found')
}
}
async function clickByText(page: Page, text: string) {
const el = page.locator(`text=${text}`).first()
if (await el.isVisible().catch(() => false)) {
await el.click()
await sleep(500)
} else {
console.warn(`Element with text "${text}" not found`)
}
}
// ── Step executor ──
async function executeSteps(
page: Page,
steps: TestAction[],
outputDir: string
) {
for (const step of steps) {
console.warn(
`${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ''}`
)
switch (step.action) {
case 'openMenu':
await openComfyMenu(page)
break
case 'hoverMenuItem':
await hoverMenuItem(page, step.label)
break
case 'clickMenuItem':
await clickSubmenuItem(page, step.label)
break
case 'fillDialog':
await fillDialogAndConfirm(page, step.text)
break
case 'pressKey':
await page.keyboard.press(step.key)
await sleep(300)
break
case 'click':
await clickByText(page, step.text)
break
case 'wait':
await sleep(Math.min(step.ms, 5000))
break
case 'screenshot':
await page.screenshot({
path: `${outputDir}/${step.name}.png`
})
break
default:
console.warn(`Unknown action: ${JSON.stringify(step)}`)
}
}
}
// ── Login flow ──
async function loginAsQaCi(page: Page) {
console.warn('Logging in as qa-ci...')
const dropdown = page
.locator('select, [role="combobox"], .p-select, .p-dropdown')
.first()
await dropdown.click()
await sleep(500)
try {
await page.locator('text=qa-ci').first().click({ timeout: 3000 })
} catch {
try {
await dropdown.selectOption({ label: 'qa-ci' })
} catch {
console.warn('Could not select qa-ci user')
}
}
await sleep(500)
await page.getByRole('button', { name: 'Next' }).click()
await sleep(5000)
// Close template gallery
await page.keyboard.press('Escape')
await sleep(2000)
// Dismiss error popup if present
const dismissBtn = page.locator('text=Dismiss').first()
if (await dismissBtn.isVisible().catch(() => false)) {
await dismissBtn.click()
await sleep(500)
}
}
// ── Main ──
async function main() {
const opts = parseArgs()
mkdirSync(opts.outputDir, { recursive: true })
// Generate or fall back to default test steps
let steps: TestAction[]
try {
steps = await generateTestSteps(opts)
} catch (err) {
console.warn('Gemini generation failed, using fallback steps:', err)
steps = opts.mode === 'before' ? FALLBACK_BEFORE : FALLBACK_AFTER
}
// Launch browser with video recording
const browser = await firefox.launch({ headless: true })
const context = await browser.newContext({
viewport: { width: 1280, height: 720 },
recordVideo: { dir: opts.outputDir, size: { width: 1280, height: 720 } }
})
const page = await context.newPage()
try {
console.warn(`Opening ComfyUI at ${opts.serverUrl}`)
await page.goto(opts.serverUrl, {
waitUntil: 'domcontentloaded',
timeout: 30000
})
await sleep(2000)
await loginAsQaCi(page)
console.warn('Editor ready — executing test steps')
await executeSteps(page, steps, opts.outputDir)
await sleep(2000)
} finally {
await context.close()
await browser.close()
}
// Rename the recorded video to expected filename
const videoName =
opts.mode === 'before' ? 'qa-before-session.webm' : 'qa-session.webm'
const files = readdirSync(opts.outputDir).filter((f) => f.endsWith('.webm'))
if (files.length > 0) {
const recorded = files[files.length - 1]
renameSync(
`${opts.outputDir}/${recorded}`,
`${opts.outputDir}/${videoName}`
)
console.warn(`Video saved: ${opts.outputDir}/${videoName}`)
} else {
console.warn('WARNING: No .webm video found after recording')
}
console.warn('Recording complete!')
}
main().catch((err) => {
console.error('Recording failed:', err)
process.exit(1)
})