refactor: replace Codex with direct Playwright recording in QA pipeline

Replace the unreliable codex exec approach with a Playwright script (qa-record.ts) that uses Gemini to generate targeted test steps from the PR diff, then executes them deterministically via Playwright's API. Key changes: - New scripts/qa-record.ts: Gemini generates JSON test actions, Playwright executes them with reliable helper functions (menu nav, dialog fill, etc.) - Remove codex CLI and playwright-cli dependencies - Remove 150+ lines of prompt templates from pr-qa.yaml - Firefox headless with video recording (same approach proven locally) - Fallback steps if Gemini fails Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-20 14:30:41 +00:00 · 2026-03-20 21:43:03 +00:00
parent 6993a7ad5f
commit 4e5f683185
2 changed files with 412 additions and 149 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -118,27 +118,13 @@ jobs:
        with:
          launch_server: 'false'

-      - name: Install playwright-cli and Codex CLI
+      - name: Install Playwright browser
        shell: bash
        run: |
-          npm install -g @playwright/cli@latest @openai/codex@latest
-          which playwright-cli
-          playwright-cli --version || true
-          npx playwright install chromium
+          npx playwright install firefox
+          mkdir -p "$QA_ARTIFACTS"

-      - name: Configure playwright-cli output
-        shell: bash
-        run: |
-          mkdir -p "$QA_ARTIFACTS" .playwright
-          cat > .playwright/cli.config.json <<CEOF
-          {
-            "outputDir": "$QA_ARTIFACTS",
-            "saveVideo": { "width": 1280, "height": 720 }
-          }
-          CEOF
-
-      - name: Get PR diff for focused QA
-        if: needs.resolve-matrix.outputs.mode == 'focused'
+      - name: Get PR diff
        shell: bash
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -151,121 +137,6 @@ jobs:
          grep '^diff --git' "${{ runner.temp }}/pr-diff.txt" | \
            sed 's|diff --git a/||;s| b/.*||' | sort -u | tee "${{ runner.temp }}/changed-files.txt"

-      - name: Write QA prompts
-        shell: bash
-        env:
-          BRANCH: ${{ github.head_ref || github.ref_name }}
-          PR_NUM: ${{ github.event.pull_request.number || 'N/A' }}
-          SHA: ${{ github.sha }}
-        run: |
-          OS_LOWER=$(echo "$RUNNER_OS" | tr '[:upper:]' '[:lower:]')
-
-          COMMON_HEADER="CRITICAL: \"playwright-cli\" is already installed globally in PATH. Do NOT use pnpm dlx or npx.
-          Chromium is already installed. Just run the commands directly."
-
-          COMMON_STEPS="You MUST follow these exact steps in order:
-          1. playwright-cli open http://127.0.0.1:8188
-          2. QUICK LOGIN (before video): snapshot, fill the username input with \"qa-ci\", click Next button, wait for graph editor to load
-          3. playwright-cli snapshot — verify graph editor is loaded
-          4. playwright-cli video-start"
-
-          COMMON_RULES="RULES:
-          - Do NOT browse templates, explore sidebar panels, or test unrelated features
-          - Do NOT use pnpm/npx to run playwright-cli
-          - Do NOT create a PR, post PR comments, commit, or push anything"
-
-          if [ "$QA_MODE" = "full" ]; then
-            cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
-          You are running a FULL automated QA pass on the ComfyUI frontend.
-          Read the file .claude/skills/comfy-qa/SKILL.md and follow the FULL QA test plan.
-
-          Environment: CI=true, OS=${{ runner.os }}
-          Server URL: http://127.0.0.1:8188
-          Branch: ${BRANCH}, PR: #${PR_NUM}, Commit: ${SHA}
-
-          ${COMMON_HEADER}
-
-          ${COMMON_STEPS}
-          5. Test the UI (click, fill, navigate — use snapshot between actions to get refs)
-          6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
-          7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
-
-          Do NOT skip any steps. Skip tests not available in CI (file dialogs, GPU execution).
-          PROMPT
-          else
-            # Focused QA — write separate before/after prompts with identical test steps
-            DIFF_CONTEXT="CHANGED FILES:
-          $(cat "${{ runner.temp }}/changed-files.txt" 2>/dev/null || echo "Unknown")
-
-          DIFF (truncated to 500 lines):
-          $(head -500 "${{ runner.temp }}/pr-diff.txt" 2>/dev/null || echo "No diff available")"
-
-            TEST_DESIGN="## Instructions
-          1. Read the diff above carefully. Identify what UI behavior changed.
-          2. Design 3-6 targeted test steps that exercise EXACTLY that behavior.
-          3. Execute ONLY those steps.
-
-          ## Time budget: keep the video recording under 30 seconds."
-
-            # BEFORE prompt (main branch — brief snapshot of old behavior / missing feature)
-            cat > "${{ runner.temp }}/qa-before-prompt.txt" <<PROMPT
-          You are recording a BEFORE snapshot on the main branch for PR #${PR_NUM}.
-          Keep this SHORT — under 15 seconds of video. Your ONLY goal is to briefly
-          show the OLD state so reviewers can see the contrast with the AFTER video.
-
-          Environment: CI=true, OS=${{ runner.os }}
-          Server URL: http://127.0.0.1:8188
-          Branch: main (before PR)
-
-          ${DIFF_CONTEXT}
-
-          ## What to record
-          Read the diff and identify what changed. Then do ONE of these:
-          - **New feature**: Show the UI WHERE the feature would appear. Open the
-            relevant menu/panel/dialog to prove it doesn't exist yet. That's it.
-          - **Bug fix**: Trigger the bug ONCE. Show the broken behavior. Stop.
-          - **Behavior change**: Perform the action ONCE with the OLD behavior. Stop.
-
-          Do NOT explore, test exhaustively, or try multiple variations.
-          One clear demonstration is all that's needed.
-
-          ${COMMON_HEADER}
-
-          ${COMMON_STEPS}
-          5. Perform ONE action that shows the old/missing behavior (snapshot before and after)
-          6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-before-session.webm
-          7. Write a 2-line report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
-
-          ${COMMON_RULES}
-          - KEEP IT SHORT — stop recording within 15 seconds of starting video
-          PROMPT
-
-            # AFTER prompt (PR branch — prove the fix works)
-            cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
-          You are running the AFTER pass of a focused QA comparison on PR #${PR_NUM}.
-          This is the PR branch (after the changes). Your goal is to prove the PR's
-          changes work correctly and the intended behavior is now in place.
-
-          Environment: CI=true, OS=${{ runner.os }}
-          Server URL: http://127.0.0.1:8188
-          Branch: ${BRANCH} (PR)
-
-          ${DIFF_CONTEXT}
-
-          ${TEST_DESIGN}
-
-          ${COMMON_HEADER}
-
-          ${COMMON_STEPS}
-          5. Execute ONLY your PR-targeted test steps (snapshot between each action)
-          6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
-          7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
-             Include PASS/FAIL for each test step.
-
-          ${COMMON_RULES}
-          PROMPT
-          fi
-
      # ── BEFORE run (main branch) ──
      - name: Start server with main branch frontend
        if: needs.resolve-matrix.outputs.mode == 'focused'
@@ -284,15 +155,13 @@ jobs:
        if: needs.resolve-matrix.outputs.mode == 'focused'
        shell: bash
        env:
-          CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          CI: 'true'
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
-          codex exec \
-            --model gpt-5.4-mini \
-            --sandbox danger-full-access \
-            - < "${{ runner.temp }}/qa-before-prompt.txt"
+          pnpm exec tsx scripts/qa-record.ts \
+            --mode before \
+            --diff "${{ runner.temp }}/pr-diff.txt" \
+            --output-dir "$QA_ARTIFACTS" \
+            --url http://127.0.0.1:8188

      - name: Stop server after BEFORE run
        if: needs.resolve-matrix.outputs.mode == 'focused'
@@ -323,15 +192,13 @@ jobs:
      - name: Run AFTER QA (PR branch)
        shell: bash
        env:
-          CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          CI: 'true'
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
-          codex exec \
-            --model gpt-5.4-mini \
-            --sandbox danger-full-access \
-            - < "${{ runner.temp }}/qa-prompt.txt"
+          pnpm exec tsx scripts/qa-record.ts \
+            --mode after \
+            --diff "${{ runner.temp }}/pr-diff.txt" \
+            --output-dir "$QA_ARTIFACTS" \
+            --url http://127.0.0.1:8188

      - name: Collect artifacts
        if: always()
--- a/scripts/qa-record.ts
+++ b/scripts/qa-record.ts
@@ -0,0 +1,396 @@
+#!/usr/bin/env tsx
+/**
+ * QA Recording Script
+ *
+ * Records a ComfyUI frontend QA session using Playwright with video capture.
+ * Uses Gemini to generate targeted test steps based on the PR diff.
+ *
+ * Usage:
+ *   pnpm exec tsx scripts/qa-record.ts \
+ *     --mode before|after \
+ *     --diff <path-to-diff> \
+ *     --output-dir <path> \
+ *     [--url <server-url>] \
+ *     [--model <gemini-model>]
+ *
+ * Env: GEMINI_API_KEY (required)
+ */
+
+import { firefox } from '@playwright/test'
+import type { Page } from '@playwright/test'
+import { GoogleGenerativeAI } from '@google/generative-ai'
+import { readFileSync, mkdirSync, readdirSync, renameSync } from 'fs'
+
+// ── Types ──
+
+type TestAction =
+  | { action: 'openMenu' }
+  | { action: 'hoverMenuItem'; label: string }
+  | { action: 'clickMenuItem'; label: string }
+  | { action: 'fillDialog'; text: string }
+  | { action: 'pressKey'; key: string }
+  | { action: 'click'; text: string }
+  | { action: 'wait'; ms: number }
+  | { action: 'screenshot'; name: string }
+
+interface Options {
+  mode: 'before' | 'after'
+  diffFile: string
+  outputDir: string
+  serverUrl: string
+  model: string
+  apiKey: string
+}
+
+// ── CLI parsing ──
+
+function parseArgs(): Options {
+  const args = process.argv.slice(2)
+  const opts: Partial<Options> = {
+    model: 'gemini-2.5-flash',
+    serverUrl: 'http://127.0.0.1:8188',
+    apiKey: process.env.GEMINI_API_KEY || ''
+  }
+
+  for (let i = 0; i < args.length; i++) {
+    switch (args[i]) {
+      case '--mode':
+        opts.mode = args[++i] as 'before' | 'after'
+        break
+      case '--diff':
+        opts.diffFile = args[++i]
+        break
+      case '--output-dir':
+        opts.outputDir = args[++i]
+        break
+      case '--url':
+        opts.serverUrl = args[++i]
+        break
+      case '--model':
+        opts.model = args[++i]
+        break
+      case '--help':
+        console.warn(
+          'Usage: qa-record.ts --mode before|after --diff <path> --output-dir <path> [--url <url>] [--model <model>]'
+        )
+        process.exit(0)
+    }
+  }
+
+  if (!opts.mode || !opts.diffFile || !opts.outputDir) {
+    console.error(
+      'Required: --mode before|after --diff <path> --output-dir <path>'
+    )
+    process.exit(1)
+  }
+
+  if (!opts.apiKey) {
+    console.error('GEMINI_API_KEY environment variable is required')
+    process.exit(1)
+  }
+
+  return opts as Options
+}
+
+// ── Gemini test step generation ──
+
+function buildPrompt(mode: string, diff: string): string {
+  const modeDesc =
+    mode === 'before'
+      ? 'BEFORE (main branch). Show the OLD state briefly — under 15 seconds. One quick demonstration of missing feature / old behavior.'
+      : 'AFTER (PR branch). Prove the changes work — 3-6 targeted steps, under 30 seconds.'
+
+  return `You are generating test steps for a ComfyUI frontend QA recording.
+
+MODE: ${modeDesc}
+
+## Available actions (JSON array)
+Each step is an object with an "action" field:
+- { "action": "openMenu" } — clicks the Comfy hamburger menu (top-left C logo)
+- { "action": "hoverMenuItem", "label": "File" } — hovers a top-level menu item to open submenu
+- { "action": "clickMenuItem", "label": "Save As" } — clicks an item in the visible submenu
+- { "action": "fillDialog", "text": "test-name" } — fills the dialog input and presses Enter
+- { "action": "pressKey", "key": "Escape" } — presses a keyboard key
+- { "action": "click", "text": "Button Text" } — clicks an element by visible text
+- { "action": "wait", "ms": 1000 } — waits (use sparingly, max 3000ms)
+- { "action": "screenshot", "name": "step-name" } — takes a screenshot
+
+## PR Diff
+\`\`\`
+${diff.slice(0, 3000)}
+\`\`\`
+
+## Rules
+- Output ONLY a valid JSON array of actions, no markdown fences or explanation
+- ${mode === 'before' ? 'Keep it minimal — just show the old/missing behavior' : 'Test the specific behavior that changed in the PR'}
+- Always include at least one screenshot
+- Do NOT include login steps (handled automatically)
+- Menu navigation pattern: openMenu → hoverMenuItem → clickMenuItem (or screenshot)
+
+## Example output
+[
+  {"action":"openMenu"},
+  {"action":"hoverMenuItem","label":"File"},
+  {"action":"screenshot","name":"file-menu"},
+  {"action":"clickMenuItem","label":"Save As"},
+  {"action":"wait","ms":800},
+  {"action":"fillDialog","text":"test-save"},
+  {"action":"wait","ms":2000},
+  {"action":"screenshot","name":"after-save"}
+]`
+}
+
+async function generateTestSteps(opts: Options): Promise<TestAction[]> {
+  const diff = readFileSync(opts.diffFile, 'utf-8')
+  const prompt = buildPrompt(opts.mode, diff)
+
+  const genAI = new GoogleGenerativeAI(opts.apiKey)
+  const model = genAI.getGenerativeModel({ model: opts.model })
+
+  console.warn(`Generating ${opts.mode} test steps with ${opts.model}...`)
+
+  const result = await model.generateContent({
+    contents: [{ role: 'user', parts: [{ text: prompt }] }],
+    generationConfig: { temperature: 0.2, maxOutputTokens: 4096 }
+  })
+
+  let text = result.response.text()
+  // Strip markdown fences if present
+  text = text
+    .replace(/^```(?:json)?\n?/gm, '')
+    .replace(/```$/gm, '')
+    .trim()
+
+  console.warn('Generated steps:', text)
+
+  const steps: TestAction[] = JSON.parse(text)
+  if (!Array.isArray(steps)) throw new Error('Expected JSON array')
+  return steps
+}
+
+// ── Fallback steps ──
+
+const FALLBACK_BEFORE: TestAction[] = [
+  { action: 'openMenu' },
+  { action: 'wait', ms: 300 },
+  { action: 'hoverMenuItem', label: 'File' },
+  { action: 'wait', ms: 500 },
+  { action: 'screenshot', name: 'file-menu-before' },
+  { action: 'pressKey', key: 'Escape' },
+  { action: 'wait', ms: 500 },
+  { action: 'screenshot', name: 'editor-before' }
+]
+
+const FALLBACK_AFTER: TestAction[] = [
+  { action: 'openMenu' },
+  { action: 'wait', ms: 300 },
+  { action: 'hoverMenuItem', label: 'File' },
+  { action: 'wait', ms: 500 },
+  { action: 'screenshot', name: 'file-menu-after' },
+  { action: 'pressKey', key: 'Escape' },
+  { action: 'wait', ms: 500 },
+  { action: 'screenshot', name: 'editor-after' }
+]
+
+// ── Playwright helpers ──
+
+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms))
+
+async function openComfyMenu(page: Page) {
+  await page.mouse.click(20, 67)
+  await sleep(800)
+}
+
+async function hoverMenuItem(page: Page, label: string) {
+  const item = page
+    .locator('.p-menubar-item-label, .p-tieredmenu-item-label')
+    .filter({ hasText: label })
+    .first()
+  if (await item.isVisible().catch(() => false)) {
+    const parent = item.locator('..').locator('..')
+    await parent.hover()
+    await sleep(600)
+  } else {
+    console.warn(`Menu item "${label}" not visible`)
+  }
+}
+
+async function clickSubmenuItem(page: Page, label: string) {
+  const item = page
+    .locator('.p-tieredmenu-submenu:visible')
+    .locator(`text=${label}`)
+    .first()
+  if (await item.isVisible().catch(() => false)) {
+    await item.click()
+    await sleep(800)
+  } else {
+    console.warn(`Submenu item "${label}" not found`)
+  }
+}
+
+async function fillDialogAndConfirm(page: Page, text: string) {
+  const input = page.locator('.p-dialog-content input')
+  if (await input.isVisible().catch(() => false)) {
+    await input.fill(text)
+    await sleep(300)
+    await page.keyboard.press('Enter')
+    await sleep(2000)
+  } else {
+    console.warn('Dialog input not found')
+  }
+}
+
+async function clickByText(page: Page, text: string) {
+  const el = page.locator(`text=${text}`).first()
+  if (await el.isVisible().catch(() => false)) {
+    await el.click()
+    await sleep(500)
+  } else {
+    console.warn(`Element with text "${text}" not found`)
+  }
+}
+
+// ── Step executor ──
+
+async function executeSteps(
+  page: Page,
+  steps: TestAction[],
+  outputDir: string
+) {
+  for (const step of steps) {
+    console.warn(
+      `  → ${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ''}`
+    )
+    switch (step.action) {
+      case 'openMenu':
+        await openComfyMenu(page)
+        break
+      case 'hoverMenuItem':
+        await hoverMenuItem(page, step.label)
+        break
+      case 'clickMenuItem':
+        await clickSubmenuItem(page, step.label)
+        break
+      case 'fillDialog':
+        await fillDialogAndConfirm(page, step.text)
+        break
+      case 'pressKey':
+        await page.keyboard.press(step.key)
+        await sleep(300)
+        break
+      case 'click':
+        await clickByText(page, step.text)
+        break
+      case 'wait':
+        await sleep(Math.min(step.ms, 5000))
+        break
+      case 'screenshot':
+        await page.screenshot({
+          path: `${outputDir}/${step.name}.png`
+        })
+        break
+      default:
+        console.warn(`Unknown action: ${JSON.stringify(step)}`)
+    }
+  }
+}
+
+// ── Login flow ──
+
+async function loginAsQaCi(page: Page) {
+  console.warn('Logging in as qa-ci...')
+  const dropdown = page
+    .locator('select, [role="combobox"], .p-select, .p-dropdown')
+    .first()
+  await dropdown.click()
+  await sleep(500)
+
+  try {
+    await page.locator('text=qa-ci').first().click({ timeout: 3000 })
+  } catch {
+    try {
+      await dropdown.selectOption({ label: 'qa-ci' })
+    } catch {
+      console.warn('Could not select qa-ci user')
+    }
+  }
+  await sleep(500)
+  await page.getByRole('button', { name: 'Next' }).click()
+  await sleep(5000)
+
+  // Close template gallery
+  await page.keyboard.press('Escape')
+  await sleep(2000)
+
+  // Dismiss error popup if present
+  const dismissBtn = page.locator('text=Dismiss').first()
+  if (await dismissBtn.isVisible().catch(() => false)) {
+    await dismissBtn.click()
+    await sleep(500)
+  }
+}
+
+// ── Main ──
+
+async function main() {
+  const opts = parseArgs()
+  mkdirSync(opts.outputDir, { recursive: true })
+
+  // Generate or fall back to default test steps
+  let steps: TestAction[]
+  try {
+    steps = await generateTestSteps(opts)
+  } catch (err) {
+    console.warn('Gemini generation failed, using fallback steps:', err)
+    steps = opts.mode === 'before' ? FALLBACK_BEFORE : FALLBACK_AFTER
+  }
+
+  // Launch browser with video recording
+  const browser = await firefox.launch({ headless: true })
+  const context = await browser.newContext({
+    viewport: { width: 1280, height: 720 },
+    recordVideo: { dir: opts.outputDir, size: { width: 1280, height: 720 } }
+  })
+  const page = await context.newPage()
+
+  try {
+    console.warn(`Opening ComfyUI at ${opts.serverUrl}`)
+    await page.goto(opts.serverUrl, {
+      waitUntil: 'domcontentloaded',
+      timeout: 30000
+    })
+    await sleep(2000)
+
+    await loginAsQaCi(page)
+    console.warn('Editor ready — executing test steps')
+
+    await executeSteps(page, steps, opts.outputDir)
+
+    await sleep(2000)
+  } finally {
+    await context.close()
+    await browser.close()
+  }
+
+  // Rename the recorded video to expected filename
+  const videoName =
+    opts.mode === 'before' ? 'qa-before-session.webm' : 'qa-session.webm'
+  const files = readdirSync(opts.outputDir).filter((f) => f.endsWith('.webm'))
+  if (files.length > 0) {
+    const recorded = files[files.length - 1]
+    renameSync(
+      `${opts.outputDir}/${recorded}`,
+      `${opts.outputDir}/${videoName}`
+    )
+    console.warn(`Video saved: ${opts.outputDir}/${videoName}`)
+  } else {
+    console.warn('WARNING: No .webm video found after recording')
+  }
+
+  console.warn('Recording complete!')
+}
+
+main().catch((err) => {
+  console.error('Recording failed:', err)
+  process.exit(1)
+})