mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-28 02:02:08 +00:00
Compare commits
40 Commits
sno-qa-103
...
sno-qa-105
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b3bf3652a | ||
|
|
05c6e1c0ff | ||
|
|
15fb037a55 | ||
|
|
dbc12a9d6a | ||
|
|
909b75b9d1 | ||
|
|
868d774007 | ||
|
|
69cd6a7628 | ||
|
|
9c39635f16 | ||
|
|
3e957213c8 | ||
|
|
f5d99c9c22 | ||
|
|
09bbd89172 | ||
|
|
0b5613246e | ||
|
|
3a27263ca6 | ||
|
|
6044452b8f | ||
|
|
c4a243060b | ||
|
|
3690e98c79 | ||
|
|
1c40893cfa | ||
|
|
72a28a1e76 | ||
|
|
49c95248e5 | ||
|
|
6bb9d18ca6 | ||
|
|
ca49e9cb1b | ||
|
|
db538c9d76 | ||
|
|
c04b31a0f1 | ||
|
|
5938fdef8d | ||
|
|
27d55f093b | ||
|
|
c1ddb8669e | ||
|
|
57dbf0132d | ||
|
|
b4c49588dc | ||
|
|
00dc10e9e6 | ||
|
|
48414fa1b5 | ||
|
|
e744f101b0 | ||
|
|
9ad8267067 | ||
|
|
cf54ddb6d3 | ||
|
|
fce31cf0bf | ||
|
|
050091abc6 | ||
|
|
548e37b9a5 | ||
|
|
458b2e918c | ||
|
|
63dbe002d1 | ||
|
|
f3d9a8c2e4 | ||
|
|
831718bd50 |
38
.github/workflows/pr-qa.yaml
vendored
38
.github/workflows/pr-qa.yaml
vendored
@@ -289,6 +289,8 @@ jobs:
|
||||
shell: bash
|
||||
env:
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
TARGET_TYPE: ${{ needs.resolve-matrix.outputs.target_type }}
|
||||
run: |
|
||||
MODE="before"
|
||||
@@ -612,12 +614,13 @@ jobs:
|
||||
DUALBADGE
|
||||
chmod +x /tmp/gen-badge-dual.sh
|
||||
|
||||
# Vertical box badge for multi-pass results
|
||||
# Universal vertical box badge — used for all badges (issues + PRs)
|
||||
cat > /tmp/gen-badge-box.sh <<'BOXBADGE'
|
||||
#!/bin/bash
|
||||
# Usage: gen-badge-box.sh <output-path> <label> <repro> <not_repro> <fail> <total>
|
||||
# Usage: gen-badge-box.sh <output-path> <label> <repro> <not_repro> <fail> <total> [fix_result] [fix_color]
|
||||
OUT="$1" LABEL="$2" REPRO="$3" NOREPRO="$4" FAIL="$5" TOTAL="$6"
|
||||
W=140
|
||||
FIX_RESULT="${7:-}" FIX_COLOR="${8:-#4c1}"
|
||||
W=160
|
||||
ROW=18
|
||||
HEADER=22
|
||||
ROWS=0
|
||||
@@ -625,26 +628,28 @@ jobs:
|
||||
[ "$NOREPRO" -gt 0 ] 2>/dev/null && ROWS=$((ROWS+1))
|
||||
[ "$FAIL" -gt 0 ] 2>/dev/null && ROWS=$((ROWS+1))
|
||||
[ "$ROWS" -eq 0 ] && ROWS=1
|
||||
# Add fix quality row for PRs
|
||||
[ -n "$FIX_RESULT" ] && ROWS=$((ROWS+1))
|
||||
H=$((HEADER + ROWS * ROW + 4))
|
||||
Y=$((HEADER + 2))
|
||||
cat > "$OUT" <<SVGEOF
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}" role="img" aria-label="${LABEL}: ${REPRO}/${TOTAL}">
|
||||
<title>${LABEL}: ${REPRO} reproduced, ${NOREPRO} not-repro, ${FAIL} inconclusive / ${TOTAL}</title>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}" role="img" aria-label="${LABEL}">
|
||||
<title>${LABEL}: ${REPRO} reproduced, ${NOREPRO} not-repro, ${FAIL} inconclusive / ${TOTAL}${FIX_RESULT:+ | Fix: ${FIX_RESULT}}</title>
|
||||
<rect width="${W}" height="${H}" rx="4" fill="#2a2d35"/>
|
||||
<rect width="${W}" height="${HEADER}" rx="4" fill="#555"/>
|
||||
<rect y="$((HEADER-4))" width="${W}" height="4" fill="#555"/>
|
||||
<text x="$((W/2))" y="15" fill="#fff" text-anchor="middle" font-family="Verdana,sans-serif" font-size="11" font-weight="bold">${LABEL}</text>
|
||||
SVGEOF
|
||||
add_row() {
|
||||
local count="$1" icon="$2" text="$3" color="$4"
|
||||
[ "$count" -gt 0 ] || return
|
||||
local icon="$1" text="$2" color="$3"
|
||||
echo " <rect x='4' y='${Y}' width='$((W-8))' height='$((ROW-2))' rx='3' fill='${color}' opacity='.15'/>" >> "$OUT"
|
||||
echo " <text x='10' y='$((Y+13))' fill='${color}' font-family='Verdana,sans-serif' font-size='11'>${icon} ${count} ${text}</text>" >> "$OUT"
|
||||
echo " <text x='10' y='$((Y+13))' fill='${color}' font-family='Verdana,sans-serif' font-size='11'>${icon} ${text}</text>" >> "$OUT"
|
||||
Y=$((Y+ROW))
|
||||
}
|
||||
add_row "$REPRO" "✓" "reproduced" "#58a6ff"
|
||||
add_row "$NOREPRO" "✗" "not reproducible" "#8b949e"
|
||||
add_row "$FAIL" "⚠" "inconclusive" "#d29922"
|
||||
[ "$REPRO" -gt 0 ] 2>/dev/null && add_row "✓" "${REPRO} reproduced" "#58a6ff"
|
||||
[ "$NOREPRO" -gt 0 ] 2>/dev/null && add_row "✗" "${NOREPRO} not reproducible" "#8b949e"
|
||||
[ "$FAIL" -gt 0 ] 2>/dev/null && add_row "⚠" "${FAIL} inconclusive" "#d29922"
|
||||
[ -n "$FIX_RESULT" ] && add_row "⚙" "Fix: ${FIX_RESULT}" "$FIX_COLOR"
|
||||
echo "</svg>" >> "$OUT"
|
||||
BOXBADGE
|
||||
chmod +x /tmp/gen-badge-box.sh
|
||||
@@ -696,6 +701,13 @@ jobs:
|
||||
- name: Setup frontend
|
||||
uses: ./.github/actions/setup-frontend
|
||||
|
||||
- name: Download QA guides
|
||||
continue-on-error: true
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
with:
|
||||
name: qa-guides-${{ github.run_id }}
|
||||
path: qa-guides
|
||||
|
||||
- name: Download BEFORE artifacts
|
||||
if: needs.qa-before.result == 'success'
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
@@ -944,6 +956,8 @@ jobs:
|
||||
TARGET_TYPE: ${{ steps.pr.outputs.target_type }}
|
||||
REPO: ${{ github.repository }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
PIPELINE_SHA: ${{ github.sha }}
|
||||
RUN_START_TIME: ${{ github.event.head_commit.timestamp || github.event.pull_request.updated_at || '' }}
|
||||
run: bash scripts/qa-deploy-pages.sh
|
||||
|
||||
- name: Post unified QA comment
|
||||
@@ -978,6 +992,8 @@ jobs:
|
||||
for os in Linux macOS Windows; do
|
||||
GIF_URL="${VIDEO_BASE}/qa-${os}-thumb.gif"
|
||||
VID_URL="${VIDEO_BASE}/qa-${os}.mp4"
|
||||
# Fallback to pass1 for multi-pass recordings
|
||||
curl -sf --head "$VID_URL" >/dev/null 2>&1 || VID_URL="${VIDEO_BASE}/qa-${os}-pass1.mp4"
|
||||
if curl -sf --head "$VID_URL" >/dev/null 2>&1; then
|
||||
if curl -sf --head "$GIF_URL" >/dev/null 2>&1; then
|
||||
VIDEO_SECTION="${VIDEO_SECTION}[](${VID_URL})"$'\n'
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
import { expect } from '@playwright/test'
|
||||
import { comfyPageFixture as test } from '../fixtures/ComfyPage'
|
||||
|
||||
test.describe('Select Component Escape Key Propagation', { tag: '@ui' }, () => {
|
||||
test.beforeEach(async ({ comfyPage }) => {
|
||||
// Ensure clean state
|
||||
await comfyPage.workflow.setupWorkflowsDirectory({})
|
||||
})
|
||||
|
||||
test('pressing Escape in a Select dropdown closes only the menu and not the parent dialog', async ({
|
||||
comfyPage
|
||||
}) => {
|
||||
const { page } = comfyPage
|
||||
|
||||
// 1. Open the Settings dialog
|
||||
await comfyPage.menu.topbar.openTopbarMenu()
|
||||
await page.getByRole('menuitem', { name: 'Settings' }).click()
|
||||
|
||||
const settingsDialog = page.getByRole('dialog')
|
||||
await expect(settingsDialog).toBeVisible()
|
||||
|
||||
// 2. Open a Select dropdown (e.g., Language)
|
||||
// Select components in ComfyUI use aria-haspopup="listbox"
|
||||
const selectTrigger = settingsDialog
|
||||
.locator('button[aria-haspopup="listbox"]')
|
||||
.first()
|
||||
await selectTrigger.click()
|
||||
|
||||
// Verify the dropdown menu is open
|
||||
const dropdownContent = page.locator('[data-dismissable-layer]')
|
||||
await expect(dropdownContent).toBeVisible()
|
||||
|
||||
// 3. Press Escape
|
||||
// This should close the dropdown but NOT the Settings dialog
|
||||
await page.keyboard.press('Escape')
|
||||
|
||||
// 4. Assertions
|
||||
await expect(dropdownContent).not.toBeVisible()
|
||||
await expect(settingsDialog).toBeVisible()
|
||||
|
||||
// Visual verification that the dialog is still present and focused
|
||||
await expect(settingsDialog).toHaveScreenshot(
|
||||
'settings-dialog-remains-open.png'
|
||||
)
|
||||
|
||||
// 5. Press Escape again
|
||||
// Now that the dropdown is closed, Escape should close the dialog
|
||||
await page.keyboard.press('Escape')
|
||||
await expect(settingsDialog).not.toBeVisible()
|
||||
})
|
||||
})
|
||||
66
browser_tests/tests/qa-pr10512.spec.ts
Normal file
66
browser_tests/tests/qa-pr10512.spec.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { expect } from '@playwright/test'
|
||||
import { comfyPageFixture as test } from '../fixtures/ComfyPage'
|
||||
|
||||
test.describe('Node Library Search Highlighting', { tag: '@ui' }, () => {
|
||||
test.beforeEach(async ({ comfyPage }) => {
|
||||
// Ensure a clean state
|
||||
await comfyPage.workflow.setupWorkflowsDirectory({})
|
||||
|
||||
// Open the Node Library sidebar tab
|
||||
// Based on the UI, this is typically the 4th icon in the sidebar
|
||||
const nodeLibraryTabButton = comfyPage.page
|
||||
.locator('.side-bar-button')
|
||||
.nth(3)
|
||||
await nodeLibraryTabButton.click()
|
||||
|
||||
// Wait for the search input to be ready
|
||||
const searchInput = comfyPage.page.locator('input[placeholder*="Search"]')
|
||||
await expect(searchInput).toBeVisible()
|
||||
})
|
||||
|
||||
test('should highlight matching text in node search results', async ({
|
||||
comfyPage
|
||||
}) => {
|
||||
const searchInput = comfyPage.page.locator('input[placeholder*="Search"]')
|
||||
|
||||
// Search for a common node prefix
|
||||
await searchInput.fill('Load')
|
||||
|
||||
// The PR implements highlighting using a span with the .highlight class
|
||||
const highlight = comfyPage.page.locator('.highlight').first()
|
||||
|
||||
// Assert that highlighting is visible and contains the search term
|
||||
await expect(highlight).toBeVisible()
|
||||
await expect(highlight).toHaveText(/Load/i)
|
||||
|
||||
// Verify the highlight has the expected styling classes from the PR
|
||||
// (font-bold and white text as defined in the Tailwind classes)
|
||||
const fontWeight = await highlight.evaluate(
|
||||
(el) => window.getComputedStyle(el).fontWeight
|
||||
)
|
||||
expect(parseInt(fontWeight)).toBeGreaterThanOrEqual(700)
|
||||
})
|
||||
|
||||
test('should correctly highlight specific matches and maintain visual consistency', async ({
|
||||
comfyPage
|
||||
}) => {
|
||||
const searchInput = comfyPage.page.locator('input[placeholder*="Search"]')
|
||||
|
||||
// Search for "Checkpoint" to find "Load Checkpoint"
|
||||
await searchInput.fill('Checkpoint')
|
||||
|
||||
// Find the specific tree item for Load Checkpoint
|
||||
const nodeItem = comfyPage.page
|
||||
.locator('[role="treeitem"]')
|
||||
.filter({ hasText: 'Load Checkpoint' })
|
||||
.first()
|
||||
await expect(nodeItem).toBeVisible()
|
||||
|
||||
// Verify that the "Checkpoint" portion is the one wrapped in the highlight span
|
||||
const highlight = nodeItem.locator('.highlight')
|
||||
await expect(highlight).toHaveText('Checkpoint')
|
||||
|
||||
// Visual assertion to ensure the highlight background and rounding look correct
|
||||
await expect(nodeItem).toHaveScreenshot('node-search-highlighting.png')
|
||||
})
|
||||
})
|
||||
73
docs/qa/TROUBLESHOOTING.md
Normal file
73
docs/qa/TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,73 @@
|
||||
# QA Pipeline Troubleshooting
|
||||
|
||||
## Common Failures
|
||||
|
||||
### `set -euo pipefail` + grep with no match
|
||||
**Symptom**: Deploy script crashes silently, badge shows FAILED.
|
||||
**Cause**: `grep -oP` returns exit code 1 when no match. Under `pipefail`, this kills the entire script.
|
||||
**Fix**: Always append `|| true` to grep pipelines in bash scripts.
|
||||
|
||||
### `__name is not defined` in page.evaluate
|
||||
**Symptom**: Recording crashes with `ReferenceError: __name is not defined`.
|
||||
**Cause**: tsx compiles arrow functions inside `page.evaluate()` with `__name` helpers. The browser context doesn't have these.
|
||||
**Fix**: Use `page.addScriptTag({ content: '...' })` with plain JS strings instead of `page.evaluate(() => { ... })` with arrow functions.
|
||||
|
||||
### `Set<string>()` in page.evaluate
|
||||
**Symptom**: Same `__name` error.
|
||||
**Cause**: TypeScript generics like `new Set<string>()` get compiled incorrectly for browser context.
|
||||
**Fix**: Use `new Set()` without type parameter.
|
||||
|
||||
### `zod/v4` import error
|
||||
**Symptom**: `ERR_PACKAGE_PATH_NOT_EXPORTED: Package subpath './v4' is not defined`.
|
||||
**Cause**: claude-agent-sdk depends on `zod/v4` internally, but the project's zod doesn't export it.
|
||||
**Fix**: Import from `zod` (not `zod/v4`) in project code.
|
||||
|
||||
### `ERR_PNPM_LOCKFILE_CONFIG_MISMATCH`
|
||||
**Symptom**: pnpm install fails with frozen lockfile mismatch.
|
||||
**Cause**: Adding a new dependency changes the workspace catalog but lockfile wasn't regenerated.
|
||||
**Fix**: Run `pnpm install` to regenerate lockfile, commit `pnpm-workspace.yaml` + `pnpm-lock.yaml`.
|
||||
|
||||
### `loadDefaultWorkflow` — "Load Default" not found
|
||||
**Symptom**: Menu item "Load Default" not found, canvas stays empty.
|
||||
**Cause**: The menu item name varies by version/locale. Menu navigation is fragile.
|
||||
**Fix**: Use `app.resetToDefaultWorkflow()` JS API via `page.evaluate` instead of menu navigation.
|
||||
|
||||
### Model ID not found (Claude Agent SDK)
|
||||
**Symptom**: `There's an issue with the selected model (claude-sonnet-4-6-20250514)`.
|
||||
**Cause**: Dated model IDs like `claude-sonnet-4-6-20250514` don't exist.
|
||||
**Fix**: Use `claude-sonnet-4-6` (no date suffix).
|
||||
|
||||
### Model not found (Gemini)
|
||||
**Symptom**: 404 from Gemini API.
|
||||
**Cause**: Preview model names like `gemini-2.5-flash-preview-05-20` expire.
|
||||
**Fix**: Use `gemini-3-flash-preview` (latest stable).
|
||||
|
||||
## Badge Mismatches
|
||||
|
||||
### False REPRODUCED
|
||||
**Symptom**: Badge says REPRODUCED but AI review says "could not reproduce".
|
||||
**Root cause**: Grep pattern `reproduc|confirm` matches neutral words like "reproduction steps" or "could not be confirmed".
|
||||
**Fix**: Use structured JSON verdict from AI (`## Verdict` section with `{"verdict": "..."}`) instead of regex matching the prose.
|
||||
|
||||
### INCONCLUSIVE feedback loop
|
||||
**Symptom**: Once an issue gets INCONCLUSIVE, all future runs stay INCONCLUSIVE.
|
||||
**Cause**: QA bot's own previous comments contain "INCONCLUSIVE", which gets fed back into pr-context.txt.
|
||||
**Fix**: Filter out `github-actions[bot]` comments when building pr-context.
|
||||
|
||||
### pressKey with hold prevents event propagation
|
||||
**Symptom**: BEFORE video doesn't show the bug (e.g., Escape doesn't close dialog).
|
||||
**Cause**: `keyboard.down()` + 400ms sleep + `keyboard.up()` changes event timing. Some UI frameworks handle held keys differently than instant presses.
|
||||
**Fix**: Use instant `keyboard.press()` for testing. Show key name via subtitle overlay instead.
|
||||
|
||||
## Cursor Not Visible
|
||||
**Symptom**: No mouse cursor in recorded videos.
|
||||
**Cause**: Headless Chrome doesn't render system cursor. The CSS cursor overlay relies on DOM `mousemove` events which Playwright CDP doesn't reliably trigger.
|
||||
**Fix**: Monkey-patch `page.mouse.move/click/dblclick/down/up` to call `__moveCursor(x,y)` on the injected cursor div. This makes ALL mouse operations update the overlay.
|
||||
|
||||
## Agent Doesn't Perform Steps
|
||||
**Symptom**: Agent opens menus and settings but never interacts with the canvas.
|
||||
**Causes**:
|
||||
1. `loadDefaultWorkflow` failed (no nodes on canvas)
|
||||
2. Agent ran out of turn budget (30 turns / 120s)
|
||||
3. Gemini Flash (old agent) ignores prompt hints
|
||||
**Fix**: Use hybrid agent (Claude Sonnet 4.6 + Gemini vision). Claude's superior reasoning follows instructions precisely.
|
||||
59
docs/qa/backlog.md
Normal file
59
docs/qa/backlog.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# QA Pipeline Backlog
|
||||
|
||||
## Comparison Modes
|
||||
|
||||
### Type A: Same code, different settings (IMPLEMENTED)
|
||||
Agent demonstrates both working (control) and broken (test) states in one session by toggling settings. E.g., Nodes 2.0 OFF → drag works, Nodes 2.0 ON → drag broken.
|
||||
|
||||
### Type B: Different commits
|
||||
For regressions reported as "worked in vX.Y, broken in vX.Z":
|
||||
- `qa-analyze-pr.ts` detects regression markers ("since v1.38", "after PR #1234")
|
||||
- Pipeline checks out the old commit, records control video
|
||||
- Records test video on current main
|
||||
- Side-by-side comparison on report page (reuses PR before/after infra)
|
||||
|
||||
### Type C: Different browsers
|
||||
For browser-specific bugs ("works on Chrome, broken on Firefox"):
|
||||
- Run recording with different Playwright browser contexts
|
||||
- Compare behavior across browsers in one report
|
||||
|
||||
## Agent Improvements
|
||||
|
||||
### TTS Narration
|
||||
- OpenAI TTS (`tts-1`, nova voice) generates audio from agent reasoning
|
||||
- Merged into video via ffmpeg at correct timestamps
|
||||
- Currently in qa-record.ts but needs wiring into hybrid agent path
|
||||
|
||||
### Image/Screenshot Reading
|
||||
- `qa-analyze-pr.ts` already downloads and sends images from issue bodies to Gemini
|
||||
- Could also send them to the Claude agent as context ("the reporter showed this screenshot")
|
||||
|
||||
### Placeholder Page
|
||||
- Deploy a status page immediately when CI starts
|
||||
- Auto-refreshes every 30s until final report replaces it
|
||||
- Shows spinner, CI link, badge
|
||||
|
||||
### Pre-seed Assets
|
||||
- Upload test images via ComfyUI API before recording
|
||||
- Enables reproduction of bugs requiring assets (#10424 zoom button)
|
||||
|
||||
### Environment-Dependent Issues
|
||||
- #7942: needs custom TestNode — could install a test custom node pack in CI
|
||||
- #9101: needs completed generation — could run with a tiny model checkpoint
|
||||
|
||||
## Cost Optimization
|
||||
|
||||
### Lazy A11y Tree
|
||||
- `inspect(selector)` searches tree for specific element (~20 tokens)
|
||||
- `getUIChanges()` diffs against previous snapshot (~100 tokens)
|
||||
- vs dumping full tree every turn (~2000 tokens)
|
||||
|
||||
### Gemini Video vs Images
|
||||
- 30s video clip: ~7,700 tokens (258 tok/s)
|
||||
- 15 screenshots: ~19,500 tokens (1,300 tok/frame)
|
||||
- Video is 2.5x cheaper and shows temporal changes
|
||||
|
||||
### Model Selection
|
||||
- Claude Sonnet 4.6: $3/$15 per 1M in/out — best reasoning
|
||||
- Gemini 2.5 Flash: $0.10/$0.40 per 1M — best vision-per-dollar
|
||||
- Hybrid uses each where it's strongest
|
||||
@@ -121,6 +121,7 @@
|
||||
"zod-validation-error": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "catalog:",
|
||||
"@eslint/js": "catalog:",
|
||||
"@google/generative-ai": "catalog:",
|
||||
"@intlify/eslint-plugin-vue-i18n": "catalog:",
|
||||
|
||||
26
pnpm-lock.yaml
generated
26
pnpm-lock.yaml
generated
@@ -9,6 +9,9 @@ catalogs:
|
||||
'@alloc/quick-lru':
|
||||
specifier: ^5.2.0
|
||||
version: 5.2.0
|
||||
'@anthropic-ai/claude-agent-sdk':
|
||||
specifier: ^0.2.85
|
||||
version: 0.2.85
|
||||
'@astrojs/vue':
|
||||
specifier: ^5.0.0
|
||||
version: 5.1.4
|
||||
@@ -597,6 +600,9 @@ importers:
|
||||
specifier: 'catalog:'
|
||||
version: 3.3.0(zod@3.24.1)
|
||||
devDependencies:
|
||||
'@anthropic-ai/claude-agent-sdk':
|
||||
specifier: 'catalog:'
|
||||
version: 0.2.85(zod@3.24.1)
|
||||
'@eslint/js':
|
||||
specifier: 'catalog:'
|
||||
version: 9.39.1
|
||||
@@ -1064,6 +1070,12 @@ packages:
|
||||
'@antfu/utils@0.7.10':
|
||||
resolution: {integrity: sha512-+562v9k4aI80m1+VuMHehNJWLOFjBnXn3tdOitzD0il5b7smkSBal4+a3oKiQTbrwMmN/TBUMDvbdoWDehgOww==}
|
||||
|
||||
'@anthropic-ai/claude-agent-sdk@0.2.85':
|
||||
resolution: {integrity: sha512-/ohKLtP1zy6aWXLW/9KTYBveJPEtAfdO96qiP1Cl5S7LgVq/qRDUl7AUw5YGrBaK6YWHEE/rfMQZGwP/i5zIvQ==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
peerDependencies:
|
||||
zod: ^4.0.0
|
||||
|
||||
'@asamuzakjp/css-color@4.1.1':
|
||||
resolution: {integrity: sha512-B0Hv6G3gWGMn0xKJ0txEi/jM5iFpT3MfDxmhZFb4W047GvytCf1DHQ1D69W3zHI4yWe2aTZAA0JnbMZ7Xc8DuQ==}
|
||||
|
||||
@@ -10068,6 +10080,20 @@ snapshots:
|
||||
|
||||
'@antfu/utils@0.7.10': {}
|
||||
|
||||
'@anthropic-ai/claude-agent-sdk@0.2.85(zod@3.24.1)':
|
||||
dependencies:
|
||||
zod: 3.24.1
|
||||
optionalDependencies:
|
||||
'@img/sharp-darwin-arm64': 0.34.5
|
||||
'@img/sharp-darwin-x64': 0.34.5
|
||||
'@img/sharp-linux-arm': 0.34.5
|
||||
'@img/sharp-linux-arm64': 0.34.5
|
||||
'@img/sharp-linux-x64': 0.34.5
|
||||
'@img/sharp-linuxmusl-arm64': 0.34.5
|
||||
'@img/sharp-linuxmusl-x64': 0.34.5
|
||||
'@img/sharp-win32-arm64': 0.34.5
|
||||
'@img/sharp-win32-x64': 0.34.5
|
||||
|
||||
'@asamuzakjp/css-color@4.1.1':
|
||||
dependencies:
|
||||
'@csstools/css-calc': 2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)
|
||||
|
||||
@@ -4,6 +4,7 @@ packages:
|
||||
|
||||
catalog:
|
||||
'@alloc/quick-lru': ^5.2.0
|
||||
'@anthropic-ai/claude-agent-sdk': ^0.2.85
|
||||
'@astrojs/vue': ^5.0.0
|
||||
'@comfyorg/comfyui-electron-types': 0.6.2
|
||||
'@eslint/js': ^9.39.1
|
||||
|
||||
347
scripts/qa-agent.ts
Normal file
347
scripts/qa-agent.ts
Normal file
@@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* QA Research Phase — Claude writes & debugs E2E tests to reproduce bugs
|
||||
*
|
||||
* Instead of driving a browser interactively, Claude:
|
||||
* 1. Reads the issue + a11y snapshot of the UI
|
||||
* 2. Writes a Playwright E2E test (.spec.ts) that reproduces the bug
|
||||
* 3. Runs the test → reads errors → rewrites → repeats until it works
|
||||
* 4. Outputs the passing test + verdict
|
||||
*
|
||||
* Tools:
|
||||
* - inspect(selector) — read a11y tree to understand UI state
|
||||
* - writeTest(code) — write a Playwright test file
|
||||
* - runTest() — execute the test and get results
|
||||
* - done(verdict, summary, testCode) — finish with the working test
|
||||
*/
|
||||
|
||||
import type { Page } from '@playwright/test'
|
||||
import { query, tool, createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk'
|
||||
import { z } from 'zod'
|
||||
import { mkdirSync, writeFileSync } from 'fs'
|
||||
import { execSync } from 'child_process'
|
||||
|
||||
// ── Types ──
|
||||
|
||||
interface ResearchOptions {
|
||||
page: Page
|
||||
issueContext: string
|
||||
qaGuide: string
|
||||
outputDir: string
|
||||
serverUrl: string
|
||||
anthropicApiKey?: string
|
||||
maxTurns?: number
|
||||
timeBudgetMs?: number
|
||||
}
|
||||
|
||||
export interface ResearchResult {
|
||||
verdict: 'REPRODUCED' | 'NOT_REPRODUCIBLE' | 'INCONCLUSIVE'
|
||||
summary: string
|
||||
evidence: string
|
||||
testCode: string
|
||||
log: Array<{
|
||||
turn: number
|
||||
timestampMs: number
|
||||
toolName: string
|
||||
toolInput: unknown
|
||||
toolResult: string
|
||||
}>
|
||||
}
|
||||
|
||||
// ── Main research function ──
|
||||
|
||||
export async function runResearchPhase(
|
||||
opts: ResearchOptions
|
||||
): Promise<ResearchResult> {
|
||||
const { page, issueContext, qaGuide, outputDir, serverUrl, anthropicApiKey } =
|
||||
opts
|
||||
const maxTurns = opts.maxTurns ?? 50
|
||||
const timeBudgetMs = opts.timeBudgetMs ?? 600_000 // 10 min for write→run→fix loops
|
||||
|
||||
let agentDone = false
|
||||
let finalVerdict: ResearchResult['verdict'] = 'INCONCLUSIVE'
|
||||
let finalSummary = 'Agent did not complete'
|
||||
let finalEvidence = ''
|
||||
let finalTestCode = ''
|
||||
let turnCount = 0
|
||||
const startTime = Date.now()
|
||||
const researchLog: ResearchResult['log'] = []
|
||||
|
||||
const testDir = `${outputDir}/research`
|
||||
mkdirSync(testDir, { recursive: true })
|
||||
const testPath = `${testDir}/reproduce.spec.ts`
|
||||
|
||||
// Get initial a11y snapshot for context
|
||||
let initialA11y = ''
|
||||
try {
|
||||
initialA11y = await page.locator('body').ariaSnapshot({ timeout: 5000 })
|
||||
initialA11y = initialA11y.slice(0, 3000)
|
||||
} catch {
|
||||
initialA11y = '(could not capture initial a11y snapshot)'
|
||||
}
|
||||
|
||||
// ── Tool: inspect ──
|
||||
const inspectTool = tool(
|
||||
'inspect',
|
||||
'Read the current accessibility tree to understand UI state. Use this to discover element names, roles, and selectors for your test.',
|
||||
{
|
||||
selector: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional filter — only show elements matching this name/role. Omit for full tree.'
|
||||
)
|
||||
},
|
||||
async (args) => {
|
||||
let resultText: string
|
||||
try {
|
||||
const ariaText = await page
|
||||
.locator('body')
|
||||
.ariaSnapshot({ timeout: 5000 })
|
||||
if (args.selector) {
|
||||
const lines = ariaText.split('\n')
|
||||
const matches = lines.filter((l: string) =>
|
||||
l.toLowerCase().includes(args.selector!.toLowerCase())
|
||||
)
|
||||
resultText =
|
||||
matches.length > 0
|
||||
? `Found "${args.selector}":\n${matches.slice(0, 15).join('\n')}`
|
||||
: `"${args.selector}" not found. Full tree:\n${ariaText.slice(0, 2000)}`
|
||||
} else {
|
||||
resultText = ariaText.slice(0, 3000)
|
||||
}
|
||||
} catch (e) {
|
||||
resultText = `inspect failed: ${e instanceof Error ? e.message : e}`
|
||||
}
|
||||
|
||||
researchLog.push({
|
||||
turn: turnCount,
|
||||
timestampMs: Date.now() - startTime,
|
||||
toolName: 'inspect',
|
||||
toolInput: args,
|
||||
toolResult: resultText.slice(0, 500)
|
||||
})
|
||||
|
||||
return { content: [{ type: 'text' as const, text: resultText }] }
|
||||
}
|
||||
)
|
||||
|
||||
// ── Tool: writeTest ──
|
||||
const writeTestTool = tool(
|
||||
'writeTest',
|
||||
'Write a Playwright E2E test file that reproduces the bug. The test should assert the broken behavior exists.',
|
||||
{
|
||||
code: z
|
||||
.string()
|
||||
.describe('Complete Playwright test file content (.spec.ts)')
|
||||
},
|
||||
async (args) => {
|
||||
writeFileSync(testPath, args.code)
|
||||
|
||||
researchLog.push({
|
||||
turn: turnCount,
|
||||
timestampMs: Date.now() - startTime,
|
||||
toolName: 'writeTest',
|
||||
toolInput: { path: testPath, codeLength: args.code.length },
|
||||
toolResult: `Test written to ${testPath} (${args.code.length} chars)`
|
||||
})
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Test written to ${testPath}. Use runTest() to execute it.`
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// ── Tool: runTest ──
|
||||
// Place test in browser_tests/ so Playwright config finds fixtures
|
||||
const projectRoot = process.cwd()
|
||||
const browserTestPath = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
|
||||
|
||||
const runTestTool = tool(
|
||||
'runTest',
|
||||
'Run the Playwright test and get results. Returns stdout/stderr including assertion errors.',
|
||||
{},
|
||||
async () => {
|
||||
turnCount++
|
||||
// Copy the test to browser_tests/tests/ where Playwright expects it
|
||||
const { copyFileSync } = await import('fs')
|
||||
try {
|
||||
copyFileSync(testPath, browserTestPath)
|
||||
} catch {
|
||||
// directory may not exist
|
||||
mkdirSync(`${projectRoot}/browser_tests/tests`, { recursive: true })
|
||||
copyFileSync(testPath, browserTestPath)
|
||||
}
|
||||
|
||||
let resultText: string
|
||||
try {
|
||||
const output = execSync(
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=30000 --retries=0 --workers=1 2>&1`,
|
||||
{
|
||||
timeout: 90000,
|
||||
encoding: 'utf-8',
|
||||
env: {
|
||||
...process.env,
|
||||
COMFYUI_BASE_URL: serverUrl
|
||||
}
|
||||
}
|
||||
)
|
||||
resultText = `TEST PASSED:\n${output.slice(-1500)}`
|
||||
} catch (e) {
|
||||
const err = e as { stdout?: string; stderr?: string; message?: string }
|
||||
const output = (err.stdout || '') + '\n' + (err.stderr || '')
|
||||
resultText = `TEST FAILED:\n${output.slice(-2000)}`
|
||||
}
|
||||
|
||||
researchLog.push({
|
||||
turn: turnCount,
|
||||
timestampMs: Date.now() - startTime,
|
||||
toolName: 'runTest',
|
||||
toolInput: { testPath },
|
||||
toolResult: resultText.slice(0, 1000)
|
||||
})
|
||||
|
||||
return { content: [{ type: 'text' as const, text: resultText }] }
|
||||
}
|
||||
)
|
||||
|
||||
// ── Tool: done ──
|
||||
const doneTool = tool(
|
||||
'done',
|
||||
'Finish research with verdict and the final test code.',
|
||||
{
|
||||
verdict: z.enum(['REPRODUCED', 'NOT_REPRODUCIBLE', 'INCONCLUSIVE']),
|
||||
summary: z.string().describe('What you found and why'),
|
||||
evidence: z.string().describe('Test output that proves the verdict'),
|
||||
testCode: z
|
||||
.string()
|
||||
.describe(
|
||||
'Final Playwright test code. If REPRODUCED, this test asserts the bug exists and passes.'
|
||||
)
|
||||
},
|
||||
async (args) => {
|
||||
agentDone = true
|
||||
finalVerdict = args.verdict
|
||||
finalSummary = args.summary
|
||||
finalEvidence = args.evidence
|
||||
finalTestCode = args.testCode
|
||||
writeFileSync(testPath, args.testCode)
|
||||
return {
|
||||
content: [
|
||||
{ type: 'text' as const, text: `Research complete: ${args.verdict}` }
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// ── MCP Server ──
|
||||
const server = createSdkMcpServer({
|
||||
name: 'qa-research',
|
||||
version: '1.0.0',
|
||||
tools: [inspectTool, writeTestTool, runTestTool, doneTool]
|
||||
})
|
||||
|
||||
// ── System prompt ──
|
||||
const systemPrompt = `You are a senior QA engineer who writes Playwright E2E tests to reproduce reported bugs.
|
||||
|
||||
## Your tools
|
||||
- inspect(selector?) — Read the accessibility tree to understand the current UI. Use to discover selectors, element names, and UI state.
|
||||
- writeTest(code) — Write a Playwright test file (.spec.ts)
|
||||
- runTest() — Execute the test and get results (pass/fail + errors)
|
||||
- done(verdict, summary, evidence, testCode) — Finish with the final test
|
||||
|
||||
## Workflow
|
||||
1. Read the issue description carefully
|
||||
2. Use inspect() to understand the current UI state and discover element selectors
|
||||
3. Write a Playwright test that:
|
||||
- Navigates to ${serverUrl}
|
||||
- Performs the exact reproduction steps from the issue
|
||||
- Asserts the BROKEN behavior (the bug) — so the test PASSES when the bug exists
|
||||
4. Run the test with runTest()
|
||||
5. If it fails: read the error, fix the test, run again (max 5 attempts)
|
||||
6. Call done() with the final verdict and test code
|
||||
|
||||
## Test writing guidelines
|
||||
- Import the project fixture: \`import { comfyPageFixture as test } from '../fixtures/ComfyPage'\`
|
||||
- Import expect: \`import { expect } from '@playwright/test'\`
|
||||
- The fixture provides \`comfyPage\` which has:
|
||||
- \`comfyPage.page\` — the Playwright Page object
|
||||
- \`comfyPage.menu.topbar\` — topbar actions (saveWorkflowAs, getTabNames, getWorkflowTab)
|
||||
- \`comfyPage.menu.topbar.triggerTopbarCommand(label)\` — click a menu command
|
||||
- \`comfyPage.workflow\` — workflow helpers (isCurrentWorkflowModified, setupWorkflowsDirectory)
|
||||
- \`comfyPage.canvas\` — canvas element for mouse interactions
|
||||
- \`comfyPage.settings.setSetting(id, value)\` — change settings
|
||||
- \`comfyPage.nextFrame()\` — wait for next render frame
|
||||
- \`comfyPage.loadWorkflow(name)\` — load a named workflow
|
||||
- Use beforeEach to set up settings and workflow directory
|
||||
- Use afterEach to clean up (setupWorkflowsDirectory({}))
|
||||
- If the bug IS present, the test should PASS. If the bug is fixed, the test would FAIL.
|
||||
- Keep tests focused and minimal — test ONLY the reported bug
|
||||
- The test file will be placed in browser_tests/tests/qa-reproduce.spec.ts
|
||||
|
||||
## Current UI state (accessibility tree)
|
||||
${initialA11y}
|
||||
|
||||
${qaGuide ? `## QA Analysis Guide\n${qaGuide}\n` : ''}
|
||||
## Issue to Reproduce
|
||||
${issueContext}`
|
||||
|
||||
// ── Run the agent ──
|
||||
console.warn('Starting research phase (Claude writes E2E tests)...')
|
||||
|
||||
try {
|
||||
for await (const message of query({
|
||||
prompt:
|
||||
'Write a Playwright E2E test that reproduces the reported bug. Use inspect() to discover selectors, writeTest() to write the test, runTest() to execute it. Iterate until it works or you determine the bug cannot be reproduced.',
|
||||
options: {
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt,
|
||||
...(anthropicApiKey ? { apiKey: anthropicApiKey } : {}),
|
||||
maxTurns,
|
||||
mcpServers: { 'qa-research': server },
|
||||
allowedTools: [
|
||||
'mcp__qa-research__inspect',
|
||||
'mcp__qa-research__writeTest',
|
||||
'mcp__qa-research__runTest',
|
||||
'mcp__qa-research__done'
|
||||
]
|
||||
}
|
||||
})) {
|
||||
if (message.type === 'assistant' && message.message?.content) {
|
||||
for (const block of message.message.content) {
|
||||
if ('text' in block && block.text) {
|
||||
console.warn(` Claude: ${block.text.slice(0, 200)}`)
|
||||
}
|
||||
if ('name' in block) {
|
||||
console.warn(
|
||||
` Tool: ${block.name}(${JSON.stringify(block.input).slice(0, 100)})`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (agentDone) break
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Research error: ${e instanceof Error ? e.message : e}`)
|
||||
}
|
||||
|
||||
const result: ResearchResult = {
|
||||
verdict: finalVerdict,
|
||||
summary: finalSummary,
|
||||
evidence: finalEvidence,
|
||||
testCode: finalTestCode,
|
||||
log: researchLog
|
||||
}
|
||||
|
||||
writeFileSync(`${testDir}/research-log.json`, JSON.stringify(result, null, 2))
|
||||
console.warn(
|
||||
`Research complete: ${finalVerdict} (${researchLog.length} tool calls)`
|
||||
)
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -112,6 +112,10 @@ if [ -n "${AFTER_SHA:-}" ]; then
|
||||
[ -n "${TARGET_NUM:-}" ] && AFTER_LABEL="#${TARGET_NUM}"
|
||||
COMMIT_HTML="${COMMIT_HTML:+${COMMIT_HTML} · }<a href=${REPO_URL}/commit/${AFTER_SHA} class=sha title='PR head commit'>${AFTER_LABEL} @ ${SHORT_AFTER}</a>"
|
||||
fi
|
||||
if [ -n "${PIPELINE_SHA:-}" ]; then
|
||||
SHORT_PIPE="${PIPELINE_SHA:0:7}"
|
||||
COMMIT_HTML="${COMMIT_HTML:+${COMMIT_HTML} · }<a href=${REPO_URL}/commit/${PIPELINE_SHA} class=sha title='QA pipeline version'>QA @ ${SHORT_PIPE}</a>"
|
||||
fi
|
||||
[ -n "$COMMIT_HTML" ] && COMMIT_HTML=" · ${COMMIT_HTML}"
|
||||
|
||||
RUN_LINK=""
|
||||
@@ -119,6 +123,13 @@ if [ -n "${RUN_URL:-}" ]; then
|
||||
RUN_LINK=" · <a href=\"${RUN_URL}\" class=sha title=\"GitHub Actions run\">CI Job</a>"
|
||||
fi
|
||||
|
||||
# Timing info
|
||||
DEPLOY_TIME=$(date -u '+%Y-%m-%d %H:%M UTC')
|
||||
TIMING_HTML=""
|
||||
if [ -n "${RUN_START_TIME:-}" ]; then
|
||||
TIMING_HTML=" · <span class=sha title='Pipeline timing'>${RUN_START_TIME} → ${DEPLOY_TIME}</span>"
|
||||
fi
|
||||
|
||||
# Generate index.html from template
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
TEMPLATE="$SCRIPT_DIR/qa-report-template.html"
|
||||
@@ -131,11 +142,57 @@ cat > "$DEPLOY_DIR/_headers" <<'HEADERSEOF'
|
||||
Cache-Control: public, max-age=86400
|
||||
HEADERSEOF
|
||||
|
||||
# Build purpose description from pr-context.txt
|
||||
PURPOSE_HTML=""
|
||||
if [ -f pr-context.txt ]; then
|
||||
# Extract title line and first paragraph of description
|
||||
PR_TITLE=$(grep -m1 '^Title:' pr-context.txt | sed 's/^Title: //')
|
||||
if [ "$TARGET_TYPE" = "issue" ]; then
|
||||
PURPOSE_LABEL="Issue #${TARGET_NUM}"
|
||||
PURPOSE_VERB="reports"
|
||||
else
|
||||
PURPOSE_LABEL="PR #${TARGET_NUM}"
|
||||
PURPOSE_VERB="aims to"
|
||||
fi
|
||||
# Get first ~300 chars of description body (after "Description:" line)
|
||||
PR_DESC=$(sed -n '/^Description:/,/^###/p' pr-context.txt | grep -v '^Description:\|^###' | head -5 | sed 's/&/\&/g; s/</\</g; s/>/\>/g' | tr '\n' ' ' | head -c 400)
|
||||
[ -z "$PR_DESC" ] && PR_DESC=$(sed -n '3,8p' pr-context.txt | sed 's/&/\&/g; s/</\</g; s/>/\>/g' | tr '\n' ' ' | head -c 400)
|
||||
# Build requirements from QA guide JSON
|
||||
REQS_HTML=""
|
||||
QA_GUIDE=$(ls qa-guides/qa-guide-*.json 2>/dev/null | head -1)
|
||||
if [ -f "$QA_GUIDE" ]; then
|
||||
PREREQS=$(python3 -c "
|
||||
import json, sys, html
|
||||
try:
|
||||
g = json.load(open(sys.argv[1]))
|
||||
prereqs = g.get('prerequisites', [])
|
||||
steps = g.get('steps', [])
|
||||
focus = g.get('test_focus', '')
|
||||
parts = []
|
||||
if focus:
|
||||
parts.append('<strong>Test focus:</strong> ' + html.escape(focus))
|
||||
if prereqs:
|
||||
parts.append('<strong>Prerequisites:</strong> ' + ', '.join(html.escape(p) for p in prereqs))
|
||||
if steps:
|
||||
parts.append('<strong>Steps:</strong> ' + ' → '.join(html.escape(s.get('description', str(s))) for s in steps[:6]))
|
||||
if len(steps) > 6:
|
||||
parts[-1] += ' → ...'
|
||||
print('<br>'.join(parts))
|
||||
except: pass
|
||||
" "$QA_GUIDE" 2>/dev/null)
|
||||
[ -n "$PREREQS" ] && REQS_HTML="<div class=purpose-reqs>${PREREQS}</div>"
|
||||
fi
|
||||
|
||||
PURPOSE_HTML="<div class=purpose><div class=purpose-label>${PURPOSE_LABEL} ${PURPOSE_VERB}</div><strong>${PR_TITLE}</strong><br>${PR_DESC}${REQS_HTML}</div>"
|
||||
fi
|
||||
|
||||
echo -n "$COMMIT_HTML" > "$DEPLOY_DIR/.commit_html"
|
||||
echo -n "$CARDS" > "$DEPLOY_DIR/.cards_html"
|
||||
echo -n "$RUN_LINK" > "$DEPLOY_DIR/.run_link"
|
||||
# Badge HTML with copy button (placeholder URL filled after deploy)
|
||||
echo -n '<div class="badge-bar"><img src="badge.svg" alt="QA Badge" class="badge-img"/><button class="copy-badge" title="Copy badge markdown" onclick="copyBadge()"><svg width=14 height=14 viewBox="0 0 24 24" fill=none stroke=currentColor stroke-width=2><rect x=9 y=9 width=13 height=13 rx=2/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></div>' > "$DEPLOY_DIR/.badge_html"
|
||||
echo -n "${TIMING_HTML:-}" > "$DEPLOY_DIR/.timing_html"
|
||||
echo -n "$PURPOSE_HTML" > "$DEPLOY_DIR/.purpose_html"
|
||||
python3 -c "
|
||||
import sys, pathlib
|
||||
d = pathlib.Path(sys.argv[1])
|
||||
@@ -144,9 +201,11 @@ t = t.replace('{{COMMIT_HTML}}', (d / '.commit_html').read_text())
|
||||
t = t.replace('{{CARDS}}', (d / '.cards_html').read_text())
|
||||
t = t.replace('{{RUN_LINK}}', (d / '.run_link').read_text())
|
||||
t = t.replace('{{BADGE_HTML}}', (d / '.badge_html').read_text())
|
||||
t = t.replace('{{TIMING_HTML}}', (d / '.timing_html').read_text())
|
||||
t = t.replace('{{PURPOSE_HTML}}', (d / '.purpose_html').read_text())
|
||||
sys.stdout.write(t)
|
||||
" "$DEPLOY_DIR" "$TEMPLATE" > "$DEPLOY_DIR/index.html"
|
||||
rm -f "$DEPLOY_DIR/.commit_html" "$DEPLOY_DIR/.cards_html" "$DEPLOY_DIR/.run_link" "$DEPLOY_DIR/.badge_html"
|
||||
rm -f "$DEPLOY_DIR/.commit_html" "$DEPLOY_DIR/.cards_html" "$DEPLOY_DIR/.run_link" "$DEPLOY_DIR/.badge_html" "$DEPLOY_DIR/.timing_html" "$DEPLOY_DIR/.purpose_html"
|
||||
|
||||
cat > "$DEPLOY_DIR/404.html" <<'ERROREOF'
|
||||
<!DOCTYPE html><html lang=en><head><meta charset=utf-8><title>404</title>
|
||||
@@ -155,22 +214,59 @@ cat > "$DEPLOY_DIR/404.html" <<'ERROREOF'
|
||||
</head><body><div><h1>404</h1><p>File not found. The QA recording may have failed or been cancelled.</p></div></body></html>
|
||||
ERROREOF
|
||||
|
||||
# Copy research log to deploy dir if it exists
|
||||
for rlog in qa-artifacts/*/research/research-log.json qa-artifacts/*/*/research/research-log.json qa-artifacts/before/*/research/research-log.json; do
|
||||
if [ -f "$rlog" ]; then
|
||||
cp "$rlog" "$DEPLOY_DIR/research-log.json"
|
||||
echo "Found research log: $rlog"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Generate badge SVGs into deploy dir
|
||||
# Verdict detection: check each report file's ## Summary section individually,
|
||||
# then aggregate across passes for a "X/Y REPRODUCED" badge.
|
||||
# Priority: research-log.json verdict (a11y-verified) > video review verdict (AI interpretation)
|
||||
REPRO_COUNT=0 INCONC_COUNT=0 NOT_REPRO_COUNT=0 TOTAL_REPORTS=0
|
||||
if [ -d video-reviews ]; then
|
||||
|
||||
# Try research log first (ground truth from a11y assertions)
|
||||
RESEARCH_VERDICT=""
|
||||
if [ -f "$DEPLOY_DIR/research-log.json" ]; then
|
||||
RESEARCH_VERDICT=$(python3 -c "import json,sys; d=json.load(open(sys.argv[1])); print(d.get('verdict',''))" "$DEPLOY_DIR/research-log.json" 2>/dev/null || true)
|
||||
echo "Research verdict (a11y-verified): ${RESEARCH_VERDICT:-none}"
|
||||
if [ -n "$RESEARCH_VERDICT" ]; then
|
||||
TOTAL_REPORTS=1
|
||||
case "$RESEARCH_VERDICT" in
|
||||
REPRODUCED) REPRO_COUNT=1 ;;
|
||||
NOT_REPRODUCIBLE) NOT_REPRO_COUNT=1 ;;
|
||||
INCONCLUSIVE) INCONC_COUNT=1 ;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fall back to video review verdicts if no research log
|
||||
if [ -z "$RESEARCH_VERDICT" ] && [ -d video-reviews ]; then
|
||||
for rpt in video-reviews/*-qa-video-report.md; do
|
||||
[ -f "$rpt" ] || continue
|
||||
TOTAL_REPORTS=$((TOTAL_REPORTS + 1))
|
||||
SUMM=$(sed -n '/^## Summary/,/^## /p' "$rpt" 2>/dev/null | head -15)
|
||||
# Check negatives FIRST — "fails to reproduce" contains "reproduce" but is negative
|
||||
if echo "$SUMM" | grep -iq 'INCONCLUSIVE'; then
|
||||
INCONC_COUNT=$((INCONC_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'not reproduced\|could not reproduce\|unable to reproduce\|fails\? to reproduce\|was NOT\|NOT visible\|not observed\|fail.* to demonstrate\|does not demonstrate\|steps were not performed\|never.*tested\|never.*accessed'; then
|
||||
NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'reproduc\|confirm'; then
|
||||
REPRO_COUNT=$((REPRO_COUNT + 1))
|
||||
# Try structured JSON verdict first (from ## Verdict section)
|
||||
VERDICT_JSON=$(grep -oP '"verdict":\s*"[A-Z_]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '[A-Z_]+$' || true)
|
||||
RISK_JSON=$(grep -oP '"risk":\s*"[a-z]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '[a-z]+$' || true)
|
||||
|
||||
if [ -n "$VERDICT_JSON" ]; then
|
||||
case "$VERDICT_JSON" in
|
||||
REPRODUCED) REPRO_COUNT=$((REPRO_COUNT + 1)) ;;
|
||||
NOT_REPRODUCIBLE) NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1)) ;;
|
||||
INCONCLUSIVE) INCONC_COUNT=$((INCONC_COUNT + 1)) ;;
|
||||
esac
|
||||
else
|
||||
# Fallback: grep Summary section (for older reports without ## Verdict)
|
||||
SUMM=$(sed -n '/^## Summary/,/^## /p' "$rpt" 2>/dev/null | head -15)
|
||||
if echo "$SUMM" | grep -iq 'INCONCLUSIVE'; then
|
||||
INCONC_COUNT=$((INCONC_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'not reproduced\|could not reproduce\|could not be confirmed\|unable to reproduce\|fails\? to reproduce\|fails\? to perform\|was NOT\|NOT visible\|not observed\|fail.* to demonstrate\|does not demonstrate\|steps were not performed\|never.*tested\|never.*accessed\|not.* confirmed'; then
|
||||
NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'reproduc\|confirm'; then
|
||||
REPRO_COUNT=$((REPRO_COUNT + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
@@ -207,46 +303,49 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
# Badge label includes the target number for identification
|
||||
BADGE_LABEL="QA"
|
||||
[ -n "${TARGET_NUM:-}" ] && BADGE_LABEL="#${TARGET_NUM} QA"
|
||||
# Badge label: #NUM QA0327 (with today's date)
|
||||
QA_DATE=$(date -u '+%m%d')
|
||||
BADGE_LABEL="QA${QA_DATE}"
|
||||
[ -n "${TARGET_NUM:-}" ] && BADGE_LABEL="#${TARGET_NUM} QA${QA_DATE}"
|
||||
|
||||
if [ "$TARGET_TYPE" = "issue" ]; then
|
||||
if [ "$TOTAL_REPORTS" -gt 1 ]; then
|
||||
# Multi-pass: vertical box badge with breakdown
|
||||
/tmp/gen-badge-box.sh "$DEPLOY_DIR/badge.svg" "$BADGE_LABEL" \
|
||||
"$REPRO_COUNT" "$NOT_REPRO_COUNT" "$FAIL_COUNT" "$TOTAL_REPORTS"
|
||||
# For PRs, also extract fix quality from Overall Risk section
|
||||
FIX_RESULT="" FIX_COLOR="#4c1"
|
||||
if [ "$TARGET_TYPE" != "issue" ]; then
|
||||
# Try structured JSON risk first
|
||||
ALL_RISKS=$(grep -ohP '"risk":\s*"[a-z]+' video-reviews/*.md 2>/dev/null | grep -oP '[a-z]+$' || true)
|
||||
if [ -n "$ALL_RISKS" ]; then
|
||||
# Use worst risk across all reports
|
||||
if echo "$ALL_RISKS" | grep -q 'high'; then
|
||||
FIX_RESULT="MAJOR ISSUES" FIX_COLOR="#e05d44"
|
||||
elif echo "$ALL_RISKS" | grep -q 'medium'; then
|
||||
FIX_RESULT="MINOR ISSUES" FIX_COLOR="#dfb317"
|
||||
elif echo "$ALL_RISKS" | grep -q 'low'; then
|
||||
FIX_RESULT="APPROVED" FIX_COLOR="#4c1"
|
||||
fi
|
||||
else
|
||||
BADGE_STATUS="${REPRO_RESULT:-FINISHED}"
|
||||
/tmp/gen-badge.sh "$BADGE_STATUS" "${REPRO_COLOR}" "$DEPLOY_DIR/badge.svg" "$BADGE_LABEL"
|
||||
# Fallback: grep Overall Risk section
|
||||
RISK_TEXT=""
|
||||
if [ -d video-reviews ]; then
|
||||
RISK_TEXT=$(sed -n '/^## Overall Risk/,/^## /p' video-reviews/*.md 2>/dev/null | sed 's/\*//g' | head -20)
|
||||
fi
|
||||
RISK_FIRST=$(echo "$RISK_TEXT" | grep -oiP '^\s*(high|medium|moderate|low|minimal|critical)' | head -1 | tr '[:upper:]' '[:lower:]')
|
||||
if [ -n "$RISK_FIRST" ]; then
|
||||
case "$RISK_FIRST" in
|
||||
*low*|*minimal*) FIX_RESULT="APPROVED" FIX_COLOR="#4c1" ;;
|
||||
*medium*|*moderate*) FIX_RESULT="MINOR ISSUES" FIX_COLOR="#dfb317" ;;
|
||||
*high*|*critical*) FIX_RESULT="MAJOR ISSUES" FIX_COLOR="#e05d44" ;;
|
||||
esac
|
||||
elif echo "$RISK_TEXT" | grep -iq 'no.*risk\|approved\|looks good'; then
|
||||
FIX_RESULT="APPROVED" FIX_COLOR="#4c1"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# Extract the Overall Risk section for fix quality verdict.
|
||||
# Only look at the "## Overall Risk" section to avoid false matches from
|
||||
# severity labels (e.g. `MAJOR`) or negated phrases ("no regressions").
|
||||
RISK_TEXT=""
|
||||
if [ -d video-reviews ]; then
|
||||
# Strip markdown bold/italic so **High** matches as "High"
|
||||
RISK_TEXT=$(sed -n '/^## Overall Risk/,/^## /p' video-reviews/*.md 2>/dev/null | sed 's/\*//g' | head -20)
|
||||
fi
|
||||
# Check LOW first — "high confidence" contains "high" but means low risk
|
||||
SOLN_RESULT="" SOLN_COLOR="#4c1"
|
||||
RISK_FIRST=$(echo "$RISK_TEXT" | grep -oiP '^\s*(high|medium|moderate|low|minimal|critical)' | head -1 | tr '[:upper:]' '[:lower:]')
|
||||
if [ -n "$RISK_FIRST" ]; then
|
||||
case "$RISK_FIRST" in
|
||||
*low*|*minimal*) SOLN_RESULT="APPROVED" SOLN_COLOR="#4c1" ;;
|
||||
*medium*|*moderate*) SOLN_RESULT="MINOR ISSUES" SOLN_COLOR="#dfb317" ;;
|
||||
*high*|*critical*) SOLN_RESULT="MAJOR ISSUES" SOLN_COLOR="#e05d44" ;;
|
||||
esac
|
||||
elif echo "$RISK_TEXT" | grep -iq 'no.*risk\|approved\|looks good'; then
|
||||
SOLN_RESULT="APPROVED" SOLN_COLOR="#4c1"
|
||||
fi
|
||||
BADGE_STATUS="${REPRO_RESULT:-UNKNOWN} | Fix: ${SOLN_RESULT:-UNKNOWN}"
|
||||
/tmp/gen-badge-dual.sh \
|
||||
"${REPRO_RESULT:-UNKNOWN}" "${REPRO_COLOR}" \
|
||||
"${SOLN_RESULT:-UNKNOWN}" "${SOLN_COLOR}" \
|
||||
"$DEPLOY_DIR/badge.svg" "$BADGE_LABEL"
|
||||
fi
|
||||
|
||||
# Always use vertical box badge
|
||||
/tmp/gen-badge-box.sh "$DEPLOY_DIR/badge.svg" "$BADGE_LABEL" \
|
||||
"$REPRO_COUNT" "$NOT_REPRO_COUNT" "$FAIL_COUNT" "$TOTAL_REPORTS" \
|
||||
"$FIX_RESULT" "$FIX_COLOR"
|
||||
BADGE_STATUS="${REPRO_RESULT:-UNKNOWN}${FIX_RESULT:+ | Fix: ${FIX_RESULT}}"
|
||||
echo "badge_status=${BADGE_STATUS:-FINISHED}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
BRANCH=$(echo "$RAW_BRANCH" | sed 's/[^a-zA-Z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//' | cut -c1-28)
|
||||
|
||||
@@ -19,7 +19,15 @@
|
||||
import { chromium } from '@playwright/test'
|
||||
import type { Page } from '@playwright/test'
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai'
|
||||
import { readFileSync, mkdirSync, readdirSync, renameSync, statSync } from 'fs'
|
||||
import {
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
mkdirSync,
|
||||
readdirSync,
|
||||
renameSync,
|
||||
statSync
|
||||
} from 'fs'
|
||||
import { execSync } from 'child_process'
|
||||
|
||||
// ── Types ──
|
||||
|
||||
@@ -392,6 +400,176 @@ const FALLBACK_STEPS: Record<RecordMode, TestAction[]> = {
|
||||
|
||||
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms))
|
||||
|
||||
async function moveCursorOverlay(page: Page, x: number, y: number) {
|
||||
await page.evaluate(
|
||||
([cx, cy]) => {
|
||||
const fn = (window as unknown as Record<string, unknown>).__moveCursor as
|
||||
| ((x: number, y: number) => void)
|
||||
| undefined
|
||||
if (fn) fn(cx, cy)
|
||||
},
|
||||
[x, y]
|
||||
)
|
||||
}
|
||||
|
||||
async function clickCursorOverlay(page: Page, down: boolean) {
|
||||
await page.evaluate((d) => {
|
||||
const fn = (window as unknown as Record<string, unknown>).__clickCursor as
|
||||
| ((down: boolean) => void)
|
||||
| undefined
|
||||
if (fn) fn(d)
|
||||
}, down)
|
||||
}
|
||||
|
||||
interface NarrationSegment {
|
||||
turn: number
|
||||
timestampMs: number
|
||||
text: string
|
||||
}
|
||||
|
||||
// Collected during recording, used for TTS post-processing
|
||||
const narrationSegments: NarrationSegment[] = []
|
||||
const recordingStartMs = 0
|
||||
|
||||
async function showSubtitle(page: Page, text: string, turn: number) {
|
||||
const safeText = text.slice(0, 120).replace(/'/g, "\\'").replace(/\n/g, ' ')
|
||||
const encoded = encodeURIComponent(safeText)
|
||||
|
||||
// Track for TTS post-processing
|
||||
narrationSegments.push({
|
||||
turn,
|
||||
timestampMs: Date.now() - recordingStartMs,
|
||||
text: safeText
|
||||
})
|
||||
|
||||
await page.addScriptTag({
|
||||
content: `(function(){
|
||||
var id='qa-subtitle';
|
||||
var el=document.getElementById(id);
|
||||
if(!el){
|
||||
el=document.createElement('div');
|
||||
el.id=id;
|
||||
Object.assign(el.style,{position:'fixed',bottom:'32px',left:'50%',transform:'translateX(-50%)',zIndex:'2147483646',maxWidth:'90%',padding:'6px 14px',borderRadius:'6px',background:'rgba(0,0,0,0.8)',color:'rgba(255,255,255,0.95)',fontSize:'12px',fontFamily:'system-ui,sans-serif',fontWeight:'400',lineHeight:'1.4',pointerEvents:'none',textAlign:'center',transition:'opacity 0.3s',whiteSpace:'normal'});
|
||||
document.body.appendChild(el);
|
||||
}
|
||||
var msg=decodeURIComponent('${encoded}');
|
||||
el.textContent='['+${turn}+'] '+msg;
|
||||
el.style.opacity='1';
|
||||
})()`
|
||||
})
|
||||
}
|
||||
|
||||
async function generateNarrationAudio(
|
||||
segments: NarrationSegment[],
|
||||
outputDir: string,
|
||||
apiKey: string
|
||||
): Promise<string | null> {
|
||||
if (segments.length === 0) return null
|
||||
|
||||
const narrationDir = `${outputDir}/narration`
|
||||
mkdirSync(narrationDir, { recursive: true })
|
||||
|
||||
// Save narration metadata
|
||||
writeFileSync(
|
||||
`${narrationDir}/segments.json`,
|
||||
JSON.stringify(segments, null, 2)
|
||||
)
|
||||
|
||||
// Generate TTS using OpenAI API (high quality, fast)
|
||||
const ttsKey = process.env.OPENAI_API_KEY
|
||||
if (!ttsKey) {
|
||||
console.warn(' OPENAI_API_KEY not set, skipping TTS narration')
|
||||
return null
|
||||
}
|
||||
|
||||
const audioFiles: Array<{ path: string; offsetMs: number }> = []
|
||||
|
||||
for (const seg of segments) {
|
||||
const audioPath = `${narrationDir}/turn-${seg.turn}.mp3`
|
||||
try {
|
||||
const resp = await fetch('https://api.openai.com/v1/audio/speech', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${ttsKey}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'tts-1',
|
||||
voice: 'nova',
|
||||
input: seg.text,
|
||||
speed: 1.15
|
||||
})
|
||||
})
|
||||
if (!resp.ok)
|
||||
throw new Error(`TTS API ${resp.status}: ${await resp.text()}`)
|
||||
const audioBuffer = Buffer.from(await resp.arrayBuffer())
|
||||
writeFileSync(audioPath, audioBuffer)
|
||||
audioFiles.push({ path: audioPath, offsetMs: seg.timestampMs })
|
||||
console.warn(
|
||||
` TTS [${seg.turn}]: ${audioPath} (${audioBuffer.length} bytes)`
|
||||
)
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
` TTS [${seg.turn}] failed: ${e instanceof Error ? e.message.slice(0, 80) : e}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (audioFiles.length === 0) return null
|
||||
|
||||
// Build ffmpeg filter to mix all audio clips at correct timestamps
|
||||
const inputArgs: string[] = []
|
||||
const filterParts: string[] = []
|
||||
|
||||
for (let i = 0; i < audioFiles.length; i++) {
|
||||
inputArgs.push('-i', audioFiles[i].path)
|
||||
const delaySec = (audioFiles[i].offsetMs / 1000).toFixed(3)
|
||||
filterParts.push(
|
||||
`[${i}]adelay=${audioFiles[i].offsetMs}|${audioFiles[i].offsetMs}[a${i}]`
|
||||
)
|
||||
}
|
||||
|
||||
const mixInputs = audioFiles.map((_, i) => `[a${i}]`).join('')
|
||||
const filter = `${filterParts.join(';')};${mixInputs}amix=inputs=${audioFiles.length}:normalize=0[aout]`
|
||||
|
||||
const mixedAudio = `${narrationDir}/mixed.mp3`
|
||||
try {
|
||||
execSync(
|
||||
`ffmpeg -y ${inputArgs.join(' ')} -filter_complex "${filter}" -map "[aout]" "${mixedAudio}" 2>/dev/null`,
|
||||
{ timeout: 30000 }
|
||||
)
|
||||
console.warn(` TTS mixed: ${mixedAudio}`)
|
||||
return mixedAudio
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
` TTS mix failed: ${e instanceof Error ? e.message.slice(0, 80) : e}`
|
||||
)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function mergeAudioIntoVideo(
|
||||
videoPath: string,
|
||||
audioPath: string,
|
||||
outputPath: string
|
||||
): boolean {
|
||||
try {
|
||||
execSync(
|
||||
`ffmpeg -y -i "${videoPath}" -i "${audioPath}" -c:v copy -c:a aac -map 0:v:0 -map 1:a:0 -shortest "${outputPath}" 2>/dev/null`,
|
||||
{ timeout: 60000 }
|
||||
)
|
||||
// Replace original with narrated version
|
||||
renameSync(outputPath, videoPath)
|
||||
console.warn(` Narrated video: ${videoPath}`)
|
||||
return true
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
` Audio merge failed: ${e instanceof Error ? e.message.slice(0, 80) : e}`
|
||||
)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async function openComfyMenu(page: Page) {
|
||||
const menuTrigger = page.locator('.comfy-menu-button-wrapper')
|
||||
const menuPopup = page.locator('.comfy-command-menu')
|
||||
@@ -427,6 +605,13 @@ async function hoverMenuItem(page: Page, label: string) {
|
||||
const menuItem = page
|
||||
.locator('.comfy-command-menu .p-tieredmenu-item')
|
||||
.filter({ has: menuLabel })
|
||||
const box = await menuItem.boundingBox().catch(() => null)
|
||||
if (box)
|
||||
await moveCursorOverlay(
|
||||
page,
|
||||
box.x + box.width / 2,
|
||||
box.y + box.height / 2
|
||||
)
|
||||
await menuItem.hover()
|
||||
// Wait for submenu to appear
|
||||
try {
|
||||
@@ -451,9 +636,18 @@ async function clickSubmenuItem(page: Page, label: string) {
|
||||
.filter({ hasText: label })
|
||||
.first()
|
||||
if (await primeItem.isVisible().catch(() => false)) {
|
||||
const box = await primeItem.boundingBox().catch(() => null)
|
||||
if (box)
|
||||
await moveCursorOverlay(
|
||||
page,
|
||||
box.x + box.width / 2,
|
||||
box.y + box.height / 2
|
||||
)
|
||||
if (box) await clickCursorOverlay(page, true)
|
||||
await primeItem.click({ timeout: 5000 }).catch(() => {
|
||||
console.warn(`Click on PrimeVue menu item "${label}" failed`)
|
||||
})
|
||||
if (box) await clickCursorOverlay(page, false)
|
||||
await sleep(800)
|
||||
return
|
||||
}
|
||||
@@ -464,9 +658,18 @@ async function clickSubmenuItem(page: Page, label: string) {
|
||||
.filter({ hasText: label })
|
||||
.first()
|
||||
if (await liteItem.isVisible().catch(() => false)) {
|
||||
const box = await liteItem.boundingBox().catch(() => null)
|
||||
if (box)
|
||||
await moveCursorOverlay(
|
||||
page,
|
||||
box.x + box.width / 2,
|
||||
box.y + box.height / 2
|
||||
)
|
||||
if (box) await clickCursorOverlay(page, true)
|
||||
await liteItem.click({ timeout: 5000 }).catch(() => {
|
||||
console.warn(`Click on litegraph menu item "${label}" failed`)
|
||||
})
|
||||
if (box) await clickCursorOverlay(page, false)
|
||||
await sleep(800)
|
||||
return
|
||||
}
|
||||
@@ -543,13 +746,24 @@ async function fillDialogAndConfirm(page: Page, text: string) {
|
||||
async function clickByText(page: Page, text: string) {
|
||||
const el = page.locator(`text=${text}`).first()
|
||||
if (await el.isVisible().catch(() => false)) {
|
||||
// Get element position for cursor overlay
|
||||
const box = await el.boundingBox().catch(() => null)
|
||||
if (box) {
|
||||
await moveCursorOverlay(
|
||||
page,
|
||||
box.x + box.width / 2,
|
||||
box.y + box.height / 2
|
||||
)
|
||||
}
|
||||
await el.hover({ timeout: 3000 }).catch(() => {})
|
||||
await sleep(400)
|
||||
if (box) await clickCursorOverlay(page, true)
|
||||
await el.click({ timeout: 5000 }).catch((e) => {
|
||||
console.warn(
|
||||
`Click on "${text}" failed: ${e instanceof Error ? e.message.split('\n')[0] : e}`
|
||||
)
|
||||
})
|
||||
if (box) await clickCursorOverlay(page, false)
|
||||
await sleep(500)
|
||||
} else {
|
||||
console.warn(`Element with text "${text}" not found`)
|
||||
@@ -569,7 +783,7 @@ async function waitForEditorReady(page: Page) {
|
||||
await sleep(1000)
|
||||
}
|
||||
|
||||
async function executeAction(
|
||||
export async function executeAction(
|
||||
page: Page,
|
||||
step: TestAction,
|
||||
outputDir: string
|
||||
@@ -605,8 +819,17 @@ async function executeAction(
|
||||
break
|
||||
case 'pressKey':
|
||||
try {
|
||||
// Show key in subtitle (persists 2s) then instant press
|
||||
const keyLabel =
|
||||
step.key === ' '
|
||||
? 'Space'
|
||||
: step.key.length === 1
|
||||
? step.key.toUpperCase()
|
||||
: step.key
|
||||
await showSubtitle(page, `⌨ ${keyLabel}`, 0)
|
||||
await sleep(200) // Let subtitle render before pressing
|
||||
await page.keyboard.press(step.key)
|
||||
await sleep(300)
|
||||
await sleep(500)
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
`Skipping invalid key "${step.key}": ${e instanceof Error ? e.message : e}`
|
||||
@@ -722,11 +945,29 @@ async function executeAction(
|
||||
break
|
||||
}
|
||||
case 'loadDefaultWorkflow':
|
||||
// Convenience: File → Load Default in one action
|
||||
await openComfyMenu(page)
|
||||
await hoverMenuItem(page, 'File')
|
||||
await clickSubmenuItem(page, 'Load Default')
|
||||
await sleep(1000)
|
||||
// Load default workflow via app API (most reliable, no menu navigation)
|
||||
try {
|
||||
await page.evaluate(() => {
|
||||
const app = (window as unknown as Record<string, unknown>).app as {
|
||||
loadGraphData?: (d: unknown) => Promise<void>
|
||||
resetToDefaultWorkflow?: () => Promise<void>
|
||||
}
|
||||
if (app?.resetToDefaultWorkflow) return app.resetToDefaultWorkflow()
|
||||
return Promise.resolve()
|
||||
})
|
||||
await sleep(1000)
|
||||
} catch {
|
||||
// Fallback: try menu navigation with multiple possible item names
|
||||
await openComfyMenu(page)
|
||||
await hoverMenuItem(page, 'File')
|
||||
const loaded = await clickSubmenuItem(page, 'Load Default')
|
||||
.then(() => true)
|
||||
.catch(() => false)
|
||||
if (!loaded) {
|
||||
await clickSubmenuItem(page, 'Default Workflow').catch(() => {})
|
||||
}
|
||||
await sleep(1000)
|
||||
}
|
||||
break
|
||||
case 'openSettings':
|
||||
// Convenience: open Settings dialog in one action
|
||||
@@ -1043,13 +1284,14 @@ function buildPreflightActions(context: string): TestAction[] {
|
||||
const ctx = context.toLowerCase()
|
||||
const actions: TestAction[] = []
|
||||
|
||||
// Enable Nodes 2.0 if issue mentions it
|
||||
// Enable Nodes 2.0 if issue mentions it — requires reload to take effect
|
||||
if (/nodes.*2\.0|vue.*node|new.*node|node.*beta/.test(ctx)) {
|
||||
actions.push({
|
||||
action: 'setSetting',
|
||||
id: 'Comfy.NodeBeta.Enabled',
|
||||
value: true
|
||||
})
|
||||
actions.push({ action: 'reload' })
|
||||
}
|
||||
|
||||
// Load default workflow for most reproduction scenarios
|
||||
@@ -1255,18 +1497,25 @@ async function runAgenticLoop(
|
||||
preflightNote
|
||||
)
|
||||
|
||||
const anthropicKey = process.env.ANTHROPIC_API_KEY
|
||||
const useHybrid = Boolean(anthropicKey)
|
||||
|
||||
const genAI = new GoogleGenerativeAI(opts.apiKey)
|
||||
// Use flash for agentic loop — rapid iteration matters more than reasoning
|
||||
const geminiVisionModel = genAI.getGenerativeModel({
|
||||
model: 'gemini-3-flash-preview'
|
||||
})
|
||||
|
||||
// Gemini-only fallback model (used when no ANTHROPIC_API_KEY)
|
||||
const agenticModel = opts.model.includes('flash')
|
||||
? opts.model
|
||||
: 'gemini-3-flash-preview'
|
||||
const model = genAI.getGenerativeModel({
|
||||
const geminiOnlyModel = genAI.getGenerativeModel({
|
||||
model: agenticModel,
|
||||
systemInstruction
|
||||
})
|
||||
|
||||
console.warn(
|
||||
`Starting agentic loop with ${agenticModel}` +
|
||||
`Starting ${useHybrid ? 'hybrid (Claude planner + Gemini vision)' : 'Gemini-only'} agentic loop` +
|
||||
(subIssue ? ` — focus: ${subIssue.title}` : '')
|
||||
)
|
||||
|
||||
@@ -1354,6 +1603,8 @@ async function runAgenticLoop(
|
||||
const parsed = JSON.parse(responseText)
|
||||
if (parsed.reasoning) {
|
||||
console.warn(` Reasoning: ${parsed.reasoning.slice(0, 150)}`)
|
||||
// Show reasoning as subtitle overlay in the video
|
||||
await showSubtitle(page, parsed.reasoning, turn)
|
||||
}
|
||||
actionObj = parsed.action || parsed.actions?.[0] || parsed
|
||||
if (!actionObj?.action) {
|
||||
@@ -1552,54 +1803,66 @@ async function launchSessionAndLogin(
|
||||
>
|
||||
page: Page
|
||||
}> {
|
||||
const browser = await chromium.launch({ headless: true })
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-dev-shm-usage'
|
||||
]
|
||||
})
|
||||
const context = await browser.newContext({
|
||||
viewport: { width: 1280, height: 720 },
|
||||
recordVideo: { dir: videoDir, size: { width: 1280, height: 720 } }
|
||||
})
|
||||
const page = await context.newPage()
|
||||
|
||||
// Inject visible cursor overlay (headless Chrome doesn't render system cursor)
|
||||
await page.addInitScript(() => {
|
||||
const style = document.createElement('style')
|
||||
style.textContent = `
|
||||
#qa-cursor {
|
||||
position: fixed; z-index: 2147483647; pointer-events: none;
|
||||
width: 16px; height: 16px; margin: -8px 0 0 -8px;
|
||||
border-radius: 50%; background: rgba(255, 60, 60, 0.7);
|
||||
border: 2px solid rgba(255, 255, 255, 0.9);
|
||||
box-shadow: 0 0 8px rgba(255, 60, 60, 0.5);
|
||||
transition: transform 80ms ease-out, opacity 80ms;
|
||||
transform: scale(1); opacity: 0.85;
|
||||
}
|
||||
#qa-cursor.clicking {
|
||||
transform: scale(1.8); opacity: 1;
|
||||
background: rgba(255, 200, 60, 0.8);
|
||||
border-color: rgba(255, 255, 255, 1);
|
||||
box-shadow: 0 0 16px rgba(255, 200, 60, 0.6);
|
||||
}
|
||||
`
|
||||
const cursor = document.createElement('div')
|
||||
cursor.id = 'qa-cursor'
|
||||
// Cursor overlay placeholder — injected after login when DOM is stable
|
||||
|
||||
const init = () => {
|
||||
document.head.appendChild(style)
|
||||
document.body.appendChild(cursor)
|
||||
document.addEventListener('mousemove', (e) => {
|
||||
cursor.style.left = e.clientX + 'px'
|
||||
cursor.style.top = e.clientY + 'px'
|
||||
})
|
||||
document.addEventListener('mousedown', () =>
|
||||
cursor.classList.add('clicking')
|
||||
)
|
||||
document.addEventListener('mouseup', () =>
|
||||
cursor.classList.remove('clicking')
|
||||
)
|
||||
}
|
||||
// Monkey-patch page.mouse to auto-update cursor overlay on ALL mouse ops
|
||||
const origMove = page.mouse.move.bind(page.mouse)
|
||||
const origClick = page.mouse.click.bind(page.mouse)
|
||||
const origDown = page.mouse.down.bind(page.mouse)
|
||||
const origUp = page.mouse.up.bind(page.mouse)
|
||||
const origDblclick = page.mouse.dblclick.bind(page.mouse)
|
||||
|
||||
if (document.body) init()
|
||||
else document.addEventListener('DOMContentLoaded', init)
|
||||
})
|
||||
page.mouse.move = async (
|
||||
x: number,
|
||||
y: number,
|
||||
options?: Parameters<typeof origMove>[2]
|
||||
) => {
|
||||
await origMove(x, y, options)
|
||||
await moveCursorOverlay(page, x, y)
|
||||
}
|
||||
page.mouse.click = async (
|
||||
x: number,
|
||||
y: number,
|
||||
options?: Parameters<typeof origClick>[2]
|
||||
) => {
|
||||
await moveCursorOverlay(page, x, y)
|
||||
await clickCursorOverlay(page, true)
|
||||
await origClick(x, y, options)
|
||||
await clickCursorOverlay(page, false)
|
||||
}
|
||||
page.mouse.dblclick = async (
|
||||
x: number,
|
||||
y: number,
|
||||
options?: Parameters<typeof origDblclick>[2]
|
||||
) => {
|
||||
await moveCursorOverlay(page, x, y)
|
||||
await clickCursorOverlay(page, true)
|
||||
await origDblclick(x, y, options)
|
||||
await clickCursorOverlay(page, false)
|
||||
}
|
||||
page.mouse.down = async (options?: Parameters<typeof origDown>[0]) => {
|
||||
await clickCursorOverlay(page, true)
|
||||
await origDown(options)
|
||||
}
|
||||
page.mouse.up = async (options?: Parameters<typeof origUp>[0]) => {
|
||||
await origUp(options)
|
||||
await clickCursorOverlay(page, false)
|
||||
}
|
||||
|
||||
console.warn(`Opening ComfyUI at ${opts.serverUrl}`)
|
||||
await page.goto(opts.serverUrl, {
|
||||
@@ -1610,6 +1873,20 @@ async function launchSessionAndLogin(
|
||||
await loginAsQaCi(page, opts.serverUrl)
|
||||
await sleep(1000)
|
||||
|
||||
// Inject cursor overlay AFTER login (addInitScript gets destroyed by Vue mount)
|
||||
await page.addScriptTag({
|
||||
content: `(function(){
|
||||
var s=document.createElement('style');
|
||||
s.textContent='#qa-cursor{position:fixed;z-index:2147483647;pointer-events:none;width:20px;height:20px;margin:-2px 0 0 -2px;opacity:0.95;transition:transform 80ms ease-out;transform:scale(1)}#qa-cursor.clicking{transform:scale(1.4)}';
|
||||
document.head.appendChild(s);
|
||||
var c=document.createElement('div');c.id='qa-cursor';
|
||||
c.innerHTML='<svg width="20" height="20" viewBox="0 0 24 24" fill="white" stroke="black" stroke-width="1.5"><path d="M4 2l14 10-6.5 1.5L15 21l-3.5-1.5L8 21l-1.5-7.5L2 16z"/></svg>';
|
||||
document.body.appendChild(c);
|
||||
window.__moveCursor=function(x,y){c.style.left=x+'px';c.style.top=y+'px'};
|
||||
window.__clickCursor=function(d){if(d)c.classList.add('clicking');else c.classList.remove('clicking')};
|
||||
})()`
|
||||
})
|
||||
|
||||
// Inject keyboard HUD — shows pressed keys in bottom-right corner of video
|
||||
// Uses addScriptTag to avoid tsx __name compilation artifacts in page.evaluate
|
||||
await page.addScriptTag({
|
||||
@@ -1657,8 +1934,115 @@ async function main() {
|
||||
await page.screenshot({
|
||||
path: `${opts.outputDir}/debug-after-login-reproduce${sessionLabel}.png`
|
||||
})
|
||||
console.warn('Editor ready — starting agentic loop')
|
||||
await runAgenticLoop(page, opts, opts.outputDir, subIssue)
|
||||
// ═══ Phase 1: RESEARCH — Claude writes E2E test to reproduce ═══
|
||||
console.warn('Phase 1: Research — Claude writes E2E test')
|
||||
const anthropicKey = process.env.ANTHROPIC_API_KEY
|
||||
const { runResearchPhase } = await import('./qa-agent.js')
|
||||
const issueCtx = opts.diffFile
|
||||
? readFileSync(opts.diffFile, 'utf-8').slice(0, 6000)
|
||||
: 'No issue context provided'
|
||||
let qaGuideText = ''
|
||||
if (opts.qaGuideFile) {
|
||||
try {
|
||||
qaGuideText = readFileSync(opts.qaGuideFile, 'utf-8')
|
||||
} catch {
|
||||
// QA guide not available
|
||||
}
|
||||
}
|
||||
const research = await runResearchPhase({
|
||||
page,
|
||||
issueContext: issueCtx,
|
||||
qaGuide: qaGuideText,
|
||||
outputDir: opts.outputDir,
|
||||
serverUrl: opts.serverUrl,
|
||||
anthropicApiKey: anthropicKey
|
||||
})
|
||||
console.warn(
|
||||
`Research complete: ${research.verdict} — ${research.summary.slice(0, 100)}`
|
||||
)
|
||||
console.warn(`Evidence: ${research.evidence.slice(0, 200)}`)
|
||||
|
||||
// ═══ Phase 2: Run passing test with video recording ═══
|
||||
if (research.verdict === 'REPRODUCED' && research.testCode) {
|
||||
console.warn('Phase 2: Recording test execution with video')
|
||||
const projectRoot = process.cwd()
|
||||
const browserTestFile = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
|
||||
const testResultsDir = `${opts.outputDir}/test-results`
|
||||
// Inject cursor overlay into the test — add page.addInitScript in beforeEach
|
||||
const cursorScript = `await comfyPage.page.addInitScript(() => {
|
||||
var c=document.createElement('div');c.id='qa-cursor';
|
||||
c.innerHTML='<svg width="20" height="20" viewBox="0 0 24 24" fill="white" stroke="black" stroke-width="1.5"><path d="M4 2l14 10-6.5 1.5L15 21l-3.5-1.5L8 21l-1.5-7.5L2 16z"/></svg>';
|
||||
Object.assign(c.style,{position:'fixed',zIndex:'2147483647',pointerEvents:'none',width:'20px',height:'20px',margin:'-2px 0 0 -2px',opacity:'0.95'});
|
||||
if(document.body)document.body.appendChild(c);
|
||||
else document.addEventListener('DOMContentLoaded',function(){document.body.appendChild(c)});
|
||||
document.addEventListener('mousemove',function(e){c.style.left=e.clientX+'px';c.style.top=e.clientY+'px'});
|
||||
});`
|
||||
// Insert cursor injection after the first line of the test body (after async ({ comfyPage }) => {)
|
||||
let testCode = research.testCode
|
||||
const testBodyMatch = testCode.match(
|
||||
/async\s*\(\{\s*comfyPage\s*\}\)\s*=>\s*\{/
|
||||
)
|
||||
if (testBodyMatch && testBodyMatch.index !== undefined) {
|
||||
const insertPos = testBodyMatch.index + testBodyMatch[0].length
|
||||
testCode =
|
||||
testCode.slice(0, insertPos) +
|
||||
'\n ' +
|
||||
cursorScript +
|
||||
'\n' +
|
||||
testCode.slice(insertPos)
|
||||
}
|
||||
// Inject 800ms pauses between actions for human-readable video
|
||||
// Uses comfyPage.page since test code uses comfyPageFixture
|
||||
testCode = testCode.replace(
|
||||
/(\n\s*)(await\s+(?:comfyPage|topbar|firstNode|page|canvas|expect))/g,
|
||||
'$1await comfyPage.page.waitForTimeout(800);\n$1$2'
|
||||
)
|
||||
writeFileSync(browserTestFile, testCode)
|
||||
try {
|
||||
const output = execSync(
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=30000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
|
||||
{
|
||||
timeout: 90000,
|
||||
encoding: 'utf-8',
|
||||
env: {
|
||||
...process.env,
|
||||
COMFYUI_BASE_URL: opts.serverUrl,
|
||||
PLAYWRIGHT_LOCAL: '1' // Enables video=on + trace=on in playwright.config.ts
|
||||
}
|
||||
}
|
||||
)
|
||||
console.warn(`Phase 2: Test passed\n${output.slice(-300)}`)
|
||||
} catch (e) {
|
||||
const err = e as { stdout?: string }
|
||||
console.warn(
|
||||
`Phase 2: Test failed\n${(err.stdout || '').slice(-300)}`
|
||||
)
|
||||
}
|
||||
// Copy recorded video to outputDir so deploy script finds it
|
||||
try {
|
||||
const videos = execSync(
|
||||
`find "${testResultsDir}" -name '*.webm' -type f 2>/dev/null`,
|
||||
{ encoding: 'utf-8' }
|
||||
)
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
if (videos.length > 0) {
|
||||
execSync(`cp "${videos[0]}" "${opts.outputDir}/qa-session.webm"`)
|
||||
console.warn(`Phase 2: Video → ${opts.outputDir}/qa-session.webm`)
|
||||
}
|
||||
} catch {
|
||||
console.warn('Phase 2: No test video found')
|
||||
}
|
||||
// Cleanup
|
||||
try {
|
||||
execSync(`rm -f "${browserTestFile}"`)
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else {
|
||||
console.warn(`Skipping Phase 2: verdict=${research.verdict}`)
|
||||
}
|
||||
await sleep(2000)
|
||||
} finally {
|
||||
await context.close()
|
||||
@@ -1666,7 +2050,38 @@ async function main() {
|
||||
}
|
||||
|
||||
knownNames.add(videoName)
|
||||
renameLatestWebm(opts.outputDir, videoName, knownNames)
|
||||
// If Phase 2 already copied a test video as qa-session.webm, don't overwrite it
|
||||
// with the idle research browser video
|
||||
const videoPath = `${opts.outputDir}/${videoName}`
|
||||
if (statSync(videoPath, { throwIfNoEntry: false })) {
|
||||
console.warn(
|
||||
'Phase 2 test video exists — skipping research video rename'
|
||||
)
|
||||
} else {
|
||||
renameLatestWebm(opts.outputDir, videoName, knownNames)
|
||||
}
|
||||
|
||||
// Post-process: add TTS narration audio to the video
|
||||
if (narrationSegments.length > 0) {
|
||||
const videoPath = `${opts.outputDir}/${videoName}`
|
||||
if (statSync(videoPath, { throwIfNoEntry: false })) {
|
||||
console.warn(
|
||||
`Generating TTS narration for ${narrationSegments.length} segments...`
|
||||
)
|
||||
const audioPath = await generateNarrationAudio(
|
||||
narrationSegments,
|
||||
opts.outputDir,
|
||||
opts.apiKey
|
||||
)
|
||||
if (audioPath) {
|
||||
mergeAudioIntoVideo(
|
||||
videoPath,
|
||||
audioPath,
|
||||
`${opts.outputDir}/${videoName.replace('.webm', '-narrated.webm')}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Before/after batch mode (unchanged)
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
<link rel=preconnect href=https://fonts.googleapis.com><link rel=preconnect href=https://fonts.gstatic.com crossorigin><link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel=stylesheet>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<style>
|
||||
:root{--bg:oklch(8% 0.02 265);--surface:oklch(12% 0.02 265);--surface-up:oklch(16% 0.02 265);--fg:oklch(96% 0.01 95);--fg-muted:oklch(65% 0.01 265);--fg-dim:oklch(45% 0.01 265);--primary:oklch(62% 0.21 265);--primary-up:oklch(68% 0.21 265);--primary-glow:oklch(62% 0.15 265);--ok:oklch(62% 0.18 155);--err:oklch(62% 0.22 25);--border:oklch(22% 0.02 265);--border-faint:oklch(15% 0.01 265);--r:0.75rem;--r-lg:1rem;--ease-out:cubic-bezier(0.22,1,0.36,1);--dur-base:250ms;--dur-slow:500ms;--font:'Inter',system-ui,sans-serif;--font-mono:'JetBrains Mono',monospace}
|
||||
:root{--bg:oklch(97% 0.01 265);--surface:oklch(100% 0 0);--surface-up:oklch(94% 0.01 265);--fg:oklch(15% 0.02 265);--fg-muted:oklch(40% 0.01 265);--fg-dim:oklch(55% 0.01 265);--primary:oklch(50% 0.21 265);--primary-up:oklch(45% 0.21 265);--primary-glow:oklch(55% 0.15 265);--ok:oklch(45% 0.18 155);--err:oklch(50% 0.22 25);--border:oklch(85% 0.01 265);--border-faint:oklch(90% 0.01 265);--r:0.75rem;--r-lg:1rem;--ease-out:cubic-bezier(0.22,1,0.36,1);--dur-base:250ms;--dur-slow:500ms;--font:'Inter',system-ui,sans-serif;--font-mono:'JetBrains Mono',monospace}
|
||||
@media(prefers-color-scheme:dark){:root{--bg:oklch(8% 0.02 265);--surface:oklch(12% 0.02 265);--surface-up:oklch(16% 0.02 265);--fg:oklch(96% 0.01 95);--fg-muted:oklch(65% 0.01 265);--fg-dim:oklch(45% 0.01 265);--primary:oklch(62% 0.21 265);--primary-up:oklch(68% 0.21 265);--primary-glow:oklch(62% 0.15 265);--ok:oklch(62% 0.18 155);--err:oklch(62% 0.22 25);--border:oklch(22% 0.02 265);--border-faint:oklch(15% 0.01 265)}}
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
body{background:var(--bg);color:var(--fg);font-family:var(--font);min-height:100vh;padding:clamp(1.5rem,4vw,3rem) clamp(1rem,3vw,2rem);position:relative}
|
||||
body::after{content:'';position:fixed;inset:0;pointer-events:none;opacity:.03;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E")}
|
||||
@media(prefers-color-scheme:dark){body::after{content:'';position:fixed;inset:0;pointer-events:none;opacity:.03;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E")}}
|
||||
.container{max-width:1200px;margin:0 auto}
|
||||
header{display:flex;align-items:center;gap:1rem;margin-bottom:clamp(1.5rem,4vw,3rem);padding-bottom:1.25rem;border-bottom:1px solid var(--border)}
|
||||
.header-icon{width:36px;height:36px;display:grid;place-items:center;background:linear-gradient(135deg,oklch(100% 0 0/.06),oklch(100% 0 0/.02));backdrop-filter:blur(12px);border:1px solid oklch(100% 0 0/.1);border-radius:var(--r);flex-shrink:0}
|
||||
@@ -13,9 +14,9 @@ header{display:flex;align-items:center;gap:1rem;margin-bottom:clamp(1.5rem,4vw,3
|
||||
h1{font-size:clamp(1.25rem,2.5vw,1.625rem);font-weight:700;letter-spacing:-.03em;background:linear-gradient(135deg,var(--fg),var(--fg-muted));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}
|
||||
.meta{color:var(--fg-dim);font-size:.8125rem;margin-top:.15rem;letter-spacing:.01em}
|
||||
.grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(min(480px,100%),1fr));gap:1.5rem}
|
||||
.card{background:linear-gradient(135deg,oklch(100% 0 0/.05),oklch(100% 0 0/.015));backdrop-filter:blur(16px) saturate(150%);border:1px solid oklch(100% 0 0/.08);border-radius:var(--r-lg);overflow:hidden;transition:border-color var(--dur-base) var(--ease-out),box-shadow var(--dur-base) var(--ease-out),transform var(--dur-base) var(--ease-out)}
|
||||
.card:hover{border-color:oklch(100% 0 0/.16);box-shadow:0 8px 32px oklch(0% 0 0/.3),inset 0 1px 0 oklch(100% 0 0/.1);transform:translateY(-2px)}
|
||||
.video-wrap{position:relative;background:oklch(4% 0.01 265);border-bottom:1px solid var(--border-faint)}
|
||||
.card{background:var(--surface);border:1px solid var(--border);border-radius:var(--r-lg);overflow:hidden;transition:border-color var(--dur-base) var(--ease-out),box-shadow var(--dur-base) var(--ease-out),transform var(--dur-base) var(--ease-out)}
|
||||
.card:hover{border-color:var(--primary);box-shadow:0 4px 16px oklch(0% 0 0/.1);transform:translateY(-2px)}
|
||||
.video-wrap{position:relative;background:var(--surface);border-bottom:1px solid var(--border-faint)}
|
||||
.video-wrap video{width:100%;display:block;aspect-ratio:16/9;object-fit:contain}
|
||||
.card-body{padding:.75rem 1rem;display:flex;align-items:center;justify-content:space-between}
|
||||
.platform{display:flex;align-items:center;gap:.5rem;font-weight:600;font-size:.9375rem;letter-spacing:-.01em}
|
||||
@@ -28,7 +29,7 @@ h1{font-size:clamp(1.25rem,2.5vw,1.625rem);font-weight:700;letter-spacing:-.03em
|
||||
.comparison{display:grid;grid-template-columns:1fr 1fr;gap:0}
|
||||
.comp-panel{border-right:1px solid var(--border-faint)}
|
||||
.comp-panel:last-child{border-right:none}
|
||||
.comp-label{padding:.4rem .75rem;font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em;color:var(--fg-muted);background:oklch(10% 0.01 265);display:flex;align-items:center;gap:.4rem}
|
||||
.comp-label{padding:.4rem .75rem;font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em;color:var(--fg-muted);background:var(--surface);display:flex;align-items:center;gap:.4rem}
|
||||
.comp-tag{font-size:.6rem;padding:.1rem .4rem;border-radius:9999px;font-weight:600}
|
||||
.comp-panel:first-child .comp-tag{background:oklch(65% 0.01 265/.15);color:var(--fg-muted);border:1px solid var(--border)}
|
||||
.comp-panel:last-child .comp-tag{background:oklch(62% 0.18 155/.15);color:var(--ok);border:1px solid oklch(62% 0.18 155/.25)}
|
||||
@@ -44,15 +45,15 @@ h1{font-size:clamp(1.25rem,2.5vw,1.625rem);font-weight:700;letter-spacing:-.03em
|
||||
.report-body p{margin:.4rem 0}
|
||||
.report-body ul,.report-body ol{margin:.4rem 0 .4rem 1.5rem}
|
||||
.report-body li{margin:.25rem 0}
|
||||
.report-body code{background:oklch(16% 0.02 265);padding:.125rem .375rem;border-radius:.25rem;font-size:.7rem;font-family:var(--font-mono);border:1px solid var(--border-faint)}
|
||||
.report-body code{background:var(--surface-up);padding:.125rem .375rem;border-radius:.25rem;font-size:.7rem;font-family:var(--font-mono);border:1px solid var(--border-faint)}
|
||||
.report-body h3+p>code:first-child{background:oklch(62% 0.22 25/.15);color:var(--err);border-color:oklch(62% 0.22 25/.25)}
|
||||
.report-body h3+p>code:nth-child(2){background:oklch(62% 0.21 265/.15);color:var(--primary-up);border-color:oklch(62% 0.21 265/.25)}
|
||||
.report-body h3+p>code:nth-child(3){background:oklch(65% 0.01 265/.15);color:var(--fg-muted);border-color:var(--border)}
|
||||
.report-body table{width:100%;border-collapse:collapse;margin:.75rem 0;font-size:.75rem;border:1px solid var(--border);border-radius:var(--r);overflow:hidden}
|
||||
.report-body th,.report-body td{border:1px solid var(--border-faint);padding:.5rem .75rem;text-align:left;vertical-align:top;word-wrap:break-word}
|
||||
.report-body th{background:oklch(14% 0.02 265);color:var(--fg);font-weight:600;font-size:.6875rem;text-transform:uppercase;letter-spacing:.05em;position:sticky;top:0;white-space:nowrap}
|
||||
.report-body tr:nth-child(even){background:oklch(10% 0.01 265/.5)}
|
||||
.report-body tr:hover{background:oklch(16% 0.02 265/.5)}
|
||||
.report-body th{background:var(--surface-up);color:var(--fg);font-weight:600;font-size:.6875rem;text-transform:uppercase;letter-spacing:.05em;position:sticky;top:0;white-space:nowrap}
|
||||
.report-body tr:nth-child(even){background:color-mix(in oklch,var(--surface) 50%,transparent)}
|
||||
.report-body tr:hover{background:color-mix(in oklch,var(--surface-up) 50%,transparent)}
|
||||
.report-body strong{color:var(--fg)}
|
||||
.report-body hr{border:none;border-top:1px solid var(--border-faint);margin:1rem 0}
|
||||
@keyframes fade-up{from{opacity:0;transform:translateY(16px)}to{opacity:1;transform:translateY(0)}}
|
||||
@@ -66,22 +67,26 @@ h1{font-size:clamp(1.25rem,2.5vw,1.625rem);font-weight:700;letter-spacing:-.03em
|
||||
.copy-badge{background:oklch(100% 0 0/.06);border:1px solid var(--border);color:var(--fg-muted);padding:.3rem .4rem;border-radius:var(--r);cursor:pointer;display:inline-flex;align-items:center;transition:all var(--dur-base) var(--ease-out)}
|
||||
.copy-badge:hover{color:var(--primary-up);border-color:var(--primary);background:oklch(62% 0.21 265/.1)}
|
||||
.copy-badge.copied{color:var(--ok);border-color:var(--ok)}
|
||||
.vseek{width:100%;padding:0 .75rem;background:oklch(6% 0.01 265);border-top:1px solid var(--border-faint);position:relative;height:24px;display:flex;align-items:center}
|
||||
.vseek{width:100%;padding:0 .75rem;background:var(--surface);border-top:1px solid var(--border-faint);position:relative;height:24px;display:flex;align-items:center}
|
||||
.vseek input[type=range]{-webkit-appearance:none;appearance:none;width:100%;height:4px;background:var(--border);border-radius:2px;outline:none;cursor:pointer;position:relative;z-index:2}
|
||||
.vseek input[type=range]::-webkit-slider-thumb{-webkit-appearance:none;width:12px;height:12px;border-radius:50%;background:var(--primary);cursor:pointer;border:2px solid var(--bg);box-shadow:0 0 4px oklch(0% 0 0/.3)}
|
||||
.vseek input[type=range]::-moz-range-thumb{width:12px;height:12px;border-radius:50%;background:var(--primary);cursor:pointer;border:2px solid var(--bg)}
|
||||
.vseek .vbuf{position:absolute;left:.75rem;right:.75rem;height:4px;border-radius:2px;pointer-events:none;top:50%;transform:translateY(-50%)}
|
||||
.vseek .vbuf-bar{height:100%;background:oklch(62% 0.21 265/.25);border-radius:2px;transition:width 200ms linear}
|
||||
.vctrl{display:flex;align-items:center;gap:.375rem;padding:.5rem .75rem;background:oklch(6% 0.01 265);border-top:1px solid var(--border-faint);flex-wrap:wrap}
|
||||
.vctrl{display:flex;align-items:center;gap:.375rem;padding:.5rem .75rem;background:var(--surface);border-top:1px solid var(--border-faint);flex-wrap:wrap}
|
||||
.vctrl button{background:oklch(100% 0 0/.06);border:1px solid var(--border);color:var(--fg-muted);font-size:.6875rem;font-weight:600;font-family:var(--font-mono);padding:.25rem .5rem;border-radius:.25rem;cursor:pointer;transition:all var(--dur-base) var(--ease-out);white-space:nowrap}
|
||||
.vctrl button:hover{color:var(--primary-up);border-color:var(--primary);background:oklch(62% 0.21 265/.1)}
|
||||
.vctrl button.active{color:var(--primary);border-color:var(--primary);background:oklch(62% 0.21 265/.15)}
|
||||
.vctrl .vtime{font-family:var(--font-mono);font-size:.6875rem;color:var(--fg-dim);min-width:10ch;text-align:center}
|
||||
.vctrl .vsep{width:1px;height:1rem;background:var(--border);flex-shrink:0}
|
||||
.vctrl .vhint{font-size:.6rem;color:var(--fg-dim);margin-left:auto}
|
||||
.purpose{background:linear-gradient(135deg,oklch(100% 0 0/.04),oklch(100% 0 0/.02));border:1px solid oklch(100% 0 0/.08);border-radius:var(--r-lg);padding:1rem 1.25rem;margin-bottom:1.5rem;font-size:.85rem;line-height:1.7;color:oklch(80% 0.01 265)}
|
||||
.purpose strong{color:var(--fg);font-weight:600}
|
||||
.purpose .purpose-label{font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em;color:var(--fg-muted);margin-bottom:.4rem}
|
||||
.purpose .purpose-reqs{margin-top:.75rem;padding-top:.75rem;border-top:1px solid oklch(100% 0 0/.06);font-size:.8rem;color:oklch(70% 0.01 265);line-height:1.8}
|
||||
</style></head><body><div class=container>
|
||||
<header><div class=header-icon><svg width=20 height=20 viewBox="0 0 24 24" fill=none stroke=currentColor stroke-width=2 stroke-linecap=round stroke-linejoin=round><polygon points="23 7 16 12 23 17 23 7"/><rect x=1 y=5 width=15 height=14 rx=2 ry=2/></svg></div><div><h1>QA Session Recordings</h1><div class=meta>ComfyUI Frontend · Automated QA{{COMMIT_HTML}}{{RUN_LINK}}</div>{{BADGE_HTML}}</div></header>
|
||||
<div class=grid>{{CARDS}}</div>
|
||||
<header><div class=header-icon><svg width=20 height=20 viewBox="0 0 24 24" fill=none stroke=currentColor stroke-width=2 stroke-linecap=round stroke-linejoin=round><polygon points="23 7 16 12 23 17 23 7"/><rect x=1 y=5 width=15 height=14 rx=2 ry=2/></svg></div><div><h1>QA Session Recordings</h1><div class=meta>ComfyUI Frontend · Automated QA{{COMMIT_HTML}}{{RUN_LINK}}{{TIMING_HTML}}</div>{{BADGE_HTML}}</div></header>
|
||||
{{PURPOSE_HTML}}<div class=grid>{{CARDS}}</div>
|
||||
</div><script>
|
||||
function copyBadge(){const u=location.href.replace(/\/[^/]*$/,'/');const b=u+'badge.svg';const md='[]('+u+')';navigator.clipboard.writeText(md).then(()=>{const btn=document.querySelector('.copy-badge');btn.classList.add('copied');btn.innerHTML='<svg width=14 height=14 viewBox="0 0 24 24" fill=none stroke=currentColor stroke-width=2><polyline points="20 6 9 17 4 12"/></svg>';setTimeout(()=>{btn.classList.remove('copied');btn.innerHTML='<svg width=14 height=14 viewBox="0 0 24 24" fill=none stroke=currentColor stroke-width=2><rect x=9 y=9 width=13 height=13 rx=2/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>'},2000)})}
|
||||
document.querySelectorAll('[data-md]').forEach(el=>{const t=el.textContent;el.removeAttribute('data-md');el.innerHTML=marked.parse(t)});
|
||||
|
||||
253
scripts/qa-reproduce.ts
Normal file
253
scripts/qa-reproduce.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* QA Reproduce Phase — Deterministic replay of research plan with narration
|
||||
*
|
||||
* Takes a reproduction plan from the research phase and replays it:
|
||||
* 1. Execute each action deterministically (no AI decisions)
|
||||
* 2. Capture a11y snapshot before/after each action
|
||||
* 3. Gemini describes what visually changed (narration for humans)
|
||||
* 4. Output: narration-log.json with full evidence chain
|
||||
*/
|
||||
|
||||
import type { Page } from '@playwright/test'
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai'
|
||||
import { mkdirSync, writeFileSync } from 'fs'
|
||||
|
||||
import type { ActionResult } from './qa-record.js'
|
||||
|
||||
// ── Types ──
|
||||
|
||||
interface ReproductionStep {
|
||||
action: Record<string, unknown> & { action: string }
|
||||
expectedAssertion: string
|
||||
}
|
||||
|
||||
interface NarrationEntry {
|
||||
step: number
|
||||
action: string
|
||||
params: Record<string, unknown>
|
||||
result: ActionResult
|
||||
a11yBefore: unknown
|
||||
a11yAfter: unknown
|
||||
assertionExpected: string
|
||||
assertionPassed: boolean
|
||||
assertionActual: string
|
||||
geminiNarration: string
|
||||
timestampMs: number
|
||||
}
|
||||
|
||||
export interface NarrationLog {
|
||||
entries: NarrationEntry[]
|
||||
allAssertionsPassed: boolean
|
||||
}
|
||||
|
||||
interface ReproduceOptions {
|
||||
page: Page
|
||||
plan: ReproductionStep[]
|
||||
geminiApiKey: string
|
||||
outputDir: string
|
||||
}
|
||||
|
||||
// ── A11y helpers ──
|
||||
|
||||
interface A11yNode {
|
||||
role: string
|
||||
name: string
|
||||
value?: string
|
||||
checked?: boolean
|
||||
disabled?: boolean
|
||||
expanded?: boolean
|
||||
children?: A11yNode[]
|
||||
}
|
||||
|
||||
function searchA11y(node: A11yNode | null, selector: string): A11yNode | null {
|
||||
if (!node) return null
|
||||
const sel = selector.toLowerCase()
|
||||
if (
|
||||
node.name?.toLowerCase().includes(sel) ||
|
||||
node.role?.toLowerCase().includes(sel)
|
||||
) {
|
||||
return node
|
||||
}
|
||||
if (node.children) {
|
||||
for (const child of node.children) {
|
||||
const found = searchA11y(child, selector)
|
||||
if (found) return found
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function summarizeA11y(node: A11yNode | null): string {
|
||||
if (!node) return 'null'
|
||||
const parts = [`role=${node.role}`, `name="${node.name}"`]
|
||||
if (node.value !== undefined) parts.push(`value="${node.value}"`)
|
||||
if (node.checked !== undefined) parts.push(`checked=${node.checked}`)
|
||||
if (node.disabled) parts.push('disabled')
|
||||
if (node.expanded !== undefined) parts.push(`expanded=${node.expanded}`)
|
||||
return `{${parts.join(', ')}}`
|
||||
}
|
||||
|
||||
// ── Subtitle overlay ──
|
||||
|
||||
async function showSubtitle(page: Page, text: string, step: number) {
|
||||
const encoded = encodeURIComponent(
|
||||
text.slice(0, 120).replace(/'/g, "\\'").replace(/\n/g, ' ')
|
||||
)
|
||||
await page.addScriptTag({
|
||||
content: `(function(){
|
||||
var id='qa-subtitle';
|
||||
var el=document.getElementById(id);
|
||||
if(!el){
|
||||
el=document.createElement('div');
|
||||
el.id=id;
|
||||
Object.assign(el.style,{position:'fixed',bottom:'32px',left:'50%',transform:'translateX(-50%)',zIndex:'2147483646',maxWidth:'90%',padding:'6px 14px',borderRadius:'6px',background:'rgba(0,0,0,0.8)',color:'rgba(255,255,255,0.95)',fontSize:'12px',fontFamily:'system-ui,sans-serif',fontWeight:'400',lineHeight:'1.4',pointerEvents:'none',textAlign:'center',whiteSpace:'normal'});
|
||||
document.body.appendChild(el);
|
||||
}
|
||||
el.textContent='['+${step}+'] '+decodeURIComponent('${encoded}');
|
||||
})()`
|
||||
})
|
||||
}
|
||||
|
||||
// ── Gemini visual narration ──
|
||||
|
||||
async function geminiDescribe(
|
||||
page: Page,
|
||||
geminiApiKey: string,
|
||||
focus: string
|
||||
): Promise<string> {
|
||||
try {
|
||||
const screenshot = await page.screenshot({ type: 'jpeg', quality: 70 })
|
||||
const genAI = new GoogleGenerativeAI(geminiApiKey)
|
||||
const model = genAI.getGenerativeModel({ model: 'gemini-3-flash-preview' })
|
||||
|
||||
const result = await model.generateContent([
|
||||
{
|
||||
text: `Describe in 1-2 sentences what you see on this ComfyUI screen. Focus on: ${focus}. Be factual — only describe what is visible.`
|
||||
},
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/jpeg',
|
||||
data: screenshot.toString('base64')
|
||||
}
|
||||
}
|
||||
])
|
||||
return result.response.text().trim()
|
||||
} catch (e) {
|
||||
return `(Gemini narration failed: ${e instanceof Error ? e.message.slice(0, 50) : e})`
|
||||
}
|
||||
}
|
||||
|
||||
// ── Main reproduce function ──
|
||||
|
||||
export async function runReproducePhase(
|
||||
opts: ReproduceOptions
|
||||
): Promise<NarrationLog> {
|
||||
const { page, plan, geminiApiKey, outputDir } = opts
|
||||
const { executeAction } = await import('./qa-record.js')
|
||||
|
||||
const narrationDir = `${outputDir}/narration`
|
||||
mkdirSync(narrationDir, { recursive: true })
|
||||
|
||||
const entries: NarrationEntry[] = []
|
||||
const startMs = Date.now()
|
||||
|
||||
console.warn(`Reproduce phase: replaying ${plan.length} steps...`)
|
||||
|
||||
for (let i = 0; i < plan.length; i++) {
|
||||
const step = plan[i]
|
||||
const actionObj = step.action
|
||||
const elapsed = Date.now() - startMs
|
||||
|
||||
// Show subtitle
|
||||
await showSubtitle(page, `Step ${i + 1}: ${actionObj.action}`, i + 1)
|
||||
console.warn(` [${i + 1}/${plan.length}] ${actionObj.action}`)
|
||||
|
||||
// Capture a11y BEFORE
|
||||
const a11yBefore = await page
|
||||
.locator('body')
|
||||
.ariaSnapshot({ timeout: 3000 })
|
||||
.catch(() => null)
|
||||
|
||||
// Execute action
|
||||
const result = await executeAction(
|
||||
page,
|
||||
actionObj as Parameters<typeof executeAction>[1],
|
||||
outputDir
|
||||
)
|
||||
await new Promise((r) => setTimeout(r, 500))
|
||||
|
||||
// Capture a11y AFTER
|
||||
const a11yAfter = await page
|
||||
.locator('body')
|
||||
.ariaSnapshot({ timeout: 3000 })
|
||||
.catch(() => null)
|
||||
|
||||
// Check assertion
|
||||
let assertionPassed = false
|
||||
let assertionActual = ''
|
||||
if (step.expectedAssertion) {
|
||||
// Parse the expected assertion — e.g. "Settings dialog: visible" or "tab count: 2"
|
||||
const parts = step.expectedAssertion.split(':').map((s) => s.trim())
|
||||
const selectorName = parts[0]
|
||||
const expectedState = parts.slice(1).join(':').trim()
|
||||
|
||||
const found = searchA11y(a11yAfter as A11yNode | null, selectorName)
|
||||
assertionActual = found ? summarizeA11y(found) : 'NOT FOUND'
|
||||
|
||||
if (expectedState === 'visible' || expectedState === 'exists') {
|
||||
assertionPassed = found !== null
|
||||
} else if (expectedState === 'hidden' || expectedState === 'gone') {
|
||||
assertionPassed = found === null
|
||||
} else {
|
||||
// Generic: check if the actual state contains the expected text
|
||||
assertionPassed = assertionActual
|
||||
.toLowerCase()
|
||||
.includes(expectedState.toLowerCase())
|
||||
}
|
||||
|
||||
console.warn(
|
||||
` Assertion: "${step.expectedAssertion}" → ${assertionPassed ? '✓ PASS' : '✗ FAIL'} (actual: ${assertionActual})`
|
||||
)
|
||||
}
|
||||
|
||||
// Gemini narration (visual description for humans)
|
||||
const geminiNarration = await geminiDescribe(
|
||||
page,
|
||||
geminiApiKey,
|
||||
`What changed after ${actionObj.action}?`
|
||||
)
|
||||
|
||||
entries.push({
|
||||
step: i + 1,
|
||||
action: actionObj.action,
|
||||
params: actionObj,
|
||||
result,
|
||||
a11yBefore,
|
||||
a11yAfter,
|
||||
assertionExpected: step.expectedAssertion,
|
||||
assertionPassed,
|
||||
assertionActual,
|
||||
geminiNarration,
|
||||
timestampMs: elapsed
|
||||
})
|
||||
}
|
||||
|
||||
// Final screenshot
|
||||
await page.screenshot({ path: `${outputDir}/reproduce-final.png` })
|
||||
|
||||
const log: NarrationLog = {
|
||||
entries,
|
||||
allAssertionsPassed: entries.every((e) => e.assertionPassed)
|
||||
}
|
||||
|
||||
writeFileSync(
|
||||
`${narrationDir}/narration-log.json`,
|
||||
JSON.stringify(log, null, 2)
|
||||
)
|
||||
console.warn(
|
||||
`Reproduce phase complete: ${entries.filter((e) => e.assertionPassed).length}/${entries.length} assertions passed`
|
||||
)
|
||||
|
||||
return log
|
||||
}
|
||||
@@ -401,7 +401,14 @@ function buildComparativePrompt(
|
||||
'',
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk',
|
||||
'(Assess whether the PR achieves its goal based on the before/after comparison)'
|
||||
'(Assess whether the PR achieves its goal based on the before/after comparison)',
|
||||
'',
|
||||
'## Verdict',
|
||||
'End your report with this EXACT JSON block (no markdown fence):',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high", "confidence": "high" | "medium" | "low"}',
|
||||
'- REPRODUCED: the before video confirms the old behavior and the after video shows the fix working',
|
||||
'- NOT_REPRODUCIBLE: the before video does not show the reported bug',
|
||||
'- INCONCLUSIVE: the videos do not adequately demonstrate the behavior change'
|
||||
)
|
||||
|
||||
return lines.filter(Boolean).join('\n')
|
||||
@@ -414,6 +421,12 @@ function buildSingleVideoPrompt(
|
||||
): string {
|
||||
const lines = [
|
||||
'You are a senior QA engineer reviewing a UI test session recording.',
|
||||
'',
|
||||
'## ANTI-HALLUCINATION RULES (READ FIRST)',
|
||||
'- Describe ONLY what you can directly observe in the video frames',
|
||||
'- NEVER infer or assume what "must have happened" between frames',
|
||||
'- If a step is not visible in the video, say "NOT SHOWN" — do not guess',
|
||||
'- Your job is to be a CAMERA — report facts, not interpretations',
|
||||
''
|
||||
]
|
||||
|
||||
@@ -424,38 +437,40 @@ function buildSingleVideoPrompt(
|
||||
)
|
||||
|
||||
if (prContext) {
|
||||
lines.push(
|
||||
'## Phase 1: Blind Observation (describe what you SEE)',
|
||||
'First, describe every UI interaction chronologically WITHOUT knowing the expected outcome:',
|
||||
'- What elements does the user click/hover/type?',
|
||||
'- What dialogs/menus open and close?',
|
||||
'- What keyboard indicators appear? (look for subtitle overlays)',
|
||||
'- What is the BEFORE state and AFTER state of each action?',
|
||||
'',
|
||||
'## Phase 2: Compare against expected behavior',
|
||||
'Now compare your observations against the context below.',
|
||||
'Only claim a match if your Phase 1 observations EXPLICITLY support it.',
|
||||
''
|
||||
)
|
||||
|
||||
if (isIssueContext) {
|
||||
lines.push(
|
||||
'## Issue Context',
|
||||
'This video attempts to reproduce a reported bug on the main branch.',
|
||||
'Your review MUST evaluate whether the reported bug is visible and reproducible.',
|
||||
'',
|
||||
prContext,
|
||||
'',
|
||||
'## Review Instructions',
|
||||
'1. Does the video demonstrate the reported bug occurring?',
|
||||
'2. Is the bug clearly visible and reproducible from the steps shown?',
|
||||
'3. Are there any other issues visible during the reproduction attempt?',
|
||||
'',
|
||||
'## CRITICAL: Honesty Requirements',
|
||||
'- If the video only shows login, idle canvas, or trivial menu interactions WITHOUT actually performing the reproduction steps, say "INCONCLUSIVE — reproduction steps were not performed".',
|
||||
'- Do NOT claim a bug is "confirmed" unless you can clearly see the bug behavior described in the issue.',
|
||||
'- Do NOT hallucinate findings. If the video does not show meaningful interaction, say so clearly.',
|
||||
'- Rate confidence as "Low" if the video does not actually demonstrate the bug scenario.',
|
||||
'## Comparison Questions',
|
||||
'1. Did the video perform the reproduction steps described in the issue?',
|
||||
'2. Did your Phase 1 observations show the reported bug behavior?',
|
||||
'3. If the steps were not performed or the bug was not visible, say INCONCLUSIVE.',
|
||||
''
|
||||
)
|
||||
} else {
|
||||
lines.push(
|
||||
'## PR Context',
|
||||
'The video is a QA session testing a specific pull request.',
|
||||
'Your review MUST evaluate whether the PR achieves its stated purpose.',
|
||||
'',
|
||||
prContext,
|
||||
'',
|
||||
'## Review Instructions',
|
||||
"1. Does the video demonstrate the PR's intended behavior working correctly?",
|
||||
'2. Are there regressions or side effects caused by the PR changes?',
|
||||
'3. Does the observed behavior match what the PR claims to implement/fix?',
|
||||
'## Comparison Questions',
|
||||
'1. Did the video test the specific behavior the PR changes?',
|
||||
'2. Did your Phase 1 observations show the expected before/after difference?',
|
||||
'3. If the test was incomplete or inconclusive, say so honestly.',
|
||||
''
|
||||
)
|
||||
}
|
||||
@@ -496,7 +511,14 @@ function buildSingleVideoPrompt(
|
||||
'`SEVERITY` `TIMESTAMP` `Confidence: LEVEL`',
|
||||
'Do NOT use a table for issues — use the block format above.',
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk'
|
||||
'## Overall Risk',
|
||||
'',
|
||||
'## Verdict',
|
||||
'End your report with this EXACT JSON block (no markdown fence):',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low"}',
|
||||
'- REPRODUCED: the bug/behavior is clearly visible in the video',
|
||||
'- NOT_REPRODUCIBLE: the steps were performed correctly but the bug was not observed',
|
||||
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient'
|
||||
)
|
||||
|
||||
return lines.filter(Boolean).join('\n')
|
||||
|
||||
Reference in New Issue
Block a user