mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-20 14:30:41 +00:00
feat: use demowright for reproduce videos, combine E2E+video verdicts
- Replace hand-rolled cursor overlay and waitForTimeout injection with demowright's automatic HUD (cursor, keystroke display, 300ms delay) - Install demowright in both qa-before and qa-after CI jobs - Always check video review verdicts (not just as fallback) - Upgrade reproducedBy to 'both' when E2E and video agree - Prevent false REPRODUCED from discovery/debug tests - Ban discovery tests in agent system prompt Amp-Thread-ID: https://ampcode.com/threads/T-019d519b-004f-71ce-b970-96edd971fbe0 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
13
.github/workflows/pr-qa.yaml
vendored
13
.github/workflows/pr-qa.yaml
vendored
@@ -10,6 +10,8 @@
|
||||
name: 'PR: QA'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [sno-qa-*]
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
branches: [main]
|
||||
@@ -90,6 +92,9 @@ jobs:
|
||||
NUM=$(gh pr list --repo "$REPO" \
|
||||
--head "$BRANCH" --state open \
|
||||
--json number --jq '.[0].number // empty')
|
||||
if [ -z "$NUM" ]; then
|
||||
NUM=$(echo "$BRANCH" | sed -n 's/^sno-qa-\([0-9]\+\)$/\1/p')
|
||||
fi
|
||||
fi
|
||||
echo "number=${NUM}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
@@ -198,7 +203,7 @@ jobs:
|
||||
include_build_step: false
|
||||
|
||||
- name: Install QA dependencies
|
||||
run: pnpm add -D @google/generative-ai@^0.24.1 @anthropic-ai/claude-agent-sdk@^0.2.85
|
||||
run: pnpm add -D @google/generative-ai@^0.24.1 @anthropic-ai/claude-agent-sdk@^0.2.85 demowright@^0.1.0
|
||||
|
||||
- name: Cache main branch dist
|
||||
id: cache-main-dist
|
||||
@@ -374,7 +379,7 @@ jobs:
|
||||
include_build_step: true
|
||||
|
||||
- name: Install QA dependencies
|
||||
run: pnpm add -D @google/generative-ai@^0.24.1
|
||||
run: pnpm add -D @google/generative-ai@^0.24.1 demowright@^0.1.0
|
||||
|
||||
- name: Setup ComfyUI server (no launch)
|
||||
uses: ./.github/actions/setup-comfyui-server
|
||||
@@ -473,7 +478,8 @@ jobs:
|
||||
needs: [resolve-matrix, analyze-pr, qa-before, qa-after]
|
||||
if: >-
|
||||
always() &&
|
||||
(needs.qa-after.result == 'success' || needs.qa-before.result == 'success')
|
||||
(needs.qa-after.result == 'success' || needs.qa-before.result == 'success') &&
|
||||
(github.event.pull_request.number || github.event.issue.number || github.event_name == 'push' || github.event_name == 'workflow_dispatch')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -650,6 +656,7 @@ jobs:
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
NUM="${PR_NUM:-$(gh pr list --repo "$REPO" --head "$BRANCH" --state open --json number --jq '.[0].number // empty')}"
|
||||
[ -z "$NUM" ] && NUM=$(echo "$BRANCH" | sed -n 's/^sno-qa-\([0-9]\+\)$/\1/p')
|
||||
echo "number=${NUM}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
if [ -n "$NUM" ]; then
|
||||
|
||||
@@ -91,3 +91,9 @@
|
||||
2. Agent ran out of turn budget (30 turns / 120s)
|
||||
3. Gemini Flash (old agent) ignores prompt hints
|
||||
**Fix**: Use hybrid agent (Claude Sonnet 4.6 + Gemini vision). Claude's superior reasoning follows instructions precisely.
|
||||
|
||||
## False REPRODUCED from Discovery Tests
|
||||
|
||||
**Symptom**: Badge says REPRODUCED but the test was a debug/discovery test (e.g., "Find String Multiline node type") with trivial assertions like `expect(x).toBeDefined()`.
|
||||
**Cause**: The auto-complete logic saved any passing test as proof of reproduction, including discovery tests that pass trivially.
|
||||
**Fix**: Auto-complete now validates test code for bug-specific assertions — discovery tests (with names containing "Inspect", "Find", "Debug") and trivial assertions (`toBeDefined()`, `toBeGreaterThan(0)`) are excluded from auto-save.
|
||||
|
||||
@@ -51,7 +51,8 @@ const config: KnipConfig = {
|
||||
'@primevue/icons',
|
||||
// QA pipeline deps — installed inline in CI workflow
|
||||
'@anthropic-ai/claude-agent-sdk',
|
||||
'@google/generative-ai'
|
||||
'@google/generative-ai',
|
||||
'demowright'
|
||||
],
|
||||
ignore: [
|
||||
// Auto generated API types
|
||||
|
||||
@@ -291,11 +291,22 @@ export async function runResearchPhase(
|
||||
toolResult: resultText.slice(0, 1000)
|
||||
})
|
||||
|
||||
// Auto-save passing test code for fallback completion
|
||||
// Auto-save passing test code for fallback completion — but only if
|
||||
// the test contains a bug-specific assertion (not just a discovery/debug test)
|
||||
if (resultText.startsWith('TEST PASSED')) {
|
||||
try {
|
||||
finalTestCode = readFileSync(browserTestPath, 'utf-8')
|
||||
lastPassedTurn = turnCount
|
||||
const code = readFileSync(browserTestPath, 'utf-8')
|
||||
const hasBugAssertion =
|
||||
/expect\s*\(/.test(code) &&
|
||||
!/^\s*expect\([^)]+\)\.toBeDefined\(\)/m.test(code) &&
|
||||
!/^\s*expect\([^)]+\)\.toBeGreaterThan\(0\)/m.test(code) &&
|
||||
!/Inspect|Find|Debug|discover/i.test(
|
||||
code.match(/test\(['"`]([^'"`]+)/)?.[1] ?? ''
|
||||
)
|
||||
if (hasBugAssertion) {
|
||||
finalTestCode = code
|
||||
lastPassedTurn = turnCount
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
@@ -392,6 +403,7 @@ export async function runResearchPhase(
|
||||
- ALWAYS call done() when finished, even if the test passed — do not keep iterating after a passing test
|
||||
- Use \`expect.poll()\` for async assertions: \`await expect.poll(() => comfyPage.nodeOps.getGraphNodesCount()).toBe(8)\`
|
||||
- CRITICAL: Your assertions must be SPECIFIC TO THE BUG. A test that asserts \`expect(count).toBeGreaterThan(0)\` proves nothing — it would pass even without the bug. Instead assert the exact broken state, e.g. \`expect(clonedWidgets).toHaveLength(0)\` (missing widgets) or \`expect(zIndex).toBeLessThan(parentZIndex)\` (wrong z-order). If a test passes trivially, it's a false positive.
|
||||
- NEVER write "debug", "discovery", or "inspect node types" tests. These waste turns and produce false REPRODUCED verdicts. If you need to discover node type names, use inspect() or readFixture() — not a passing test.
|
||||
- If you cannot write a bug-specific assertion, call done() with verdict NOT_REPRODUCIBLE and explain why.
|
||||
|
||||
## ComfyPage Fixture API Reference
|
||||
|
||||
@@ -253,22 +253,27 @@ if [ -f "$DEPLOY_DIR/research-log.json" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fall back to video review verdicts if no research log
|
||||
if [ -z "$RESEARCH_VERDICT" ] && [ -d video-reviews ]; then
|
||||
# Check video review verdicts (always, not just as fallback)
|
||||
VIDEO_REPRODUCED=false
|
||||
if [ -d video-reviews ]; then
|
||||
for rpt in video-reviews/*-qa-video-report.md; do
|
||||
[ -f "$rpt" ] || continue
|
||||
TOTAL_REPORTS=$((TOTAL_REPORTS + 1))
|
||||
# Try structured JSON verdict first (from ## Verdict section)
|
||||
VERDICT_JSON=$(grep -oP '"verdict":\s*"[A-Z_]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '[A-Z_]+$' || true)
|
||||
RISK_JSON=$(grep -oP '"risk":\s*"[a-z]+' "$rpt" 2>/dev/null | tail -1 | grep -oP '[a-z]+$' || true)
|
||||
|
||||
if [ -n "$VERDICT_JSON" ]; then
|
||||
case "$VERDICT_JSON" in
|
||||
REPRODUCED) REPRO_COUNT=$((REPRO_COUNT + 1)) ;;
|
||||
NOT_REPRODUCIBLE) NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1)) ;;
|
||||
INCONCLUSIVE) INCONC_COUNT=$((INCONC_COUNT + 1)) ;;
|
||||
esac
|
||||
else
|
||||
echo "Video review verdict: $VERDICT_JSON ($(basename "$rpt"))"
|
||||
[ "$VERDICT_JSON" = "REPRODUCED" ] && VIDEO_REPRODUCED=true
|
||||
# Only count video as separate report if no research log
|
||||
if [ -z "$RESEARCH_VERDICT" ]; then
|
||||
TOTAL_REPORTS=$((TOTAL_REPORTS + 1))
|
||||
case "$VERDICT_JSON" in
|
||||
REPRODUCED) REPRO_COUNT=$((REPRO_COUNT + 1)) ;;
|
||||
NOT_REPRODUCIBLE) NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1)) ;;
|
||||
INCONCLUSIVE) INCONC_COUNT=$((INCONC_COUNT + 1)) ;;
|
||||
esac
|
||||
fi
|
||||
elif [ -z "$RESEARCH_VERDICT" ]; then
|
||||
TOTAL_REPORTS=$((TOTAL_REPORTS + 1))
|
||||
# Fallback: grep Summary section (for older reports without ## Verdict)
|
||||
SUMM=$(sed -n '/^## Summary/,/^## /p' "$rpt" 2>/dev/null | head -15)
|
||||
if echo "$SUMM" | grep -iq 'INCONCLUSIVE'; then
|
||||
@@ -277,10 +282,19 @@ if [ -z "$RESEARCH_VERDICT" ] && [ -d video-reviews ]; then
|
||||
NOT_REPRO_COUNT=$((NOT_REPRO_COUNT + 1))
|
||||
elif echo "$SUMM" | grep -iq 'reproduc\|confirm'; then
|
||||
REPRO_COUNT=$((REPRO_COUNT + 1))
|
||||
VIDEO_REPRODUCED=true
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Upgrade reproduction method to "both" when E2E and video agree
|
||||
if [ "$REPRO_METHOD" = "e2e_test" ] && [ "$VIDEO_REPRODUCED" = "true" ]; then
|
||||
REPRO_METHOD="both"
|
||||
echo "Upgraded reproducedBy to 'both' (E2E + video review agree)"
|
||||
elif [ -z "$RESEARCH_VERDICT" ] && [ "$VIDEO_REPRODUCED" = "true" ]; then
|
||||
REPRO_METHOD="video"
|
||||
fi
|
||||
FAIL_COUNT=$((TOTAL_REPORTS - REPRO_COUNT - NOT_REPRO_COUNT))
|
||||
[ "$FAIL_COUNT" -lt 0 ] && FAIL_COUNT=0
|
||||
echo "DEBUG verdict: repro=${REPRO_COUNT} not_repro=${NOT_REPRO_COUNT} inconc=${INCONC_COUNT} fail=${FAIL_COUNT} total=${TOTAL_REPORTS}"
|
||||
|
||||
@@ -1965,52 +1965,28 @@ async function main() {
|
||||
)
|
||||
console.warn(`Evidence: ${research.evidence.slice(0, 200)}`)
|
||||
|
||||
// ═══ Phase 2: Run passing test with video recording ═══
|
||||
// ═══ Phase 2: Run passing test with demowright video recording ═══
|
||||
if (research.verdict === 'REPRODUCED' && research.testCode) {
|
||||
console.warn('Phase 2: Recording test execution with video')
|
||||
console.warn('Phase 2: Recording test execution with demowright')
|
||||
const projectRoot = process.cwd()
|
||||
const browserTestFile = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
|
||||
const testResultsDir = `${opts.outputDir}/test-results`
|
||||
// Inject cursor overlay into the test — add page.addInitScript in beforeEach
|
||||
const cursorScript = `await comfyPage.page.addInitScript(() => {
|
||||
var c=document.createElement('div');c.id='qa-cursor';
|
||||
c.innerHTML='<svg width="20" height="20" viewBox="0 0 24 24" fill="white" stroke="black" stroke-width="1.5"><path d="M4 2l14 10-6.5 1.5L15 21l-3.5-1.5L8 21l-1.5-7.5L2 16z"/></svg>';
|
||||
Object.assign(c.style,{position:'fixed',zIndex:'2147483647',pointerEvents:'none',width:'20px',height:'20px',margin:'-2px 0 0 -2px',opacity:'0.95'});
|
||||
if(document.body)document.body.appendChild(c);
|
||||
else document.addEventListener('DOMContentLoaded',function(){document.body.appendChild(c)});
|
||||
document.addEventListener('mousemove',function(e){c.style.left=e.clientX+'px';c.style.top=e.clientY+'px'});
|
||||
});`
|
||||
// Insert cursor injection after the first line of the test body (after async ({ comfyPage }) => {)
|
||||
let testCode = research.testCode
|
||||
const testBodyMatch = testCode.match(
|
||||
/async\s*\(\{\s*comfyPage\s*\}\)\s*=>\s*\{/
|
||||
)
|
||||
if (testBodyMatch && testBodyMatch.index !== undefined) {
|
||||
const insertPos = testBodyMatch.index + testBodyMatch[0].length
|
||||
testCode =
|
||||
testCode.slice(0, insertPos) +
|
||||
'\n ' +
|
||||
cursorScript +
|
||||
'\n' +
|
||||
testCode.slice(insertPos)
|
||||
}
|
||||
// Inject 800ms pauses between actions for human-readable video
|
||||
// Uses comfyPage.page since test code uses comfyPageFixture
|
||||
testCode = testCode.replace(
|
||||
/(\n\s*)(await\s+(?:comfyPage|topbar|firstNode|page|canvas|expect))/g,
|
||||
'$1await comfyPage.page.waitForTimeout(800);\n$1$2'
|
||||
)
|
||||
writeFileSync(browserTestFile, testCode)
|
||||
// Write the test as-is — demowright handles cursor, keyboard, slowdown
|
||||
writeFileSync(browserTestFile, research.testCode)
|
||||
try {
|
||||
const output = execSync(
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=30000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=60000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
|
||||
{
|
||||
timeout: 90000,
|
||||
timeout: 120000,
|
||||
encoding: 'utf-8',
|
||||
env: {
|
||||
...process.env,
|
||||
COMFYUI_BASE_URL: opts.serverUrl,
|
||||
PLAYWRIGHT_LOCAL: '1' // Enables video=on + trace=on in playwright.config.ts
|
||||
PLAYWRIGHT_LOCAL: '1',
|
||||
NODE_OPTIONS: '--require demowright/register',
|
||||
QA_HUD_DELAY: '300',
|
||||
QA_HUD_CURSOR_STYLE: 'default',
|
||||
QA_HUD_KEY_FADE: '2000'
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user