mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-20 14:30:41 +00:00
feat: enable demowright TTS audio capture in QA videos
- Set QA_HUD_AUDIO=1 to enable demowright audio pipeline - Install ffmpeg in qa-before/after jobs for audio muxing - After Phase 2, check .demowright/ for rendered MP4 with audio - Add narration detection to Gemini video review prompt - Increase Phase 2 timeout for TTS fetch latency Amp-Thread-ID: https://ampcode.com/threads/T-019d7181-cd11-7255-9f13-fecc658d2751 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
21
.github/workflows/pr-qa.yaml
vendored
21
.github/workflows/pr-qa.yaml
vendored
@@ -237,11 +237,15 @@ jobs:
|
||||
with:
|
||||
launch_server: 'false'
|
||||
|
||||
- name: Install Playwright browser
|
||||
- name: Install Playwright browser and ffmpeg
|
||||
shell: bash
|
||||
run: |
|
||||
npx playwright install chromium
|
||||
mkdir -p "$QA_ARTIFACTS"
|
||||
if ! command -v ffmpeg &>/dev/null; then
|
||||
sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
|
||||
fi
|
||||
ffmpeg -version | head -1
|
||||
|
||||
- name: Get PR diff
|
||||
if: needs.resolve-matrix.outputs.target_type == 'pr'
|
||||
@@ -397,11 +401,15 @@ jobs:
|
||||
with:
|
||||
launch_server: 'false'
|
||||
|
||||
- name: Install Playwright browser
|
||||
- name: Install Playwright browser and ffmpeg
|
||||
shell: bash
|
||||
run: |
|
||||
npx playwright install chromium
|
||||
mkdir -p "$QA_ARTIFACTS"
|
||||
if ! command -v ffmpeg &>/dev/null; then
|
||||
sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
|
||||
fi
|
||||
ffmpeg -version | head -1
|
||||
|
||||
- name: Get PR diff
|
||||
shell: bash
|
||||
@@ -724,11 +732,11 @@ jobs:
|
||||
HAS_FILES=false
|
||||
|
||||
# Check for before files (flat or in subdirectory)
|
||||
if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
|
||||
if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
|
||||
HAS_FILES=true
|
||||
fi
|
||||
# Check for after files
|
||||
if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
|
||||
if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
|
||||
HAS_FILES=true
|
||||
fi
|
||||
|
||||
@@ -774,6 +782,11 @@ jobs:
|
||||
[ -d "$dir" ] || continue
|
||||
# Convert known video names (single + multi-pass + before)
|
||||
for name in qa-session qa-session-1 qa-session-2 qa-session-3 qa-before-session; do
|
||||
# Skip if demowright already produced an MP4 with audio
|
||||
if [ -f "$dir/${name}.mp4" ] && [ -s "$dir/${name}.mp4" ]; then
|
||||
echo "Using existing MP4 (with audio): $dir/${name}.mp4 ($(du -h "$dir/${name}.mp4" | cut -f1))"
|
||||
continue
|
||||
fi
|
||||
if [ -f "$dir/${name}.webm" ] && [ -s "$dir/${name}.webm" ]; then
|
||||
convert_video "$dir/${name}.webm" "$dir/${name}.mp4"
|
||||
fi
|
||||
|
||||
@@ -1972,9 +1972,30 @@ async function main() {
|
||||
const videoTestFile = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
|
||||
const testResultsDir = `${opts.outputDir}/test-results`
|
||||
|
||||
// Write the E2E test as-is — demowright register patches Browser.newContext
|
||||
// to inject cursor overlay, keystroke badges, and action delays
|
||||
writeFileSync(videoTestFile, research.testCode)
|
||||
// Inject demowright narrate() call for TTS voice narration
|
||||
const issueTitle =
|
||||
issueCtx.match(/Title:\s*(.+)/)?.[1]?.trim() ??
|
||||
'Bug Reproduction'
|
||||
let testCode = research.testCode
|
||||
const bodyMatch = testCode.match(
|
||||
/async\s*\(\{\s*comfyPage\s*\}\)\s*=>\s*\{/
|
||||
)
|
||||
if (bodyMatch?.index !== undefined) {
|
||||
const pos = bodyMatch.index + bodyMatch[0].length
|
||||
const narrationInject = `
|
||||
// demowright: narrate issue title (TTS + subtitle)
|
||||
try {
|
||||
const { narrate: _narrate } = await import('demowright/helpers')
|
||||
await _narrate(comfyPage.page, ${JSON.stringify('Reproducing: ' + issueTitle)})
|
||||
console.log('[qa] narrate() completed successfully')
|
||||
} catch (e) {
|
||||
console.log('[qa] narrate() failed:', e instanceof Error ? e.message : e)
|
||||
}
|
||||
`
|
||||
testCode =
|
||||
testCode.slice(0, pos) + narrationInject + testCode.slice(pos)
|
||||
}
|
||||
writeFileSync(videoTestFile, testCode)
|
||||
|
||||
// Also save original test for the report
|
||||
writeFileSync(
|
||||
@@ -1982,11 +2003,12 @@ async function main() {
|
||||
research.testCode
|
||||
)
|
||||
|
||||
const demowrightDir = `${projectRoot}/.demowright`
|
||||
try {
|
||||
const output = execSync(
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=60000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
|
||||
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=120000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
|
||||
{
|
||||
timeout: 120000,
|
||||
timeout: 180000,
|
||||
encoding: 'utf-8',
|
||||
env: {
|
||||
...process.env,
|
||||
@@ -1995,18 +2017,50 @@ async function main() {
|
||||
NODE_OPTIONS: '--require demowright/register',
|
||||
QA_HUD_DELAY: '300',
|
||||
QA_HUD_CURSOR_STYLE: 'default',
|
||||
QA_HUD_KEY_FADE: '2000'
|
||||
QA_HUD_KEY_FADE: '2000',
|
||||
QA_HUD_AUDIO: '1',
|
||||
QA_HUD_OUTPUT_DIR: '.demowright'
|
||||
}
|
||||
}
|
||||
)
|
||||
console.warn(`Phase 2: Demo video recorded\n${output.slice(-300)}`)
|
||||
console.warn(`Phase 2: Demo video recorded\n${output.slice(-500)}`)
|
||||
} catch (e) {
|
||||
const err = e as { stdout?: string }
|
||||
console.warn(
|
||||
`Phase 2: Demo recording failed\n${(err.stdout || '').slice(-300)}`
|
||||
`Phase 2: Demo recording failed\n${(err.stdout || '').slice(-500)}`
|
||||
)
|
||||
}
|
||||
// Copy recorded video to outputDir so deploy script finds it
|
||||
|
||||
// Check for demowright-rendered MP4 (has TTS audio muxed in)
|
||||
let demowrightMp4 = ''
|
||||
try {
|
||||
const mp4s = execSync(
|
||||
`find "${demowrightDir}" -name '*.mp4' -type f 2>/dev/null`,
|
||||
{ encoding: 'utf-8' }
|
||||
)
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
if (mp4s.length > 0) {
|
||||
demowrightMp4 = mp4s[0]
|
||||
console.warn(
|
||||
`Phase 2: demowright MP4 with audio → ${demowrightMp4}`
|
||||
)
|
||||
}
|
||||
} catch {
|
||||
/* no demowright output */
|
||||
}
|
||||
|
||||
if (demowrightMp4) {
|
||||
execSync(
|
||||
`cp "${demowrightMp4}" "${opts.outputDir}/qa-session.mp4"`
|
||||
)
|
||||
console.warn(
|
||||
`Phase 2: Narrated video → ${opts.outputDir}/qa-session.mp4`
|
||||
)
|
||||
}
|
||||
|
||||
// Also copy raw webm as fallback
|
||||
try {
|
||||
const videos = execSync(
|
||||
`find "${testResultsDir}" -name '*.webm' -type f 2>/dev/null`,
|
||||
@@ -2017,7 +2071,7 @@ async function main() {
|
||||
.filter(Boolean)
|
||||
if (videos.length > 0) {
|
||||
execSync(`cp "${videos[0]}" "${opts.outputDir}/qa-session.webm"`)
|
||||
console.warn(`Phase 2: Video → ${opts.outputDir}/qa-session.webm`)
|
||||
console.warn(`Phase 2: Raw video → ${opts.outputDir}/qa-session.webm`)
|
||||
}
|
||||
} catch {
|
||||
console.warn('Phase 2: No test video found')
|
||||
|
||||
@@ -438,12 +438,13 @@ function buildSingleVideoPrompt(
|
||||
|
||||
if (prContext) {
|
||||
lines.push(
|
||||
'## Phase 1: Blind Observation (describe what you SEE)',
|
||||
'## Phase 1: Blind Observation (describe what you SEE and HEAR)',
|
||||
'First, describe every UI interaction chronologically WITHOUT knowing the expected outcome:',
|
||||
'- What elements does the user click/hover/type?',
|
||||
'- What dialogs/menus open and close?',
|
||||
'- What keyboard indicators appear? (look for subtitle overlays)',
|
||||
'- What is the BEFORE state and AFTER state of each action?',
|
||||
'- **Audio**: Does the video have a TTS narration audio track? If yes, transcribe what the voice says. This narration describes the bug being reproduced.',
|
||||
'',
|
||||
'## Phase 2: Compare against expected behavior',
|
||||
'Now compare your observations against the context below.',
|
||||
@@ -513,12 +514,17 @@ function buildSingleVideoPrompt(
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk',
|
||||
'',
|
||||
'## Narration',
|
||||
'If the video contains a TTS audio narration track, transcribe it here.',
|
||||
'If there is no audio or the video is silent, write "No narration detected."',
|
||||
'',
|
||||
'## Verdict',
|
||||
'End your report with this EXACT JSON block (no markdown fence):',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low"}',
|
||||
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low", "narrationDetected": true | false}',
|
||||
'- REPRODUCED: the bug/behavior is clearly visible in the video',
|
||||
'- NOT_REPRODUCIBLE: the steps were performed correctly but the bug was not observed',
|
||||
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient'
|
||||
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient',
|
||||
'- narrationDetected: true if you heard TTS voice narration in the video, false if silent'
|
||||
)
|
||||
|
||||
return lines.filter(Boolean).join('\n')
|
||||
|
||||
Reference in New Issue
Block a user