feat: enable demowright TTS audio capture in QA videos

- Set QA_HUD_AUDIO=1 to enable demowright audio pipeline
- Install ffmpeg in qa-before/after jobs for audio muxing
- After Phase 2, check .demowright/ for rendered MP4 with audio
- Add narration detection to Gemini video review prompt
- Increase Phase 2 timeout for TTS fetch latency

Amp-Thread-ID: https://ampcode.com/threads/T-019d7181-cd11-7255-9f13-fecc658d2751
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
sno
2026-04-09 09:46:57 +00:00
committed by snomiao
parent 10d6e93197
commit c1f7e03c1c
3 changed files with 90 additions and 17 deletions

View File

@@ -237,11 +237,15 @@ jobs:
with:
launch_server: 'false'
- name: Install Playwright browser
- name: Install Playwright browser and ffmpeg
shell: bash
run: |
npx playwright install chromium
mkdir -p "$QA_ARTIFACTS"
if ! command -v ffmpeg &>/dev/null; then
sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
fi
ffmpeg -version | head -1
- name: Get PR diff
if: needs.resolve-matrix.outputs.target_type == 'pr'
@@ -397,11 +401,15 @@ jobs:
with:
launch_server: 'false'
- name: Install Playwright browser
- name: Install Playwright browser and ffmpeg
shell: bash
run: |
npx playwright install chromium
mkdir -p "$QA_ARTIFACTS"
if ! command -v ffmpeg &>/dev/null; then
sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
fi
ffmpeg -version | head -1
- name: Get PR diff
shell: bash
@@ -724,11 +732,11 @@ jobs:
HAS_FILES=false
# Check for before files (flat or in subdirectory)
if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
HAS_FILES=true
fi
# Check for after files
if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
HAS_FILES=true
fi
@@ -774,6 +782,11 @@ jobs:
[ -d "$dir" ] || continue
# Convert known video names (single + multi-pass + before)
for name in qa-session qa-session-1 qa-session-2 qa-session-3 qa-before-session; do
# Skip if demowright already produced an MP4 with audio
if [ -f "$dir/${name}.mp4" ] && [ -s "$dir/${name}.mp4" ]; then
echo "Using existing MP4 (with audio): $dir/${name}.mp4 ($(du -h "$dir/${name}.mp4" | cut -f1))"
continue
fi
if [ -f "$dir/${name}.webm" ] && [ -s "$dir/${name}.webm" ]; then
convert_video "$dir/${name}.webm" "$dir/${name}.mp4"
fi

View File

@@ -1972,9 +1972,30 @@ async function main() {
const videoTestFile = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
const testResultsDir = `${opts.outputDir}/test-results`
// Write the E2E test as-is — demowright register patches Browser.newContext
// to inject cursor overlay, keystroke badges, and action delays
writeFileSync(videoTestFile, research.testCode)
// Inject demowright narrate() call for TTS voice narration
const issueTitle =
issueCtx.match(/Title:\s*(.+)/)?.[1]?.trim() ??
'Bug Reproduction'
let testCode = research.testCode
const bodyMatch = testCode.match(
/async\s*\(\{\s*comfyPage\s*\}\)\s*=>\s*\{/
)
if (bodyMatch?.index !== undefined) {
const pos = bodyMatch.index + bodyMatch[0].length
const narrationInject = `
// demowright: narrate issue title (TTS + subtitle)
try {
const { narrate: _narrate } = await import('demowright/helpers')
await _narrate(comfyPage.page, ${JSON.stringify('Reproducing: ' + issueTitle)})
console.log('[qa] narrate() completed successfully')
} catch (e) {
console.log('[qa] narrate() failed:', e instanceof Error ? e.message : e)
}
`
testCode =
testCode.slice(0, pos) + narrationInject + testCode.slice(pos)
}
writeFileSync(videoTestFile, testCode)
// Also save original test for the report
writeFileSync(
@@ -1982,11 +2003,12 @@ async function main() {
research.testCode
)
const demowrightDir = `${projectRoot}/.demowright`
try {
const output = execSync(
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=60000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
`cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=120000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
{
timeout: 120000,
timeout: 180000,
encoding: 'utf-8',
env: {
...process.env,
@@ -1995,18 +2017,50 @@ async function main() {
NODE_OPTIONS: '--require demowright/register',
QA_HUD_DELAY: '300',
QA_HUD_CURSOR_STYLE: 'default',
QA_HUD_KEY_FADE: '2000'
QA_HUD_KEY_FADE: '2000',
QA_HUD_AUDIO: '1',
QA_HUD_OUTPUT_DIR: '.demowright'
}
}
)
console.warn(`Phase 2: Demo video recorded\n${output.slice(-300)}`)
console.warn(`Phase 2: Demo video recorded\n${output.slice(-500)}`)
} catch (e) {
const err = e as { stdout?: string }
console.warn(
`Phase 2: Demo recording failed\n${(err.stdout || '').slice(-300)}`
`Phase 2: Demo recording failed\n${(err.stdout || '').slice(-500)}`
)
}
// Copy recorded video to outputDir so deploy script finds it
// Check for demowright-rendered MP4 (has TTS audio muxed in)
let demowrightMp4 = ''
try {
const mp4s = execSync(
`find "${demowrightDir}" -name '*.mp4' -type f 2>/dev/null`,
{ encoding: 'utf-8' }
)
.trim()
.split('\n')
.filter(Boolean)
if (mp4s.length > 0) {
demowrightMp4 = mp4s[0]
console.warn(
`Phase 2: demowright MP4 with audio → ${demowrightMp4}`
)
}
} catch {
/* no demowright output */
}
if (demowrightMp4) {
execSync(
`cp "${demowrightMp4}" "${opts.outputDir}/qa-session.mp4"`
)
console.warn(
`Phase 2: Narrated video → ${opts.outputDir}/qa-session.mp4`
)
}
// Also copy raw webm as fallback
try {
const videos = execSync(
`find "${testResultsDir}" -name '*.webm' -type f 2>/dev/null`,
@@ -2017,7 +2071,7 @@ async function main() {
.filter(Boolean)
if (videos.length > 0) {
execSync(`cp "${videos[0]}" "${opts.outputDir}/qa-session.webm"`)
console.warn(`Phase 2: Video → ${opts.outputDir}/qa-session.webm`)
console.warn(`Phase 2: Raw video → ${opts.outputDir}/qa-session.webm`)
}
} catch {
console.warn('Phase 2: No test video found')

View File

@@ -438,12 +438,13 @@ function buildSingleVideoPrompt(
if (prContext) {
lines.push(
'## Phase 1: Blind Observation (describe what you SEE)',
'## Phase 1: Blind Observation (describe what you SEE and HEAR)',
'First, describe every UI interaction chronologically WITHOUT knowing the expected outcome:',
'- What elements does the user click/hover/type?',
'- What dialogs/menus open and close?',
'- What keyboard indicators appear? (look for subtitle overlays)',
'- What is the BEFORE state and AFTER state of each action?',
'- **Audio**: Does the video have a TTS narration audio track? If yes, transcribe what the voice says. This narration describes the bug being reproduced.',
'',
'## Phase 2: Compare against expected behavior',
'Now compare your observations against the context below.',
@@ -513,12 +514,17 @@ function buildSingleVideoPrompt(
'## Possible Issues (Needs Human Verification)',
'## Overall Risk',
'',
'## Narration',
'If the video contains a TTS audio narration track, transcribe it here.',
'If there is no audio or the video is silent, write "No narration detected."',
'',
'## Verdict',
'End your report with this EXACT JSON block (no markdown fence):',
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low"}',
'{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low", "narrationDetected": true | false}',
'- REPRODUCED: the bug/behavior is clearly visible in the video',
'- NOT_REPRODUCIBLE: the steps were performed correctly but the bug was not observed',
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient'
'- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient',
'- narrationDetected: true if you heard TTS voice narration in the video, false if silent'
)
return lines.filter(Boolean).join('\n')