feat: enable demowright TTS audio capture in QA videos

- Set QA_HUD_AUDIO=1 to enable demowright audio pipeline - Install ffmpeg in qa-before/after jobs for audio muxing - After Phase 2, check .demowright/ for rendered MP4 with audio - Add narration detection to Gemini video review prompt - Increase Phase 2 timeout for TTS fetch latency Amp-Thread-ID: https://ampcode.com/threads/T-019d7181-cd11-7255-9f13-fecc658d2751 Co-authored-by: Amp <amp@ampcode.com>
2026-04-20 14:30:41 +00:00 · 2026-04-09 09:46:57 +00:00
parent 10d6e93197
commit c1f7e03c1c
3 changed files with 90 additions and 17 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -237,11 +237,15 @@ jobs:
        with:
          launch_server: 'false'

-      - name: Install Playwright browser
+      - name: Install Playwright browser and ffmpeg
        shell: bash
        run: |
          npx playwright install chromium
          mkdir -p "$QA_ARTIFACTS"
+          if ! command -v ffmpeg &>/dev/null; then
+            sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
+          fi
+          ffmpeg -version | head -1

      - name: Get PR diff
        if: needs.resolve-matrix.outputs.target_type == 'pr'
@@ -397,11 +401,15 @@ jobs:
        with:
          launch_server: 'false'

-      - name: Install Playwright browser
+      - name: Install Playwright browser and ffmpeg
        shell: bash
        run: |
          npx playwright install chromium
          mkdir -p "$QA_ARTIFACTS"
+          if ! command -v ffmpeg &>/dev/null; then
+            sudo apt-get update -qq && sudo apt-get install -y -qq ffmpeg
+          fi
+          ffmpeg -version | head -1

      - name: Get PR diff
        shell: bash
@@ -724,11 +732,11 @@ jobs:
            HAS_FILES=false

            # Check for before files (flat or in subdirectory)
-            if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
+            if [ -d "qa-artifacts/before" ] && find qa-artifacts/before -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
              HAS_FILES=true
            fi
            # Check for after files
-            if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.png' 2>/dev/null | grep -q .; then
+            if [ -d "qa-artifacts/after" ] && find qa-artifacts/after -name '*.webm' -o -name '*.mp4' -o -name '*.png' 2>/dev/null | grep -q .; then
              HAS_FILES=true
            fi

@@ -774,6 +782,11 @@ jobs:
            [ -d "$dir" ] || continue
            # Convert known video names (single + multi-pass + before)
            for name in qa-session qa-session-1 qa-session-2 qa-session-3 qa-before-session; do
+              # Skip if demowright already produced an MP4 with audio
+              if [ -f "$dir/${name}.mp4" ] && [ -s "$dir/${name}.mp4" ]; then
+                echo "Using existing MP4 (with audio): $dir/${name}.mp4 ($(du -h "$dir/${name}.mp4" | cut -f1))"
+                continue
+              fi
              if [ -f "$dir/${name}.webm" ] && [ -s "$dir/${name}.webm" ]; then
                convert_video "$dir/${name}.webm" "$dir/${name}.mp4"
              fi
--- a/scripts/qa-record.ts
+++ b/scripts/qa-record.ts
@@ -1972,9 +1972,30 @@ async function main() {
          const videoTestFile = `${projectRoot}/browser_tests/tests/qa-reproduce.spec.ts`
          const testResultsDir = `${opts.outputDir}/test-results`

-          // Write the E2E test as-is — demowright register patches Browser.newContext
-          // to inject cursor overlay, keystroke badges, and action delays
-          writeFileSync(videoTestFile, research.testCode)
+          // Inject demowright narrate() call for TTS voice narration
+          const issueTitle =
+            issueCtx.match(/Title:\s*(.+)/)?.[1]?.trim() ??
+            'Bug Reproduction'
+          let testCode = research.testCode
+          const bodyMatch = testCode.match(
+            /async\s*\(\{\s*comfyPage\s*\}\)\s*=>\s*\{/
+          )
+          if (bodyMatch?.index !== undefined) {
+            const pos = bodyMatch.index + bodyMatch[0].length
+            const narrationInject = `
+    // demowright: narrate issue title (TTS + subtitle)
+    try {
+      const { narrate: _narrate } = await import('demowright/helpers')
+      await _narrate(comfyPage.page, ${JSON.stringify('Reproducing: ' + issueTitle)})
+      console.log('[qa] narrate() completed successfully')
+    } catch (e) {
+      console.log('[qa] narrate() failed:', e instanceof Error ? e.message : e)
+    }
+`
+            testCode =
+              testCode.slice(0, pos) + narrationInject + testCode.slice(pos)
+          }
+          writeFileSync(videoTestFile, testCode)

          // Also save original test for the report
          writeFileSync(
@@ -1982,11 +2003,12 @@ async function main() {
            research.testCode
          )

+          const demowrightDir = `${projectRoot}/.demowright`
          try {
            const output = execSync(
-              `cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=60000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
+              `cd "${projectRoot}" && npx playwright test browser_tests/tests/qa-reproduce.spec.ts --reporter=list --timeout=120000 --retries=0 --workers=1 --output="${testResultsDir}" 2>&1`,
              {
-                timeout: 120000,
+                timeout: 180000,
                encoding: 'utf-8',
                env: {
                  ...process.env,
@@ -1995,18 +2017,50 @@ async function main() {
                  NODE_OPTIONS: '--require demowright/register',
                  QA_HUD_DELAY: '300',
                  QA_HUD_CURSOR_STYLE: 'default',
-                  QA_HUD_KEY_FADE: '2000'
+                  QA_HUD_KEY_FADE: '2000',
+                  QA_HUD_AUDIO: '1',
+                  QA_HUD_OUTPUT_DIR: '.demowright'
                }
              }
            )
-            console.warn(`Phase 2: Demo video recorded\n${output.slice(-300)}`)
+            console.warn(`Phase 2: Demo video recorded\n${output.slice(-500)}`)
          } catch (e) {
            const err = e as { stdout?: string }
            console.warn(
-              `Phase 2: Demo recording failed\n${(err.stdout || '').slice(-300)}`
+              `Phase 2: Demo recording failed\n${(err.stdout || '').slice(-500)}`
            )
          }
-          // Copy recorded video to outputDir so deploy script finds it
+
+          // Check for demowright-rendered MP4 (has TTS audio muxed in)
+          let demowrightMp4 = ''
+          try {
+            const mp4s = execSync(
+              `find "${demowrightDir}" -name '*.mp4' -type f 2>/dev/null`,
+              { encoding: 'utf-8' }
+            )
+              .trim()
+              .split('\n')
+              .filter(Boolean)
+            if (mp4s.length > 0) {
+              demowrightMp4 = mp4s[0]
+              console.warn(
+                `Phase 2: demowright MP4 with audio → ${demowrightMp4}`
+              )
+            }
+          } catch {
+            /* no demowright output */
+          }
+
+          if (demowrightMp4) {
+            execSync(
+              `cp "${demowrightMp4}" "${opts.outputDir}/qa-session.mp4"`
+            )
+            console.warn(
+              `Phase 2: Narrated video → ${opts.outputDir}/qa-session.mp4`
+            )
+          }
+
+          // Also copy raw webm as fallback
          try {
            const videos = execSync(
              `find "${testResultsDir}" -name '*.webm' -type f 2>/dev/null`,
@@ -2017,7 +2071,7 @@ async function main() {
              .filter(Boolean)
            if (videos.length > 0) {
              execSync(`cp "${videos[0]}" "${opts.outputDir}/qa-session.webm"`)
-              console.warn(`Phase 2: Video → ${opts.outputDir}/qa-session.webm`)
+              console.warn(`Phase 2: Raw video → ${opts.outputDir}/qa-session.webm`)
            }
          } catch {
            console.warn('Phase 2: No test video found')
--- a/scripts/qa-video-review.ts
+++ b/scripts/qa-video-review.ts
@@ -438,12 +438,13 @@ function buildSingleVideoPrompt(

  if (prContext) {
    lines.push(
-      '## Phase 1: Blind Observation (describe what you SEE)',
+      '## Phase 1: Blind Observation (describe what you SEE and HEAR)',
      'First, describe every UI interaction chronologically WITHOUT knowing the expected outcome:',
      '- What elements does the user click/hover/type?',
      '- What dialogs/menus open and close?',
      '- What keyboard indicators appear? (look for subtitle overlays)',
      '- What is the BEFORE state and AFTER state of each action?',
+      '- **Audio**: Does the video have a TTS narration audio track? If yes, transcribe what the voice says. This narration describes the bug being reproduced.',
      '',
      '## Phase 2: Compare against expected behavior',
      'Now compare your observations against the context below.',
@@ -513,12 +514,17 @@ function buildSingleVideoPrompt(
    '## Possible Issues (Needs Human Verification)',
    '## Overall Risk',
    '',
+    '## Narration',
+    'If the video contains a TTS audio narration track, transcribe it here.',
+    'If there is no audio or the video is silent, write "No narration detected."',
+    '',
    '## Verdict',
    'End your report with this EXACT JSON block (no markdown fence):',
-    '{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low"}',
+    '{"verdict": "REPRODUCED" | "NOT_REPRODUCIBLE" | "INCONCLUSIVE", "risk": "low" | "medium" | "high" | null, "confidence": "high" | "medium" | "low", "narrationDetected": true | false}',
    '- REPRODUCED: the bug/behavior is clearly visible in the video',
    '- NOT_REPRODUCIBLE: the steps were performed correctly but the bug was not observed',
-    '- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient'
+    '- INCONCLUSIVE: the reproduction steps were not performed or the video is insufficient',
+    '- narrationDetected: true if you heard TTS voice narration in the video, false if silent'
  )

  return lines.filter(Boolean).join('\n')