feat: agentic screenshot feedback loop + multi-pass recording

Replace single-shot step generation in reproduce mode with an agentic loop where Gemini sees the screen after each action and decides what to do next. For multi-bug issues, decompose into sub-issues and run separate recording passes. - Extract executeAction() from executeSteps() for reuse - Add reload and done action types - Add captureScreenshotForGemini() (JPEG q50, ~50KB) - Add runAgenticLoop() with sliding window history (3 screenshots) - Add decomposeIssue() for multi-pass recording (1-3 sub-issues) - Update workflow to handle numbered session videos (qa-session-1, etc.)
2026-04-20 14:30:41 +00:00 · 2026-03-24 12:49:13 +00:00
parent 7170918fd7
commit aa0c020e89
2 changed files with 653 additions and 212 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -585,14 +585,14 @@ jobs:

          for dir in qa-artifacts/qa-report-*; do
            [ -d "$dir" ] || continue
-            # Convert after video (qa-session.webm)
-            for name in qa-session qa-before-session; do
+            # Convert known video names (single + multi-pass + before)
+            for name in qa-session qa-session-1 qa-session-2 qa-session-3 qa-before-session; do
              if [ -f "$dir/${name}.webm" ] && [ -s "$dir/${name}.webm" ]; then
                convert_video "$dir/${name}.webm" "$dir/${name}.mp4"
              fi
            done
            # Fallback: find any non-empty webm not yet converted
-            if [ ! -f "$dir/qa-session.mp4" ]; then
+            if [ ! -f "$dir/qa-session.mp4" ] && [ ! -f "$dir/qa-session-1.mp4" ]; then
              WEBM=$(find "$dir" -name '*.webm' -type f -size +0c | head -1)
              [ -n "$WEBM" ] && convert_video "$WEBM" "$dir/qa-session.mp4"
            fi
@@ -660,7 +660,7 @@ jobs:
              TARGET_URL_FLAG="--target-url https://github.com/${REPO}/pull/${TARGET_NUM}"
            fi
          fi
-          for vid in qa-artifacts/qa-report-*/qa-session.mp4; do
+          for vid in qa-artifacts/qa-report-*/qa-session*.mp4; do
            [ -f "$vid" ] || continue
            DIR=$(dirname "$vid")
            BEFORE_FLAG=""
@@ -753,9 +753,19 @@ jobs:
                echo "Found ${prefix} ${os} video ($(du -h "$VID" | cut -f1))"
              fi
            done
-            # Generate GIF thumbnail from after video
-            if [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
-              ffmpeg -y -ss 10 -i "$DEPLOY_DIR/qa-${os}.mp4" -t 8 \
+            # Copy multi-pass session videos (qa-session-1, qa-session-2, etc.)
+            for numbered in "$DIR"/qa-session-[0-9].mp4; do
+              [ -f "$numbered" ] || continue
+              NUM=$(basename "$numbered" | sed 's/qa-session-\([0-9]\).mp4/\1/')
+              DEST="$DEPLOY_DIR/qa-${os}-pass${NUM}.mp4"
+              cp "$numbered" "$DEST"
+              echo "Found pass ${NUM} ${os} video ($(du -h "$numbered" | cut -f1))"
+            done
+            # Generate GIF thumbnail from after video (or first pass)
+            THUMB_SRC="$DEPLOY_DIR/qa-${os}.mp4"
+            [ ! -f "$THUMB_SRC" ] && THUMB_SRC="$DEPLOY_DIR/qa-${os}-pass1.mp4"
+            if [ -f "$THUMB_SRC" ]; then
+              ffmpeg -y -ss 10 -i "$THUMB_SRC" -t 8 \
                -vf "fps=8,scale=480:-1:flags=lanczos,split[s0][s1];[s0]palettegen=max_colors=64[p];[s1][p]paletteuse=dither=bayer" \
                -loop 0 "$DEPLOY_DIR/qa-${os}-thumb.gif" 2>/dev/null \
              || echo "GIF generation failed for ${os} (non-fatal)"
@@ -772,7 +782,7 @@ jobs:
            eval "ICON=\$ICONS_${os}"
            OS_LOWER=$(echo "$os" | tr '[:upper:]' '[:lower:]')
            HAS_BEFORE=$([ -f "$DEPLOY_DIR/qa-before-${os}.mp4" ] && echo 1 || echo 0)
-            HAS_AFTER=$([ -f "$DEPLOY_DIR/qa-${os}.mp4" ] && echo 1 || echo 0)
+            HAS_AFTER=$( ([ -f "$DEPLOY_DIR/qa-${os}.mp4" ] || [ -f "$DEPLOY_DIR/qa-${os}-pass1.mp4" ]) && echo 1 || echo 0)
            [ "$HAS_AFTER" = "0" ] && continue

            REPORT_FILE="video-reviews/${OS_LOWER}-qa-video-report.md"
@@ -789,9 +799,18 @@ jobs:
            if [ "$HAS_BEFORE" = "1" ]; then
              # Side-by-side before/after layout
              CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=card-header><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links>${REPORT_LINK}</span></div><div class=comparison><div class=comp-panel><div class=comp-label>Before <span class=comp-tag>main</span></div><div class=video-wrap><video controls muted preload=metadata><source src=qa-before-${os}.mp4 type=video/mp4></video></div><div class=comp-dl><a class=dl href=qa-before-${os}.mp4 download>${DL_ICON}Before</a></div></div><div class=comp-panel><div class=comp-label>After <span class=comp-tag>PR</span></div><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}.mp4 type=video/mp4></video></div><div class=comp-dl><a class=dl href=qa-${os}.mp4 download>${DL_ICON}After</a></div></div></div>${REPORT_HTML}</div>"
-            else
+            elif [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
              # Single video (full QA mode or no before available)
              CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}.mp4 type=video/mp4></video></div><div class=card-body><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links><a class=dl href=qa-${os}.mp4 download>${DL_ICON}Download</a>${REPORT_LINK}</span></div>${REPORT_HTML}</div>"
+            else
+              # Multi-pass videos (reproduce mode with sub-issues)
+              PASS_VIDEOS=""
+              for pass_vid in "$DEPLOY_DIR"/qa-${os}-pass[0-9].mp4; do
+                [ -f "$pass_vid" ] || continue
+                PASS_NUM=$(basename "$pass_vid" | sed "s/qa-${os}-pass\([0-9]\).mp4/\1/")
+                PASS_VIDEOS="${PASS_VIDEOS}<div class=comp-panel><div class=comp-label>Pass ${PASS_NUM}</div><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}-pass${PASS_NUM}.mp4 type=video/mp4></video></div><div class=comp-dl><a class=dl href=qa-${os}-pass${PASS_NUM}.mp4 download>${DL_ICON}Pass ${PASS_NUM}</a></div></div>"
+              done
+              CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=card-header><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links>${REPORT_LINK}</span></div><div class=comparison>${PASS_VIDEOS}</div>${REPORT_HTML}</div>"
            fi
            CARD_COUNT=$((CARD_COUNT + 1))
          done
--- a/scripts/qa-record.ts
+++ b/scripts/qa-record.ts
@@ -46,6 +46,19 @@ type TestAction =
  | { action: 'screenshot'; name: string }
  | { action: 'loadWorkflow'; url: string }
  | { action: 'setSetting'; id: string; value: string | number | boolean }
+  | { action: 'reload' }
+  | { action: 'done'; reason: string }
+
+interface ActionResult {
+  action: TestAction
+  success: boolean
+  error?: string
+}
+
+interface SubIssue {
+  title: string
+  focus: string
+}

 type RecordMode = 'before' | 'after' | 'reproduce'

@@ -508,7 +521,174 @@ async function clickByText(page: Page, text: string) {
  }
 }

-// ── Step executor ──
+// ── Action executor ──
+
+async function waitForEditorReady(page: Page) {
+  try {
+    await page
+      .locator('.comfy-menu-button-wrapper')
+      .waitFor({ state: 'visible', timeout: 15000 })
+  } catch {
+    console.warn('Editor not ready after reload (menu button not visible)')
+  }
+  await sleep(1000)
+}
+
+async function executeAction(
+  page: Page,
+  step: TestAction,
+  outputDir: string
+): Promise<ActionResult> {
+  console.warn(
+    `  → ${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
+  )
+  try {
+    switch (step.action) {
+      case 'openMenu':
+        await openComfyMenu(page)
+        break
+      case 'hoverMenuItem':
+        await hoverMenuItem(page, step.label)
+        break
+      case 'clickMenuItem':
+        await clickSubmenuItem(page, step.label)
+        break
+      case 'fillDialog':
+        await fillDialogAndConfirm(page, step.text)
+        break
+      case 'pressKey':
+        try {
+          await page.keyboard.press(step.key)
+          await sleep(300)
+        } catch (e) {
+          console.warn(
+            `Skipping invalid key "${step.key}": ${e instanceof Error ? e.message : e}`
+          )
+        }
+        break
+      case 'click':
+        await clickByText(page, step.text)
+        break
+      case 'rightClick': {
+        const rcEl = page.locator(`text=${step.text}`).first()
+        if (await rcEl.isVisible().catch(() => false)) {
+          await rcEl.click({ button: 'right' })
+          await sleep(500)
+        } else {
+          console.warn(
+            `Element with text "${step.text}" not found for rightClick`
+          )
+        }
+        break
+      }
+      case 'doubleClick': {
+        if (step.x !== undefined && step.y !== undefined) {
+          await page.mouse.dblclick(step.x, step.y)
+        } else if (step.text) {
+          const dcEl = page.locator(`text=${step.text}`).first()
+          if (await dcEl.isVisible().catch(() => false)) {
+            await dcEl.dblclick()
+          } else {
+            console.warn(
+              `Element with text "${step.text}" not found for doubleClick`
+            )
+          }
+        } else {
+          await page.mouse.dblclick(640, 400)
+        }
+        await sleep(500)
+        break
+      }
+      case 'clickCanvas':
+        await page.mouse.click(step.x, step.y)
+        await sleep(300)
+        break
+      case 'rightClickCanvas':
+        await page.mouse.click(step.x, step.y, { button: 'right' })
+        await sleep(500)
+        break
+      case 'dragCanvas': {
+        await page.mouse.move(step.fromX, step.fromY)
+        await page.mouse.down()
+        await sleep(100)
+        const dragSteps = 5
+        for (let i = 1; i <= dragSteps; i++) {
+          const x = step.fromX + ((step.toX - step.fromX) * i) / dragSteps
+          const y = step.fromY + ((step.toY - step.fromY) * i) / dragSteps
+          await page.mouse.move(x, y)
+          await sleep(50)
+        }
+        await page.mouse.up()
+        await sleep(300)
+        break
+      }
+      case 'scrollCanvas':
+        await page.mouse.move(step.x, step.y)
+        await page.mouse.wheel(0, step.deltaY)
+        await sleep(500)
+        break
+      case 'wait':
+        await sleep(Math.min(step.ms, 5000))
+        break
+      case 'screenshot': {
+        const safeName = step.name.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 100)
+        await page.screenshot({
+          path: `${outputDir}/${safeName}.png`
+        })
+        break
+      }
+      case 'loadWorkflow': {
+        console.warn(`  Loading workflow from: ${step.url}`)
+        await page.evaluate(async (workflowUrl) => {
+          const resp = await fetch(workflowUrl)
+          const workflow = await resp.json()
+          const app = (window as unknown as Record<string, unknown>).app as {
+            loadGraphData: (data: unknown) => Promise<void>
+          }
+          if (app?.loadGraphData) {
+            await app.loadGraphData(workflow)
+          }
+        }, step.url)
+        await sleep(2000)
+        break
+      }
+      case 'setSetting': {
+        console.warn(`  Setting ${step.id} = ${step.value}`)
+        await page.evaluate(
+          ({ id, value }) => {
+            const app = (window as unknown as Record<string, unknown>).app as {
+              ui: {
+                settings: {
+                  setSettingValue: (id: string, value: unknown) => void
+                }
+              }
+            }
+            app?.ui?.settings?.setSettingValue(id, value)
+          },
+          { id: step.id, value: step.value }
+        )
+        await sleep(500)
+        break
+      }
+      case 'reload':
+        await page.reload({ waitUntil: 'domcontentloaded' })
+        await waitForEditorReady(page)
+        break
+      case 'done':
+        console.warn(`  Agent done: ${step.reason}`)
+        break
+      default:
+        console.warn(`Unknown action: ${JSON.stringify(step)}`)
+    }
+    return { action: step, success: true }
+  } catch (e) {
+    const error = e instanceof Error ? e.message.split('\n')[0] : String(e)
+    console.warn(`Step ${step.action} failed: ${error}`)
+    return { action: step, success: false, error }
+  }
+}
+
+// ── Step executor (batch mode for before/after) ──

 async function executeSteps(
  page: Page,
@@ -516,151 +696,7 @@ async function executeSteps(
  outputDir: string
 ) {
  for (const step of steps) {
-    console.warn(
-      `  → ${step.action}${('label' in step && `: ${step.label}`) || ('text' in step && `: ${step.text}`) || ('name' in step && `: ${step.name}`) || ('x' in step && `: (${step.x},${step.y})`) || ''}`
-    )
-    try {
-      switch (step.action) {
-        case 'openMenu':
-          await openComfyMenu(page)
-          break
-        case 'hoverMenuItem':
-          await hoverMenuItem(page, step.label)
-          break
-        case 'clickMenuItem':
-          await clickSubmenuItem(page, step.label)
-          break
-        case 'fillDialog':
-          await fillDialogAndConfirm(page, step.text)
-          break
-        case 'pressKey':
-          try {
-            await page.keyboard.press(step.key)
-            await sleep(300)
-          } catch (e) {
-            console.warn(
-              `Skipping invalid key "${step.key}": ${e instanceof Error ? e.message : e}`
-            )
-          }
-          break
-        case 'click':
-          await clickByText(page, step.text)
-          break
-        case 'rightClick': {
-          const rcEl = page.locator(`text=${step.text}`).first()
-          if (await rcEl.isVisible().catch(() => false)) {
-            await rcEl.click({ button: 'right' })
-            await sleep(500)
-          } else {
-            console.warn(
-              `Element with text "${step.text}" not found for rightClick`
-            )
-          }
-          break
-        }
-        case 'doubleClick': {
-          if (step.x !== undefined && step.y !== undefined) {
-            await page.mouse.dblclick(step.x, step.y)
-          } else if (step.text) {
-            const dcEl = page.locator(`text=${step.text}`).first()
-            if (await dcEl.isVisible().catch(() => false)) {
-              await dcEl.dblclick()
-            } else {
-              console.warn(
-                `Element with text "${step.text}" not found for doubleClick`
-              )
-            }
-          } else {
-            // Double-click center of canvas as fallback
-            await page.mouse.dblclick(640, 400)
-          }
-          await sleep(500)
-          break
-        }
-        case 'clickCanvas':
-          await page.mouse.click(step.x, step.y)
-          await sleep(300)
-          break
-        case 'rightClickCanvas':
-          await page.mouse.click(step.x, step.y, { button: 'right' })
-          await sleep(500)
-          break
-        case 'dragCanvas': {
-          await page.mouse.move(step.fromX, step.fromY)
-          await page.mouse.down()
-          await sleep(100)
-          const dragSteps = 5
-          for (let i = 1; i <= dragSteps; i++) {
-            const x = step.fromX + ((step.toX - step.fromX) * i) / dragSteps
-            const y = step.fromY + ((step.toY - step.fromY) * i) / dragSteps
-            await page.mouse.move(x, y)
-            await sleep(50)
-          }
-          await page.mouse.up()
-          await sleep(300)
-          break
-        }
-        case 'scrollCanvas':
-          await page.mouse.move(step.x, step.y)
-          await page.mouse.wheel(0, step.deltaY)
-          await sleep(500)
-          break
-        case 'wait':
-          await sleep(Math.min(step.ms, 5000))
-          break
-        case 'screenshot': {
-          const safeName = step.name
-            .replace(/[^a-zA-Z0-9_-]/g, '_')
-            .slice(0, 100)
-          await page.screenshot({
-            path: `${outputDir}/${safeName}.png`
-          })
-          break
-        }
-        case 'loadWorkflow': {
-          // Load a workflow JSON from a URL into ComfyUI via the API
-          console.warn(`  Loading workflow from: ${step.url}`)
-          await page.evaluate(async (workflowUrl) => {
-            const resp = await fetch(workflowUrl)
-            const workflow = await resp.json()
-            // Use ComfyUI's app.loadGraphData to load the workflow
-            const app = (window as unknown as Record<string, unknown>).app as {
-              loadGraphData: (data: unknown) => Promise<void>
-            }
-            if (app?.loadGraphData) {
-              await app.loadGraphData(workflow)
-            }
-          }, step.url)
-          await sleep(2000)
-          break
-        }
-        case 'setSetting': {
-          // Set a ComfyUI setting via the app API
-          console.warn(`  Setting ${step.id} = ${step.value}`)
-          await page.evaluate(
-            ({ id, value }) => {
-              const app = (window as unknown as Record<string, unknown>).app as {
-                ui: {
-                  settings: {
-                    setSettingValue: (id: string, value: unknown) => void
-                  }
-                }
-              }
-              app?.ui?.settings?.setSettingValue(id, value)
-            },
-            { id: step.id, value: step.value }
-          )
-          await sleep(500)
-          break
-        }
-        default:
-          console.warn(`Unknown action: ${JSON.stringify(step)}`)
-      }
-    } catch (e) {
-      console.warn(
-        `Step ${step.action} failed: ${e instanceof Error ? e.message.split('\n')[0] : e}`
-      )
-    }
+    await executeAction(page, step, outputDir)
  }
 }

@@ -754,75 +790,461 @@ async function loginAsQaCi(page: Page, _serverUrl: string) {
  await sleep(1000)
 }

+// ── Agentic loop (reproduce mode) ──
+
+async function captureScreenshotForGemini(page: Page): Promise<string> {
+  const buffer = await page.screenshot({ type: 'jpeg', quality: 50 })
+  return buffer.toString('base64')
+}
+
+function buildAgenticSystemPrompt(
+  issueContext: string,
+  subIssueFocus?: string
+): string {
+  const focusSection = subIssueFocus
+    ? `\n## Current Focus\nYou are reproducing this specific sub-issue: ${subIssueFocus}\nStay focused on this particular bug. When you have demonstrated it, return done.\n`
+    : ''
+
+  return `You are an AI QA agent controlling a ComfyUI browser session to reproduce reported bugs.
+You see the ACTUAL screen after each action and decide what to do next.
+
+## ComfyUI Layout (viewport 1280x720)
+- A large canvas showing a node graph, centered roughly at (640, 400)
+- Nodes are boxes with input/output slots connected by wires
+- Hamburger menu (top-left C logo) with File, Edit, Help submenus
+- Sidebar on the left (Workflows, Node Library, Models)
+- Topbar with workflow tabs and Queue button
+- Default workflow nodes (approximate center coordinates):
+  - Load Checkpoint (~150, 300)
+  - CLIP Text Encode (positive) (~450, 250)
+  - CLIP Text Encode (negative) (~450, 450)
+  - Empty Latent Image (~450, 600)
+  - KSampler (~750, 350)
+  - VAE Decode (~1000, 350)
+  - Save Image (~1200, 350)
+
+## Available Actions
+Each action is a JSON object with an "action" field:
+- { "action": "openMenu" } — clicks the hamburger menu
+- { "action": "hoverMenuItem", "label": "File" } — hovers a top-level menu item
+- { "action": "clickMenuItem", "label": "Save As" } — clicks submenu item
+- { "action": "click", "text": "Button Text" } — clicks element by text
+- { "action": "rightClick", "text": "NodeTitle" } — right-clicks element
+- { "action": "doubleClick", "text": "NodeTitle" } — double-clicks element
+- { "action": "doubleClick", "x": 640, "y": 400 } — double-click at coords (opens node search)
+- { "action": "fillDialog", "text": "search-text" } — fills input and presses Enter
+- { "action": "pressKey", "key": "Escape" } — presses a key
+- { "action": "clickCanvas", "x": 640, "y": 400 } — click at coordinates
+- { "action": "rightClickCanvas", "x": 500, "y": 350 } — right-click on canvas
+- { "action": "dragCanvas", "fromX": 400, "fromY": 300, "toX": 600, "toY": 300 }
+- { "action": "scrollCanvas", "x": 640, "y": 400, "deltaY": -300 } — scroll (negative=zoom in)
+- { "action": "loadWorkflow", "url": "https://..." } — loads workflow JSON from URL
+- { "action": "setSetting", "id": "Comfy.Setting.Id", "value": true } — changes a setting
+- { "action": "reload" } — reloads the page (for bugs that manifest on load)
+- { "action": "wait", "ms": 1000 } — waits (max 3000ms)
+- { "action": "screenshot", "name": "step-name" } — takes a named screenshot
+- { "action": "done", "reason": "..." } — signals you are finished
+
+## Response Format
+Return JSON: { "reasoning": "brief explanation of what you see and plan to do", "action": { "action": "...", ...params } }
+Return { "reasoning": "...", "action": { "action": "done", "reason": "..." } } when finished.
+
+## Rules
+- You see the ACTUAL screen state via the screenshot. Use it to decide your next action.
+- If an action failed, try an alternative approach (different coordinates, different selector).
+- Take screenshots before and after critical state changes to capture the bug.
+- Never repeat the same failing action more than twice. Adapt your approach.
+- Establish prerequisites first: if the bug requires a specific setting, use setSetting. If it needs a workflow, use loadWorkflow.
+- Use reload for bugs that manifest on page load/startup.
+- Common interactions: right-click node for context menu, Ctrl+C/V to copy/paste, Delete to remove, double-click canvas to add node via search.
+${focusSection}
+## Issue to Reproduce
+${issueContext}`
+}
+
+interface AgenticTurnContent {
+  role: 'user' | 'model'
+  parts: Array<
+    { text: string } | { inlineData: { mimeType: string; data: string } }
+  >
+}
+
+async function runAgenticLoop(
+  page: Page,
+  opts: Options,
+  outputDir: string,
+  subIssue?: SubIssue
+): Promise<void> {
+  const MAX_TURNS = 20
+  const TIME_BUDGET_MS = 55_000
+  const SCREENSHOT_HISTORY_WINDOW = 3
+
+  const issueContext = opts.diffFile
+    ? readFileSync(opts.diffFile, 'utf-8').slice(0, 6000)
+    : 'No issue context provided'
+
+  const systemInstruction = buildAgenticSystemPrompt(
+    issueContext,
+    subIssue?.focus
+  )
+
+  const genAI = new GoogleGenerativeAI(opts.apiKey)
+  // Use flash for agentic loop — rapid iteration matters more than reasoning
+  const agenticModel = opts.model.includes('flash')
+    ? opts.model
+    : opts.model.replace('pro', 'flash')
+  const model = genAI.getGenerativeModel({
+    model: agenticModel,
+    systemInstruction
+  })
+
+  console.warn(
+    `Starting agentic loop with ${agenticModel}` +
+      (subIssue ? ` — focus: ${subIssue.title}` : '')
+  )
+
+  const history: AgenticTurnContent[] = []
+  let lastResult: ActionResult | undefined
+  let consecutiveFailures = 0
+  let lastActionKey = ''
+  let repeatCount = 0
+  const startTime = Date.now()
+
+  for (let turn = 0; turn < MAX_TURNS; turn++) {
+    const elapsed = Date.now() - startTime
+    if (elapsed > TIME_BUDGET_MS) {
+      console.warn(`Time budget exhausted (${elapsed}ms), stopping loop`)
+      break
+    }
+
+    // 1. Capture screenshot
+    const screenshotB64 = await captureScreenshotForGemini(page)
+
+    // 2. Build user message for this turn
+    const userParts: AgenticTurnContent['parts'] = []
+
+    if (turn === 0) {
+      userParts.push({
+        text: 'Here is the current screen state. What action should I take first to reproduce the reported issue?'
+      })
+    } else if (lastResult) {
+      const statusText = lastResult.success
+        ? `Action "${lastResult.action.action}" succeeded.`
+        : `Action "${lastResult.action.action}" FAILED: ${lastResult.error}`
+      userParts.push({
+        text: `${statusText}\nHere is the screen after that action. What should I do next? (Turn ${turn + 1}/${MAX_TURNS}, ${Math.round((TIME_BUDGET_MS - elapsed) / 1000)}s remaining)`
+      })
+    }
+
+    userParts.push({
+      inlineData: { mimeType: 'image/jpeg', data: screenshotB64 }
+    })
+
+    // Add to history, trimming old screenshots
+    history.push({ role: 'user', parts: userParts })
+
+    // Build contents with sliding window for screenshots
+    const contents: AgenticTurnContent[] = history.map((entry, idx) => {
+      // Keep screenshots only in last SCREENSHOT_HISTORY_WINDOW user turns
+      const userTurnIndices = history
+        .map((e, i) => (e.role === 'user' ? i : -1))
+        .filter((i) => i >= 0)
+      const recentUserTurns = userTurnIndices.slice(-SCREENSHOT_HISTORY_WINDOW)
+
+      if (entry.role === 'user' && !recentUserTurns.includes(idx)) {
+        // Replace screenshot with placeholder text
+        return {
+          role: entry.role,
+          parts: entry.parts.map((p) =>
+            'inlineData' in p ? { text: '[screenshot omitted]' } : p
+          )
+        }
+      }
+      return entry
+    })
+
+    // 3. Call Gemini
+    let actionObj: TestAction
+    try {
+      const result = await model.generateContent({
+        contents,
+        generationConfig: {
+          temperature: 0.2,
+          maxOutputTokens: 1024,
+          responseMimeType: 'application/json'
+        }
+      })
+
+      const responseText = result.response.text().trim()
+      console.warn(`  Gemini [${turn}]: ${responseText.slice(0, 200)}`)
+
+      const parsed = JSON.parse(responseText)
+      if (parsed.reasoning) {
+        console.warn(`  Reasoning: ${parsed.reasoning.slice(0, 150)}`)
+      }
+      actionObj = parsed.action || parsed.actions?.[0] || parsed
+      if (!actionObj?.action) {
+        throw new Error('No action field in response')
+      }
+
+      consecutiveFailures = 0
+    } catch (e) {
+      consecutiveFailures++
+      const errMsg = e instanceof Error ? e.message.split('\n')[0] : String(e)
+      console.warn(`  Gemini call failed (${consecutiveFailures}/3): ${errMsg}`)
+
+      // Add a placeholder model response to keep history balanced
+      history.push({
+        role: 'model',
+        parts: [
+          {
+            text: `{"reasoning": "API error", "action": {"action": "wait", "ms": 500}}`
+          }
+        ]
+      })
+
+      if (consecutiveFailures >= 3) {
+        console.warn('3 consecutive API failures, falling back to static steps')
+        await executeSteps(page, FALLBACK_REPRODUCE, outputDir)
+        return
+      }
+      continue
+    }
+
+    // Add model response to history
+    history.push({
+      role: 'model',
+      parts: [{ text: JSON.stringify({ action: actionObj }) }]
+    })
+
+    // 4. Check for done
+    if (actionObj.action === 'done') {
+      console.warn(
+        `Agent signaled done: ${'reason' in actionObj ? actionObj.reason : 'no reason'}`
+      )
+      // Take a final screenshot
+      await page.screenshot({
+        path: `${outputDir}/agentic-final.png`
+      })
+      break
+    }
+
+    // 5. Stuck detection
+    const actionKey = JSON.stringify(actionObj)
+    if (actionKey === lastActionKey) {
+      repeatCount++
+      if (repeatCount >= 3) {
+        console.warn('Stuck: same action repeated 3x, breaking loop')
+        break
+      }
+    } else {
+      repeatCount = 0
+      lastActionKey = actionKey
+    }
+
+    // 6. Execute the action
+    lastResult = await executeAction(page, actionObj, outputDir)
+  }
+
+  console.warn('Agentic loop complete')
+}
+
+// ── Multi-pass issue decomposition ──
+
+async function decomposeIssue(opts: Options): Promise<SubIssue[]> {
+  const issueContext = opts.diffFile
+    ? readFileSync(opts.diffFile, 'utf-8').slice(0, 8000)
+    : ''
+
+  if (!issueContext) {
+    return [{ title: 'General reproduction', focus: 'Reproduce the issue' }]
+  }
+
+  const genAI = new GoogleGenerativeAI(opts.apiKey)
+  const model = genAI.getGenerativeModel({ model: opts.model })
+
+  console.warn('Decomposing issue into sub-issues...')
+
+  try {
+    const result = await model.generateContent({
+      contents: [
+        {
+          role: 'user',
+          parts: [
+            {
+              text: `Analyze this GitHub issue and decompose it into 1-3 independent, testable sub-issues that can each be reproduced in a separate browser session. Each sub-issue should focus on ONE specific bug or visual problem.
+
+If the issue describes only one bug, return just one sub-issue.
+
+Return JSON array: [{ "title": "short title", "focus": "specific instruction for what to test and how to trigger it" }]
+
+Issue context:
+${issueContext}`
+            }
+          ]
+        }
+      ],
+      generationConfig: {
+        temperature: 0.1,
+        maxOutputTokens: 1024,
+        responseMimeType: 'application/json'
+      }
+    })
+
+    let text = result.response.text().trim()
+    text = text
+      .replace(/^```(?:json)?\n?/gm, '')
+      .replace(/```$/gm, '')
+      .trim()
+
+    const subIssues: SubIssue[] = JSON.parse(text)
+    if (!Array.isArray(subIssues) || subIssues.length === 0) {
+      throw new Error('Expected non-empty array')
+    }
+
+    // Cap at 3 sub-issues
+    const capped = subIssues.slice(0, 3)
+    console.warn(
+      `Decomposed into ${capped.length} sub-issue(s):`,
+      capped.map((s) => s.title)
+    )
+    return capped
+  } catch (e) {
+    console.warn(
+      'Issue decomposition failed, using single pass:',
+      e instanceof Error ? e.message : e
+    )
+    return [
+      { title: 'Issue reproduction', focus: 'Reproduce the reported issue' }
+    ]
+  }
+}
+
+// ── Video file management ──
+
+function renameLatestWebm(
+  outputDir: string,
+  targetName: string,
+  knownNames: Set<string>
+) {
+  const files = readdirSync(outputDir)
+    .filter((f) => f.endsWith('.webm') && !knownNames.has(f))
+    .sort((a, b) => {
+      const mtimeA = statSync(`${outputDir}/${a}`).mtimeMs
+      const mtimeB = statSync(`${outputDir}/${b}`).mtimeMs
+      return mtimeB - mtimeA
+    })
+  if (files.length > 0) {
+    renameSync(`${outputDir}/${files[0]}`, `${outputDir}/${targetName}`)
+    console.warn(`Video saved: ${outputDir}/${targetName}`)
+  } else {
+    console.warn(`WARNING: No .webm video found for ${targetName}`)
+  }
+}
+
+// ── Browser session helpers ──
+
+async function launchSessionAndLogin(
+  opts: Options,
+  videoDir: string
+): Promise<{
+  browser: Awaited<ReturnType<typeof chromium.launch>>
+  context: Awaited<
+    ReturnType<Awaited<ReturnType<typeof chromium.launch>>['newContext']>
+  >
+  page: Page
+}> {
+  const browser = await chromium.launch({ headless: true })
+  const context = await browser.newContext({
+    viewport: { width: 1280, height: 720 },
+    recordVideo: { dir: videoDir, size: { width: 1280, height: 720 } }
+  })
+  const page = await context.newPage()
+
+  console.warn(`Opening ComfyUI at ${opts.serverUrl}`)
+  await page.goto(opts.serverUrl, {
+    waitUntil: 'domcontentloaded',
+    timeout: 30000
+  })
+  await sleep(2000)
+  await loginAsQaCi(page, opts.serverUrl)
+  await sleep(1000)
+
+  return { browser, context, page }
+}
+
 // ── Main ──

 async function main() {
  const opts = parseArgs()
  mkdirSync(opts.outputDir, { recursive: true })

-  // Generate or fall back to default test steps
-  let steps: TestAction[]
-  try {
-    steps = await generateTestSteps(opts)
-  } catch (err) {
-    console.warn('Gemini generation failed, using fallback steps:', err)
-    steps = FALLBACK_STEPS[opts.mode]
-  }
+  if (opts.mode === 'reproduce') {
+    // Agentic multi-pass mode
+    const subIssues = await decomposeIssue(opts)
+    const knownNames = new Set<string>()

-  // Launch browser with video recording (Chromium for WebGL support)
-  const browser = await chromium.launch({ headless: true })
-  const context = await browser.newContext({
-    viewport: { width: 1280, height: 720 },
-    recordVideo: { dir: opts.outputDir, size: { width: 1280, height: 720 } }
-  })
-  const page = await context.newPage()
+    for (let i = 0; i < subIssues.length; i++) {
+      const subIssue = subIssues[i]
+      const sessionLabel = subIssues.length === 1 ? '' : `-${i + 1}`
+      const videoName = `qa-session${sessionLabel}.webm`

-  try {
-    console.warn(`Opening ComfyUI at ${opts.serverUrl}`)
-    await page.goto(opts.serverUrl, {
-      waitUntil: 'domcontentloaded',
-      timeout: 30000
-    })
-    await sleep(2000)
+      console.warn(
+        `\n=== Pass ${i + 1}/${subIssues.length}: ${subIssue.title} ===`
+      )

-    await loginAsQaCi(page, opts.serverUrl)
+      const { browser, context, page } = await launchSessionAndLogin(
+        opts,
+        opts.outputDir
+      )

-    // Debug: capture page state after login
-    await page.screenshot({
-      path: `${opts.outputDir}/debug-after-login-${opts.mode}.png`
-    })
-    console.warn(`Debug screenshot saved: debug-after-login-${opts.mode}.png`)
-    console.warn('Editor ready — executing test steps')
+      try {
+        await page.screenshot({
+          path: `${opts.outputDir}/debug-after-login-reproduce${sessionLabel}.png`
+        })
+        console.warn('Editor ready — starting agentic loop')
+        await runAgenticLoop(page, opts, opts.outputDir, subIssue)
+        await sleep(2000)
+      } finally {
+        await context.close()
+        await browser.close()
+      }

-    await executeSteps(page, steps, opts.outputDir)
-
-    await sleep(2000)
-  } finally {
-    await context.close()
-    await browser.close()
-  }
-
-  // Rename the recorded video to expected filename
-  const videoName =
-    opts.mode === 'before' ? 'qa-before-session.webm' : 'qa-session.webm'
-  // reproduce mode uses 'qa-session.webm' (same as after — it's the primary video)
-  const knownNames = new Set(['qa-before-session.webm', 'qa-session.webm'])
-  const files = readdirSync(opts.outputDir)
-    .filter((f) => f.endsWith('.webm') && !knownNames.has(f))
-    .sort((a, b) => {
-      const mtimeA = statSync(`${opts.outputDir}/${a}`).mtimeMs
-      const mtimeB = statSync(`${opts.outputDir}/${b}`).mtimeMs
-      return mtimeB - mtimeA
-    })
-  if (files.length > 0) {
-    const recorded = files[0]
-    renameSync(
-      `${opts.outputDir}/${recorded}`,
-      `${opts.outputDir}/${videoName}`
-    )
-    console.warn(`Video saved: ${opts.outputDir}/${videoName}`)
+      knownNames.add(videoName)
+      renameLatestWebm(opts.outputDir, videoName, knownNames)
+    }
  } else {
-    console.warn('WARNING: No .webm video found after recording')
+    // Before/after batch mode (unchanged)
+    let steps: TestAction[]
+    try {
+      steps = await generateTestSteps(opts)
+    } catch (err) {
+      console.warn('Gemini generation failed, using fallback steps:', err)
+      steps = FALLBACK_STEPS[opts.mode]
+    }
+
+    const { browser, context, page } = await launchSessionAndLogin(
+      opts,
+      opts.outputDir
+    )
+
+    try {
+      await page.screenshot({
+        path: `${opts.outputDir}/debug-after-login-${opts.mode}.png`
+      })
+      console.warn(`Debug screenshot saved: debug-after-login-${opts.mode}.png`)
+      console.warn('Editor ready — executing test steps')
+      await executeSteps(page, steps, opts.outputDir)
+      await sleep(2000)
+    } finally {
+      await context.close()
+      await browser.close()
+    }
+
+    const videoName =
+      opts.mode === 'before' ? 'qa-before-session.webm' : 'qa-session.webm'
+    const knownNames = new Set(['qa-before-session.webm', 'qa-session.webm'])
+    renameLatestWebm(opts.outputDir, videoName, knownNames)
  }

  console.warn('Recording complete!')