feat: hybrid QA agent — Claude Sonnet 4.6 brain + Gemini vision

Architecture: - Claude Sonnet 4.6 plans and reasons (via Claude Agent SDK) - Gemini 2.5 Flash watches video buffer and describes what it sees - 4 tools: observe(), inspect(), perform(), done() observe(seconds, focus): builds video clip from screenshot buffer, sends to Gemini with Claude's focused question. inspect(selector): searches a11y tree for specific element state. perform(action, params): executes Playwright action. done(verdict, summary): signals completion. Falls back to Gemini-only loop if ANTHROPIC_API_KEY not set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-19 22:09:37 +00:00 · 2026-03-27 06:27:30 +00:00
parent d78388c893
commit 83204b9a67
5 changed files with 611 additions and 6 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -290,6 +290,7 @@ jobs:
        env:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          TARGET_TYPE: ${{ needs.resolve-matrix.outputs.target_type }}
        run: |
          MODE="before"
--- a/package.json
+++ b/package.json
@@ -123,6 +123,7 @@
    "zod-validation-error": "catalog:"
  },
  "devDependencies": {
+    "@anthropic-ai/claude-agent-sdk": "catalog:",
    "@eslint/js": "catalog:",
    "@google/generative-ai": "catalog:",
    "@intlify/eslint-plugin-vue-i18n": "catalog:",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,6 +9,9 @@ catalogs:
    '@alloc/quick-lru':
      specifier: ^5.2.0
      version: 5.2.0
+    '@anthropic-ai/claude-agent-sdk':
+      specifier: ^0.2.85
+      version: 0.2.85
    '@astrojs/vue':
      specifier: ^5.0.0
      version: 5.1.4
@@ -603,6 +606,9 @@ importers:
        specifier: 'catalog:'
        version: 3.3.0(zod@3.24.1)
    devDependencies:
+      '@anthropic-ai/claude-agent-sdk':
+        specifier: 'catalog:'
+        version: 0.2.85(zod@3.24.1)
      '@eslint/js':
        specifier: 'catalog:'
        version: 9.39.1
@@ -1073,6 +1079,12 @@ packages:
  '@antfu/utils@0.7.10':
    resolution: {integrity: sha512-+562v9k4aI80m1+VuMHehNJWLOFjBnXn3tdOitzD0il5b7smkSBal4+a3oKiQTbrwMmN/TBUMDvbdoWDehgOww==}

+  '@anthropic-ai/claude-agent-sdk@0.2.85':
+    resolution: {integrity: sha512-/ohKLtP1zy6aWXLW/9KTYBveJPEtAfdO96qiP1Cl5S7LgVq/qRDUl7AUw5YGrBaK6YWHEE/rfMQZGwP/i5zIvQ==}
+    engines: {node: '>=18.0.0'}
+    peerDependencies:
+      zod: ^4.0.0
+
  '@asamuzakjp/css-color@4.1.1':
    resolution: {integrity: sha512-B0Hv6G3gWGMn0xKJ0txEi/jM5iFpT3MfDxmhZFb4W047GvytCf1DHQ1D69W3zHI4yWe2aTZAA0JnbMZ7Xc8DuQ==}

@@ -10080,6 +10092,20 @@ snapshots:

  '@antfu/utils@0.7.10': {}

+  '@anthropic-ai/claude-agent-sdk@0.2.85(zod@3.24.1)':
+    dependencies:
+      zod: 3.24.1
+    optionalDependencies:
+      '@img/sharp-darwin-arm64': 0.34.5
+      '@img/sharp-darwin-x64': 0.34.5
+      '@img/sharp-linux-arm': 0.34.5
+      '@img/sharp-linux-arm64': 0.34.5
+      '@img/sharp-linux-x64': 0.34.5
+      '@img/sharp-linuxmusl-arm64': 0.34.5
+      '@img/sharp-linuxmusl-x64': 0.34.5
+      '@img/sharp-win32-arm64': 0.34.5
+      '@img/sharp-win32-x64': 0.34.5
+
  '@asamuzakjp/css-color@4.1.1':
    dependencies:
      '@csstools/css-calc': 2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)
--- a/scripts/qa-agent.ts
+++ b/scripts/qa-agent.ts
@@ -0,0 +1,540 @@
+#!/usr/bin/env tsx
+/**
+ * Hybrid QA Agent — Claude Sonnet 4.6 brain + Gemini 3.1 Pro eyes
+ *
+ * Claude plans and reasons. Gemini watches the video buffer and describes
+ * what it sees. The agent uses 4 tools:
+ *   - observe(seconds, focus) — Gemini reviews last N seconds of video
+ *   - inspect(selector) — search accessibility tree for element state
+ *   - perform(action, params) — execute Playwright action
+ *   - done(verdict, summary) — finish with result
+ */
+
+import type { Page } from '@playwright/test'
+import { query, tool, createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk'
+import { GoogleGenerativeAI } from '@google/generative-ai'
+// eslint-disable-next-line import-x/no-unresolved -- zod/v4 is re-exported by claude-agent-sdk
+import { z } from 'zod/v4'
+import { execSync } from 'child_process'
+import { mkdirSync, writeFileSync, readFileSync } from 'fs'
+
+// ── Types ──
+
+interface AgentOptions {
+  page: Page
+  issueContext: string
+  qaGuide: string
+  outputDir: string
+  geminiApiKey: string
+  anthropicApiKey: string
+  maxTurns?: number
+  timeBudgetMs?: number
+}
+
+interface ScreenshotFrame {
+  timestampMs: number
+  base64: string
+}
+
+// ── Video buffer ──
+
+const FRAME_INTERVAL_MS = 2000
+const MAX_BUFFER_FRAMES = 30 // 60 seconds at 2fps
+
+class VideoBuffer {
+  private frames: ScreenshotFrame[] = []
+  private startMs = Date.now()
+  private intervalId: ReturnType<typeof setInterval> | null = null
+  private page: Page
+
+  constructor(page: Page) {
+    this.page = page
+  }
+
+  start() {
+    this.startMs = Date.now()
+    this.intervalId = setInterval(async () => {
+      try {
+        const buf = await this.page.screenshot({
+          type: 'jpeg',
+          quality: 60
+        })
+        this.frames.push({
+          timestampMs: Date.now() - this.startMs,
+          base64: buf.toString('base64')
+        })
+        if (this.frames.length > MAX_BUFFER_FRAMES) {
+          this.frames.shift()
+        }
+      } catch {
+        // page may be navigating
+      }
+    }, FRAME_INTERVAL_MS)
+  }
+
+  stop() {
+    if (this.intervalId) clearInterval(this.intervalId)
+  }
+
+  getLastFrames(seconds: number): ScreenshotFrame[] {
+    const cutoffMs = Date.now() - this.startMs - seconds * 1000
+    return this.frames.filter((f) => f.timestampMs >= cutoffMs)
+  }
+
+  async buildVideoClip(
+    seconds: number,
+    outputDir: string
+  ): Promise<Buffer | null> {
+    const frames = this.getLastFrames(seconds)
+    if (frames.length < 2) return null
+
+    const clipDir = `${outputDir}/.clip-frames`
+    mkdirSync(clipDir, { recursive: true })
+
+    // Write frames as numbered JPEGs
+    for (let i = 0; i < frames.length; i++) {
+      writeFileSync(
+        `${clipDir}/frame-${String(i).padStart(4, '0')}.jpg`,
+        Buffer.from(frames[i].base64, 'base64')
+      )
+    }
+
+    // Compose into video with ffmpeg
+    const clipPath = `${outputDir}/.observe-clip.mp4`
+    try {
+      const fps = Math.max(1, Math.round(frames.length / seconds))
+      execSync(
+        `ffmpeg -y -framerate ${fps} -i "${clipDir}/frame-%04d.jpg" ` +
+          `-c:v libx264 -preset ultrafast -pix_fmt yuv420p "${clipPath}" 2>/dev/null`,
+        { timeout: 10000 }
+      )
+      return readFileSync(clipPath)
+    } catch {
+      return null
+    }
+  }
+}
+
+// ── Gemini Vision ──
+
+async function geminiObserve(
+  videoBuffer: VideoBuffer,
+  seconds: number,
+  focus: string,
+  outputDir: string,
+  geminiApiKey: string
+): Promise<string> {
+  const genAI = new GoogleGenerativeAI(geminiApiKey)
+  const model = genAI.getGenerativeModel({
+    model: 'gemini-2.5-flash-preview-05-20'
+  })
+
+  // Try video clip first, fall back to last frame
+  const clip = await videoBuffer.buildVideoClip(seconds, outputDir)
+
+  const parts: Array<
+    { text: string } | { inlineData: { mimeType: string; data: string } }
+  > = [
+    {
+      text: `You are observing a ComfyUI frontend session. Focus on: ${focus}\n\nDescribe what happened in the last ${seconds} seconds. Be specific about UI state, actions taken, and results.`
+    }
+  ]
+
+  if (clip) {
+    parts.push({
+      inlineData: { mimeType: 'video/mp4', data: clip.toString('base64') }
+    })
+  } else {
+    // Fall back to last frame
+    const frames = videoBuffer.getLastFrames(seconds)
+    if (frames.length > 0) {
+      parts.push({
+        inlineData: {
+          mimeType: 'image/jpeg',
+          data: frames[frames.length - 1].base64
+        }
+      })
+    }
+  }
+
+  const result = await model.generateContent(parts)
+  return result.response.text().trim()
+}
+
+// ── Accessibility tree helpers ──
+
+interface A11yNode {
+  role: string
+  name: string
+  value?: string
+  checked?: boolean
+  disabled?: boolean
+  children?: A11yNode[]
+}
+
+function searchA11y(node: A11yNode | null, selector: string): A11yNode | null {
+  if (!node) return null
+  const sel = selector.toLowerCase()
+
+  // Match by name or role
+  if (
+    node.name?.toLowerCase().includes(sel) ||
+    node.role?.toLowerCase().includes(sel)
+  ) {
+    return node
+  }
+
+  // Recurse into children
+  if (node.children) {
+    for (const child of node.children) {
+      const found = searchA11y(child, selector)
+      if (found) return found
+    }
+  }
+  return null
+}
+
+function flattenA11y(node: A11yNode | null, depth = 0): string {
+  if (!node || depth > 3) return ''
+  const parts: string[] = []
+  const indent = '  '.repeat(depth)
+  const attrs: string[] = []
+  if (node.value !== undefined) attrs.push(`value="${node.value}"`)
+  if (node.checked !== undefined) attrs.push(`checked=${node.checked}`)
+  if (node.disabled) attrs.push('disabled')
+  const attrStr = attrs.length ? ` [${attrs.join(', ')}]` : ''
+  if (node.name || attrs.length) {
+    parts.push(`${indent}${node.role}: ${node.name || '(unnamed)'}${attrStr}`)
+  }
+  if (node.children) {
+    for (const child of node.children) {
+      parts.push(flattenA11y(child, depth + 1))
+    }
+  }
+  return parts.filter(Boolean).join('\n')
+}
+
+// ── Subtitle overlay ──
+
+async function showSubtitle(page: Page, text: string, turn: number) {
+  const encoded = encodeURIComponent(
+    text.slice(0, 120).replace(/'/g, "\\'").replace(/\n/g, ' ')
+  )
+  await page.addScriptTag({
+    content: `(function(){
+      var id='qa-subtitle';
+      var el=document.getElementById(id);
+      if(!el){
+        el=document.createElement('div');
+        el.id=id;
+        Object.assign(el.style,{position:'fixed',bottom:'32px',left:'50%',transform:'translateX(-50%)',zIndex:'2147483646',maxWidth:'90%',padding:'6px 14px',borderRadius:'6px',background:'rgba(0,0,0,0.8)',color:'rgba(255,255,255,0.95)',fontSize:'12px',fontFamily:'system-ui,sans-serif',fontWeight:'400',lineHeight:'1.4',pointerEvents:'none',textAlign:'center',transition:'opacity 0.3s',whiteSpace:'normal'});
+        document.body.appendChild(el);
+      }
+      var msg=decodeURIComponent('${encoded}');
+      el.textContent='['+${turn}+'] '+msg;
+      el.style.opacity='1';
+    })()`
+  })
+}
+
+// ── Main agent ──
+
+export async function runHybridAgent(opts: AgentOptions): Promise<{
+  verdict: string
+  summary: string
+}> {
+  const {
+    page,
+    issueContext,
+    qaGuide,
+    outputDir,
+    geminiApiKey,
+    anthropicApiKey
+  } = opts
+  const maxTurns = opts.maxTurns ?? 30
+  const timeBudgetMs = opts.timeBudgetMs ?? 120_000
+
+  // Start video buffer
+  const videoBuffer = new VideoBuffer(page)
+  videoBuffer.start()
+
+  let lastA11ySnapshot: A11yNode | null = null
+  let agentDone = false
+  let finalVerdict = 'INCONCLUSIVE'
+  let finalSummary = 'Agent did not complete'
+  let turnCount = 0
+  const startTime = Date.now()
+
+  // Import executeAction from qa-record.ts (shared Playwright helpers)
+  // For now, inline the action execution
+  const { executeAction } = await import('./qa-record.js')
+
+  // Define tools
+  const observeTool = tool(
+    'observe',
+    'Watch the last N seconds of screen recording through Gemini vision. Use this to verify visual state, check if actions had visible effect, or inspect visual bugs. Pass a focused question so Gemini knows what to look for.',
+    {
+      seconds: z
+        .number()
+        .min(3)
+        .max(60)
+        .default(10)
+        .describe('How many seconds to look back'),
+      focus: z
+        .string()
+        .describe(
+          'What to look for — be specific, e.g. "Did the Nodes 2.0 toggle switch to ON?"'
+        )
+    },
+    async (args) => {
+      const description = await geminiObserve(
+        videoBuffer,
+        args.seconds,
+        args.focus,
+        outputDir,
+        geminiApiKey
+      )
+      return { content: [{ type: 'text' as const, text: description }] }
+    }
+  )
+
+  const inspectTool = tool(
+    'inspect',
+    'Search the accessibility tree for a specific UI element. Returns its role, name, value, checked state. Fast and precise — use this to verify element state without vision.',
+    {
+      selector: z
+        .string()
+        .describe(
+          'Element name or role to search for, e.g. "Nodes 2.0", "KSampler seed", "Run button"'
+        )
+    },
+    async (args) => {
+      try {
+        const snapshot =
+          (await page.accessibility.snapshot()) as A11yNode | null
+        lastA11ySnapshot = snapshot
+        const found = searchA11y(snapshot, args.selector)
+        if (found) {
+          return {
+            content: [
+              {
+                type: 'text' as const,
+                text: JSON.stringify({
+                  role: found.role,
+                  name: found.name,
+                  value: found.value,
+                  checked: found.checked,
+                  disabled: found.disabled,
+                  hasChildren: Boolean(found.children?.length)
+                })
+              }
+            ]
+          }
+        }
+        // Return nearby elements if exact match not found
+        const tree = flattenA11y(snapshot, 0).slice(0, 2000)
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: `Element "${args.selector}" not found. Available elements:\n${tree}`
+            }
+          ]
+        }
+      } catch (e) {
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: `inspect failed: ${e instanceof Error ? e.message : e}`
+            }
+          ]
+        }
+      }
+    }
+  )
+
+  const performTool = tool(
+    'perform',
+    `Execute a Playwright action on the ComfyUI page. Available actions:
+- click(text): click element by visible text
+- clickCanvas(x, y): click at coordinates
+- rightClickCanvas(x, y): right-click at coordinates
+- doubleClick(x, y): double-click at coordinates
+- dragCanvas(fromX, fromY, toX, toY): drag between points
+- scrollCanvas(x, y, deltaY): scroll wheel (negative=zoom in)
+- pressKey(key): press keyboard key (Escape, Enter, Delete, Control+c, etc.)
+- fillDialog(text): fill input and press Enter
+- openMenu(): open hamburger menu
+- hoverMenuItem(label): hover menu item
+- clickMenuItem(label): click submenu item
+- setSetting(id, value): change a ComfyUI setting
+- loadDefaultWorkflow(): load the 7-node default workflow
+- openSettings(): open Settings dialog
+- reload(): reload the page
+- addNode(nodeName, x, y): add a node via search
+- copyPaste(x, y): Ctrl+C then Ctrl+V at coords
+- holdKeyAndDrag(key, fromX, fromY, toX, toY): hold key while dragging
+- screenshot(name): take a named screenshot`,
+    {
+      action: z.string().describe('Action name'),
+      params: z
+        .record(z.unknown())
+        .optional()
+        .describe('Action parameters as key-value pairs')
+    },
+    async (args) => {
+      turnCount++
+      if (turnCount > maxTurns || Date.now() - startTime > timeBudgetMs) {
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: `Budget exceeded (${turnCount}/${maxTurns} turns, ${Math.round((Date.now() - startTime) / 1000)}s). Use done() now.`
+            }
+          ]
+        }
+      }
+
+      // Build TestAction object from args
+      const actionObj = { action: args.action, ...args.params } as Parameters<
+        typeof executeAction
+      >[1]
+
+      try {
+        const result = await executeAction(page, actionObj, outputDir)
+        // Show subtitle
+        await showSubtitle(
+          page,
+          `${args.action}: ${result.success ? 'OK' : result.error}`,
+          turnCount
+        )
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: result.success
+                ? `Action "${args.action}" succeeded.`
+                : `Action "${args.action}" FAILED: ${result.error}`
+            }
+          ]
+        }
+      } catch (e) {
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: `Action "${args.action}" threw: ${e instanceof Error ? e.message : e}`
+            }
+          ]
+        }
+      }
+    }
+  )
+
+  const doneTool = tool(
+    'done',
+    'Signal that reproduction is complete. Call this when you have either confirmed the bug or determined it cannot be reproduced.',
+    {
+      verdict: z
+        .enum(['REPRODUCED', 'NOT_REPRODUCIBLE', 'INCONCLUSIVE'])
+        .describe('Final verdict'),
+      summary: z
+        .string()
+        .describe(
+          'One paragraph: what you did, what you observed, and why you reached this verdict'
+        )
+    },
+    async (args) => {
+      agentDone = true
+      finalVerdict = args.verdict
+      finalSummary = args.summary
+      await showSubtitle(page, `DONE: ${args.verdict}`, turnCount)
+      return {
+        content: [
+          {
+            type: 'text' as const,
+            text: `Agent finished: ${args.verdict}`
+          }
+        ]
+      }
+    }
+  )
+
+  // Create MCP server with our tools
+  const server = createSdkMcpServer({
+    name: 'qa-agent',
+    version: '1.0.0',
+    tools: [observeTool, inspectTool, performTool, doneTool]
+  })
+
+  // Build system prompt
+  const systemPrompt = `You are a senior QA engineer reproducing a reported bug in ComfyUI, a node-based AI image generation tool.
+
+## Your tools
+- observe(seconds, focus) — Gemini AI watches the last N seconds of screen recording and answers your focused question. Use for visual verification.
+- inspect(selector) — Search the accessibility tree for a specific element's state. Use for precise state checks (toggle on/off, value, disabled).
+- perform(action, params) — Execute a Playwright action on the browser.
+- done(verdict, summary) — Finish with your conclusion.
+
+## Strategy
+1. Start by understanding the issue, then plan your reproduction steps.
+2. Use perform() to take actions. After each action, use inspect() to verify state or observe() for visual confirmation.
+3. If a setting change doesn't seem to take effect, try reload() then verify again.
+4. Focus on the specific bug — don't explore randomly.
+5. Take screenshots at key moments for the video evidence.
+6. When you've confirmed or ruled out the bug, call done().
+
+## ComfyUI Layout (1280×720 viewport)
+- Canvas with node graph centered at ~(640, 400)
+- Hamburger menu top-left (C logo)
+- Sidebar: Workflows, Node Library, Models
+- Default workflow nodes: Load Checkpoint (~150,300), CLIP Text Encode (~450,250/450), Empty Latent (~450,600), KSampler (~750,350), VAE Decode (~1000,350), Save Image (~1200,350)
+
+${qaGuide ? `## QA Guide\n${qaGuide}\n` : ''}
+## Issue to Reproduce
+${issueContext}`
+
+  // Run the agent
+  console.warn('Starting hybrid agent (Claude Sonnet 4.6 + Gemini vision)...')
+
+  try {
+    for await (const message of query({
+      prompt:
+        'Reproduce the reported bug. Start by reading the issue context in your system prompt, then use your tools to interact with the ComfyUI browser session.',
+      options: {
+        model: 'claude-sonnet-4-6-20250514',
+        systemPrompt,
+        apiKey: anthropicApiKey,
+        maxTurns,
+        mcpServers: { 'qa-agent': server },
+        allowedTools: [
+          'mcp__qa-agent__observe',
+          'mcp__qa-agent__inspect',
+          'mcp__qa-agent__perform',
+          'mcp__qa-agent__done'
+        ]
+      }
+    })) {
+      if (message.type === 'assistant' && message.message?.content) {
+        for (const block of message.message.content) {
+          if ('text' in block && block.text) {
+            console.warn(`  Claude: ${block.text.slice(0, 200)}`)
+          }
+          if ('name' in block) {
+            console.warn(
+              `  Tool: ${block.name}(${JSON.stringify(block.input).slice(0, 100)})`
+            )
+          }
+        }
+      }
+      if (agentDone) break
+    }
+  } catch (e) {
+    console.warn(`Agent error: ${e instanceof Error ? e.message : e}`)
+  }
+
+  videoBuffer.stop()
+
+  return { verdict: finalVerdict, summary: finalSummary }
+}
--- a/scripts/qa-record.ts
+++ b/scripts/qa-record.ts
@@ -726,7 +726,7 @@ async function waitForEditorReady(page: Page) {
  await sleep(1000)
 }

-async function executeAction(
+export async function executeAction(
  page: Page,
  step: TestAction,
  outputDir: string
@@ -1200,13 +1200,14 @@ function buildPreflightActions(context: string): TestAction[] {
  const ctx = context.toLowerCase()
  const actions: TestAction[] = []

-  // Enable Nodes 2.0 if issue mentions it
+  // Enable Nodes 2.0 if issue mentions it — requires reload to take effect
  if (/nodes.*2\.0|vue.*node|new.*node|node.*beta/.test(ctx)) {
    actions.push({
      action: 'setSetting',
      id: 'Comfy.NodeBeta.Enabled',
      value: true
    })
+    actions.push({ action: 'reload' })
  }

  // Load default workflow for most reproduction scenarios
@@ -1412,18 +1413,25 @@ async function runAgenticLoop(
    preflightNote
  )

+  const anthropicKey = process.env.ANTHROPIC_API_KEY
+  const useHybrid = Boolean(anthropicKey)
+
  const genAI = new GoogleGenerativeAI(opts.apiKey)
-  // Use flash for agentic loop — rapid iteration matters more than reasoning
+  const geminiVisionModel = genAI.getGenerativeModel({
+    model: 'gemini-3-flash-preview'
+  })
+
+  // Gemini-only fallback model (used when no ANTHROPIC_API_KEY)
  const agenticModel = opts.model.includes('flash')
    ? opts.model
    : 'gemini-3-flash-preview'
-  const model = genAI.getGenerativeModel({
+  const geminiOnlyModel = genAI.getGenerativeModel({
    model: agenticModel,
    systemInstruction
  })

  console.warn(
-    `Starting agentic loop with ${agenticModel}` +
+    `Starting ${useHybrid ? 'hybrid (Claude planner + Gemini vision)' : 'Gemini-only'} agentic loop` +
      (subIssue ? ` — focus: ${subIssue.title}` : '')
  )

@@ -1827,7 +1835,36 @@ async function main() {
        console.warn('Editor ready — starting agentic loop')
        recordingStartMs = Date.now()
        narrationSegments.length = 0
-        await runAgenticLoop(page, opts, opts.outputDir, subIssue)
+
+        // Use hybrid agent (Claude + Gemini) if ANTHROPIC_API_KEY is available
+        const anthropicKey = process.env.ANTHROPIC_API_KEY
+        if (anthropicKey) {
+          const { runHybridAgent } = await import('./qa-agent.js')
+          const issueCtx = opts.diffFile
+            ? readFileSync(opts.diffFile, 'utf-8').slice(0, 6000)
+            : 'No issue context provided'
+          let qaGuideText = ''
+          if (opts.qaGuideFile) {
+            try {
+              qaGuideText = readFileSync(opts.qaGuideFile, 'utf-8')
+            } catch {
+              // QA guide not available
+            }
+          }
+          const result = await runHybridAgent({
+            page,
+            issueContext: issueCtx,
+            qaGuide: qaGuideText,
+            outputDir: opts.outputDir,
+            geminiApiKey: opts.apiKey,
+            anthropicApiKey: anthropicKey
+          })
+          console.warn(
+            `Hybrid agent finished: ${result.verdict} — ${result.summary.slice(0, 100)}`
+          )
+        } else {
+          await runAgenticLoop(page, opts, opts.outputDir, subIssue)
+        }
        await sleep(2000)
      } finally {
        await context.close()