mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-19 22:09:37 +00:00
feat: hybrid QA agent — Claude Sonnet 4.6 brain + Gemini vision
Architecture: - Claude Sonnet 4.6 plans and reasons (via Claude Agent SDK) - Gemini 2.5 Flash watches video buffer and describes what it sees - 4 tools: observe(), inspect(), perform(), done() observe(seconds, focus): builds video clip from screenshot buffer, sends to Gemini with Claude's focused question. inspect(selector): searches a11y tree for specific element state. perform(action, params): executes Playwright action. done(verdict, summary): signals completion. Falls back to Gemini-only loop if ANTHROPIC_API_KEY not set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
1
.github/workflows/pr-qa.yaml
vendored
1
.github/workflows/pr-qa.yaml
vendored
@@ -290,6 +290,7 @@ jobs:
|
||||
env:
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
TARGET_TYPE: ${{ needs.resolve-matrix.outputs.target_type }}
|
||||
run: |
|
||||
MODE="before"
|
||||
|
||||
@@ -123,6 +123,7 @@
|
||||
"zod-validation-error": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "catalog:",
|
||||
"@eslint/js": "catalog:",
|
||||
"@google/generative-ai": "catalog:",
|
||||
"@intlify/eslint-plugin-vue-i18n": "catalog:",
|
||||
|
||||
26
pnpm-lock.yaml
generated
26
pnpm-lock.yaml
generated
@@ -9,6 +9,9 @@ catalogs:
|
||||
'@alloc/quick-lru':
|
||||
specifier: ^5.2.0
|
||||
version: 5.2.0
|
||||
'@anthropic-ai/claude-agent-sdk':
|
||||
specifier: ^0.2.85
|
||||
version: 0.2.85
|
||||
'@astrojs/vue':
|
||||
specifier: ^5.0.0
|
||||
version: 5.1.4
|
||||
@@ -603,6 +606,9 @@ importers:
|
||||
specifier: 'catalog:'
|
||||
version: 3.3.0(zod@3.24.1)
|
||||
devDependencies:
|
||||
'@anthropic-ai/claude-agent-sdk':
|
||||
specifier: 'catalog:'
|
||||
version: 0.2.85(zod@3.24.1)
|
||||
'@eslint/js':
|
||||
specifier: 'catalog:'
|
||||
version: 9.39.1
|
||||
@@ -1073,6 +1079,12 @@ packages:
|
||||
'@antfu/utils@0.7.10':
|
||||
resolution: {integrity: sha512-+562v9k4aI80m1+VuMHehNJWLOFjBnXn3tdOitzD0il5b7smkSBal4+a3oKiQTbrwMmN/TBUMDvbdoWDehgOww==}
|
||||
|
||||
'@anthropic-ai/claude-agent-sdk@0.2.85':
|
||||
resolution: {integrity: sha512-/ohKLtP1zy6aWXLW/9KTYBveJPEtAfdO96qiP1Cl5S7LgVq/qRDUl7AUw5YGrBaK6YWHEE/rfMQZGwP/i5zIvQ==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
peerDependencies:
|
||||
zod: ^4.0.0
|
||||
|
||||
'@asamuzakjp/css-color@4.1.1':
|
||||
resolution: {integrity: sha512-B0Hv6G3gWGMn0xKJ0txEi/jM5iFpT3MfDxmhZFb4W047GvytCf1DHQ1D69W3zHI4yWe2aTZAA0JnbMZ7Xc8DuQ==}
|
||||
|
||||
@@ -10080,6 +10092,20 @@ snapshots:
|
||||
|
||||
'@antfu/utils@0.7.10': {}
|
||||
|
||||
'@anthropic-ai/claude-agent-sdk@0.2.85(zod@3.24.1)':
|
||||
dependencies:
|
||||
zod: 3.24.1
|
||||
optionalDependencies:
|
||||
'@img/sharp-darwin-arm64': 0.34.5
|
||||
'@img/sharp-darwin-x64': 0.34.5
|
||||
'@img/sharp-linux-arm': 0.34.5
|
||||
'@img/sharp-linux-arm64': 0.34.5
|
||||
'@img/sharp-linux-x64': 0.34.5
|
||||
'@img/sharp-linuxmusl-arm64': 0.34.5
|
||||
'@img/sharp-linuxmusl-x64': 0.34.5
|
||||
'@img/sharp-win32-arm64': 0.34.5
|
||||
'@img/sharp-win32-x64': 0.34.5
|
||||
|
||||
'@asamuzakjp/css-color@4.1.1':
|
||||
dependencies:
|
||||
'@csstools/css-calc': 2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)
|
||||
|
||||
540
scripts/qa-agent.ts
Normal file
540
scripts/qa-agent.ts
Normal file
@@ -0,0 +1,540 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Hybrid QA Agent — Claude Sonnet 4.6 brain + Gemini 3.1 Pro eyes
|
||||
*
|
||||
* Claude plans and reasons. Gemini watches the video buffer and describes
|
||||
* what it sees. The agent uses 4 tools:
|
||||
* - observe(seconds, focus) — Gemini reviews last N seconds of video
|
||||
* - inspect(selector) — search accessibility tree for element state
|
||||
* - perform(action, params) — execute Playwright action
|
||||
* - done(verdict, summary) — finish with result
|
||||
*/
|
||||
|
||||
import type { Page } from '@playwright/test'
|
||||
import { query, tool, createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk'
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai'
|
||||
// eslint-disable-next-line import-x/no-unresolved -- zod/v4 is re-exported by claude-agent-sdk
|
||||
import { z } from 'zod/v4'
|
||||
import { execSync } from 'child_process'
|
||||
import { mkdirSync, writeFileSync, readFileSync } from 'fs'
|
||||
|
||||
// ── Types ──
|
||||
|
||||
interface AgentOptions {
|
||||
page: Page
|
||||
issueContext: string
|
||||
qaGuide: string
|
||||
outputDir: string
|
||||
geminiApiKey: string
|
||||
anthropicApiKey: string
|
||||
maxTurns?: number
|
||||
timeBudgetMs?: number
|
||||
}
|
||||
|
||||
interface ScreenshotFrame {
|
||||
timestampMs: number
|
||||
base64: string
|
||||
}
|
||||
|
||||
// ── Video buffer ──
|
||||
|
||||
const FRAME_INTERVAL_MS = 2000
|
||||
const MAX_BUFFER_FRAMES = 30 // 60 seconds at 2fps
|
||||
|
||||
class VideoBuffer {
|
||||
private frames: ScreenshotFrame[] = []
|
||||
private startMs = Date.now()
|
||||
private intervalId: ReturnType<typeof setInterval> | null = null
|
||||
private page: Page
|
||||
|
||||
constructor(page: Page) {
|
||||
this.page = page
|
||||
}
|
||||
|
||||
start() {
|
||||
this.startMs = Date.now()
|
||||
this.intervalId = setInterval(async () => {
|
||||
try {
|
||||
const buf = await this.page.screenshot({
|
||||
type: 'jpeg',
|
||||
quality: 60
|
||||
})
|
||||
this.frames.push({
|
||||
timestampMs: Date.now() - this.startMs,
|
||||
base64: buf.toString('base64')
|
||||
})
|
||||
if (this.frames.length > MAX_BUFFER_FRAMES) {
|
||||
this.frames.shift()
|
||||
}
|
||||
} catch {
|
||||
// page may be navigating
|
||||
}
|
||||
}, FRAME_INTERVAL_MS)
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (this.intervalId) clearInterval(this.intervalId)
|
||||
}
|
||||
|
||||
getLastFrames(seconds: number): ScreenshotFrame[] {
|
||||
const cutoffMs = Date.now() - this.startMs - seconds * 1000
|
||||
return this.frames.filter((f) => f.timestampMs >= cutoffMs)
|
||||
}
|
||||
|
||||
async buildVideoClip(
|
||||
seconds: number,
|
||||
outputDir: string
|
||||
): Promise<Buffer | null> {
|
||||
const frames = this.getLastFrames(seconds)
|
||||
if (frames.length < 2) return null
|
||||
|
||||
const clipDir = `${outputDir}/.clip-frames`
|
||||
mkdirSync(clipDir, { recursive: true })
|
||||
|
||||
// Write frames as numbered JPEGs
|
||||
for (let i = 0; i < frames.length; i++) {
|
||||
writeFileSync(
|
||||
`${clipDir}/frame-${String(i).padStart(4, '0')}.jpg`,
|
||||
Buffer.from(frames[i].base64, 'base64')
|
||||
)
|
||||
}
|
||||
|
||||
// Compose into video with ffmpeg
|
||||
const clipPath = `${outputDir}/.observe-clip.mp4`
|
||||
try {
|
||||
const fps = Math.max(1, Math.round(frames.length / seconds))
|
||||
execSync(
|
||||
`ffmpeg -y -framerate ${fps} -i "${clipDir}/frame-%04d.jpg" ` +
|
||||
`-c:v libx264 -preset ultrafast -pix_fmt yuv420p "${clipPath}" 2>/dev/null`,
|
||||
{ timeout: 10000 }
|
||||
)
|
||||
return readFileSync(clipPath)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Gemini Vision ──
|
||||
|
||||
async function geminiObserve(
|
||||
videoBuffer: VideoBuffer,
|
||||
seconds: number,
|
||||
focus: string,
|
||||
outputDir: string,
|
||||
geminiApiKey: string
|
||||
): Promise<string> {
|
||||
const genAI = new GoogleGenerativeAI(geminiApiKey)
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: 'gemini-2.5-flash-preview-05-20'
|
||||
})
|
||||
|
||||
// Try video clip first, fall back to last frame
|
||||
const clip = await videoBuffer.buildVideoClip(seconds, outputDir)
|
||||
|
||||
const parts: Array<
|
||||
{ text: string } | { inlineData: { mimeType: string; data: string } }
|
||||
> = [
|
||||
{
|
||||
text: `You are observing a ComfyUI frontend session. Focus on: ${focus}\n\nDescribe what happened in the last ${seconds} seconds. Be specific about UI state, actions taken, and results.`
|
||||
}
|
||||
]
|
||||
|
||||
if (clip) {
|
||||
parts.push({
|
||||
inlineData: { mimeType: 'video/mp4', data: clip.toString('base64') }
|
||||
})
|
||||
} else {
|
||||
// Fall back to last frame
|
||||
const frames = videoBuffer.getLastFrames(seconds)
|
||||
if (frames.length > 0) {
|
||||
parts.push({
|
||||
inlineData: {
|
||||
mimeType: 'image/jpeg',
|
||||
data: frames[frames.length - 1].base64
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const result = await model.generateContent(parts)
|
||||
return result.response.text().trim()
|
||||
}
|
||||
|
||||
// ── Accessibility tree helpers ──
|
||||
|
||||
interface A11yNode {
|
||||
role: string
|
||||
name: string
|
||||
value?: string
|
||||
checked?: boolean
|
||||
disabled?: boolean
|
||||
children?: A11yNode[]
|
||||
}
|
||||
|
||||
function searchA11y(node: A11yNode | null, selector: string): A11yNode | null {
|
||||
if (!node) return null
|
||||
const sel = selector.toLowerCase()
|
||||
|
||||
// Match by name or role
|
||||
if (
|
||||
node.name?.toLowerCase().includes(sel) ||
|
||||
node.role?.toLowerCase().includes(sel)
|
||||
) {
|
||||
return node
|
||||
}
|
||||
|
||||
// Recurse into children
|
||||
if (node.children) {
|
||||
for (const child of node.children) {
|
||||
const found = searchA11y(child, selector)
|
||||
if (found) return found
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function flattenA11y(node: A11yNode | null, depth = 0): string {
|
||||
if (!node || depth > 3) return ''
|
||||
const parts: string[] = []
|
||||
const indent = ' '.repeat(depth)
|
||||
const attrs: string[] = []
|
||||
if (node.value !== undefined) attrs.push(`value="${node.value}"`)
|
||||
if (node.checked !== undefined) attrs.push(`checked=${node.checked}`)
|
||||
if (node.disabled) attrs.push('disabled')
|
||||
const attrStr = attrs.length ? ` [${attrs.join(', ')}]` : ''
|
||||
if (node.name || attrs.length) {
|
||||
parts.push(`${indent}${node.role}: ${node.name || '(unnamed)'}${attrStr}`)
|
||||
}
|
||||
if (node.children) {
|
||||
for (const child of node.children) {
|
||||
parts.push(flattenA11y(child, depth + 1))
|
||||
}
|
||||
}
|
||||
return parts.filter(Boolean).join('\n')
|
||||
}
|
||||
|
||||
// ── Subtitle overlay ──
|
||||
|
||||
async function showSubtitle(page: Page, text: string, turn: number) {
|
||||
const encoded = encodeURIComponent(
|
||||
text.slice(0, 120).replace(/'/g, "\\'").replace(/\n/g, ' ')
|
||||
)
|
||||
await page.addScriptTag({
|
||||
content: `(function(){
|
||||
var id='qa-subtitle';
|
||||
var el=document.getElementById(id);
|
||||
if(!el){
|
||||
el=document.createElement('div');
|
||||
el.id=id;
|
||||
Object.assign(el.style,{position:'fixed',bottom:'32px',left:'50%',transform:'translateX(-50%)',zIndex:'2147483646',maxWidth:'90%',padding:'6px 14px',borderRadius:'6px',background:'rgba(0,0,0,0.8)',color:'rgba(255,255,255,0.95)',fontSize:'12px',fontFamily:'system-ui,sans-serif',fontWeight:'400',lineHeight:'1.4',pointerEvents:'none',textAlign:'center',transition:'opacity 0.3s',whiteSpace:'normal'});
|
||||
document.body.appendChild(el);
|
||||
}
|
||||
var msg=decodeURIComponent('${encoded}');
|
||||
el.textContent='['+${turn}+'] '+msg;
|
||||
el.style.opacity='1';
|
||||
})()`
|
||||
})
|
||||
}
|
||||
|
||||
// ── Main agent ──
|
||||
|
||||
export async function runHybridAgent(opts: AgentOptions): Promise<{
|
||||
verdict: string
|
||||
summary: string
|
||||
}> {
|
||||
const {
|
||||
page,
|
||||
issueContext,
|
||||
qaGuide,
|
||||
outputDir,
|
||||
geminiApiKey,
|
||||
anthropicApiKey
|
||||
} = opts
|
||||
const maxTurns = opts.maxTurns ?? 30
|
||||
const timeBudgetMs = opts.timeBudgetMs ?? 120_000
|
||||
|
||||
// Start video buffer
|
||||
const videoBuffer = new VideoBuffer(page)
|
||||
videoBuffer.start()
|
||||
|
||||
let lastA11ySnapshot: A11yNode | null = null
|
||||
let agentDone = false
|
||||
let finalVerdict = 'INCONCLUSIVE'
|
||||
let finalSummary = 'Agent did not complete'
|
||||
let turnCount = 0
|
||||
const startTime = Date.now()
|
||||
|
||||
// Import executeAction from qa-record.ts (shared Playwright helpers)
|
||||
// For now, inline the action execution
|
||||
const { executeAction } = await import('./qa-record.js')
|
||||
|
||||
// Define tools
|
||||
const observeTool = tool(
|
||||
'observe',
|
||||
'Watch the last N seconds of screen recording through Gemini vision. Use this to verify visual state, check if actions had visible effect, or inspect visual bugs. Pass a focused question so Gemini knows what to look for.',
|
||||
{
|
||||
seconds: z
|
||||
.number()
|
||||
.min(3)
|
||||
.max(60)
|
||||
.default(10)
|
||||
.describe('How many seconds to look back'),
|
||||
focus: z
|
||||
.string()
|
||||
.describe(
|
||||
'What to look for — be specific, e.g. "Did the Nodes 2.0 toggle switch to ON?"'
|
||||
)
|
||||
},
|
||||
async (args) => {
|
||||
const description = await geminiObserve(
|
||||
videoBuffer,
|
||||
args.seconds,
|
||||
args.focus,
|
||||
outputDir,
|
||||
geminiApiKey
|
||||
)
|
||||
return { content: [{ type: 'text' as const, text: description }] }
|
||||
}
|
||||
)
|
||||
|
||||
const inspectTool = tool(
|
||||
'inspect',
|
||||
'Search the accessibility tree for a specific UI element. Returns its role, name, value, checked state. Fast and precise — use this to verify element state without vision.',
|
||||
{
|
||||
selector: z
|
||||
.string()
|
||||
.describe(
|
||||
'Element name or role to search for, e.g. "Nodes 2.0", "KSampler seed", "Run button"'
|
||||
)
|
||||
},
|
||||
async (args) => {
|
||||
try {
|
||||
const snapshot =
|
||||
(await page.accessibility.snapshot()) as A11yNode | null
|
||||
lastA11ySnapshot = snapshot
|
||||
const found = searchA11y(snapshot, args.selector)
|
||||
if (found) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify({
|
||||
role: found.role,
|
||||
name: found.name,
|
||||
value: found.value,
|
||||
checked: found.checked,
|
||||
disabled: found.disabled,
|
||||
hasChildren: Boolean(found.children?.length)
|
||||
})
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
// Return nearby elements if exact match not found
|
||||
const tree = flattenA11y(snapshot, 0).slice(0, 2000)
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Element "${args.selector}" not found. Available elements:\n${tree}`
|
||||
}
|
||||
]
|
||||
}
|
||||
} catch (e) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `inspect failed: ${e instanceof Error ? e.message : e}`
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
const performTool = tool(
|
||||
'perform',
|
||||
`Execute a Playwright action on the ComfyUI page. Available actions:
|
||||
- click(text): click element by visible text
|
||||
- clickCanvas(x, y): click at coordinates
|
||||
- rightClickCanvas(x, y): right-click at coordinates
|
||||
- doubleClick(x, y): double-click at coordinates
|
||||
- dragCanvas(fromX, fromY, toX, toY): drag between points
|
||||
- scrollCanvas(x, y, deltaY): scroll wheel (negative=zoom in)
|
||||
- pressKey(key): press keyboard key (Escape, Enter, Delete, Control+c, etc.)
|
||||
- fillDialog(text): fill input and press Enter
|
||||
- openMenu(): open hamburger menu
|
||||
- hoverMenuItem(label): hover menu item
|
||||
- clickMenuItem(label): click submenu item
|
||||
- setSetting(id, value): change a ComfyUI setting
|
||||
- loadDefaultWorkflow(): load the 7-node default workflow
|
||||
- openSettings(): open Settings dialog
|
||||
- reload(): reload the page
|
||||
- addNode(nodeName, x, y): add a node via search
|
||||
- copyPaste(x, y): Ctrl+C then Ctrl+V at coords
|
||||
- holdKeyAndDrag(key, fromX, fromY, toX, toY): hold key while dragging
|
||||
- screenshot(name): take a named screenshot`,
|
||||
{
|
||||
action: z.string().describe('Action name'),
|
||||
params: z
|
||||
.record(z.unknown())
|
||||
.optional()
|
||||
.describe('Action parameters as key-value pairs')
|
||||
},
|
||||
async (args) => {
|
||||
turnCount++
|
||||
if (turnCount > maxTurns || Date.now() - startTime > timeBudgetMs) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Budget exceeded (${turnCount}/${maxTurns} turns, ${Math.round((Date.now() - startTime) / 1000)}s). Use done() now.`
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
// Build TestAction object from args
|
||||
const actionObj = { action: args.action, ...args.params } as Parameters<
|
||||
typeof executeAction
|
||||
>[1]
|
||||
|
||||
try {
|
||||
const result = await executeAction(page, actionObj, outputDir)
|
||||
// Show subtitle
|
||||
await showSubtitle(
|
||||
page,
|
||||
`${args.action}: ${result.success ? 'OK' : result.error}`,
|
||||
turnCount
|
||||
)
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: result.success
|
||||
? `Action "${args.action}" succeeded.`
|
||||
: `Action "${args.action}" FAILED: ${result.error}`
|
||||
}
|
||||
]
|
||||
}
|
||||
} catch (e) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Action "${args.action}" threw: ${e instanceof Error ? e.message : e}`
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
const doneTool = tool(
|
||||
'done',
|
||||
'Signal that reproduction is complete. Call this when you have either confirmed the bug or determined it cannot be reproduced.',
|
||||
{
|
||||
verdict: z
|
||||
.enum(['REPRODUCED', 'NOT_REPRODUCIBLE', 'INCONCLUSIVE'])
|
||||
.describe('Final verdict'),
|
||||
summary: z
|
||||
.string()
|
||||
.describe(
|
||||
'One paragraph: what you did, what you observed, and why you reached this verdict'
|
||||
)
|
||||
},
|
||||
async (args) => {
|
||||
agentDone = true
|
||||
finalVerdict = args.verdict
|
||||
finalSummary = args.summary
|
||||
await showSubtitle(page, `DONE: ${args.verdict}`, turnCount)
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Agent finished: ${args.verdict}`
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// Create MCP server with our tools
|
||||
const server = createSdkMcpServer({
|
||||
name: 'qa-agent',
|
||||
version: '1.0.0',
|
||||
tools: [observeTool, inspectTool, performTool, doneTool]
|
||||
})
|
||||
|
||||
// Build system prompt
|
||||
const systemPrompt = `You are a senior QA engineer reproducing a reported bug in ComfyUI, a node-based AI image generation tool.
|
||||
|
||||
## Your tools
|
||||
- observe(seconds, focus) — Gemini AI watches the last N seconds of screen recording and answers your focused question. Use for visual verification.
|
||||
- inspect(selector) — Search the accessibility tree for a specific element's state. Use for precise state checks (toggle on/off, value, disabled).
|
||||
- perform(action, params) — Execute a Playwright action on the browser.
|
||||
- done(verdict, summary) — Finish with your conclusion.
|
||||
|
||||
## Strategy
|
||||
1. Start by understanding the issue, then plan your reproduction steps.
|
||||
2. Use perform() to take actions. After each action, use inspect() to verify state or observe() for visual confirmation.
|
||||
3. If a setting change doesn't seem to take effect, try reload() then verify again.
|
||||
4. Focus on the specific bug — don't explore randomly.
|
||||
5. Take screenshots at key moments for the video evidence.
|
||||
6. When you've confirmed or ruled out the bug, call done().
|
||||
|
||||
## ComfyUI Layout (1280×720 viewport)
|
||||
- Canvas with node graph centered at ~(640, 400)
|
||||
- Hamburger menu top-left (C logo)
|
||||
- Sidebar: Workflows, Node Library, Models
|
||||
- Default workflow nodes: Load Checkpoint (~150,300), CLIP Text Encode (~450,250/450), Empty Latent (~450,600), KSampler (~750,350), VAE Decode (~1000,350), Save Image (~1200,350)
|
||||
|
||||
${qaGuide ? `## QA Guide\n${qaGuide}\n` : ''}
|
||||
## Issue to Reproduce
|
||||
${issueContext}`
|
||||
|
||||
// Run the agent
|
||||
console.warn('Starting hybrid agent (Claude Sonnet 4.6 + Gemini vision)...')
|
||||
|
||||
try {
|
||||
for await (const message of query({
|
||||
prompt:
|
||||
'Reproduce the reported bug. Start by reading the issue context in your system prompt, then use your tools to interact with the ComfyUI browser session.',
|
||||
options: {
|
||||
model: 'claude-sonnet-4-6-20250514',
|
||||
systemPrompt,
|
||||
apiKey: anthropicApiKey,
|
||||
maxTurns,
|
||||
mcpServers: { 'qa-agent': server },
|
||||
allowedTools: [
|
||||
'mcp__qa-agent__observe',
|
||||
'mcp__qa-agent__inspect',
|
||||
'mcp__qa-agent__perform',
|
||||
'mcp__qa-agent__done'
|
||||
]
|
||||
}
|
||||
})) {
|
||||
if (message.type === 'assistant' && message.message?.content) {
|
||||
for (const block of message.message.content) {
|
||||
if ('text' in block && block.text) {
|
||||
console.warn(` Claude: ${block.text.slice(0, 200)}`)
|
||||
}
|
||||
if ('name' in block) {
|
||||
console.warn(
|
||||
` Tool: ${block.name}(${JSON.stringify(block.input).slice(0, 100)})`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (agentDone) break
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Agent error: ${e instanceof Error ? e.message : e}`)
|
||||
}
|
||||
|
||||
videoBuffer.stop()
|
||||
|
||||
return { verdict: finalVerdict, summary: finalSummary }
|
||||
}
|
||||
@@ -726,7 +726,7 @@ async function waitForEditorReady(page: Page) {
|
||||
await sleep(1000)
|
||||
}
|
||||
|
||||
async function executeAction(
|
||||
export async function executeAction(
|
||||
page: Page,
|
||||
step: TestAction,
|
||||
outputDir: string
|
||||
@@ -1200,13 +1200,14 @@ function buildPreflightActions(context: string): TestAction[] {
|
||||
const ctx = context.toLowerCase()
|
||||
const actions: TestAction[] = []
|
||||
|
||||
// Enable Nodes 2.0 if issue mentions it
|
||||
// Enable Nodes 2.0 if issue mentions it — requires reload to take effect
|
||||
if (/nodes.*2\.0|vue.*node|new.*node|node.*beta/.test(ctx)) {
|
||||
actions.push({
|
||||
action: 'setSetting',
|
||||
id: 'Comfy.NodeBeta.Enabled',
|
||||
value: true
|
||||
})
|
||||
actions.push({ action: 'reload' })
|
||||
}
|
||||
|
||||
// Load default workflow for most reproduction scenarios
|
||||
@@ -1412,18 +1413,25 @@ async function runAgenticLoop(
|
||||
preflightNote
|
||||
)
|
||||
|
||||
const anthropicKey = process.env.ANTHROPIC_API_KEY
|
||||
const useHybrid = Boolean(anthropicKey)
|
||||
|
||||
const genAI = new GoogleGenerativeAI(opts.apiKey)
|
||||
// Use flash for agentic loop — rapid iteration matters more than reasoning
|
||||
const geminiVisionModel = genAI.getGenerativeModel({
|
||||
model: 'gemini-3-flash-preview'
|
||||
})
|
||||
|
||||
// Gemini-only fallback model (used when no ANTHROPIC_API_KEY)
|
||||
const agenticModel = opts.model.includes('flash')
|
||||
? opts.model
|
||||
: 'gemini-3-flash-preview'
|
||||
const model = genAI.getGenerativeModel({
|
||||
const geminiOnlyModel = genAI.getGenerativeModel({
|
||||
model: agenticModel,
|
||||
systemInstruction
|
||||
})
|
||||
|
||||
console.warn(
|
||||
`Starting agentic loop with ${agenticModel}` +
|
||||
`Starting ${useHybrid ? 'hybrid (Claude planner + Gemini vision)' : 'Gemini-only'} agentic loop` +
|
||||
(subIssue ? ` — focus: ${subIssue.title}` : '')
|
||||
)
|
||||
|
||||
@@ -1827,7 +1835,36 @@ async function main() {
|
||||
console.warn('Editor ready — starting agentic loop')
|
||||
recordingStartMs = Date.now()
|
||||
narrationSegments.length = 0
|
||||
await runAgenticLoop(page, opts, opts.outputDir, subIssue)
|
||||
|
||||
// Use hybrid agent (Claude + Gemini) if ANTHROPIC_API_KEY is available
|
||||
const anthropicKey = process.env.ANTHROPIC_API_KEY
|
||||
if (anthropicKey) {
|
||||
const { runHybridAgent } = await import('./qa-agent.js')
|
||||
const issueCtx = opts.diffFile
|
||||
? readFileSync(opts.diffFile, 'utf-8').slice(0, 6000)
|
||||
: 'No issue context provided'
|
||||
let qaGuideText = ''
|
||||
if (opts.qaGuideFile) {
|
||||
try {
|
||||
qaGuideText = readFileSync(opts.qaGuideFile, 'utf-8')
|
||||
} catch {
|
||||
// QA guide not available
|
||||
}
|
||||
}
|
||||
const result = await runHybridAgent({
|
||||
page,
|
||||
issueContext: issueCtx,
|
||||
qaGuide: qaGuideText,
|
||||
outputDir: opts.outputDir,
|
||||
geminiApiKey: opts.apiKey,
|
||||
anthropicApiKey: anthropicKey
|
||||
})
|
||||
console.warn(
|
||||
`Hybrid agent finished: ${result.verdict} — ${result.summary.slice(0, 100)}`
|
||||
)
|
||||
} else {
|
||||
await runAgenticLoop(page, opts, opts.outputDir, subIssue)
|
||||
}
|
||||
await sleep(2000)
|
||||
} finally {
|
||||
await context.close()
|
||||
|
||||
Reference in New Issue
Block a user