mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-20 06:20:11 +00:00
feat: upgrade QA pipeline to Gemini 3.x models
- qa-record.ts, qa-analyze-pr.ts: gemini-2.5-flash/pro → gemini-3.1-pro-preview - qa-video-review.ts, qa-generate-test.ts: gemini-2.5-flash → gemini-3-flash-preview - pr-qa.yaml: update hardcoded model reference - Add docs/qa/models.md with model comparison and rationale
This commit is contained in:
2
.github/workflows/pr-qa.yaml
vendored
2
.github/workflows/pr-qa.yaml
vendored
@@ -672,7 +672,7 @@ jobs:
|
||||
--artifacts-dir qa-artifacts \
|
||||
--output-dir video-reviews \
|
||||
--video-file "$vid" \
|
||||
--model gemini-2.5-flash $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG || true
|
||||
--model gemini-3-flash-preview $PR_CTX_FLAG $BEFORE_FLAG $TARGET_URL_FLAG || true
|
||||
echo "::endgroup::"
|
||||
done
|
||||
|
||||
|
||||
58
docs/qa/models.md
Normal file
58
docs/qa/models.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# QA Pipeline Model Selection
|
||||
|
||||
## Current Configuration
|
||||
|
||||
| Script | Role | Model | Why |
|
||||
|---|---|---|---|
|
||||
| `qa-analyze-pr.ts` | PR/issue analysis, QA guide generation | `gemini-3.1-pro-preview` | Needs deep reasoning over PR diffs, screenshots, and issue threads |
|
||||
| `qa-record.ts` | Playwright step generation | `gemini-3.1-pro-preview` | Step quality is critical — must understand ComfyUI's canvas UI and produce precise action sequences |
|
||||
| `qa-video-review.ts` | Video comparison review | `gemini-3-flash-preview` | Video analysis with structured output; flash is sufficient and faster |
|
||||
| `qa-generate-test.ts` | Regression test generation | `gemini-3-flash-preview` | Code generation from QA reports; flash handles this well |
|
||||
|
||||
## Model Comparison
|
||||
|
||||
### Gemini 3.1 Pro vs GPT-5.4
|
||||
|
||||
| | Gemini 3.1 Pro Preview | GPT-5.4 |
|
||||
|---|---|---|
|
||||
| Context window | 1M tokens | 1M tokens |
|
||||
| Max output | 65K tokens | 128K tokens |
|
||||
| Video input | Yes | No |
|
||||
| Image input | Yes | Yes |
|
||||
| Audio input | Yes | No |
|
||||
| Pricing (input) | $2/1M tokens | $2.50/1M tokens |
|
||||
| Pricing (output) | $12/1M tokens | $15/1M tokens |
|
||||
| Function calling | Yes | Yes |
|
||||
| Code execution | Yes | Yes (interpreter) |
|
||||
| Structured output | Yes | Yes |
|
||||
|
||||
**Why Gemini over GPT for QA:**
|
||||
- Native video understanding (can review recordings directly)
|
||||
- Lower cost at comparable quality
|
||||
- Native multimodal input (screenshots, videos, audio from issue threads)
|
||||
- Better price/performance for high-volume CI usage
|
||||
|
||||
### Gemini 3 Flash vs GPT-5.4 Mini
|
||||
|
||||
| | Gemini 3 Flash Preview | GPT-5.4 Mini |
|
||||
|---|---|---|
|
||||
| Context window | 1M tokens | 1M tokens |
|
||||
| Pricing (input) | $0.50/1M tokens | $0.40/1M tokens |
|
||||
| Pricing (output) | $3/1M tokens | $1.60/1M tokens |
|
||||
| Video input | Yes | No |
|
||||
|
||||
**Why Gemini Flash for video review:**
|
||||
- Video input support is required — GPT models cannot process video files
|
||||
- Good enough quality for structured comparison reports
|
||||
|
||||
## Upgrade History
|
||||
|
||||
| Date | Change | Reason |
|
||||
|---|---|---|
|
||||
| 2026-03-24 | `gemini-2.5-flash` → `gemini-3.1-pro-preview` (record) | Shallow step generation; pro model needed for complex ComfyUI interactions |
|
||||
| 2026-03-24 | `gemini-2.5-pro` → `gemini-3.1-pro-preview` (analyze) | Keep analysis on latest pro |
|
||||
| 2026-03-24 | `gemini-2.5-flash` → `gemini-3-flash-preview` (review, test-gen) | Latest flash for cost-efficient tasks |
|
||||
|
||||
## Override
|
||||
|
||||
All scripts accept `--model <name>` to override the default. Pass any Gemini model ID.
|
||||
@@ -11,7 +11,7 @@
|
||||
* --pr-number 10270 \
|
||||
* --repo owner/repo \
|
||||
* --output-dir qa-guides/ \
|
||||
* [--model gemini-2.5-pro]
|
||||
* [--model gemini-3.1-pro-preview]
|
||||
*
|
||||
* Env: GEMINI_API_KEY (required)
|
||||
*/
|
||||
@@ -67,7 +67,7 @@ interface Options {
|
||||
function parseArgs(): Options {
|
||||
const args = process.argv.slice(2)
|
||||
const opts: Partial<Options> = {
|
||||
model: 'gemini-2.5-pro',
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
apiKey: process.env.GEMINI_API_KEY || '',
|
||||
mediaBudgetBytes: 20 * 1024 * 1024,
|
||||
maxVideoBytes: 10 * 1024 * 1024,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* --qa-report <path> QA video review report (markdown)
|
||||
* --pr-diff <path> PR diff file
|
||||
* --output <path> Output .spec.ts file path
|
||||
* --model <name> Gemini model (default: gemini-2.5-flash)
|
||||
* --model <name> Gemini model (default: gemini-3-flash-preview)
|
||||
*/
|
||||
import { readFile, writeFile } from 'node:fs/promises'
|
||||
import { basename, resolve } from 'node:path'
|
||||
@@ -26,7 +26,7 @@ const DEFAULTS: CliOptions = {
|
||||
qaReport: '',
|
||||
prDiff: '',
|
||||
output: '',
|
||||
model: 'gemini-2.5-flash'
|
||||
model: 'gemini-3-flash-preview'
|
||||
}
|
||||
|
||||
// ── Fixture API reference for the prompt ────────────────────────────
|
||||
@@ -171,7 +171,7 @@ Options:
|
||||
--qa-report <path> QA video review report (markdown) [required]
|
||||
--pr-diff <path> PR diff file [required]
|
||||
--output <path> Output .spec.ts path [required]
|
||||
--model <name> Gemini model (default: gemini-2.5-flash)`)
|
||||
--model <name> Gemini model (default: gemini-3-flash-preview)`)
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ interface Options {
|
||||
function parseArgs(): Options {
|
||||
const args = process.argv.slice(2)
|
||||
const opts: Partial<Options> = {
|
||||
model: 'gemini-2.5-flash',
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
serverUrl: 'http://127.0.0.1:8188',
|
||||
apiKey: process.env.GEMINI_API_KEY || ''
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ const DEFAULT_OPTIONS: CliOptions = {
|
||||
videoFile: '',
|
||||
beforeVideo: '',
|
||||
outputDir: './tmp',
|
||||
model: 'gemini-2.5-flash',
|
||||
model: 'gemini-3-flash-preview',
|
||||
requestTimeoutMs: 300_000,
|
||||
dryRun: false,
|
||||
prContext: '',
|
||||
@@ -50,7 +50,7 @@ Options:
|
||||
--output-dir <path> Output directory for markdown reports
|
||||
(default: ./tmp)
|
||||
--model <name> Gemini model
|
||||
(default: gemini-2.5-flash)
|
||||
(default: gemini-3-flash-preview)
|
||||
--request-timeout-ms <n> Request timeout in milliseconds
|
||||
(default: 300000)
|
||||
--pr-context <file> File with PR context (title, body, diff)
|
||||
|
||||
Reference in New Issue
Block a user