mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-04-20 06:20:11 +00:00
refactor: replace GPT frame extraction with Gemini native video analysis
Replace the OpenAI GPT-based frame extraction approach (ffmpeg + screenshots) with Gemini 2.5 Flash's native video understanding. This eliminates false positives from frame-based analysis (e.g. "black screen = critical bug" during page transitions) and produces dramatically better QA reviews. Changes: - Remove ffmpeg frame extraction, ffprobe duration detection, and all related logic (~365 lines removed) - Add @google/generative-ai SDK for native video/mp4 upload to Gemini - Update CLI: remove --max-frames, --min-interval-seconds, --keep-frames flags - Update env: OPENAI_API_KEY → GEMINI_API_KEY - Update workflow: swap API key secret and model in pr-qa.yaml - Update report: replace "Frames analyzed" with "Video size" - Add note in prompt that brief black frames during transitions are normal Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
8
.github/workflows/pr-qa.yaml
vendored
8
.github/workflows/pr-qa.yaml
vendored
@@ -372,7 +372,7 @@ jobs:
|
||||
|
||||
- name: Run video review
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
run: |
|
||||
mkdir -p video-reviews
|
||||
for vid in qa-artifacts/qa-report-*/qa-session.mp4; do
|
||||
@@ -382,7 +382,7 @@ jobs:
|
||||
--artifacts-dir qa-artifacts \
|
||||
--output-dir video-reviews \
|
||||
--video-file "$vid" \
|
||||
--model gpt-4.1-mini || true
|
||||
--model gemini-2.5-flash || true
|
||||
echo "::endgroup::"
|
||||
done
|
||||
|
||||
@@ -428,14 +428,14 @@ jobs:
|
||||
REPORT_HTML=""
|
||||
if [ -f "$REPORT_FILE" ]; then
|
||||
cp "$REPORT_FILE" "$DEPLOY_DIR/report-${OS_LOWER}.md"
|
||||
REPORT_LINK="<a class=download href=report-${OS_LOWER}.md>GPT Report</a>"
|
||||
REPORT_LINK="<a class=download href=report-${OS_LOWER}.md>AI Report</a>"
|
||||
|
||||
# Convert markdown to basic HTML for inline display
|
||||
REPORT_CONTENT=$(sed 's/&/\&/g; s/</\</g; s/>/\>/g' "$REPORT_FILE" \
|
||||
| sed 's/^## \(.*\)/<h3>\1<\/h3>/; s/^# \(.*\)/<h2>\1<\/h2>/' \
|
||||
| sed 's/|\(.*\)|/<tr><td>\1<\/td><\/tr>/g' \
|
||||
| sed '/^$/s/.*/<br>/')
|
||||
REPORT_HTML="<details class=report><summary>GPT Video Review</summary><div class=report-body>${REPORT_CONTENT}</div></details>"
|
||||
REPORT_HTML="<details class=report><summary>AI Video Review</summary><div class=report-body>${REPORT_CONTENT}</div></details>"
|
||||
fi
|
||||
|
||||
if [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
|
||||
|
||||
@@ -122,6 +122,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "catalog:",
|
||||
"@google/generative-ai": "catalog:",
|
||||
"@intlify/eslint-plugin-vue-i18n": "catalog:",
|
||||
"@lobehub/i18n-cli": "catalog:",
|
||||
"@nx/eslint": "catalog:",
|
||||
|
||||
17
pnpm-lock.yaml
generated
17
pnpm-lock.yaml
generated
@@ -21,6 +21,9 @@ catalogs:
|
||||
'@formkit/auto-animate':
|
||||
specifier: ^0.9.0
|
||||
version: 0.9.0
|
||||
'@google/generative-ai':
|
||||
specifier: ^0.24.1
|
||||
version: 0.24.1
|
||||
'@iconify-json/lucide':
|
||||
specifier: ^1.1.178
|
||||
version: 1.2.79
|
||||
@@ -597,6 +600,9 @@ importers:
|
||||
'@eslint/js':
|
||||
specifier: 'catalog:'
|
||||
version: 9.39.1
|
||||
'@google/generative-ai':
|
||||
specifier: 'catalog:'
|
||||
version: 0.24.1
|
||||
'@intlify/eslint-plugin-vue-i18n':
|
||||
specifier: 'catalog:'
|
||||
version: 4.1.1(eslint@9.39.1(jiti@2.6.1))(jsonc-eslint-parser@2.4.0)(vue-eslint-parser@10.4.0(eslint@9.39.1(jiti@2.6.1)))(yaml-eslint-parser@1.3.0)
|
||||
@@ -2305,6 +2311,10 @@ packages:
|
||||
'@formkit/auto-animate@0.9.0':
|
||||
resolution: {integrity: sha512-VhP4zEAacXS3dfTpJpJ88QdLqMTcabMg0jwpOSxZ/VzfQVfl3GkZSCZThhGC5uhq/TxPHPzW0dzr4H9Bb1OgKA==}
|
||||
|
||||
'@google/generative-ai@0.24.1':
|
||||
resolution: {integrity: sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
|
||||
'@grpc/grpc-js@1.9.15':
|
||||
resolution: {integrity: sha512-nqE7Hc0AzI+euzUwDAy0aY5hCp10r734gMGRdU+qOPX0XSceI2ULrcXB5U2xSc5VkWwalCj4M7GzCAygZl2KoQ==}
|
||||
engines: {node: ^8.13.0 || >=10.10.0}
|
||||
@@ -9560,6 +9570,9 @@ packages:
|
||||
vue-component-type-helpers@3.2.6:
|
||||
resolution: {integrity: sha512-O02tnvIfOQVmnvoWwuSydwRoHjZVt8UEBR+2p4rT35p8GAy5VTlWP8o5qXfJR/GWCN0nVZoYWsVUvx2jwgdBmQ==}
|
||||
|
||||
vue-component-type-helpers@3.2.6:
|
||||
resolution: {integrity: sha512-O02tnvIfOQVmnvoWwuSydwRoHjZVt8UEBR+2p4rT35p8GAy5VTlWP8o5qXfJR/GWCN0nVZoYWsVUvx2jwgdBmQ==}
|
||||
|
||||
vue-demi@0.14.10:
|
||||
resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==}
|
||||
engines: {node: '>=12'}
|
||||
@@ -11455,6 +11468,8 @@ snapshots:
|
||||
|
||||
'@formkit/auto-animate@0.9.0': {}
|
||||
|
||||
'@google/generative-ai@0.24.1': {}
|
||||
|
||||
'@grpc/grpc-js@1.9.15':
|
||||
dependencies:
|
||||
'@grpc/proto-loader': 0.7.13
|
||||
@@ -19850,6 +19865,8 @@ snapshots:
|
||||
|
||||
vue-component-type-helpers@3.2.6: {}
|
||||
|
||||
vue-component-type-helpers@3.2.6: {}
|
||||
|
||||
vue-demi@0.14.10(vue@3.5.13(typescript@5.9.3)):
|
||||
dependencies:
|
||||
vue: 3.5.13(typescript@5.9.3)
|
||||
|
||||
@@ -8,6 +8,7 @@ catalog:
|
||||
'@comfyorg/comfyui-electron-types': 0.6.2
|
||||
'@eslint/js': ^9.39.1
|
||||
'@formkit/auto-animate': ^0.9.0
|
||||
'@google/generative-ai': ^0.24.1
|
||||
'@iconify-json/lucide': ^1.1.178
|
||||
'@iconify/json': ^2.2.380
|
||||
'@iconify/tailwind4': ^1.2.0
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import {
|
||||
extractOutputText,
|
||||
extractPlatformFromArtifactDirName,
|
||||
pickLatestVideosByPlatform,
|
||||
selectVideoCandidateByFile
|
||||
@@ -62,38 +61,6 @@ describe('pickLatestVideosByPlatform', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractOutputText', () => {
|
||||
it('reads output_text when available', () => {
|
||||
expect(extractOutputText({ output_text: 'hello world' })).toBe(
|
||||
'hello world'
|
||||
)
|
||||
})
|
||||
|
||||
it('reads nested output text chunks', () => {
|
||||
const responseBody = {
|
||||
output: [
|
||||
{
|
||||
content: [
|
||||
{ type: 'output_text', text: 'first block' },
|
||||
{ type: 'refusal', refusal: 'not relevant' }
|
||||
]
|
||||
},
|
||||
{
|
||||
content: [{ type: 'output_text', text: 'second block' }]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
expect(extractOutputText(responseBody)).toBe('first block\n\nsecond block')
|
||||
})
|
||||
|
||||
it('returns null when there is no usable text', () => {
|
||||
expect(
|
||||
extractOutputText({ output: [{ content: [{ type: 'tool_call' }] }] })
|
||||
).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('selectVideoCandidateByFile', () => {
|
||||
it('selects a single candidate by artifacts-relative path', () => {
|
||||
const selected = selectVideoCandidateByFile(
|
||||
|
||||
@@ -1,17 +1,9 @@
|
||||
#!/usr/bin/env tsx
|
||||
import { spawn } from 'node:child_process'
|
||||
import {
|
||||
mkdtemp,
|
||||
mkdir,
|
||||
readdir,
|
||||
readFile,
|
||||
rm,
|
||||
stat,
|
||||
writeFile
|
||||
} from 'node:fs/promises'
|
||||
import { basename, dirname, join, relative, resolve } from 'node:path'
|
||||
import { mkdir, readFile, stat, writeFile } from 'node:fs/promises'
|
||||
import { basename, dirname, extname, relative, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai'
|
||||
import { globSync } from 'glob'
|
||||
|
||||
interface CliOptions {
|
||||
@@ -19,11 +11,8 @@ interface CliOptions {
|
||||
videoFile: string
|
||||
outputDir: string
|
||||
model: string
|
||||
minIntervalSeconds: number
|
||||
maxFrames: number
|
||||
requestTimeoutMs: number
|
||||
dryRun: boolean
|
||||
keepFrames: boolean
|
||||
}
|
||||
|
||||
interface VideoCandidate {
|
||||
@@ -32,49 +21,13 @@ interface VideoCandidate {
|
||||
mtimeMs: number
|
||||
}
|
||||
|
||||
interface ExtractedFrame {
|
||||
index: number
|
||||
timestampSeconds: number
|
||||
filePath: string
|
||||
dataUrl: string
|
||||
}
|
||||
|
||||
type ResponseInputContent =
|
||||
| { type: 'input_text'; text: string }
|
||||
| { type: 'input_image'; image_url: string }
|
||||
|
||||
interface ResponseInputMessage {
|
||||
role: 'system' | 'user'
|
||||
content: ResponseInputContent[]
|
||||
}
|
||||
|
||||
interface ResponsesCreatePayload {
|
||||
model: string
|
||||
input: ResponseInputMessage[]
|
||||
max_output_tokens: number
|
||||
}
|
||||
|
||||
interface OpenAIReviewRequest {
|
||||
apiKey: string
|
||||
baseUrl: string
|
||||
model: string
|
||||
platformName: string
|
||||
videoPath: string
|
||||
frames: ExtractedFrame[]
|
||||
samplingIntervalSeconds: number
|
||||
timeoutMs: number
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: CliOptions = {
|
||||
artifactsDir: './tmp/qa-artifacts',
|
||||
videoFile: '',
|
||||
outputDir: './tmp',
|
||||
model: 'gpt-4o',
|
||||
minIntervalSeconds: 5,
|
||||
maxFrames: 36,
|
||||
model: 'gemini-2.5-flash',
|
||||
requestTimeoutMs: 300_000,
|
||||
dryRun: false,
|
||||
keepFrames: false
|
||||
dryRun: false
|
||||
}
|
||||
|
||||
const USAGE = `Usage:
|
||||
@@ -87,31 +40,17 @@ Options:
|
||||
(supports basename or relative/absolute path)
|
||||
--output-dir <path> Output directory for markdown reports
|
||||
(default: ./tmp)
|
||||
--model <name> OpenAI model
|
||||
(default: gpt-4o)
|
||||
--min-interval-seconds <n> Minimum frame sampling interval in seconds
|
||||
(default: 5)
|
||||
--max-frames <n> Max frames analyzed per video
|
||||
(default: 36)
|
||||
--model <name> Gemini model
|
||||
(default: gemini-2.5-flash)
|
||||
--request-timeout-ms <n> Request timeout in milliseconds
|
||||
(default: 300000)
|
||||
--dry-run Discover videos and output targets only
|
||||
--keep-frames Keep extracted frames in ./tmp for inspection
|
||||
--help Show this help text
|
||||
|
||||
Environment:
|
||||
OPENAI_API_KEY Required unless --dry-run
|
||||
OPENAI_BASE_URL Optional override for API base URL
|
||||
GEMINI_API_KEY Required unless --dry-run
|
||||
`
|
||||
|
||||
function parsePositiveNumber(rawValue: string, flagName: string): number {
|
||||
const parsedValue = Number(rawValue)
|
||||
if (!Number.isFinite(parsedValue) || parsedValue <= 0) {
|
||||
throw new Error(`Invalid value for ${flagName}: "${rawValue}"`)
|
||||
}
|
||||
return parsedValue
|
||||
}
|
||||
|
||||
function parsePositiveInteger(rawValue: string, flagName: string): number {
|
||||
const parsedValue = Number.parseInt(rawValue, 10)
|
||||
if (!Number.isInteger(parsedValue) || parsedValue <= 0) {
|
||||
@@ -159,19 +98,6 @@ function parseCliOptions(args: string[]): CliOptions {
|
||||
continue
|
||||
}
|
||||
|
||||
if (argument === '--min-interval-seconds') {
|
||||
options.minIntervalSeconds = parsePositiveNumber(
|
||||
requireValue(argument),
|
||||
argument
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
if (argument === '--max-frames') {
|
||||
options.maxFrames = parsePositiveInteger(requireValue(argument), argument)
|
||||
continue
|
||||
}
|
||||
|
||||
if (argument === '--request-timeout-ms') {
|
||||
options.requestTimeoutMs = parsePositiveInteger(
|
||||
requireValue(argument),
|
||||
@@ -185,21 +111,12 @@ function parseCliOptions(args: string[]): CliOptions {
|
||||
continue
|
||||
}
|
||||
|
||||
if (argument === '--keep-frames') {
|
||||
options.keepFrames = true
|
||||
continue
|
||||
}
|
||||
|
||||
throw new Error(`Unknown argument: ${argument}`)
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null
|
||||
}
|
||||
|
||||
function normalizePlatformName(value: string): string {
|
||||
const slug = value
|
||||
.trim()
|
||||
@@ -316,84 +233,6 @@ export function selectVideoCandidateByFile(
|
||||
)
|
||||
}
|
||||
|
||||
async function runCommand(command: string, args: string[]): Promise<string> {
|
||||
return await new Promise<string>((resolvePromise, rejectPromise) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ['ignore', 'pipe', 'pipe']
|
||||
})
|
||||
|
||||
let stdout = ''
|
||||
let stderr = ''
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer | string) => {
|
||||
stdout += chunk.toString()
|
||||
})
|
||||
|
||||
child.stderr.on('data', (chunk: Buffer | string) => {
|
||||
stderr += chunk.toString()
|
||||
})
|
||||
|
||||
child.on('error', (error) => {
|
||||
rejectPromise(error)
|
||||
})
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolvePromise(stdout)
|
||||
return
|
||||
}
|
||||
|
||||
const details = stderr.trim() || stdout.trim() || 'unknown failure'
|
||||
rejectPromise(
|
||||
new Error(
|
||||
`${command} ${args.join(' ')} failed with code ${String(code)}: ${details}`
|
||||
)
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function ensureBinariesAvailable(): Promise<void> {
|
||||
await runCommand('ffmpeg', ['-version'])
|
||||
await runCommand('ffprobe', ['-version'])
|
||||
}
|
||||
|
||||
async function getVideoDurationSeconds(
|
||||
videoPath: string
|
||||
): Promise<number | null> {
|
||||
try {
|
||||
const stdout = await runCommand('ffprobe', [
|
||||
'-v',
|
||||
'error',
|
||||
'-show_entries',
|
||||
'format=duration',
|
||||
'-of',
|
||||
'default=noprint_wrappers=1:nokey=1',
|
||||
videoPath
|
||||
])
|
||||
const durationSeconds = Number.parseFloat(stdout.trim())
|
||||
if (!Number.isFinite(durationSeconds) || durationSeconds <= 0) {
|
||||
return null
|
||||
}
|
||||
return durationSeconds
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function computeSamplingIntervalSeconds(
|
||||
durationSeconds: number | null,
|
||||
minIntervalSeconds: number,
|
||||
maxFrames: number
|
||||
): number {
|
||||
if (durationSeconds === null) {
|
||||
return minIntervalSeconds
|
||||
}
|
||||
|
||||
const fullCoverageInterval = durationSeconds / maxFrames
|
||||
return Math.max(minIntervalSeconds, fullCoverageInterval)
|
||||
}
|
||||
|
||||
async function collectVideoCandidates(
|
||||
artifactsDir: string
|
||||
): Promise<VideoCandidate[]> {
|
||||
@@ -418,241 +257,87 @@ async function collectVideoCandidates(
|
||||
return candidates
|
||||
}
|
||||
|
||||
async function extractFramesFromVideo(
|
||||
videoPath: string,
|
||||
stagingDir: string,
|
||||
minIntervalSeconds: number,
|
||||
maxFrames: number
|
||||
): Promise<{ frames: ExtractedFrame[]; samplingIntervalSeconds: number }> {
|
||||
const durationSeconds = await getVideoDurationSeconds(videoPath)
|
||||
const samplingIntervalSeconds = computeSamplingIntervalSeconds(
|
||||
durationSeconds,
|
||||
minIntervalSeconds,
|
||||
maxFrames
|
||||
)
|
||||
const outputPattern = join(stagingDir, 'frame-%03d.jpg')
|
||||
|
||||
await runCommand('ffmpeg', [
|
||||
'-hide_banner',
|
||||
'-loglevel',
|
||||
'error',
|
||||
'-y',
|
||||
'-i',
|
||||
videoPath,
|
||||
'-vf',
|
||||
`fps=1/${samplingIntervalSeconds},scale=1024:-2:force_original_aspect_ratio=decrease`,
|
||||
'-frames:v',
|
||||
String(maxFrames),
|
||||
outputPattern
|
||||
])
|
||||
|
||||
const frameNames = (await readdir(stagingDir))
|
||||
.filter((name) => name.startsWith('frame-') && name.endsWith('.jpg'))
|
||||
.sort()
|
||||
|
||||
if (frameNames.length === 0) {
|
||||
throw new Error(`No frames were extracted from ${videoPath}`)
|
||||
function getMimeType(filePath: string): string {
|
||||
const ext = extname(filePath).toLowerCase()
|
||||
const mimeMap: Record<string, string> = {
|
||||
'.mp4': 'video/mp4',
|
||||
'.webm': 'video/webm',
|
||||
'.mov': 'video/quicktime',
|
||||
'.avi': 'video/x-msvideo',
|
||||
'.mkv': 'video/x-matroska',
|
||||
'.m4v': 'video/mp4'
|
||||
}
|
||||
|
||||
const frames = await Promise.all(
|
||||
frameNames.map(async (frameName, index) => {
|
||||
const framePath = join(stagingDir, frameName)
|
||||
const buffer = await readFile(framePath)
|
||||
return {
|
||||
index: index + 1,
|
||||
timestampSeconds: Number((index * samplingIntervalSeconds).toFixed(2)),
|
||||
filePath: framePath,
|
||||
dataUrl: `data:image/jpeg;base64,${buffer.toString('base64')}`
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
return { frames, samplingIntervalSeconds }
|
||||
return mimeMap[ext] || 'video/mp4'
|
||||
}
|
||||
|
||||
function buildReviewMessages(
|
||||
request: OpenAIReviewRequest
|
||||
): ResponseInputMessage[] {
|
||||
const systemPrompt = [
|
||||
'You are a senior QA engineer reviewing a UI test session.',
|
||||
function buildReviewPrompt(platformName: string, videoPath: string): string {
|
||||
return [
|
||||
'You are a senior QA engineer reviewing a UI test session recording.',
|
||||
'Report only concrete, visible problems and avoid speculation.',
|
||||
'If confidence is low, mark it explicitly.'
|
||||
].join(' ')
|
||||
|
||||
const introPrompt = [
|
||||
`Review sampled frames from a QA session for platform "${request.platformName}".`,
|
||||
`Source video: ${toProjectRelativePath(request.videoPath)}.`,
|
||||
`Frames are presented in chronological order at ~${request.samplingIntervalSeconds.toFixed(2)}s intervals.`,
|
||||
'If confidence is low, mark it explicitly.',
|
||||
'',
|
||||
`Review this QA session video for platform "${platformName}".`,
|
||||
`Source video: ${toProjectRelativePath(videoPath)}.`,
|
||||
'The video shows the full test session — analyze it chronologically.',
|
||||
'Focus on UI regressions, broken states, visual glitches, unreadable text, missing labels/i18n, and clear workflow failures.',
|
||||
'Note: Brief black frames during page transitions are NORMAL and should NOT be reported as issues.',
|
||||
'',
|
||||
'Return markdown with these sections exactly:',
|
||||
'## Summary',
|
||||
'## Confirmed Issues',
|
||||
'## Possible Issues (Needs Human Verification)',
|
||||
'## Overall Risk',
|
||||
'Under Confirmed Issues include a markdown table with columns:',
|
||||
'Severity | Timestamp (s) | Issue | Evidence | Confidence | Suggested Fix'
|
||||
'Severity | Timestamp | Issue | Evidence | Confidence | Suggested Fix'
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
const userContent: ResponseInputContent[] = [
|
||||
{ type: 'input_text', text: introPrompt }
|
||||
]
|
||||
async function requestGeminiReview(options: {
|
||||
apiKey: string
|
||||
model: string
|
||||
platformName: string
|
||||
videoPath: string
|
||||
timeoutMs: number
|
||||
}): Promise<string> {
|
||||
const genAI = new GoogleGenerativeAI(options.apiKey)
|
||||
const model = genAI.getGenerativeModel({ model: options.model })
|
||||
|
||||
for (const frame of request.frames) {
|
||||
userContent.push({
|
||||
type: 'input_text',
|
||||
text: `Frame ${frame.index} at approximately ${frame.timestampSeconds}s`
|
||||
})
|
||||
userContent.push({
|
||||
type: 'input_image',
|
||||
image_url: frame.dataUrl
|
||||
})
|
||||
}
|
||||
const videoBuffer = await readFile(options.videoPath)
|
||||
const base64Video = videoBuffer.toString('base64')
|
||||
const mimeType = getMimeType(options.videoPath)
|
||||
const prompt = buildReviewPrompt(options.platformName, options.videoPath)
|
||||
|
||||
return [
|
||||
const result = await model.generateContent([
|
||||
{ text: prompt },
|
||||
{
|
||||
role: 'system',
|
||||
content: [{ type: 'input_text', text: systemPrompt }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: userContent
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl
|
||||
}
|
||||
|
||||
function extractApiErrorMessage(responseBody: unknown): string {
|
||||
if (!isRecord(responseBody)) {
|
||||
return 'Unknown API error'
|
||||
}
|
||||
|
||||
const errorValue = responseBody.error
|
||||
if (!isRecord(errorValue)) {
|
||||
return 'Unknown API error'
|
||||
}
|
||||
|
||||
const messageValue = errorValue.message
|
||||
if (typeof messageValue !== 'string' || messageValue.length === 0) {
|
||||
return 'Unknown API error'
|
||||
}
|
||||
|
||||
return messageValue
|
||||
}
|
||||
|
||||
export function extractOutputText(responseBody: unknown): string | null {
|
||||
if (!isRecord(responseBody)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const topLevelOutputText = responseBody.output_text
|
||||
if (
|
||||
typeof topLevelOutputText === 'string' &&
|
||||
topLevelOutputText.trim().length > 0
|
||||
) {
|
||||
return topLevelOutputText.trim()
|
||||
}
|
||||
|
||||
const outputValue = responseBody.output
|
||||
if (!Array.isArray(outputValue)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const collectedParts: string[] = []
|
||||
for (const outputItem of outputValue) {
|
||||
if (!isRecord(outputItem)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const contentValue = outputItem.content
|
||||
if (!Array.isArray(contentValue)) {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const contentItem of contentValue) {
|
||||
if (!isRecord(contentItem)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (contentItem.type !== 'output_text') {
|
||||
continue
|
||||
}
|
||||
|
||||
const textValue = contentItem.text
|
||||
if (typeof textValue === 'string' && textValue.trim().length > 0) {
|
||||
collectedParts.push(textValue.trim())
|
||||
inlineData: {
|
||||
mimeType,
|
||||
data: base64Video
|
||||
}
|
||||
}
|
||||
])
|
||||
|
||||
const response = result.response
|
||||
const text = response.text()
|
||||
|
||||
if (!text || text.trim().length === 0) {
|
||||
throw new Error('Gemini API returned no output text')
|
||||
}
|
||||
|
||||
if (collectedParts.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
return collectedParts.join('\n\n')
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
async function requestOpenAIReview(
|
||||
request: OpenAIReviewRequest
|
||||
): Promise<string> {
|
||||
const payload: ResponsesCreatePayload = {
|
||||
model: request.model,
|
||||
input: buildReviewMessages(request),
|
||||
max_output_tokens: 3_000
|
||||
}
|
||||
|
||||
const controller = new AbortController()
|
||||
const timeoutHandle = setTimeout(() => controller.abort(), request.timeoutMs)
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${normalizeBaseUrl(request.baseUrl)}/responses`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${request.apiKey}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
signal: controller.signal
|
||||
}
|
||||
)
|
||||
|
||||
const rawResponse = await response.text()
|
||||
let responseBody: unknown
|
||||
try {
|
||||
responseBody = JSON.parse(rawResponse)
|
||||
} catch {
|
||||
throw new Error(
|
||||
`OpenAI API returned a non-JSON response: ${rawResponse.slice(0, 400)}`
|
||||
)
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`OpenAI API request failed (${response.status}): ${extractApiErrorMessage(responseBody)}`
|
||||
)
|
||||
}
|
||||
|
||||
const reviewText = extractOutputText(responseBody)
|
||||
if (!reviewText) {
|
||||
throw new Error('OpenAI API returned no output text')
|
||||
}
|
||||
|
||||
return reviewText
|
||||
} finally {
|
||||
clearTimeout(timeoutHandle)
|
||||
}
|
||||
function formatBytes(bytes: number): string {
|
||||
if (bytes < 1024) return `${bytes} B`
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
|
||||
}
|
||||
|
||||
function buildReportMarkdown(input: {
|
||||
platformName: string
|
||||
model: string
|
||||
videoPath: string
|
||||
frames: ExtractedFrame[]
|
||||
samplingIntervalSeconds: number
|
||||
videoSizeBytes: number
|
||||
reviewText: string
|
||||
}): string {
|
||||
const header = [
|
||||
@@ -661,8 +346,7 @@ function buildReportMarkdown(input: {
|
||||
`- Generated at: ${new Date().toISOString()}`,
|
||||
`- Model: \`${input.model}\``,
|
||||
`- Source video: \`${toProjectRelativePath(input.videoPath)}\``,
|
||||
`- Frames analyzed: ${input.frames.length}`,
|
||||
`- Sampling interval: ${input.samplingIntervalSeconds.toFixed(2)}s`,
|
||||
`- Video size: ${formatBytes(input.videoSizeBytes)}`,
|
||||
'',
|
||||
'## AI Review',
|
||||
''
|
||||
@@ -674,74 +358,39 @@ function buildReportMarkdown(input: {
|
||||
async function reviewVideo(
|
||||
video: VideoCandidate,
|
||||
options: CliOptions,
|
||||
apiKey: string,
|
||||
baseUrl: string
|
||||
apiKey: string
|
||||
): Promise<void> {
|
||||
const stagingRoot = resolve('./tmp')
|
||||
await mkdir(stagingRoot, { recursive: true })
|
||||
const stagingDir = await mkdtemp(join(stagingRoot, 'qa-video-review-'))
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Sending video ${toProjectRelativePath(video.videoPath)} to ${options.model}\n`
|
||||
)
|
||||
|
||||
try {
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Extracting frames from ${toProjectRelativePath(video.videoPath)}\n`
|
||||
)
|
||||
const reviewText = await requestGeminiReview({
|
||||
apiKey,
|
||||
model: options.model,
|
||||
platformName: video.platformName,
|
||||
videoPath: video.videoPath,
|
||||
timeoutMs: options.requestTimeoutMs
|
||||
})
|
||||
|
||||
const { frames, samplingIntervalSeconds } = await extractFramesFromVideo(
|
||||
video.videoPath,
|
||||
stagingDir,
|
||||
options.minIntervalSeconds,
|
||||
options.maxFrames
|
||||
)
|
||||
const videoStat = await stat(video.videoPath)
|
||||
const outputPath = resolve(
|
||||
options.outputDir,
|
||||
`${video.platformName}-qa-video-report.md`
|
||||
)
|
||||
const reportMarkdown = buildReportMarkdown({
|
||||
platformName: video.platformName,
|
||||
model: options.model,
|
||||
videoPath: video.videoPath,
|
||||
videoSizeBytes: videoStat.size,
|
||||
reviewText
|
||||
})
|
||||
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Sending ${frames.length} frame(s) to ${options.model}\n`
|
||||
)
|
||||
await mkdir(dirname(outputPath), { recursive: true })
|
||||
await writeFile(outputPath, reportMarkdown, 'utf-8')
|
||||
|
||||
const reviewText = await requestOpenAIReview({
|
||||
apiKey,
|
||||
baseUrl,
|
||||
model: options.model,
|
||||
platformName: video.platformName,
|
||||
videoPath: video.videoPath,
|
||||
frames,
|
||||
samplingIntervalSeconds,
|
||||
timeoutMs: options.requestTimeoutMs
|
||||
})
|
||||
|
||||
const outputPath = resolve(
|
||||
options.outputDir,
|
||||
`${video.platformName}-qa-video-report.md`
|
||||
)
|
||||
const reportMarkdown = buildReportMarkdown({
|
||||
platformName: video.platformName,
|
||||
model: options.model,
|
||||
videoPath: video.videoPath,
|
||||
frames,
|
||||
samplingIntervalSeconds,
|
||||
reviewText
|
||||
})
|
||||
|
||||
await mkdir(dirname(outputPath), { recursive: true })
|
||||
await writeFile(outputPath, reportMarkdown, 'utf-8')
|
||||
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Wrote ${toProjectRelativePath(outputPath)}\n`
|
||||
)
|
||||
|
||||
if (options.keepFrames) {
|
||||
const stagedFrames = (await readdir(stagingDir)).filter((name) =>
|
||||
name.endsWith('.jpg')
|
||||
).length
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Kept ${stagedFrames} frame(s) in ${toProjectRelativePath(stagingDir)}\n`
|
||||
)
|
||||
return
|
||||
}
|
||||
} finally {
|
||||
if (!options.keepFrames) {
|
||||
await rm(stagingDir, { recursive: true, force: true })
|
||||
}
|
||||
}
|
||||
process.stdout.write(
|
||||
`[${video.platformName}] Wrote ${toProjectRelativePath(outputPath)}\n`
|
||||
)
|
||||
}
|
||||
|
||||
function isExecutedAsScript(metaUrl: string): boolean {
|
||||
@@ -775,15 +424,12 @@ async function main(): Promise<void> {
|
||||
return
|
||||
}
|
||||
|
||||
const apiKey = process.env.OPENAI_API_KEY
|
||||
const apiKey = process.env.GEMINI_API_KEY
|
||||
if (!apiKey) {
|
||||
throw new Error('OPENAI_API_KEY is required unless --dry-run is set')
|
||||
throw new Error('GEMINI_API_KEY is required unless --dry-run is set')
|
||||
}
|
||||
|
||||
await ensureBinariesAvailable()
|
||||
|
||||
const baseUrl = process.env.OPENAI_BASE_URL ?? 'https://api.openai.com/v1'
|
||||
await reviewVideo(selectedVideo, options, apiKey, baseUrl)
|
||||
await reviewVideo(selectedVideo, options, apiKey)
|
||||
}
|
||||
|
||||
if (isExecutedAsScript(import.meta.url)) {
|
||||
|
||||
Reference in New Issue
Block a user