refactor: replace GPT frame extraction with Gemini native video analysis

Replace the OpenAI GPT-based frame extraction approach (ffmpeg + screenshots) with Gemini 2.5 Flash's native video understanding. This eliminates false positives from frame-based analysis (e.g. "black screen = critical bug" during page transitions) and produces dramatically better QA reviews. Changes: - Remove ffmpeg frame extraction, ffprobe duration detection, and all related logic (~365 lines removed) - Add @google/generative-ai SDK for native video/mp4 upload to Gemini - Update CLI: remove --max-frames, --min-interval-seconds, --keep-frames flags - Update env: OPENAI_API_KEY → GEMINI_API_KEY - Update workflow: swap API key secret and model in pr-qa.yaml - Update report: replace "Frames analyzed" with "Video size" - Add note in prompt that brief black frames during transitions are normal Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-05 20:54:56 +00:00 · 2026-03-19 08:33:07 +00:00
parent 2f30fbe060
commit 09a3c10d50
6 changed files with 116 additions and 484 deletions
--- a/.github/workflows/pr-qa.yaml
+++ b/.github/workflows/pr-qa.yaml
@@ -372,7 +372,7 @@ jobs:

      - name: Run video review
        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        run: |
          mkdir -p video-reviews
          for vid in qa-artifacts/qa-report-*/qa-session.mp4; do
@@ -382,7 +382,7 @@ jobs:
              --artifacts-dir qa-artifacts \
              --output-dir video-reviews \
              --video-file "$vid" \
-              --model gpt-4.1-mini || true
+              --model gemini-2.5-flash || true
            echo "::endgroup::"
          done

@@ -428,14 +428,14 @@ jobs:
            REPORT_HTML=""
            if [ -f "$REPORT_FILE" ]; then
              cp "$REPORT_FILE" "$DEPLOY_DIR/report-${OS_LOWER}.md"
-              REPORT_LINK="<a class=download href=report-${OS_LOWER}.md>GPT Report</a>"
+              REPORT_LINK="<a class=download href=report-${OS_LOWER}.md>AI Report</a>"

              # Convert markdown to basic HTML for inline display
              REPORT_CONTENT=$(sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g' "$REPORT_FILE" \
                | sed 's/^## \(.*\)/<h3>\1<\/h3>/; s/^# \(.*\)/<h2>\1<\/h2>/' \
                | sed 's/|\(.*\)|/<tr><td>\1<\/td><\/tr>/g' \
                | sed '/^$/s/.*/<br>/')
-              REPORT_HTML="<details class=report><summary>GPT Video Review</summary><div class=report-body>${REPORT_CONTENT}</div></details>"
+              REPORT_HTML="<details class=report><summary>AI Video Review</summary><div class=report-body>${REPORT_CONTENT}</div></details>"
            fi

            if [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
--- a/package.json
+++ b/package.json
@@ -122,6 +122,7 @@
  },
  "devDependencies": {
    "@eslint/js": "catalog:",
+    "@google/generative-ai": "catalog:",
    "@intlify/eslint-plugin-vue-i18n": "catalog:",
    "@lobehub/i18n-cli": "catalog:",
    "@nx/eslint": "catalog:",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -21,6 +21,9 @@ catalogs:
    '@formkit/auto-animate':
      specifier: ^0.9.0
      version: 0.9.0
+    '@google/generative-ai':
+      specifier: ^0.24.1
+      version: 0.24.1
    '@iconify-json/lucide':
      specifier: ^1.1.178
      version: 1.2.79
@@ -597,6 +600,9 @@ importers:
      '@eslint/js':
        specifier: 'catalog:'
        version: 9.39.1
+      '@google/generative-ai':
+        specifier: 'catalog:'
+        version: 0.24.1
      '@intlify/eslint-plugin-vue-i18n':
        specifier: 'catalog:'
        version: 4.1.1(eslint@9.39.1(jiti@2.6.1))(jsonc-eslint-parser@2.4.0)(vue-eslint-parser@10.4.0(eslint@9.39.1(jiti@2.6.1)))(yaml-eslint-parser@1.3.0)
@@ -2305,6 +2311,10 @@ packages:
  '@formkit/auto-animate@0.9.0':
    resolution: {integrity: sha512-VhP4zEAacXS3dfTpJpJ88QdLqMTcabMg0jwpOSxZ/VzfQVfl3GkZSCZThhGC5uhq/TxPHPzW0dzr4H9Bb1OgKA==}

+  '@google/generative-ai@0.24.1':
+    resolution: {integrity: sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==}
+    engines: {node: '>=18.0.0'}
+
  '@grpc/grpc-js@1.9.15':
    resolution: {integrity: sha512-nqE7Hc0AzI+euzUwDAy0aY5hCp10r734gMGRdU+qOPX0XSceI2ULrcXB5U2xSc5VkWwalCj4M7GzCAygZl2KoQ==}
    engines: {node: ^8.13.0 || >=10.10.0}
@@ -9560,6 +9570,9 @@ packages:
  vue-component-type-helpers@3.2.6:
    resolution: {integrity: sha512-O02tnvIfOQVmnvoWwuSydwRoHjZVt8UEBR+2p4rT35p8GAy5VTlWP8o5qXfJR/GWCN0nVZoYWsVUvx2jwgdBmQ==}

+  vue-component-type-helpers@3.2.6:
+    resolution: {integrity: sha512-O02tnvIfOQVmnvoWwuSydwRoHjZVt8UEBR+2p4rT35p8GAy5VTlWP8o5qXfJR/GWCN0nVZoYWsVUvx2jwgdBmQ==}
+
  vue-demi@0.14.10:
    resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==}
    engines: {node: '>=12'}
@@ -11455,6 +11468,8 @@ snapshots:

  '@formkit/auto-animate@0.9.0': {}

+  '@google/generative-ai@0.24.1': {}
+
  '@grpc/grpc-js@1.9.15':
    dependencies:
      '@grpc/proto-loader': 0.7.13
@@ -19850,6 +19865,8 @@ snapshots:

  vue-component-type-helpers@3.2.6: {}

+  vue-component-type-helpers@3.2.6: {}
+
  vue-demi@0.14.10(vue@3.5.13(typescript@5.9.3)):
    dependencies:
      vue: 3.5.13(typescript@5.9.3)
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -8,6 +8,7 @@ catalog:
  '@comfyorg/comfyui-electron-types': 0.6.2
  '@eslint/js': ^9.39.1
  '@formkit/auto-animate': ^0.9.0
+  '@google/generative-ai': ^0.24.1
  '@iconify-json/lucide': ^1.1.178
  '@iconify/json': ^2.2.380
  '@iconify/tailwind4': ^1.2.0
--- a/scripts/qa-video-review.test.ts
+++ b/scripts/qa-video-review.test.ts
@@ -1,7 +1,6 @@
 import { describe, expect, it } from 'vitest'

 import {
-  extractOutputText,
  extractPlatformFromArtifactDirName,
  pickLatestVideosByPlatform,
  selectVideoCandidateByFile
@@ -62,38 +61,6 @@ describe('pickLatestVideosByPlatform', () => {
  })
 })

-describe('extractOutputText', () => {
-  it('reads output_text when available', () => {
-    expect(extractOutputText({ output_text: 'hello world' })).toBe(
-      'hello world'
-    )
-  })
-
-  it('reads nested output text chunks', () => {
-    const responseBody = {
-      output: [
-        {
-          content: [
-            { type: 'output_text', text: 'first block' },
-            { type: 'refusal', refusal: 'not relevant' }
-          ]
-        },
-        {
-          content: [{ type: 'output_text', text: 'second block' }]
-        }
-      ]
-    }
-
-    expect(extractOutputText(responseBody)).toBe('first block\n\nsecond block')
-  })
-
-  it('returns null when there is no usable text', () => {
-    expect(
-      extractOutputText({ output: [{ content: [{ type: 'tool_call' }] }] })
-    ).toBeNull()
-  })
-})
-
 describe('selectVideoCandidateByFile', () => {
  it('selects a single candidate by artifacts-relative path', () => {
    const selected = selectVideoCandidateByFile(
--- a/scripts/qa-video-review.ts
+++ b/scripts/qa-video-review.ts
@@ -1,17 +1,9 @@
 #!/usr/bin/env tsx
-import { spawn } from 'node:child_process'
-import {
-  mkdtemp,
-  mkdir,
-  readdir,
-  readFile,
-  rm,
-  stat,
-  writeFile
-} from 'node:fs/promises'
-import { basename, dirname, join, relative, resolve } from 'node:path'
+import { mkdir, readFile, stat, writeFile } from 'node:fs/promises'
+import { basename, dirname, extname, relative, resolve } from 'node:path'
 import { fileURLToPath } from 'node:url'

+import { GoogleGenerativeAI } from '@google/generative-ai'
 import { globSync } from 'glob'

 interface CliOptions {
@@ -19,11 +11,8 @@ interface CliOptions {
  videoFile: string
  outputDir: string
  model: string
-  minIntervalSeconds: number
-  maxFrames: number
  requestTimeoutMs: number
  dryRun: boolean
-  keepFrames: boolean
 }

 interface VideoCandidate {
@@ -32,49 +21,13 @@ interface VideoCandidate {
  mtimeMs: number
 }

-interface ExtractedFrame {
-  index: number
-  timestampSeconds: number
-  filePath: string
-  dataUrl: string
-}
-
-type ResponseInputContent =
-  | { type: 'input_text'; text: string }
-  | { type: 'input_image'; image_url: string }
-
-interface ResponseInputMessage {
-  role: 'system' | 'user'
-  content: ResponseInputContent[]
-}
-
-interface ResponsesCreatePayload {
-  model: string
-  input: ResponseInputMessage[]
-  max_output_tokens: number
-}
-
-interface OpenAIReviewRequest {
-  apiKey: string
-  baseUrl: string
-  model: string
-  platformName: string
-  videoPath: string
-  frames: ExtractedFrame[]
-  samplingIntervalSeconds: number
-  timeoutMs: number
-}
-
 const DEFAULT_OPTIONS: CliOptions = {
  artifactsDir: './tmp/qa-artifacts',
  videoFile: '',
  outputDir: './tmp',
-  model: 'gpt-4o',
-  minIntervalSeconds: 5,
-  maxFrames: 36,
+  model: 'gemini-2.5-flash',
  requestTimeoutMs: 300_000,
-  dryRun: false,
-  keepFrames: false
+  dryRun: false
 }

 const USAGE = `Usage:
@@ -87,31 +40,17 @@ Options:
                                 (supports basename or relative/absolute path)
  --output-dir <path>           Output directory for markdown reports
                                 (default: ./tmp)
-  --model <name>                OpenAI model
-                                 (default: gpt-4o)
-  --min-interval-seconds <n>    Minimum frame sampling interval in seconds
-                                 (default: 5)
-  --max-frames <n>              Max frames analyzed per video
-                                 (default: 36)
+  --model <name>                Gemini model
+                                 (default: gemini-2.5-flash)
  --request-timeout-ms <n>      Request timeout in milliseconds
                                 (default: 300000)
  --dry-run                     Discover videos and output targets only
-  --keep-frames                 Keep extracted frames in ./tmp for inspection
  --help                        Show this help text

 Environment:
-  OPENAI_API_KEY                Required unless --dry-run
-  OPENAI_BASE_URL               Optional override for API base URL
+  GEMINI_API_KEY                Required unless --dry-run
 `

-function parsePositiveNumber(rawValue: string, flagName: string): number {
-  const parsedValue = Number(rawValue)
-  if (!Number.isFinite(parsedValue) || parsedValue <= 0) {
-    throw new Error(`Invalid value for ${flagName}: "${rawValue}"`)
-  }
-  return parsedValue
-}
-
 function parsePositiveInteger(rawValue: string, flagName: string): number {
  const parsedValue = Number.parseInt(rawValue, 10)
  if (!Number.isInteger(parsedValue) || parsedValue <= 0) {
@@ -159,19 +98,6 @@ function parseCliOptions(args: string[]): CliOptions {
      continue
    }

-    if (argument === '--min-interval-seconds') {
-      options.minIntervalSeconds = parsePositiveNumber(
-        requireValue(argument),
-        argument
-      )
-      continue
-    }
-
-    if (argument === '--max-frames') {
-      options.maxFrames = parsePositiveInteger(requireValue(argument), argument)
-      continue
-    }
-
    if (argument === '--request-timeout-ms') {
      options.requestTimeoutMs = parsePositiveInteger(
        requireValue(argument),
@@ -185,21 +111,12 @@ function parseCliOptions(args: string[]): CliOptions {
      continue
    }

-    if (argument === '--keep-frames') {
-      options.keepFrames = true
-      continue
-    }
-
    throw new Error(`Unknown argument: ${argument}`)
  }

  return options
 }

-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === 'object' && value !== null
-}
-
 function normalizePlatformName(value: string): string {
  const slug = value
    .trim()
@@ -316,84 +233,6 @@ export function selectVideoCandidateByFile(
  )
 }

-async function runCommand(command: string, args: string[]): Promise<string> {
-  return await new Promise<string>((resolvePromise, rejectPromise) => {
-    const child = spawn(command, args, {
-      stdio: ['ignore', 'pipe', 'pipe']
-    })
-
-    let stdout = ''
-    let stderr = ''
-
-    child.stdout.on('data', (chunk: Buffer | string) => {
-      stdout += chunk.toString()
-    })
-
-    child.stderr.on('data', (chunk: Buffer | string) => {
-      stderr += chunk.toString()
-    })
-
-    child.on('error', (error) => {
-      rejectPromise(error)
-    })
-
-    child.on('close', (code) => {
-      if (code === 0) {
-        resolvePromise(stdout)
-        return
-      }
-
-      const details = stderr.trim() || stdout.trim() || 'unknown failure'
-      rejectPromise(
-        new Error(
-          `${command} ${args.join(' ')} failed with code ${String(code)}: ${details}`
-        )
-      )
-    })
-  })
-}
-
-async function ensureBinariesAvailable(): Promise<void> {
-  await runCommand('ffmpeg', ['-version'])
-  await runCommand('ffprobe', ['-version'])
-}
-
-async function getVideoDurationSeconds(
-  videoPath: string
-): Promise<number | null> {
-  try {
-    const stdout = await runCommand('ffprobe', [
-      '-v',
-      'error',
-      '-show_entries',
-      'format=duration',
-      '-of',
-      'default=noprint_wrappers=1:nokey=1',
-      videoPath
-    ])
-    const durationSeconds = Number.parseFloat(stdout.trim())
-    if (!Number.isFinite(durationSeconds) || durationSeconds <= 0) {
-      return null
-    }
-    return durationSeconds
-  } catch {
-    return null
-  }
-}
-
-function computeSamplingIntervalSeconds(
-  durationSeconds: number | null,
-  minIntervalSeconds: number,
-  maxFrames: number
-): number {
-  if (durationSeconds === null) {
-    return minIntervalSeconds
-  }
-
-  const fullCoverageInterval = durationSeconds / maxFrames
-  return Math.max(minIntervalSeconds, fullCoverageInterval)
-}
-
 async function collectVideoCandidates(
  artifactsDir: string
 ): Promise<VideoCandidate[]> {
@@ -418,241 +257,87 @@ async function collectVideoCandidates(
  return candidates
 }

-async function extractFramesFromVideo(
-  videoPath: string,
-  stagingDir: string,
-  minIntervalSeconds: number,
-  maxFrames: number
-): Promise<{ frames: ExtractedFrame[]; samplingIntervalSeconds: number }> {
-  const durationSeconds = await getVideoDurationSeconds(videoPath)
-  const samplingIntervalSeconds = computeSamplingIntervalSeconds(
-    durationSeconds,
-    minIntervalSeconds,
-    maxFrames
-  )
-  const outputPattern = join(stagingDir, 'frame-%03d.jpg')
-
-  await runCommand('ffmpeg', [
-    '-hide_banner',
-    '-loglevel',
-    'error',
-    '-y',
-    '-i',
-    videoPath,
-    '-vf',
-    `fps=1/${samplingIntervalSeconds},scale=1024:-2:force_original_aspect_ratio=decrease`,
-    '-frames:v',
-    String(maxFrames),
-    outputPattern
-  ])
-
-  const frameNames = (await readdir(stagingDir))
-    .filter((name) => name.startsWith('frame-') && name.endsWith('.jpg'))
-    .sort()
-
-  if (frameNames.length === 0) {
-    throw new Error(`No frames were extracted from ${videoPath}`)
+function getMimeType(filePath: string): string {
+  const ext = extname(filePath).toLowerCase()
+  const mimeMap: Record<string, string> = {
+    '.mp4': 'video/mp4',
+    '.webm': 'video/webm',
+    '.mov': 'video/quicktime',
+    '.avi': 'video/x-msvideo',
+    '.mkv': 'video/x-matroska',
+    '.m4v': 'video/mp4'
  }
-
-  const frames = await Promise.all(
-    frameNames.map(async (frameName, index) => {
-      const framePath = join(stagingDir, frameName)
-      const buffer = await readFile(framePath)
-      return {
-        index: index + 1,
-        timestampSeconds: Number((index * samplingIntervalSeconds).toFixed(2)),
-        filePath: framePath,
-        dataUrl: `data:image/jpeg;base64,${buffer.toString('base64')}`
-      }
-    })
-  )
-
-  return { frames, samplingIntervalSeconds }
+  return mimeMap[ext] || 'video/mp4'
 }

-function buildReviewMessages(
-  request: OpenAIReviewRequest
-): ResponseInputMessage[] {
-  const systemPrompt = [
-    'You are a senior QA engineer reviewing a UI test session.',
+function buildReviewPrompt(platformName: string, videoPath: string): string {
+  return [
+    'You are a senior QA engineer reviewing a UI test session recording.',
    'Report only concrete, visible problems and avoid speculation.',
-    'If confidence is low, mark it explicitly.'
-  ].join(' ')
-
-  const introPrompt = [
-    `Review sampled frames from a QA session for platform "${request.platformName}".`,
-    `Source video: ${toProjectRelativePath(request.videoPath)}.`,
-    `Frames are presented in chronological order at ~${request.samplingIntervalSeconds.toFixed(2)}s intervals.`,
+    'If confidence is low, mark it explicitly.',
+    '',
+    `Review this QA session video for platform "${platformName}".`,
+    `Source video: ${toProjectRelativePath(videoPath)}.`,
+    'The video shows the full test session — analyze it chronologically.',
    'Focus on UI regressions, broken states, visual glitches, unreadable text, missing labels/i18n, and clear workflow failures.',
+    'Note: Brief black frames during page transitions are NORMAL and should NOT be reported as issues.',
+    '',
    'Return markdown with these sections exactly:',
    '## Summary',
    '## Confirmed Issues',
    '## Possible Issues (Needs Human Verification)',
    '## Overall Risk',
    'Under Confirmed Issues include a markdown table with columns:',
-    'Severity | Timestamp (s) | Issue | Evidence | Confidence | Suggested Fix'
+    'Severity | Timestamp | Issue | Evidence | Confidence | Suggested Fix'
  ].join('\n')
+}

-  const userContent: ResponseInputContent[] = [
-    { type: 'input_text', text: introPrompt }
-  ]
+async function requestGeminiReview(options: {
+  apiKey: string
+  model: string
+  platformName: string
+  videoPath: string
+  timeoutMs: number
+}): Promise<string> {
+  const genAI = new GoogleGenerativeAI(options.apiKey)
+  const model = genAI.getGenerativeModel({ model: options.model })

-  for (const frame of request.frames) {
-    userContent.push({
-      type: 'input_text',
-      text: `Frame ${frame.index} at approximately ${frame.timestampSeconds}s`
-    })
-    userContent.push({
-      type: 'input_image',
-      image_url: frame.dataUrl
-    })
-  }
+  const videoBuffer = await readFile(options.videoPath)
+  const base64Video = videoBuffer.toString('base64')
+  const mimeType = getMimeType(options.videoPath)
+  const prompt = buildReviewPrompt(options.platformName, options.videoPath)

-  return [
+  const result = await model.generateContent([
+    { text: prompt },
    {
-      role: 'system',
-      content: [{ type: 'input_text', text: systemPrompt }]
-    },
-    {
-      role: 'user',
-      content: userContent
-    }
-  ]
-}
-
-function normalizeBaseUrl(baseUrl: string): string {
-  return baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl
-}
-
-function extractApiErrorMessage(responseBody: unknown): string {
-  if (!isRecord(responseBody)) {
-    return 'Unknown API error'
-  }
-
-  const errorValue = responseBody.error
-  if (!isRecord(errorValue)) {
-    return 'Unknown API error'
-  }
-
-  const messageValue = errorValue.message
-  if (typeof messageValue !== 'string' || messageValue.length === 0) {
-    return 'Unknown API error'
-  }
-
-  return messageValue
-}
-
-export function extractOutputText(responseBody: unknown): string | null {
-  if (!isRecord(responseBody)) {
-    return null
-  }
-
-  const topLevelOutputText = responseBody.output_text
-  if (
-    typeof topLevelOutputText === 'string' &&
-    topLevelOutputText.trim().length > 0
-  ) {
-    return topLevelOutputText.trim()
-  }
-
-  const outputValue = responseBody.output
-  if (!Array.isArray(outputValue)) {
-    return null
-  }
-
-  const collectedParts: string[] = []
-  for (const outputItem of outputValue) {
-    if (!isRecord(outputItem)) {
-      continue
-    }
-
-    const contentValue = outputItem.content
-    if (!Array.isArray(contentValue)) {
-      continue
-    }
-
-    for (const contentItem of contentValue) {
-      if (!isRecord(contentItem)) {
-        continue
-      }
-
-      if (contentItem.type !== 'output_text') {
-        continue
-      }
-
-      const textValue = contentItem.text
-      if (typeof textValue === 'string' && textValue.trim().length > 0) {
-        collectedParts.push(textValue.trim())
+      inlineData: {
+        mimeType,
+        data: base64Video
      }
    }
+  ])
+
+  const response = result.response
+  const text = response.text()
+
+  if (!text || text.trim().length === 0) {
+    throw new Error('Gemini API returned no output text')
  }

-  if (collectedParts.length === 0) {
-    return null
-  }
-
-  return collectedParts.join('\n\n')
+  return text.trim()
 }

-async function requestOpenAIReview(
-  request: OpenAIReviewRequest
-): Promise<string> {
-  const payload: ResponsesCreatePayload = {
-    model: request.model,
-    input: buildReviewMessages(request),
-    max_output_tokens: 3_000
-  }
-
-  const controller = new AbortController()
-  const timeoutHandle = setTimeout(() => controller.abort(), request.timeoutMs)
-
-  try {
-    const response = await fetch(
-      `${normalizeBaseUrl(request.baseUrl)}/responses`,
-      {
-        method: 'POST',
-        headers: {
-          Authorization: `Bearer ${request.apiKey}`,
-          'Content-Type': 'application/json'
-        },
-        body: JSON.stringify(payload),
-        signal: controller.signal
-      }
-    )
-
-    const rawResponse = await response.text()
-    let responseBody: unknown
-    try {
-      responseBody = JSON.parse(rawResponse)
-    } catch {
-      throw new Error(
-        `OpenAI API returned a non-JSON response: ${rawResponse.slice(0, 400)}`
-      )
-    }
-
-    if (!response.ok) {
-      throw new Error(
-        `OpenAI API request failed (${response.status}): ${extractApiErrorMessage(responseBody)}`
-      )
-    }
-
-    const reviewText = extractOutputText(responseBody)
-    if (!reviewText) {
-      throw new Error('OpenAI API returned no output text')
-    }
-
-    return reviewText
-  } finally {
-    clearTimeout(timeoutHandle)
-  }
+function formatBytes(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
 }

 function buildReportMarkdown(input: {
  platformName: string
  model: string
  videoPath: string
-  frames: ExtractedFrame[]
-  samplingIntervalSeconds: number
+  videoSizeBytes: number
  reviewText: string
 }): string {
  const header = [
@@ -661,8 +346,7 @@ function buildReportMarkdown(input: {
    `- Generated at: ${new Date().toISOString()}`,
    `- Model: \`${input.model}\``,
    `- Source video: \`${toProjectRelativePath(input.videoPath)}\``,
-    `- Frames analyzed: ${input.frames.length}`,
-    `- Sampling interval: ${input.samplingIntervalSeconds.toFixed(2)}s`,
+    `- Video size: ${formatBytes(input.videoSizeBytes)}`,
    '',
    '## AI Review',
    ''
@@ -674,74 +358,39 @@ function buildReportMarkdown(input: {
 async function reviewVideo(
  video: VideoCandidate,
  options: CliOptions,
-  apiKey: string,
-  baseUrl: string
+  apiKey: string
 ): Promise<void> {
-  const stagingRoot = resolve('./tmp')
-  await mkdir(stagingRoot, { recursive: true })
-  const stagingDir = await mkdtemp(join(stagingRoot, 'qa-video-review-'))
+  process.stdout.write(
+    `[${video.platformName}] Sending video ${toProjectRelativePath(video.videoPath)} to ${options.model}\n`
+  )

-  try {
-    process.stdout.write(
-      `[${video.platformName}] Extracting frames from ${toProjectRelativePath(video.videoPath)}\n`
-    )
+  const reviewText = await requestGeminiReview({
+    apiKey,
+    model: options.model,
+    platformName: video.platformName,
+    videoPath: video.videoPath,
+    timeoutMs: options.requestTimeoutMs
+  })

-    const { frames, samplingIntervalSeconds } = await extractFramesFromVideo(
-      video.videoPath,
-      stagingDir,
-      options.minIntervalSeconds,
-      options.maxFrames
-    )
+  const videoStat = await stat(video.videoPath)
+  const outputPath = resolve(
+    options.outputDir,
+    `${video.platformName}-qa-video-report.md`
+  )
+  const reportMarkdown = buildReportMarkdown({
+    platformName: video.platformName,
+    model: options.model,
+    videoPath: video.videoPath,
+    videoSizeBytes: videoStat.size,
+    reviewText
+  })

-    process.stdout.write(
-      `[${video.platformName}] Sending ${frames.length} frame(s) to ${options.model}\n`
-    )
+  await mkdir(dirname(outputPath), { recursive: true })
+  await writeFile(outputPath, reportMarkdown, 'utf-8')

-    const reviewText = await requestOpenAIReview({
-      apiKey,
-      baseUrl,
-      model: options.model,
-      platformName: video.platformName,
-      videoPath: video.videoPath,
-      frames,
-      samplingIntervalSeconds,
-      timeoutMs: options.requestTimeoutMs
-    })
-
-    const outputPath = resolve(
-      options.outputDir,
-      `${video.platformName}-qa-video-report.md`
-    )
-    const reportMarkdown = buildReportMarkdown({
-      platformName: video.platformName,
-      model: options.model,
-      videoPath: video.videoPath,
-      frames,
-      samplingIntervalSeconds,
-      reviewText
-    })
-
-    await mkdir(dirname(outputPath), { recursive: true })
-    await writeFile(outputPath, reportMarkdown, 'utf-8')
-
-    process.stdout.write(
-      `[${video.platformName}] Wrote ${toProjectRelativePath(outputPath)}\n`
-    )
-
-    if (options.keepFrames) {
-      const stagedFrames = (await readdir(stagingDir)).filter((name) =>
-        name.endsWith('.jpg')
-      ).length
-      process.stdout.write(
-        `[${video.platformName}] Kept ${stagedFrames} frame(s) in ${toProjectRelativePath(stagingDir)}\n`
-      )
-      return
-    }
-  } finally {
-    if (!options.keepFrames) {
-      await rm(stagingDir, { recursive: true, force: true })
-    }
-  }
+  process.stdout.write(
+    `[${video.platformName}] Wrote ${toProjectRelativePath(outputPath)}\n`
+  )
 }

 function isExecutedAsScript(metaUrl: string): boolean {
@@ -775,15 +424,12 @@ async function main(): Promise<void> {
    return
  }

-  const apiKey = process.env.OPENAI_API_KEY
+  const apiKey = process.env.GEMINI_API_KEY
  if (!apiKey) {
-    throw new Error('OPENAI_API_KEY is required unless --dry-run is set')
+    throw new Error('GEMINI_API_KEY is required unless --dry-run is set')
  }

-  await ensureBinariesAvailable()
-
-  const baseUrl = process.env.OPENAI_BASE_URL ?? 'https://api.openai.com/v1'
-  await reviewVideo(selectedVideo, options, apiKey, baseUrl)
+  await reviewVideo(selectedVideo, options, apiKey)
 }

 if (isExecutedAsScript(import.meta.url)) {