feat: before/after video comparison for QA pipeline

- Build both main (dist-before/) and PR (dist/) frontends in focused mode
- Run QA twice: BEFORE on main branch frontend, AFTER on PR branch
- Send both videos to Gemini in one request for comparative analysis
- Side-by-side dashboard layout with Before (main) / After (PR) panels
- Comparative prompt evaluates whether before confirms old behavior
  and after proves the fix works
- Falls back to single-video mode when no before video available
This commit is contained in:
snomiao
2026-03-20 16:46:51 +00:00
parent 06e4a512f0
commit eb0ce5ed4e
2 changed files with 376 additions and 150 deletions

View File

@@ -90,44 +90,38 @@ jobs:
ref: ${{ github.head_ref || github.ref }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup frontend
- name: Setup frontend (PR branch)
uses: ./.github/actions/setup-frontend
with:
include_build_step: true
- name: Setup and start ComfyUI server
- name: Build main branch frontend for before comparison
if: needs.resolve-matrix.outputs.mode == 'focused'
shell: bash
run: |
# Save PR dist, build main dist
mv dist dist-after
git stash --include-untracked || true
git checkout origin/main -- .
pnpm install --frozen-lockfile
pnpm build
mv dist dist-before
# Restore PR branch
git checkout - -- .
git stash pop || true
mv dist-after dist
echo "Built both: dist-before/ (main) and dist/ (PR)"
ls -la dist-before/index.html dist/index.html
- name: Setup ComfyUI server (no launch)
uses: ./.github/actions/setup-comfyui-server
with:
launch_server: 'true'
- name: Wait for ComfyUI server
shell: bash
run: |
echo "Waiting for ComfyUI server..."
for i in $(seq 1 60); do
if curl -sf http://127.0.0.1:8188/api/system_stats >/dev/null 2>&1; then
echo "Server ready"; exit 0
fi; sleep 2
done
echo "::error::Server timeout"; exit 1
- name: Pre-seed settings to skip onboarding
shell: bash
run: |
# Mark tutorial as completed so the template gallery doesn't appear.
# Try both /api/settings (new) and /settings (legacy) endpoints.
BODY='{"Comfy.TutorialCompleted": true}'
curl -sv -X POST http://127.0.0.1:8188/api/settings \
-H 'Content-Type: application/json' -d "$BODY" 2>&1 || true
curl -sv -X POST http://127.0.0.1:8188/settings \
-H 'Content-Type: application/json' -d "$BODY" 2>&1 || true
echo "Pre-seed attempted on both endpoints"
launch_server: 'false'
- name: Install playwright-cli and Codex CLI
shell: bash
run: |
npm install -g @playwright/cli@latest @openai/codex@latest
# Verify playwright-cli is in PATH and install browser
which playwright-cli
playwright-cli --version || true
npx playwright install chromium
@@ -136,16 +130,12 @@ jobs:
shell: bash
run: |
mkdir -p "$QA_ARTIFACTS" .playwright
# Auto-record video + save screenshots to artifacts dir
cat > .playwright/cli.config.json <<CEOF
{
"outputDir": "$QA_ARTIFACTS",
"saveVideo": { "width": 1280, "height": 720 }
}
CEOF
echo "playwright-cli config:"
cat .playwright/cli.config.json
- name: Get PR diff for focused QA
if: needs.resolve-matrix.outputs.mode == 'focused'
@@ -157,12 +147,11 @@ jobs:
--repo ${{ github.repository }} > "${{ runner.temp }}/pr-diff.txt" 2>/dev/null || \
git diff origin/main...HEAD > "${{ runner.temp }}/pr-diff.txt"
# Summarize changed files for the prompt
echo "Changed files:"
grep '^diff --git' "${{ runner.temp }}/pr-diff.txt" | \
sed 's|diff --git a/||;s| b/.*||' | sort -u | tee "${{ runner.temp }}/changed-files.txt"
- name: Write QA prompt
- name: Write QA prompts
shell: bash
env:
BRANCH: ${{ github.head_ref || github.ref_name }}
@@ -171,6 +160,20 @@ jobs:
run: |
OS_LOWER=$(echo "$RUNNER_OS" | tr '[:upper:]' '[:lower:]')
COMMON_HEADER="CRITICAL: \"playwright-cli\" is already installed globally in PATH. Do NOT use pnpm dlx or npx.
Chromium is already installed. Just run the commands directly."
COMMON_STEPS="You MUST follow these exact steps in order:
1. playwright-cli open http://127.0.0.1:8188
2. QUICK LOGIN (before video): snapshot, fill the username input with \"qa-ci\", click Next button, wait for graph editor to load
3. playwright-cli snapshot — verify graph editor is loaded
4. playwright-cli video-start"
COMMON_RULES="RULES:
- Do NOT browse templates, explore sidebar panels, or test unrelated features
- Do NOT use pnpm/npx to run playwright-cli
- Do NOT create a PR, post PR comments, commit, or push anything"
if [ "$QA_MODE" = "full" ]; then
cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
You are running a FULL automated QA pass on the ComfyUI frontend.
@@ -178,78 +181,138 @@ jobs:
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: ${BRANCH}
PR: #${PR_NUM}
Commit: ${SHA}
Branch: ${BRANCH}, PR: #${PR_NUM}, Commit: ${SHA}
CRITICAL: "playwright-cli" is already installed globally in PATH. Do NOT use pnpm dlx or npx.
Chromium is already installed. Just run the commands directly.
${COMMON_HEADER}
You MUST follow these exact steps in order:
1. playwright-cli open http://127.0.0.1:8188
2. QUICK LOGIN (before video): snapshot, fill the username input with "qa-ci", click Next button, wait for graph editor to load
3. playwright-cli video-start
4. playwright-cli snapshot (you should see the graph editor now)
${COMMON_STEPS}
5. Test the UI (click, fill, navigate — use snapshot between actions to get refs)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
Do NOT skip any steps. Do NOT use pnpm/npx to run playwright-cli.
Do NOT create a PR, post PR comments, commit, or push anything.
Skip tests not available in CI (file dialogs, GPU execution).
Do NOT skip any steps. Skip tests not available in CI (file dialogs, GPU execution).
PROMPT
else
cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
You are running a FOCUSED QA pass on PR #${PR_NUM} to the ComfyUI frontend.
Your ONLY goal is to test the SPECIFIC BEHAVIOR this PR changes — nothing else.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: ${BRANCH}
CHANGED FILES:
# Focused QA — write separate before/after prompts with identical test steps
DIFF_CONTEXT="CHANGED FILES:
$(cat "${{ runner.temp }}/changed-files.txt" 2>/dev/null || echo "Unknown")
DIFF (truncated to 500 lines):
$(head -500 "${{ runner.temp }}/pr-diff.txt" 2>/dev/null || echo "No diff available")
## Instructions
$(head -500 "${{ runner.temp }}/pr-diff.txt" 2>/dev/null || echo "No diff available")"
TEST_DESIGN="## Instructions
1. Read the diff above carefully. Identify what UI behavior changed.
2. Design 3-6 targeted test steps that exercise EXACTLY that behavior.
3. Execute ONLY those steps. Do NOT do general smoke testing, template
browsing, sidebar exploration, or anything unrelated to the PR.
3. Execute ONLY those steps.
Examples:
- PR changes a menu item → find and click that menu item, verify the change
- PR fixes a bug → reproduce the bug scenario, confirm it's fixed
- PR adds a feature → use that feature end-to-end
## Time budget: keep the video recording under 30 seconds."
## Time budget: keep the video recording under 30 seconds.
Only record the PR-relevant interactions. Login happens BEFORE recording.
# BEFORE prompt (main branch — demonstrate the old behavior / bug)
cat > "${{ runner.temp }}/qa-before-prompt.txt" <<PROMPT
You are running the BEFORE pass of a focused QA comparison on PR #${PR_NUM}.
This is the MAIN branch (before the PR). Your goal is to demonstrate the
OLD behavior that this PR intends to change or fix.
CRITICAL: "playwright-cli" is already installed globally in PATH. Do NOT use pnpm dlx or npx.
Chromium is already installed. Just run the commands directly.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: main (before PR)
You MUST follow these exact steps in order:
1. playwright-cli open http://127.0.0.1:8188
2. QUICK LOGIN (before video): snapshot, fill the username input with "qa-ci",
click Next button, wait for graph editor to load
3. playwright-cli snapshot — verify graph editor is loaded
4. playwright-cli video-start
${DIFF_CONTEXT}
${TEST_DESIGN}
${COMMON_HEADER}
${COMMON_STEPS}
5. Execute ONLY your PR-targeted test steps (snapshot between each action)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-before-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-before-${OS_LOWER}-report.md
Include PASS/FAIL for each test step.
${COMMON_RULES}
PROMPT
# AFTER prompt (PR branch — prove the fix works)
cat > "${{ runner.temp }}/qa-prompt.txt" <<PROMPT
You are running the AFTER pass of a focused QA comparison on PR #${PR_NUM}.
This is the PR branch (after the changes). Your goal is to prove the PR's
changes work correctly and the intended behavior is now in place.
Environment: CI=true, OS=${{ runner.os }}
Server URL: http://127.0.0.1:8188
Branch: ${BRANCH} (PR)
${DIFF_CONTEXT}
${TEST_DESIGN}
${COMMON_HEADER}
${COMMON_STEPS}
5. Execute ONLY your PR-targeted test steps (snapshot between each action)
6. playwright-cli video-stop ${QA_ARTIFACTS}/qa-session.webm
7. Write report to ${QA_ARTIFACTS}/$(date +%Y-%m-%d)-001-${OS_LOWER}-report.md
Include PASS/FAIL for each test step in the report.
Include PASS/FAIL for each test step.
RULES:
- Do NOT browse templates, explore sidebar panels, or test unrelated features
- Do NOT use pnpm/npx to run playwright-cli
- Do NOT create a PR, post PR comments, commit, or push anything
${COMMON_RULES}
PROMPT
fi
- name: Run Codex QA
# ── BEFORE run (main branch) ──
- name: Start server with main branch frontend
if: needs.resolve-matrix.outputs.mode == 'focused'
shell: bash
working-directory: ComfyUI
run: |
python main.py --cpu --multi-user --front-end-root ../dist-before &
echo $! > /tmp/comfyui-server.pid
for i in $(seq 1 60); do
curl -sf http://127.0.0.1:8188/api/system_stats >/dev/null 2>&1 && echo "Server ready (main)" && exit 0
sleep 2
done
echo "::error::Server timeout (main)"; exit 1
- name: Run BEFORE QA (main branch)
if: needs.resolve-matrix.outputs.mode == 'focused'
shell: bash
env:
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CI: 'true'
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
codex exec \
--model gpt-5.4-mini \
--sandbox danger-full-access \
- < "${{ runner.temp }}/qa-before-prompt.txt"
- name: Stop server after BEFORE run
if: needs.resolve-matrix.outputs.mode == 'focused'
shell: bash
run: |
kill "$(cat /tmp/comfyui-server.pid)" 2>/dev/null || true
sleep 2
# Ensure port is free
for i in $(seq 1 10); do
curl -sf http://127.0.0.1:8188/ >/dev/null 2>&1 || break
sleep 1
done
echo "Server stopped"
# ── AFTER run (PR branch) ──
- name: Start server with PR branch frontend
shell: bash
working-directory: ComfyUI
run: |
python main.py --cpu --multi-user --front-end-root ../dist &
echo $! > /tmp/comfyui-server.pid
for i in $(seq 1 60); do
curl -sf http://127.0.0.1:8188/api/system_stats >/dev/null 2>&1 && echo "Server ready (PR)" && exit 0
sleep 2
done
echo "::error::Server timeout (PR)"; exit 1
- name: Run AFTER QA (PR branch)
shell: bash
env:
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -266,16 +329,16 @@ jobs:
if: always()
shell: bash
run: |
kill "$(cat /tmp/comfyui-server.pid)" 2>/dev/null || true
mkdir -p "$QA_ARTIFACTS"
echo "=== QA artifacts ==="
ls -la "$QA_ARTIFACTS/" 2>/dev/null | head -30
# Check for video from explicit video-stop command
# Check for after video
if [ -f "$QA_ARTIFACTS/qa-session.webm" ]; then
echo "Found video: $QA_ARTIFACTS/qa-session.webm ($(du -h "$QA_ARTIFACTS/qa-session.webm" | cut -f1))"
echo "Found after video: $(du -h "$QA_ARTIFACTS/qa-session.webm" | cut -f1)"
else
echo "No qa-session.webm found at expected path"
# Search for any .webm in artifacts dir or playwright-cli output
echo "No qa-session.webm found"
VIDEO=$(find "$QA_ARTIFACTS" . -maxdepth 3 -name '*.webm' -not -path '*/node_modules/*' 2>/dev/null | head -1)
if [ -n "$VIDEO" ]; then
echo "Found fallback video: $VIDEO ($(du -h "$VIDEO" | cut -f1))"
@@ -369,28 +432,29 @@ jobs:
- name: Convert videos to mp4
run: |
for dir in qa-artifacts/qa-report-*; do
[ -d "$dir" ] || continue
# Prefer explicitly-saved qa-session.webm over auto-recorded files
if [ -f "$dir/qa-session.webm" ] && [ -s "$dir/qa-session.webm" ]; then
WEBM="$dir/qa-session.webm"
else
# Fallback: find any non-empty webm
WEBM=$(find "$dir" -name '*.webm' -type f -size +0c | head -1)
fi
if [ -z "$WEBM" ]; then
echo "No valid .webm video in $dir, skipping"
continue
fi
echo "Converting $WEBM ($(du -h "$WEBM" | cut -f1)) to mp4"
convert_video() {
local WEBM="$1" MP4="$2"
echo "Converting $WEBM ($(du -h "$WEBM" | cut -f1)) to $MP4"
ffmpeg -y -i "$WEBM" \
-c:v libx264 -preset ultrafast -crf 23 -pix_fmt yuv420p \
-movflags +faststart -g 60 \
"$dir/qa-session.mp4" 2>&1 | tail -5 \
"$MP4" 2>&1 | tail -5 \
|| echo "ffmpeg conversion failed for $WEBM (non-fatal)"
[ -f "$MP4" ] && echo "Created: $MP4 ($(du -h "$MP4" | cut -f1))"
}
if [ -f "$dir/qa-session.mp4" ]; then
echo "Created: $dir/qa-session.mp4 ($(du -h "$dir/qa-session.mp4" | cut -f1))"
for dir in qa-artifacts/qa-report-*; do
[ -d "$dir" ] || continue
# Convert after video (qa-session.webm)
for name in qa-session qa-before-session; do
if [ -f "$dir/${name}.webm" ] && [ -s "$dir/${name}.webm" ]; then
convert_video "$dir/${name}.webm" "$dir/${name}.mp4"
fi
done
# Fallback: find any non-empty webm not yet converted
if [ ! -f "$dir/qa-session.mp4" ]; then
WEBM=$(find "$dir" -name '*.webm' -type f -size +0c | head -1)
[ -n "$WEBM" ] && convert_video "$WEBM" "$dir/qa-session.mp4"
fi
done
@@ -429,12 +493,17 @@ jobs:
fi
for vid in qa-artifacts/qa-report-*/qa-session.mp4; do
[ -f "$vid" ] || continue
DIR=$(dirname "$vid")
BEFORE_FLAG=""
if [ -f "$DIR/qa-before-session.mp4" ]; then
BEFORE_FLAG="--before-video $DIR/qa-before-session.mp4"
fi
echo "::group::Reviewing $vid"
pnpm exec tsx scripts/qa-video-review.ts \
--artifacts-dir qa-artifacts \
--output-dir video-reviews \
--video-file "$vid" \
--model gemini-2.5-flash $PR_CTX_FLAG || true
--model gemini-2.5-flash $PR_CTX_FLAG $BEFORE_FLAG || true
echo "::endgroup::"
done
@@ -451,19 +520,21 @@ jobs:
mkdir -p "$DEPLOY_DIR"
for os in Linux macOS Windows; do
VID="qa-artifacts/qa-report-${os}-${{ github.run_id }}/qa-session.mp4"
if [ -f "$VID" ]; then
cp "$VID" "$DEPLOY_DIR/qa-${os}.mp4"
echo "Found ${os} video ($(du -h "$VID" | cut -f1))"
# Generate GIF thumbnail
ffmpeg -y -ss 10 -i "$VID" -t 8 \
DIR="qa-artifacts/qa-report-${os}-${{ github.run_id }}"
for prefix in qa qa-before; do
VID="${DIR}/${prefix}-session.mp4"
if [ -f "$VID" ]; then
DEST="$DEPLOY_DIR/${prefix}-${os}.mp4"
cp "$VID" "$DEST"
echo "Found ${prefix} ${os} video ($(du -h "$VID" | cut -f1))"
fi
done
# Generate GIF thumbnail from after video
if [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
ffmpeg -y -ss 10 -i "$DEPLOY_DIR/qa-${os}.mp4" -t 8 \
-vf "fps=8,scale=480:-1:flags=lanczos,split[s0][s1];[s0]palettegen=max_colors=64[p];[s1][p]paletteuse=dither=bayer" \
-loop 0 "$DEPLOY_DIR/qa-${os}-thumb.gif" 2>/dev/null \
|| ffmpeg -y -i "$VID" -t 8 \
-vf "fps=8,scale=480:-1:flags=lanczos,split[s0][s1];[s0]palettegen=max_colors=64[p];[s1][p]paletteuse=dither=bayer" \
-loop 0 "$DEPLOY_DIR/qa-${os}-thumb.gif" 2>/dev/null \
|| echo "GIF generation failed for ${os} (non-fatal)"
|| echo "GIF generation failed for ${os} (non-fatal)"
fi
done
@@ -471,9 +542,14 @@ jobs:
CARDS=""
ICONS_Linux="&#x1F427;" ICONS_macOS="&#x1F34E;" ICONS_Windows="&#x1FA9F;"
CARD_COUNT=0
DL_ICON="<svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4'/><polyline points='7 10 12 15 17 10'/><line x1=12 y1=15 x2=12 y2=3'/></svg>"
for os in Linux macOS Windows; do
eval "ICON=\$ICONS_${os}"
OS_LOWER=$(echo "$os" | tr '[:upper:]' '[:lower:]')
HAS_BEFORE=$([ -f "$DEPLOY_DIR/qa-before-${os}.mp4" ] && echo 1 || echo 0)
HAS_AFTER=$([ -f "$DEPLOY_DIR/qa-${os}.mp4" ] && echo 1 || echo 0)
[ "$HAS_AFTER" = "0" ] && continue
REPORT_FILE="video-reviews/${OS_LOWER}-qa-video-report.md"
REPORT_LINK=""
@@ -483,13 +559,17 @@ jobs:
REPORT_LINK="<a class=dl href=report-${OS_LOWER}.md><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z'/><polyline points='14 2 14 8 20 8'/><line x1=16 y1=13 x2=8 y2=13/><line x1=16 y1=17 x2=8 y2=17'/></svg>Report</a>"
REPORT_MD=$(cat "$REPORT_FILE" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
REPORT_HTML="<details class=report><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Video Review</summary><div class=report-body data-md>${REPORT_MD}</div></details>"
REPORT_HTML="<details class=report open><summary><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><circle cx=12 cy=12 r=10/><line x1=12 y1=16 x2=12 y2=12/><line x1=12 y1=8 x2=12.01 y2=8'/></svg> AI Comparative Review</summary><div class=report-body data-md>${REPORT_MD}</div></details>"
fi
if [ -f "$DEPLOY_DIR/qa-${os}.mp4" ]; then
CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}.mp4 type=video/mp4></video></div><div class=card-body><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links><a class=dl href=qa-${os}.mp4 download><svg width=14 height=14 viewBox='0 0 24 24' fill=none stroke=currentColor stroke-width=2><path d='M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4'/><polyline points='7 10 12 15 17 10'/><line x1=12 y1=15 x2=12 y2=3'/></svg>Download</a>${REPORT_LINK}</span></div>${REPORT_HTML}</div>"
CARD_COUNT=$((CARD_COUNT + 1))
if [ "$HAS_BEFORE" = "1" ]; then
# Side-by-side before/after layout
CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=card-header><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links>${REPORT_LINK}</span></div><div class=comparison><div class=comp-panel><div class=comp-label>Before <span class=comp-tag>main</span></div><div class=video-wrap><video controls muted preload=metadata><source src=qa-before-${os}.mp4 type=video/mp4></video></div><div class=comp-dl><a class=dl href=qa-before-${os}.mp4 download>${DL_ICON}Before</a></div></div><div class=comp-panel><div class=comp-label>After <span class=comp-tag>PR</span></div><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}.mp4 type=video/mp4></video></div><div class=comp-dl><a class=dl href=qa-${os}.mp4 download>${DL_ICON}After</a></div></div></div>${REPORT_HTML}</div>"
else
# Single video (full QA mode or no before available)
CARDS="${CARDS}<div class='card reveal' style='--i:${CARD_COUNT}'><div class=video-wrap><video controls muted preload=metadata><source src=qa-${os}.mp4 type=video/mp4></video></div><div class=card-body><span class=platform><span class=icon>${ICON}</span>${os}</span><span class=links><a class=dl href=qa-${os}.mp4 download>${DL_ICON}Download</a>${REPORT_LINK}</span></div>${REPORT_HTML}</div>"
fi
CARD_COUNT=$((CARD_COUNT + 1))
done
cat > "$DEPLOY_DIR/index.html" <<INDEXEOF
@@ -519,6 +599,15 @@ jobs:
.dl{color:var(--fg-muted);text-decoration:none;font-size:.75rem;font-weight:500;display:inline-flex;align-items:center;gap:.3rem;padding:.25rem .6rem;border-radius:9999px;border:1px solid var(--border);background:oklch(100% 0 0/.03);transition:all var(--dur-base) var(--ease-out)}
.dl:hover{color:var(--primary-up);border-color:var(--primary);background:oklch(62% 0.21 265/.08)}
.badge{font-size:.6875rem;font-weight:600;padding:.2rem .625rem;border-radius:9999px;text-transform:uppercase;letter-spacing:.05em}
.card-header{padding:.75rem 1rem;display:flex;align-items:center;justify-content:space-between;border-bottom:1px solid var(--border-faint)}
.comparison{display:grid;grid-template-columns:1fr 1fr;gap:0}
.comp-panel{border-right:1px solid var(--border-faint)}
.comp-panel:last-child{border-right:none}
.comp-label{padding:.4rem .75rem;font-size:.7rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em;color:var(--fg-muted);background:oklch(10% 0.01 265);display:flex;align-items:center;gap:.4rem}
.comp-tag{font-size:.6rem;padding:.1rem .4rem;border-radius:9999px;font-weight:600}
.comp-panel:first-child .comp-tag{background:oklch(65% 0.01 265/.15);color:var(--fg-muted);border:1px solid var(--border)}
.comp-panel:last-child .comp-tag{background:oklch(62% 0.18 155/.15);color:var(--ok);border:1px solid oklch(62% 0.18 155/.25)}
.comp-dl{padding:.4rem .75rem;display:flex;justify-content:center}
.report{border-top:1px solid var(--border-faint);padding:.75rem 1rem;font-size:.8125rem}
.report summary{cursor:pointer;color:var(--fg-muted);font-weight:500;display:flex;align-items:center;gap:.4rem;user-select:none;transition:color var(--dur-base) var(--ease-out)}
.report summary:hover{color:var(--fg)}

View File

@@ -9,6 +9,7 @@ import { globSync } from 'glob'
interface CliOptions {
artifactsDir: string
videoFile: string
beforeVideo: string
outputDir: string
model: string
requestTimeoutMs: number
@@ -25,6 +26,7 @@ interface VideoCandidate {
const DEFAULT_OPTIONS: CliOptions = {
artifactsDir: './tmp/qa-artifacts',
videoFile: '',
beforeVideo: '',
outputDir: './tmp',
model: 'gemini-2.5-flash',
requestTimeoutMs: 300_000,
@@ -40,6 +42,9 @@ Options:
(default: ./tmp/qa-artifacts)
--video-file <name-or-path> Video file to analyze (required)
(supports basename or relative/absolute path)
--before-video <path> Before video (main branch) for comparison
When provided, sends both videos to Gemini
for comparative before/after analysis
--output-dir <path> Output directory for markdown reports
(default: ./tmp)
--model <name> Gemini model
@@ -110,6 +115,11 @@ function parseCliOptions(args: string[]): CliOptions {
continue
}
if (argument === '--before-video') {
options.beforeVideo = requireValue(argument)
continue
}
if (argument === '--pr-context') {
options.prContext = requireValue(argument)
continue
@@ -279,18 +289,94 @@ function getMimeType(filePath: string): string {
return mimeMap[ext] || 'video/mp4'
}
function buildReviewPrompt(
function buildReviewPrompt(options: {
platformName: string
videoPath: string
prContext: string
isComparative: boolean
}): string {
const { platformName, videoPath, prContext, isComparative } = options
if (isComparative) {
return buildComparativePrompt(platformName, videoPath, prContext)
}
return buildSingleVideoPrompt(platformName, videoPath, prContext)
}
function buildComparativePrompt(
platformName: string,
videoPath: string,
prContext: string
): string {
const basePrompt = [
const lines = [
'You are a senior QA engineer performing a BEFORE/AFTER comparison review.',
'',
'You are given TWO videos:',
'- **Video 1 (BEFORE)**: The main branch BEFORE the PR. This shows the OLD behavior.',
'- **Video 2 (AFTER)**: The PR branch AFTER the changes. This shows the NEW behavior.',
'',
'Both videos show the same test steps executed on different code versions.',
''
]
if (prContext) {
lines.push('## PR Context', prContext, '')
}
lines.push(
'## Your Task',
`Platform: "${platformName}". After video: ${toProjectRelativePath(videoPath)}.`,
'',
'1. **BEFORE video**: Does it demonstrate the old behavior or bug that the PR aims to fix?',
' Describe what you observe — this establishes the baseline.',
'2. **AFTER video**: Does it prove the PR fix works? Is the intended new behavior visible?',
'3. **Comparison**: What specifically changed between before and after?',
'4. **Regressions**: Did the PR introduce any new problems visible in the AFTER video',
' that were NOT present in the BEFORE video?',
'',
'Note: Brief black frames during page transitions are NORMAL.',
'Report only concrete, visible differences. Avoid speculation.',
'',
'Return markdown with these sections exactly:',
'## Summary',
'(What the PR changes, whether BEFORE confirms the old behavior, whether AFTER proves the fix)',
'## Before vs After',
'(Side-by-side behavioral comparison with timestamps from both videos)',
'## Confirmed Issues',
'For each issue, use this exact format:',
'',
'### [Short issue title]',
'`SEVERITY` `TIMESTAMP` `Confidence: LEVEL`',
'',
'[Description — specify whether it appears in BEFORE, AFTER, or both]',
'',
'**Evidence:** [What you observed at the given timestamp in which video]',
'',
'**Suggested Fix:** [Actionable recommendation]',
'',
'---',
'',
'## Possible Issues (Needs Human Verification)',
'## Overall Risk',
'(Assess whether the PR achieves its goal based on the before/after comparison)'
)
return lines.filter(Boolean).join('\n')
}
function buildSingleVideoPrompt(
platformName: string,
videoPath: string,
prContext: string
): string {
const lines = [
'You are a senior QA engineer reviewing a UI test session recording.',
''
]
if (prContext) {
basePrompt.push(
lines.push(
'## PR Context',
'The video is a QA session testing a specific pull request.',
'Your review MUST evaluate whether the PR achieves its stated purpose.',
@@ -305,7 +391,7 @@ function buildReviewPrompt(
)
}
basePrompt.push(
lines.push(
`Review this QA session video for platform "${platformName}".`,
`Source video: ${toProjectRelativePath(videoPath)}.`,
'The video shows the full test session — analyze it chronologically.',
@@ -340,7 +426,7 @@ function buildReviewPrompt(
'## Overall Risk'
)
return basePrompt.filter(Boolean).join('\n')
return lines.filter(Boolean).join('\n')
}
async function requestGeminiReview(options: {
@@ -348,31 +434,50 @@ async function requestGeminiReview(options: {
model: string
platformName: string
videoPath: string
beforeVideoPath: string
timeoutMs: number
prContext: string
}): Promise<string> {
const genAI = new GoogleGenerativeAI(options.apiKey)
const model = genAI.getGenerativeModel({ model: options.model })
const videoBuffer = await readFile(options.videoPath)
const base64Video = videoBuffer.toString('base64')
const mimeType = getMimeType(options.videoPath)
const prompt = buildReviewPrompt(
options.platformName,
options.videoPath,
options.prContext
)
const isComparative = options.beforeVideoPath.length > 0
const prompt = buildReviewPrompt({
platformName: options.platformName,
videoPath: options.videoPath,
prContext: options.prContext,
isComparative
})
const result = await model.generateContent([
{ text: prompt },
{
inlineData: {
mimeType,
data: base64Video
const parts: Array<
{ text: string } | { inlineData: { mimeType: string; data: string } }
> = [{ text: prompt }]
if (isComparative) {
const beforeBuffer = await readFile(options.beforeVideoPath)
parts.push(
{ text: 'Video 1 — BEFORE (main branch):' },
{
inlineData: {
mimeType: getMimeType(options.beforeVideoPath),
data: beforeBuffer.toString('base64')
}
}
}
])
)
}
const afterBuffer = await readFile(options.videoPath)
if (isComparative) {
parts.push({ text: 'Video 2 — AFTER (PR branch):' })
}
parts.push({
inlineData: {
mimeType: getMimeType(options.videoPath),
data: afterBuffer.toString('base64')
}
})
const result = await model.generateContent(parts)
const response = result.response
const text = response.text()
@@ -394,21 +499,32 @@ function buildReportMarkdown(input: {
model: string
videoPath: string
videoSizeBytes: number
beforeVideoPath?: string
beforeVideoSizeBytes?: number
reviewText: string
}): string {
const header = [
const headerLines = [
`# ${input.platformName} QA Video Report`,
'',
`- Generated at: ${new Date().toISOString()}`,
`- Model: \`${input.model}\``,
`- Source video: \`${toProjectRelativePath(input.videoPath)}\``,
`- Video size: ${formatBytes(input.videoSizeBytes)}`,
'',
'## AI Review',
''
].join('\n')
`- Model: \`${input.model}\``
]
return `${header}${input.reviewText.trim()}\n`
if (input.beforeVideoPath) {
headerLines.push(
`- Before video: \`${toProjectRelativePath(input.beforeVideoPath)}\` (${formatBytes(input.beforeVideoSizeBytes ?? 0)})`,
`- After video: \`${toProjectRelativePath(input.videoPath)}\` (${formatBytes(input.videoSizeBytes)})`,
'- Mode: **Comparative (before/after)**'
)
} else {
headerLines.push(
`- Source video: \`${toProjectRelativePath(input.videoPath)}\``,
`- Video size: ${formatBytes(input.videoSizeBytes)}`
)
}
headerLines.push('', '## AI Review', '')
return `${headerLines.join('\n')}${input.reviewText.trim()}\n`
}
async function reviewVideo(
@@ -430,8 +546,19 @@ async function reviewVideo(
}
}
const beforeVideoPath = options.beforeVideo
? resolve(options.beforeVideo)
: ''
if (beforeVideoPath) {
const beforeStat = await stat(beforeVideoPath)
process.stdout.write(
`[${video.platformName}] Before video: ${toProjectRelativePath(beforeVideoPath)} (${formatBytes(beforeStat.size)})\n`
)
}
process.stdout.write(
`[${video.platformName}] Sending video ${toProjectRelativePath(video.videoPath)} to ${options.model}\n`
`[${video.platformName}] Sending ${beforeVideoPath ? '2 videos (comparative)' : 'video'} to ${options.model}\n`
)
const reviewText = await requestGeminiReview({
@@ -439,6 +566,7 @@ async function reviewVideo(
model: options.model,
platformName: video.platformName,
videoPath: video.videoPath,
beforeVideoPath,
timeoutMs: options.requestTimeoutMs,
prContext
})
@@ -448,13 +576,22 @@ async function reviewVideo(
options.outputDir,
`${video.platformName}-qa-video-report.md`
)
const reportMarkdown = buildReportMarkdown({
const reportInput: Parameters<typeof buildReportMarkdown>[0] = {
platformName: video.platformName,
model: options.model,
videoPath: video.videoPath,
videoSizeBytes: videoStat.size,
reviewText
})
}
if (beforeVideoPath) {
const beforeStat = await stat(beforeVideoPath)
reportInput.beforeVideoPath = beforeVideoPath
reportInput.beforeVideoSizeBytes = beforeStat.size
}
const reportMarkdown = buildReportMarkdown(reportInput)
await mkdir(dirname(outputPath), { recursive: true })
await writeFile(outputPath, reportMarkdown, 'utf-8')