Files
ComfyUI_frontend/browser_tests/fixtures/helpers/PerformanceHelper.ts
Christian Byrne 437f41c553 perf: add layout/GC metrics + reduce false positives in regression detection (#10477)
## Summary

Add layout duration, style recalc duration, and heap usage metrics to CI
perf reports, while improving statistical reliability to reduce false
positive regressions.

## Changes

- **What**:
- Collect `layoutDurationMs`, `styleRecalcDurationMs`, `heapUsedBytes`
(absolute snapshot) alongside existing metrics
- Add effect size gate (`minAbsDelta`) for integer-quantized count
metrics (style recalcs, layouts, DOM nodes, event listeners) — prevents
z=7.2 false positives from e.g. 11→12 style recalcs
- Switch from mean to **median** for PR metric aggregation — robust to
outlier CI runs that dominate n=3 mean
- Increase historical baseline window from **5 to 15 runs** for more
stable σ estimates
- Reorder reported metrics: layout/style duration first (actionable),
counts and heap after (informational)

## Review Focus

The effect size gate in `classifyChange()` — it now requires both z > 2
AND absolute delta ≥ `minAbsDelta` (when configured) to flag a
regression. This addresses the core false positive issue where integer
metrics with near-zero historical variance produce extreme z-scores for
trivial changes.

Median vs mean tradeoff: median is more robust to outliers but less
sensitive to real shifts — acceptable given n=3 and CI noise levels.

┆Issue is synchronized with this [Notion
page](https://www.notion.so/PR-10477-perf-add-layout-GC-metrics-reduce-false-positives-in-regression-detection-32d6d73d365081daa72cec96d8a07b90)
by [Unito](https://www.unito.io)
2026-03-25 10:16:56 -07:00

204 lines
6.2 KiB
TypeScript

import type { CDPSession, Page } from '@playwright/test'
interface PerfSnapshot {
RecalcStyleCount: number
RecalcStyleDuration: number
LayoutCount: number
LayoutDuration: number
TaskDuration: number
JSHeapUsedSize: number
Timestamp: number
Nodes: number
JSHeapTotalSize: number
ScriptDuration: number
JSEventListeners: number
}
export interface PerfMeasurement {
name: string
durationMs: number
styleRecalcs: number
styleRecalcDurationMs: number
layouts: number
layoutDurationMs: number
taskDurationMs: number
heapDeltaBytes: number
heapUsedBytes: number
domNodes: number
jsHeapTotalBytes: number
scriptDurationMs: number
eventListeners: number
totalBlockingTimeMs: number
frameDurationMs: number
}
export class PerformanceHelper {
private cdp: CDPSession | null = null
private snapshot: PerfSnapshot | null = null
constructor(private readonly page: Page) {}
async init(): Promise<void> {
this.cdp = await this.page.context().newCDPSession(this.page)
await this.cdp.send('Performance.enable')
}
async dispose(): Promise<void> {
this.snapshot = null
if (this.cdp) {
try {
await this.cdp.send('Performance.disable')
} finally {
await this.cdp.detach()
this.cdp = null
}
}
}
private async getSnapshot(): Promise<PerfSnapshot> {
if (!this.cdp) throw new Error('PerformanceHelper not initialized')
const { metrics } = (await this.cdp.send('Performance.getMetrics')) as {
metrics: { name: string; value: number }[]
}
function get(name: string): number {
return metrics.find((m) => m.name === name)?.value ?? 0
}
return {
RecalcStyleCount: get('RecalcStyleCount'),
RecalcStyleDuration: get('RecalcStyleDuration'),
LayoutCount: get('LayoutCount'),
LayoutDuration: get('LayoutDuration'),
TaskDuration: get('TaskDuration'),
JSHeapUsedSize: get('JSHeapUsedSize'),
Timestamp: get('Timestamp'),
Nodes: get('Nodes'),
JSHeapTotalSize: get('JSHeapTotalSize'),
ScriptDuration: get('ScriptDuration'),
JSEventListeners: get('JSEventListeners')
}
}
/**
* Collect longtask entries from PerformanceObserver and compute TBT.
* TBT = sum of (duration - 50ms) for every task longer than 50ms.
*/
private async collectTBT(): Promise<number> {
return this.page.evaluate(() => {
const state = (window as unknown as Record<string, unknown>)
.__perfLongtaskState as
| { observer: PerformanceObserver; tbtMs: number }
| undefined
if (!state) return 0
// Flush any queued-but-undelivered entries into our accumulator
for (const entry of state.observer.takeRecords()) {
if (entry.duration > 50) state.tbtMs += entry.duration - 50
}
const result = state.tbtMs
state.tbtMs = 0
return result
})
}
/**
* Measure average frame duration via rAF timing over a sample window.
* Returns average ms per frame (lower = better, 16.67 = 60fps).
*/
private async measureFrameDuration(sampleFrames = 10): Promise<number> {
return this.page.evaluate((frames) => {
return new Promise<number>((resolve) => {
const timeout = setTimeout(() => resolve(0), 5000)
const timestamps: number[] = []
let count = 0
function tick(ts: number) {
timestamps.push(ts)
count++
if (count <= frames) {
requestAnimationFrame(tick)
} else {
clearTimeout(timeout)
if (timestamps.length < 2) {
resolve(0)
return
}
const total = timestamps[timestamps.length - 1] - timestamps[0]
resolve(total / (timestamps.length - 1))
}
}
requestAnimationFrame(tick)
})
}, sampleFrames)
}
async startMeasuring(): Promise<void> {
if (this.snapshot) {
throw new Error(
'Measurement already in progress — call stopMeasuring() first'
)
}
// Install longtask observer if not already present, then reset the
// accumulator so old longtasks don't bleed into the new measurement window.
await this.page.evaluate(() => {
const win = window as unknown as Record<string, unknown>
if (!win.__perfLongtaskState) {
const state: { observer: PerformanceObserver; tbtMs: number } = {
observer: new PerformanceObserver((list) => {
const self = (window as unknown as Record<string, unknown>)
.__perfLongtaskState as {
observer: PerformanceObserver
tbtMs: number
}
for (const entry of list.getEntries()) {
if (entry.duration > 50) self.tbtMs += entry.duration - 50
}
}),
tbtMs: 0
}
state.observer.observe({ type: 'longtask', buffered: true })
win.__perfLongtaskState = state
}
const state = win.__perfLongtaskState as {
observer: PerformanceObserver
tbtMs: number
}
state.tbtMs = 0
state.observer.takeRecords()
})
this.snapshot = await this.getSnapshot()
}
async stopMeasuring(name: string): Promise<PerfMeasurement> {
if (!this.snapshot) throw new Error('Call startMeasuring() first')
const after = await this.getSnapshot()
const before = this.snapshot
this.snapshot = null
function delta(key: keyof PerfSnapshot): number {
return after[key] - before[key]
}
const [totalBlockingTimeMs, frameDurationMs] = await Promise.all([
this.collectTBT(),
this.measureFrameDuration()
])
return {
name,
durationMs: delta('Timestamp') * 1000,
styleRecalcs: delta('RecalcStyleCount'),
styleRecalcDurationMs: delta('RecalcStyleDuration') * 1000,
layouts: delta('LayoutCount'),
layoutDurationMs: delta('LayoutDuration') * 1000,
taskDurationMs: delta('TaskDuration') * 1000,
heapDeltaBytes: delta('JSHeapUsedSize'),
heapUsedBytes: after.JSHeapUsedSize,
domNodes: delta('Nodes'),
jsHeapTotalBytes: delta('JSHeapTotalSize'),
scriptDurationMs: delta('ScriptDuration') * 1000,
eventListeners: delta('JSEventListeners'),
totalBlockingTimeMs,
frameDurationMs
}
}
}