Compare commits

...

11 Commits

Author SHA1 Message Date
Matt Miller
c6a2645191 Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-26 09:58:37 -07:00
Matt Miller
79a029d233 refactor: move WebSocket reconnect tracking from PostHog/Mixpanel to Sentry
Reconnect events are operational/debug signals, not product analytics.
Sentry is the right home for this — breadcrumbs for all reconnects,
captureMessage (warning) when jobs were in flight.

Also addresses review feedback:
- Extract to useWebSocketReconnectTracking composable
- Use performance.now() instead of Date.now()
- Add tests for the composable (6 cases)
- Remove PostHog/Mixpanel telemetry plumbing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 18:38:49 -07:00
Matt Miller
1dbb26bb9a Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-23 10:54:22 -07:00
Matt Miller
b5ba737f43 Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-23 09:53:11 -07:00
Matt Miller
4e8729bc61 Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-21 11:04:34 -07:00
Matt Miller
251558bd2c Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-20 16:54:00 -07:00
Matt Miller
515f69c02f Merge branch 'main' into feat/websocket-reconnect-telemetry 2026-03-20 16:36:14 -07:00
Matt Miller
b344883871 fix: reset disconnect state on GraphView unmount
Prevents stale disconnectedAt timestamps from producing inflated
disconnect_duration_ms if the component remounts mid-disconnect.
2026-03-20 10:12:08 -07:00
Matt Miller
aec5449f62 docs: clarify activeJobsCount includes pending tasks intentionally 2026-03-20 10:12:08 -07:00
Matt Miller
53d09ab869 fix: address PR review feedback on websocket reconnect telemetry
1. Fire reconnect event unconditionally so had_active_jobs varies
   (was always true since it was inside an activeJobCount > 0 guard)
2. Capture activeJobsCount at disconnect time, not reconnect time,
   since the queue store hasn't refreshed yet on reconnect
3. Add PostHog provider tests for trackWebSocketReconnected
2026-03-20 10:12:08 -07:00
Matt Miller
92abd71213 feat: add WebSocket reconnect telemetry for incident-39 observability
Track WebSocket disconnect episodes to quantify how often users lose
real-time updates while jobs are running. Fires a single
app:websocket_reconnected event per disconnect episode, gated on
active jobs to control volume.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 10:12:08 -07:00
3 changed files with 173 additions and 0 deletions

View File

@@ -0,0 +1,101 @@
import * as Sentry from '@sentry/vue'
import { createPinia, setActivePinia } from 'pinia'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { useWebSocketReconnectTracking } from './useWebSocketReconnectTracking'
vi.mock('@sentry/vue', () => ({
addBreadcrumb: vi.fn(),
captureMessage: vi.fn()
}))
vi.mock('@/stores/queueStore', () => ({
useQueueStore: () => ({
activeJobsCount: mockActiveJobsCount
})
}))
let mockActiveJobsCount = 0
describe('useWebSocketReconnectTracking', () => {
beforeEach(() => {
vi.clearAllMocks()
mockActiveJobsCount = 0
setActivePinia(createPinia())
})
it('does nothing on reconnect without prior disconnect', () => {
const { onReconnect } = useWebSocketReconnectTracking()
onReconnect()
expect(Sentry.addBreadcrumb).not.toHaveBeenCalled()
expect(Sentry.captureMessage).not.toHaveBeenCalled()
})
it('adds breadcrumb on reconnect after disconnect', () => {
const { onDisconnect, onReconnect } = useWebSocketReconnectTracking()
onDisconnect()
onReconnect()
expect(Sentry.addBreadcrumb).toHaveBeenCalledWith(
expect.objectContaining({
category: 'websocket',
message: 'WebSocket reconnected',
level: 'info'
})
)
})
it('captures warning when jobs were active at disconnect', () => {
mockActiveJobsCount = 3
const { onDisconnect, onReconnect } = useWebSocketReconnectTracking()
onDisconnect()
onReconnect()
expect(Sentry.captureMessage).toHaveBeenCalledWith(
'WebSocket reconnected with active jobs',
expect.objectContaining({
level: 'warning',
tags: { incident: 'incident-39' },
extra: expect.objectContaining({ active_job_count: 3 })
})
)
})
it('does not capture warning when no jobs were active', () => {
mockActiveJobsCount = 0
const { onDisconnect, onReconnect } = useWebSocketReconnectTracking()
onDisconnect()
onReconnect()
expect(Sentry.addBreadcrumb).toHaveBeenCalled()
expect(Sentry.captureMessage).not.toHaveBeenCalled()
})
it('ignores duplicate disconnect calls', () => {
mockActiveJobsCount = 2
const { onDisconnect, onReconnect } = useWebSocketReconnectTracking()
onDisconnect()
// Job count changes between disconnect calls
mockActiveJobsCount = 5
onDisconnect()
onReconnect()
// Should use the count from the first disconnect (2), not the second (5)
expect(Sentry.addBreadcrumb).toHaveBeenCalledWith(
expect.objectContaining({
data: expect.objectContaining({ active_job_count: 2 })
})
)
})
it('resets state and prevents stale reconnect', () => {
const { onDisconnect, onReconnect, reset } = useWebSocketReconnectTracking()
onDisconnect()
reset()
onReconnect()
expect(Sentry.addBreadcrumb).not.toHaveBeenCalled()
})
})

View File

@@ -0,0 +1,66 @@
import * as Sentry from '@sentry/vue'
import { useQueueStore } from '@/stores/queueStore'
/**
* Tracks WebSocket disconnect/reconnect events via Sentry for incident-39
* observability. Captures disconnect duration and whether jobs were in flight
* when the connection dropped.
*/
export function useWebSocketReconnectTracking() {
const queueStore = useQueueStore()
let disconnectedAt: number | null = null
let activeJobCountAtDisconnect = 0
function onDisconnect() {
if (disconnectedAt !== null) return
disconnectedAt = performance.now()
// Includes both pending and running tasks. Pending tasks matter because
// their pending->running transition is delivered via WebSocket -- if the
// connection drops while tasks are queued, the user never sees them start.
activeJobCountAtDisconnect = queueStore.activeJobsCount
}
function onReconnect() {
if (disconnectedAt === null) return
const durationMs = Math.round(performance.now() - disconnectedAt)
const hadActiveJobs = activeJobCountAtDisconnect > 0
Sentry.addBreadcrumb({
category: 'websocket',
message: 'WebSocket reconnected',
level: 'info',
data: {
disconnect_duration_ms: durationMs,
had_active_jobs: hadActiveJobs,
active_job_count: activeJobCountAtDisconnect
}
})
if (hadActiveJobs) {
Sentry.captureMessage('WebSocket reconnected with active jobs', {
level: 'warning',
tags: {
incident: 'incident-39'
},
extra: {
disconnect_duration_ms: durationMs,
active_job_count: activeJobCountAtDisconnect
}
})
}
disconnectedAt = null
activeJobCountAtDisconnect = 0
}
function reset() {
disconnectedAt = null
activeJobCountAtDisconnect = 0
}
return { onDisconnect, onReconnect, reset }
}

View File

@@ -66,6 +66,7 @@ import ModelImportProgressDialog from '@/platform/assets/components/ModelImportP
import { isCloud, isDesktop } from '@/platform/distribution/types'
import { useSettingStore } from '@/platform/settings/settingStore'
import { useTelemetry } from '@/platform/telemetry'
import { useWebSocketReconnectTracking } from '@/composables/useWebSocketReconnectTracking'
import { useFrontendVersionMismatchWarning } from '@/platform/updates/common/useFrontendVersionMismatchWarning'
import { useVersionCompatibilityStore } from '@/platform/updates/common/versionCompatibilityStore'
import { useCanvasStore } from '@/renderer/core/canvas/canvasStore'
@@ -253,7 +254,10 @@ const reconnectingMessage: ToastMessageOptions = {
summary: t('g.reconnecting')
}
const wsReconnectTracking = useWebSocketReconnectTracking()
const onReconnecting = () => {
wsReconnectTracking.onDisconnect()
if (!settingStore.get('Comfy.Toast.DisableReconnectingToast')) {
toast.remove(reconnectingMessage)
toast.add(reconnectingMessage)
@@ -261,6 +265,7 @@ const onReconnecting = () => {
}
const onReconnected = () => {
wsReconnectTracking.onReconnect()
if (!settingStore.get('Comfy.Toast.DisableReconnectingToast')) {
toast.remove(reconnectingMessage)
toast.add({
@@ -288,6 +293,7 @@ onMounted(() => {
})
onBeforeUnmount(() => {
wsReconnectTracking.reset()
executionStore.unbindExecutionEvents()
})