feat: add WebSocket reconnect telemetry for incident-39 observability

Track WebSocket disconnect episodes to quantify how often users lose
real-time updates while jobs are running. Fires a single
app:websocket_reconnected event per disconnect episode, gated on
active jobs to control volume.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Miller
2026-03-19 11:15:56 -07:00
parent 944f78adf4
commit 92abd71213
5 changed files with 48 additions and 1 deletions

View File

@@ -27,6 +27,7 @@ import type {
TemplateLibraryMetadata,
TemplateMetadata,
UiButtonClickMetadata,
WebSocketReconnectedMetadata,
WorkflowCreatedMetadata,
WorkflowImportMetadata,
WorkflowSavedMetadata
@@ -236,4 +237,8 @@ export class TelemetryRegistry implements TelemetryDispatcher {
trackPageView(pageName: string, properties?: PageViewMetadata): void {
this.dispatch((provider) => provider.trackPageView?.(pageName, properties))
}
trackWebSocketReconnected(metadata: WebSocketReconnectedMetadata): void {
this.dispatch((provider) => provider.trackWebSocketReconnected?.(metadata))
}
}

View File

@@ -41,6 +41,7 @@ import type {
TemplateLibraryMetadata,
TemplateMetadata,
UiButtonClickMetadata,
WebSocketReconnectedMetadata,
WorkflowCreatedMetadata,
WorkflowImportMetadata,
WorkflowSavedMetadata
@@ -442,4 +443,8 @@ export class MixpanelTelemetryProvider implements TelemetryProvider {
trackUiButtonClicked(metadata: UiButtonClickMetadata): void {
this.trackEvent(TelemetryEvents.UI_BUTTON_CLICKED, metadata)
}
trackWebSocketReconnected(metadata: WebSocketReconnectedMetadata): void {
this.trackEvent(TelemetryEvents.WEBSOCKET_RECONNECTED, metadata)
}
}

View File

@@ -36,6 +36,7 @@ import type {
TemplateLibraryMetadata,
TemplateMetadata,
UiButtonClickMetadata,
WebSocketReconnectedMetadata,
WorkflowCreatedMetadata,
WorkflowImportMetadata,
WorkflowSavedMetadata
@@ -452,4 +453,8 @@ export class PostHogTelemetryProvider implements TelemetryProvider {
...properties
})
}
trackWebSocketReconnected(metadata: WebSocketReconnectedMetadata): void {
this.trackEvent(TelemetryEvents.WEBSOCKET_RECONNECTED, metadata)
}
}

View File

@@ -304,6 +304,15 @@ export interface WorkflowCreatedMetadata {
previous_workflow_had_nodes: boolean
}
/**
* WebSocket reconnection metadata
*/
export interface WebSocketReconnectedMetadata {
disconnect_duration_ms: number
had_active_jobs: boolean
active_job_count: number
}
/**
* Page view metadata for route tracking
*/
@@ -427,6 +436,9 @@ export interface TelemetryProvider {
// Page view tracking
trackPageView?(pageName: string, properties?: PageViewMetadata): void
// WebSocket lifecycle events
trackWebSocketReconnected?(metadata: WebSocketReconnectedMetadata): void
}
/**
@@ -514,7 +526,10 @@ export const TelemetryEvents = {
UI_BUTTON_CLICKED: 'app:ui_button_clicked',
// Page View
PAGE_VIEW: 'app:page_view'
PAGE_VIEW: 'app:page_view',
// WebSocket Lifecycle
WEBSOCKET_RECONNECTED: 'app:websocket_reconnected'
} as const
export type TelemetryEventName =
@@ -558,3 +573,4 @@ export type TelemetryEventProperties =
| WorkflowSavedMetadata
| DefaultViewSetMetadata
| SubscriptionMetadata
| WebSocketReconnectedMetadata

View File

@@ -253,7 +253,12 @@ const reconnectingMessage: ToastMessageOptions = {
summary: t('g.reconnecting')
}
let disconnectedAt: number | null = null
const onReconnecting = () => {
if (disconnectedAt === null) {
disconnectedAt = Date.now()
}
if (!settingStore.get('Comfy.Toast.DisableReconnectingToast')) {
toast.remove(reconnectingMessage)
toast.add(reconnectingMessage)
@@ -261,6 +266,17 @@ const onReconnecting = () => {
}
const onReconnected = () => {
if (disconnectedAt !== null) {
const activeJobCount = queueStore.activeJobsCount
if (activeJobCount > 0) {
telemetry?.trackWebSocketReconnected({
disconnect_duration_ms: Date.now() - disconnectedAt,
had_active_jobs: true,
active_job_count: activeJobCount
})
}
disconnectedAt = null
}
if (!settingStore.get('Comfy.Toast.DisableReconnectingToast')) {
toast.remove(reconnectingMessage)
toast.add({