// tests/offscreen/port-reconnect-race.test.ts // // RED-gate tests for debug session empty-archive-port-race // (.planning/debug/empty-archive-port-race.md). // // Empirically pins the two coupled defects observed in Phase 1 UAT Test 3: // // H1 Pre-emptive reconnect at 290 s leaves the ping interval briefly // able to fire against a Port whose remote end has been disconnected, // throwing "Attempting to use a disconnected port object" — 3 // uncaught errors observed in the offscreen console. // // H2 REQUEST_BUFFER round-trip silently drops BUFFER when the port is // replaced mid-encode (src/offscreen/recorder.ts:597 stale-port // refuse) AND when the SW's videoPort is null at request time // (src/background/index.ts:128 fast-bailout). The SW then times // out (BUFFER_FETCH_TIMEOUT_MS = 2 s) and resolves // `{ segments: [] }`, which createArchive's branch at // src/background/index.ts:346 silently elides into a video-less // archive. Operator-visible result: 88 KB zip with NO // video/last_30sec.webm. // // These tests drive the REAL src/offscreen/recorder.ts module via the // established chrome-stub harness (same scaffold as // tests/offscreen/port.test.ts + handshake.test.ts). No source-code // re-implementation — the production module is what we're testing. import { describe, it, expect, vi, beforeEach } from 'vitest'; interface PortStub { name: string; // Throw-on-disconnected-postMessage: simulates the real Chrome behaviour // documented at https://developer.chrome.com/docs/extensions/develop/concepts/messaging // — postMessage on a disconnected Port throws synchronously with // "Attempting to use a disconnected port object". postMessage: ReturnType; onMessage: { addListener: ReturnType; // capture for synthetic dispatch _listeners: Array<(msg: unknown) => void>; }; onDisconnect: { addListener: (fn: () => void) => void; // capture for synthetic dispatch _listeners: Array<() => void>; }; disconnect: ReturnType; // test seam — flip this to true to simulate the remote disconnect // having taken effect before the next postMessage runs. _disconnected: boolean; } interface ChromeStub { runtime: { id: string; sendMessage: ReturnType; onMessage: { addListener: ReturnType }; connect: () => PortStub; }; } interface GlobalWithChrome { chrome?: ChromeStub; MediaRecorder?: { isTypeSupported: (mime: string) => boolean }; } // Build a PortStub whose postMessage throws once `_disconnected` flips true, // matching Chrome's runtime semantics. The throw is the exact error string // the offscreen console showed in UAT Test 3 ("Attempting to use a // disconnected port object"), so the test fails loudly when the bug is // reproduced — and is silent (pass) once the production code clears the // ping interval BEFORE the disconnect-detection race window. function makePortStub(): PortStub { const port: PortStub = { name: 'video-keepalive', postMessage: vi.fn(), onMessage: { addListener: vi.fn(), _listeners: [], }, onDisconnect: { addListener: (fn: () => void) => { port.onDisconnect._listeners.push(fn); }, _listeners: [], }, disconnect: vi.fn(() => { port._disconnected = true; }), _disconnected: false, }; // Wire onMessage.addListener to capture into _listeners so the test can // dispatch REQUEST_BUFFER synthetically. port.onMessage.addListener.mockImplementation( (fn: (msg: unknown) => void) => { port.onMessage._listeners.push(fn); } ); // Wire postMessage to throw if the port is disconnected — Chrome's // documented behaviour. This is the smoking gun for H1. port.postMessage.mockImplementation((msg: unknown) => { if (port._disconnected) { throw new Error('Attempting to use a disconnected port object'); } return msg; }); return port; } function buildChromeStub(ports: PortStub[]): ChromeStub { return { runtime: { id: 'ext-id-test', sendMessage: vi.fn(), onMessage: { addListener: vi.fn() }, connect: () => { const port = makePortStub(); ports.push(port); return port; }, }, }; } describe('port reconnect race (RED — confirms empty-archive-port-race H1+H2)', () => { let originalSetInterval: typeof globalThis.setInterval; let originalClearInterval: typeof globalThis.clearInterval; let originalSetTimeout: typeof globalThis.setTimeout; let originalClearTimeout: typeof globalThis.clearTimeout; beforeEach(() => { vi.resetModules(); (globalThis as unknown as GlobalWithChrome).MediaRecorder = { isTypeSupported: vi.fn().mockReturnValue(true), }; originalSetInterval = globalThis.setInterval; originalClearInterval = globalThis.clearInterval; originalSetTimeout = globalThis.setTimeout; originalClearTimeout = globalThis.clearTimeout; }); // ────────────────────────────────────────────────────────────────────── // H1 — ping fires against disconnected port and throws // ────────────────────────────────────────────────────────────────────── // // Production code path (src/offscreen/recorder.ts:623-625): // pingIntervalId = setInterval(() => { // keepalivePort?.postMessage({ type: 'PING' }); // throws if disconnected // }, PORT_PING_MS); // // The `?.` only guards against keepalivePort === null. After the SW // (or the SW's idle-eviction, or a remote disconnect) tears down the // port, the offscreen-side Port reference is still non-null but // postMessage throws synchronously with "Attempting to use a // disconnected port object". The interval is cleared only in the // onDisconnect handler — there's a race window during which the next // queued interval callback fires before onDisconnect runs. // // Test strategy: intercept setInterval to capture the ping callback, // then synthetically disconnect the port (without firing onDisconnect // first — mimicking the race), then invoke the captured ping callback // and assert it does NOT throw an uncaught error. it('H1: ping callback handles a remotely-disconnected port without throwing', async () => { const ports: PortStub[] = []; const stub = buildChromeStub(ports); (globalThis as unknown as GlobalWithChrome).chrome = stub; // Intercept setInterval to capture the ping callback for direct invocation. const intervalCallbacks: Array<() => void> = []; globalThis.setInterval = ((cb: () => void, _ms: number) => { intervalCallbacks.push(cb); return 999 as unknown as ReturnType; }) as typeof globalThis.setInterval; // No-op clearInterval — the test wants to keep the callback callable // even after the production code attempts a clear, to expose the // race-window assumption. globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval; await import('../../src/offscreen/recorder'); expect(ports.length).toBe(1); expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1); const port = ports[0]; const pingCb = intervalCallbacks[0]; // Simulate the race: the remote side of the port has been // disconnected (e.g. SW eviction, ~5 min cap, port glitch) but the // offscreen's onDisconnect handler has NOT yet run, so the interval // is still installed and keepalivePort is still non-null on the // offscreen side. port._disconnected = true; // The PRODUCTION ping callback must not throw — either by checking // port.postMessage safety first, or by wrapping the call in // try/catch, or by removing the interval at first postMessage // failure. Whatever the fix, an uncaught throw here would surface // as the "Attempting to use a disconnected port object" error the // operator observed in UAT Test 3. expect(() => pingCb()).not.toThrow(); // Restore globals. globalThis.setInterval = originalSetInterval; globalThis.clearInterval = originalClearInterval; }); // ────────────────────────────────────────────────────────────────────── // H2 — BUFFER lost on stale-port reconnect; SW left with empty segments // ────────────────────────────────────────────────────────────────────── // // Production code path (src/offscreen/recorder.ts:547-604): // const finalized = getSegments(); // ... // for (let idx = 0; idx < allSegments.length; idx++) { // const data = await blobToBase64(segment); // ←── async window // transferred.push(...); // } // if (keepalivePort !== portAtRequest) { // logger.warn('port replaced during encode; dropping BUFFER response'); // return; // } // portAtRequest.postMessage({ type: 'BUFFER', segments: transferred }); // // When the SW issues REQUEST_BUFFER during the ~10-50 ms reconnect // window, the offscreen captures `portAtRequest = keepalivePort`, // awaits the per-segment base64 encode (~150 ms for 3 segments), and // by the time the encode completes the pre-emptive reconnect has // swapped `keepalivePort` to a fresh Port. The guard correctly // refuses to post on the stale Port. HOWEVER, no retry is issued — // the SW's getVideoBufferFromOffscreen times out after 2 s and // resolves `{ segments: [] }`. createArchive then takes the silent // no-video branch (src/background/index.ts:350-352). The archive // ships without video. // // This RED test pins the contract: when REQUEST_BUFFER arrives AND // segments exist, BUFFER (with the encoded segments) MUST reach SOME // live port, either: // (a) by retrying on the freshly-connected port, OR // (b) by re-issuing REQUEST_BUFFER on the new port from the // reconnect handler when there's an unanswered request in // flight. // Whatever the strategy, silent loss with non-empty buffer is the // bug. The current production code drops the response on the floor. it('H2: REQUEST_BUFFER mid-reconnect must NOT drop BUFFER when segments exist', async () => { const ports: PortStub[] = []; const stub = buildChromeStub(ports); (globalThis as unknown as GlobalWithChrome).chrome = stub; const recorder = await import('../../src/offscreen/recorder'); // 1) Seed the ring buffer with 3 real Blob segments via the test // seam (recorder.ts:85 — pushSegmentForTest). Each segment is a // plausible WebM head (EBML magic + filler bytes). The 3 × // payload ensures the for-loop body actually awaits — opening // the real reconnect race window. const ebmlMagic = new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]); recorder.resetBuffer(); for (let i = 0; i < 3; i++) { const payload = new Uint8Array(1024); payload.set(ebmlMagic, 0); payload.fill(i + 1, ebmlMagic.length); recorder.pushSegmentForTest( new Blob([payload], { type: 'video/webm' }) ); } expect(recorder.getSegments().length).toBe(3); expect(ports.length).toBe(1); const oldPort = ports[0]; expect(oldPort.onMessage._listeners.length).toBeGreaterThanOrEqual(1); // 2) SW issues REQUEST_BUFFER — synthetically dispatch to the // captured onPortMessage listener. The production code kicks off // encodeAndSendBuffer which awaits 3 blobToBase64 calls. oldPort.onMessage._listeners[0]({ type: 'REQUEST_BUFFER' }); // 3) Mid-encode, the pre-emptive reconnect fires. Simulate by // firing onDisconnect on the old port — production code's // handler runs `keepalivePort = null` then `connectPort()` // which spawns a new PortStub. The keepalivePort reference now // points to the new port; the in-flight portAtRequest in the // encode loop still points to oldPort, so the stale-port guard // at line 597 will trigger. oldPort.onDisconnect._listeners.forEach((fn) => fn()); // After reconnect a NEW port exists. expect(ports.length).toBeGreaterThanOrEqual(2); const newPort = ports[ports.length - 1]; // 4) Drain enough microtasks for the encode loop to complete and // the post-await guard to evaluate. blobToBase64 awaits // `blob.arrayBuffer()` (1 microtask) ×3 + the guard check. // Generous: drain 32 microtasks then a macrotask. This is // deterministic — vitest's Node env exposes setImmediate // semantics via setTimeout(_, 0). for (let i = 0; i < 32; i++) { await Promise.resolve(); } await new Promise((resolve) => setTimeout(resolve, 0)); for (let i = 0; i < 32; i++) { await Promise.resolve(); } // CONTRACT — pick exactly one: // // (a) BUFFER posted on the NEW port (segments encoded, retry on // fresh port works), OR // (b) BUFFER posted on the OLD port (encode completed and the // post squeezed in before the swap took effect — acceptable // because the SW's per-request listener is still bound to it // and the SW gets the data). // // The current production code posts NEITHER when the reconnect // arrives between the await and the post (the line 597 guard // returns silently). This assertion FAILS today, proving the H2 // bug. The fix must surface SOME signal that the SW can act on — // silent loss when the buffer is non-empty is the bug. const oldPortBuffers = oldPort.postMessage.mock.calls.filter( (call: unknown[]) => typeof call[0] === 'object' && call[0] !== null && (call[0] as { type?: unknown }).type === 'BUFFER' ); const newPortBuffers = newPort.postMessage.mock.calls.filter( (call: unknown[]) => typeof call[0] === 'object' && call[0] !== null && (call[0] as { type?: unknown }).type === 'BUFFER' ); const totalBuffersPosted = oldPortBuffers.length + newPortBuffers.length; // Stronger assertion: when BUFFER reaches a port, it must carry // the 3 segments we seeded. A 0-segment BUFFER is the silent-loss // shape and should not satisfy the contract. const allBufferCalls = [...oldPortBuffers, ...newPortBuffers]; const segmentBearingBuffers = allBufferCalls.filter((call) => { const msg = call[0] as { segments?: unknown }; return Array.isArray(msg.segments) && msg.segments.length > 0; }); expect(totalBuffersPosted).toBeGreaterThanOrEqual(1); expect(segmentBearingBuffers.length).toBeGreaterThanOrEqual(1); // Cleanup test seam state so we don't bleed segments across tests. recorder.resetBuffer(); }); // ────────────────────────────────────────────────────────────────────── // H1.b — pre-emptive reconnect path specifically. Walk the same // sequence the operator hit at 290 s: the production code calls // `keepalivePort?.disconnect()` and the test then invokes the ping // callback that was installed BEFORE the reconnect. This proves the // race window in the production timer-clear ordering. // ────────────────────────────────────────────────────────────────────── it('H1.b: pre-emptive reconnect path — ping does not throw against just-disconnected port', async () => { const ports: PortStub[] = []; const stub = buildChromeStub(ports); (globalThis as unknown as GlobalWithChrome).chrome = stub; // Capture both interval callbacks AND timeout callbacks for direct // invocation. We need to fire the pre-emptive setTimeout // synthetically (it would otherwise wait 290 s of wall-clock time). const intervalCallbacks: Array<() => void> = []; const timeoutCallbacks: Array<() => void> = []; globalThis.setInterval = ((cb: () => void, _ms: number) => { intervalCallbacks.push(cb); return 100 as unknown as ReturnType; }) as typeof globalThis.setInterval; globalThis.setTimeout = ((cb: () => void, _ms: number) => { timeoutCallbacks.push(cb); return 200 as unknown as ReturnType; }) as typeof globalThis.setTimeout; // No-op the clear* so the captured callbacks remain callable — // surfaces the race-window assumption in the production code. globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval; globalThis.clearTimeout = (() => {}) as typeof globalThis.clearTimeout; await import('../../src/offscreen/recorder'); expect(ports.length).toBe(1); const port = ports[0]; // intervalCallbacks[0] is the ping callback (first setInterval call). // timeoutCallbacks contains at least the pre-emptive reconnect timer // (recorder.ts:626). Other setTimeouts may exist (e.g. queueMicrotask // shims, scheduleRotation — but the latter is gated on mediaStream // which is null in this test). expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1); expect(timeoutCallbacks.length).toBeGreaterThanOrEqual(1); const pingCb = intervalCallbacks[0]; // The pre-emptive reconnect timeout is the only setTimeout the // bootstrap installs (rotation is gated on mediaStream). Pick the // first to keep the test resilient. const preemptiveReconnect = timeoutCallbacks[0]; // 1) Fire pre-emptive reconnect — the production code calls // keepalivePort?.disconnect(). Our stub flips _disconnected=true // synchronously (matching Chrome's local disconnect semantics). preemptiveReconnect(); expect(port._disconnected).toBe(true); expect(port.disconnect).toHaveBeenCalled(); // 2) Now the race: an already-queued ping callback (e.g. scheduled // before the reconnect) fires against the just-disconnected port. // Production code: keepalivePort?.postMessage({type:'PING'}) — // keepalivePort is still the old non-null reference at this point // (the onDisconnect handler may not have run yet in the same // macrotask), so postMessage runs against the disconnected port // and our stub throws — exactly matching the UAT error message. // // The fix must make this safe. Acceptable strategies: // - Clear the interval as the FIRST step of the pre-emptive // reconnect, before .disconnect(). // - Wrap postMessage in try/catch inside the ping callback. // - Switch to a per-port closure (capturedPort) so a stale ping // can detect it's pinging a swapped port. expect(() => pingCb()).not.toThrow(); // Restore globals. globalThis.setInterval = originalSetInterval; globalThis.clearInterval = originalClearInterval; globalThis.setTimeout = originalSetTimeout; globalThis.clearTimeout = originalClearTimeout; }); });