Files
mokosh/tests/offscreen/port-reconnect-race.test.ts
Mark 674c415945 test(debug-empty-archive): RED gate for empty-archive-port-race (H1 + H1.b + H2)
Phase 1 UAT Test 3 surfaced a two-headed BLOCKER:
(a) silent empty-video archive when save crosses a port-reconnect window,
(b) 3x "Attempting to use a disconnected port object" Uncaught Errors
starting at the 290 s pre-emptive reconnect mark.

Bisect confirmed: H1 (port lifecycle race) was introduced by Plan 01-04
(b064a21); H2 (createArchive silent-skip on empty segments) is an upstream
defect (555eb05) that became fatal once CR-01 + sweep #5 guaranteed the
silent-skip branch would fire on every save during a reconnect window.

This commit lands the 3 RED tests at the unit-test level — they match the
UAT error string byte-for-byte for H1/H1.b and pin the silent-drop
contract for H2. They will flip GREEN as the Option C architectural
refactor (request-id'd port protocol + port-health probe + retry +
operator-visible error surface) lands across the next commits.

Baseline: 8 files / 43 tests (40 GREEN, 3 RED). tsc --noEmit exit 0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 14:17:45 +02:00

428 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// tests/offscreen/port-reconnect-race.test.ts
//
// RED-gate tests for debug session empty-archive-port-race
// (.planning/debug/empty-archive-port-race.md).
//
// Empirically pins the two coupled defects observed in Phase 1 UAT Test 3:
//
// H1 Pre-emptive reconnect at 290 s leaves the ping interval briefly
// able to fire against a Port whose remote end has been disconnected,
// throwing "Attempting to use a disconnected port object" — 3
// uncaught errors observed in the offscreen console.
//
// H2 REQUEST_BUFFER round-trip silently drops BUFFER when the port is
// replaced mid-encode (src/offscreen/recorder.ts:597 stale-port
// refuse) AND when the SW's videoPort is null at request time
// (src/background/index.ts:128 fast-bailout). The SW then times
// out (BUFFER_FETCH_TIMEOUT_MS = 2 s) and resolves
// `{ segments: [] }`, which createArchive's branch at
// src/background/index.ts:346 silently elides into a video-less
// archive. Operator-visible result: 88 KB zip with NO
// video/last_30sec.webm.
//
// These tests drive the REAL src/offscreen/recorder.ts module via the
// established chrome-stub harness (same scaffold as
// tests/offscreen/port.test.ts + handshake.test.ts). No source-code
// re-implementation — the production module is what we're testing.
import { describe, it, expect, vi, beforeEach } from 'vitest';
interface PortStub {
name: string;
// Throw-on-disconnected-postMessage: simulates the real Chrome behaviour
// documented at https://developer.chrome.com/docs/extensions/develop/concepts/messaging
// — postMessage on a disconnected Port throws synchronously with
// "Attempting to use a disconnected port object".
postMessage: ReturnType<typeof vi.fn>;
onMessage: {
addListener: ReturnType<typeof vi.fn>;
// capture for synthetic dispatch
_listeners: Array<(msg: unknown) => void>;
};
onDisconnect: {
addListener: (fn: () => void) => void;
// capture for synthetic dispatch
_listeners: Array<() => void>;
};
disconnect: ReturnType<typeof vi.fn>;
// test seam — flip this to true to simulate the remote disconnect
// having taken effect before the next postMessage runs.
_disconnected: boolean;
}
interface ChromeStub {
runtime: {
id: string;
sendMessage: ReturnType<typeof vi.fn>;
onMessage: { addListener: ReturnType<typeof vi.fn> };
connect: () => PortStub;
};
}
interface GlobalWithChrome {
chrome?: ChromeStub;
MediaRecorder?: { isTypeSupported: (mime: string) => boolean };
}
// Build a PortStub whose postMessage throws once `_disconnected` flips true,
// matching Chrome's runtime semantics. The throw is the exact error string
// the offscreen console showed in UAT Test 3 ("Attempting to use a
// disconnected port object"), so the test fails loudly when the bug is
// reproduced — and is silent (pass) once the production code clears the
// ping interval BEFORE the disconnect-detection race window.
function makePortStub(): PortStub {
const port: PortStub = {
name: 'video-keepalive',
postMessage: vi.fn(),
onMessage: {
addListener: vi.fn(),
_listeners: [],
},
onDisconnect: {
addListener: (fn: () => void) => {
port.onDisconnect._listeners.push(fn);
},
_listeners: [],
},
disconnect: vi.fn(() => {
port._disconnected = true;
}),
_disconnected: false,
};
// Wire onMessage.addListener to capture into _listeners so the test can
// dispatch REQUEST_BUFFER synthetically.
port.onMessage.addListener.mockImplementation(
(fn: (msg: unknown) => void) => {
port.onMessage._listeners.push(fn);
}
);
// Wire postMessage to throw if the port is disconnected — Chrome's
// documented behaviour. This is the smoking gun for H1.
port.postMessage.mockImplementation((msg: unknown) => {
if (port._disconnected) {
throw new Error('Attempting to use a disconnected port object');
}
return msg;
});
return port;
}
function buildChromeStub(ports: PortStub[]): ChromeStub {
return {
runtime: {
id: 'ext-id-test',
sendMessage: vi.fn(),
onMessage: { addListener: vi.fn() },
connect: () => {
const port = makePortStub();
ports.push(port);
return port;
},
},
};
}
describe('port reconnect race (RED — confirms empty-archive-port-race H1+H2)', () => {
let originalSetInterval: typeof globalThis.setInterval;
let originalClearInterval: typeof globalThis.clearInterval;
let originalSetTimeout: typeof globalThis.setTimeout;
let originalClearTimeout: typeof globalThis.clearTimeout;
beforeEach(() => {
vi.resetModules();
(globalThis as unknown as GlobalWithChrome).MediaRecorder = {
isTypeSupported: vi.fn().mockReturnValue(true),
};
originalSetInterval = globalThis.setInterval;
originalClearInterval = globalThis.clearInterval;
originalSetTimeout = globalThis.setTimeout;
originalClearTimeout = globalThis.clearTimeout;
});
// ──────────────────────────────────────────────────────────────────────
// H1 — ping fires against disconnected port and throws
// ──────────────────────────────────────────────────────────────────────
//
// Production code path (src/offscreen/recorder.ts:623-625):
// pingIntervalId = setInterval(() => {
// keepalivePort?.postMessage({ type: 'PING' }); // throws if disconnected
// }, PORT_PING_MS);
//
// The `?.` only guards against keepalivePort === null. After the SW
// (or the SW's idle-eviction, or a remote disconnect) tears down the
// port, the offscreen-side Port reference is still non-null but
// postMessage throws synchronously with "Attempting to use a
// disconnected port object". The interval is cleared only in the
// onDisconnect handler — there's a race window during which the next
// queued interval callback fires before onDisconnect runs.
//
// Test strategy: intercept setInterval to capture the ping callback,
// then synthetically disconnect the port (without firing onDisconnect
// first — mimicking the race), then invoke the captured ping callback
// and assert it does NOT throw an uncaught error.
it('H1: ping callback handles a remotely-disconnected port without throwing', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
// Intercept setInterval to capture the ping callback for direct invocation.
const intervalCallbacks: Array<() => void> = [];
globalThis.setInterval = ((cb: () => void, _ms: number) => {
intervalCallbacks.push(cb);
return 999 as unknown as ReturnType<typeof setInterval>;
}) as typeof globalThis.setInterval;
// No-op clearInterval — the test wants to keep the callback callable
// even after the production code attempts a clear, to expose the
// race-window assumption.
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
await import('../../src/offscreen/recorder');
expect(ports.length).toBe(1);
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
const port = ports[0];
const pingCb = intervalCallbacks[0];
// Simulate the race: the remote side of the port has been
// disconnected (e.g. SW eviction, ~5 min cap, port glitch) but the
// offscreen's onDisconnect handler has NOT yet run, so the interval
// is still installed and keepalivePort is still non-null on the
// offscreen side.
port._disconnected = true;
// The PRODUCTION ping callback must not throw — either by checking
// port.postMessage safety first, or by wrapping the call in
// try/catch, or by removing the interval at first postMessage
// failure. Whatever the fix, an uncaught throw here would surface
// as the "Attempting to use a disconnected port object" error the
// operator observed in UAT Test 3.
expect(() => pingCb()).not.toThrow();
// Restore globals.
globalThis.setInterval = originalSetInterval;
globalThis.clearInterval = originalClearInterval;
});
// ──────────────────────────────────────────────────────────────────────
// H2 — BUFFER lost on stale-port reconnect; SW left with empty segments
// ──────────────────────────────────────────────────────────────────────
//
// Production code path (src/offscreen/recorder.ts:547-604):
// const finalized = getSegments();
// ...
// for (let idx = 0; idx < allSegments.length; idx++) {
// const data = await blobToBase64(segment); // ←── async window
// transferred.push(...);
// }
// if (keepalivePort !== portAtRequest) {
// logger.warn('port replaced during encode; dropping BUFFER response');
// return;
// }
// portAtRequest.postMessage({ type: 'BUFFER', segments: transferred });
//
// When the SW issues REQUEST_BUFFER during the ~10-50 ms reconnect
// window, the offscreen captures `portAtRequest = keepalivePort`,
// awaits the per-segment base64 encode (~150 ms for 3 segments), and
// by the time the encode completes the pre-emptive reconnect has
// swapped `keepalivePort` to a fresh Port. The guard correctly
// refuses to post on the stale Port. HOWEVER, no retry is issued —
// the SW's getVideoBufferFromOffscreen times out after 2 s and
// resolves `{ segments: [] }`. createArchive then takes the silent
// no-video branch (src/background/index.ts:350-352). The archive
// ships without video.
//
// This RED test pins the contract: when REQUEST_BUFFER arrives AND
// segments exist, BUFFER (with the encoded segments) MUST reach SOME
// live port, either:
// (a) by retrying on the freshly-connected port, OR
// (b) by re-issuing REQUEST_BUFFER on the new port from the
// reconnect handler when there's an unanswered request in
// flight.
// Whatever the strategy, silent loss with non-empty buffer is the
// bug. The current production code drops the response on the floor.
it('H2: REQUEST_BUFFER mid-reconnect must NOT drop BUFFER when segments exist', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
const recorder = await import('../../src/offscreen/recorder');
// 1) Seed the ring buffer with 3 real Blob segments via the test
// seam (recorder.ts:85 — pushSegmentForTest). Each segment is a
// plausible WebM head (EBML magic + filler bytes). The 3 ×
// payload ensures the for-loop body actually awaits — opening
// the real reconnect race window.
const ebmlMagic = new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]);
recorder.resetBuffer();
for (let i = 0; i < 3; i++) {
const payload = new Uint8Array(1024);
payload.set(ebmlMagic, 0);
payload.fill(i + 1, ebmlMagic.length);
recorder.pushSegmentForTest(
new Blob([payload], { type: 'video/webm' })
);
}
expect(recorder.getSegments().length).toBe(3);
expect(ports.length).toBe(1);
const oldPort = ports[0];
expect(oldPort.onMessage._listeners.length).toBeGreaterThanOrEqual(1);
// 2) SW issues REQUEST_BUFFER — synthetically dispatch to the
// captured onPortMessage listener. The production code kicks off
// encodeAndSendBuffer which awaits 3 blobToBase64 calls.
oldPort.onMessage._listeners[0]({ type: 'REQUEST_BUFFER' });
// 3) Mid-encode, the pre-emptive reconnect fires. Simulate by
// firing onDisconnect on the old port — production code's
// handler runs `keepalivePort = null` then `connectPort()`
// which spawns a new PortStub. The keepalivePort reference now
// points to the new port; the in-flight portAtRequest in the
// encode loop still points to oldPort, so the stale-port guard
// at line 597 will trigger.
oldPort.onDisconnect._listeners.forEach((fn) => fn());
// After reconnect a NEW port exists.
expect(ports.length).toBeGreaterThanOrEqual(2);
const newPort = ports[ports.length - 1];
// 4) Drain enough microtasks for the encode loop to complete and
// the post-await guard to evaluate. blobToBase64 awaits
// `blob.arrayBuffer()` (1 microtask) ×3 + the guard check.
// Generous: drain 32 microtasks then a macrotask. This is
// deterministic — vitest's Node env exposes setImmediate
// semantics via setTimeout(_, 0).
for (let i = 0; i < 32; i++) {
await Promise.resolve();
}
await new Promise<void>((resolve) => setTimeout(resolve, 0));
for (let i = 0; i < 32; i++) {
await Promise.resolve();
}
// CONTRACT — pick exactly one:
//
// (a) BUFFER posted on the NEW port (segments encoded, retry on
// fresh port works), OR
// (b) BUFFER posted on the OLD port (encode completed and the
// post squeezed in before the swap took effect — acceptable
// because the SW's per-request listener is still bound to it
// and the SW gets the data).
//
// The current production code posts NEITHER when the reconnect
// arrives between the await and the post (the line 597 guard
// returns silently). This assertion FAILS today, proving the H2
// bug. The fix must surface SOME signal that the SW can act on —
// silent loss when the buffer is non-empty is the bug.
const oldPortBuffers = oldPort.postMessage.mock.calls.filter(
(call: unknown[]) =>
typeof call[0] === 'object' &&
call[0] !== null &&
(call[0] as { type?: unknown }).type === 'BUFFER'
);
const newPortBuffers = newPort.postMessage.mock.calls.filter(
(call: unknown[]) =>
typeof call[0] === 'object' &&
call[0] !== null &&
(call[0] as { type?: unknown }).type === 'BUFFER'
);
const totalBuffersPosted = oldPortBuffers.length + newPortBuffers.length;
// Stronger assertion: when BUFFER reaches a port, it must carry
// the 3 segments we seeded. A 0-segment BUFFER is the silent-loss
// shape and should not satisfy the contract.
const allBufferCalls = [...oldPortBuffers, ...newPortBuffers];
const segmentBearingBuffers = allBufferCalls.filter((call) => {
const msg = call[0] as { segments?: unknown };
return Array.isArray(msg.segments) && msg.segments.length > 0;
});
expect(totalBuffersPosted).toBeGreaterThanOrEqual(1);
expect(segmentBearingBuffers.length).toBeGreaterThanOrEqual(1);
// Cleanup test seam state so we don't bleed segments across tests.
recorder.resetBuffer();
});
// ──────────────────────────────────────────────────────────────────────
// H1.b — pre-emptive reconnect path specifically. Walk the same
// sequence the operator hit at 290 s: the production code calls
// `keepalivePort?.disconnect()` and the test then invokes the ping
// callback that was installed BEFORE the reconnect. This proves the
// race window in the production timer-clear ordering.
// ──────────────────────────────────────────────────────────────────────
it('H1.b: pre-emptive reconnect path — ping does not throw against just-disconnected port', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
// Capture both interval callbacks AND timeout callbacks for direct
// invocation. We need to fire the pre-emptive setTimeout
// synthetically (it would otherwise wait 290 s of wall-clock time).
const intervalCallbacks: Array<() => void> = [];
const timeoutCallbacks: Array<() => void> = [];
globalThis.setInterval = ((cb: () => void, _ms: number) => {
intervalCallbacks.push(cb);
return 100 as unknown as ReturnType<typeof setInterval>;
}) as typeof globalThis.setInterval;
globalThis.setTimeout = ((cb: () => void, _ms: number) => {
timeoutCallbacks.push(cb);
return 200 as unknown as ReturnType<typeof setTimeout>;
}) as typeof globalThis.setTimeout;
// No-op the clear* so the captured callbacks remain callable —
// surfaces the race-window assumption in the production code.
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
globalThis.clearTimeout = (() => {}) as typeof globalThis.clearTimeout;
await import('../../src/offscreen/recorder');
expect(ports.length).toBe(1);
const port = ports[0];
// intervalCallbacks[0] is the ping callback (first setInterval call).
// timeoutCallbacks contains at least the pre-emptive reconnect timer
// (recorder.ts:626). Other setTimeouts may exist (e.g. queueMicrotask
// shims, scheduleRotation — but the latter is gated on mediaStream
// which is null in this test).
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
expect(timeoutCallbacks.length).toBeGreaterThanOrEqual(1);
const pingCb = intervalCallbacks[0];
// The pre-emptive reconnect timeout is the only setTimeout the
// bootstrap installs (rotation is gated on mediaStream). Pick the
// first to keep the test resilient.
const preemptiveReconnect = timeoutCallbacks[0];
// 1) Fire pre-emptive reconnect — the production code calls
// keepalivePort?.disconnect(). Our stub flips _disconnected=true
// synchronously (matching Chrome's local disconnect semantics).
preemptiveReconnect();
expect(port._disconnected).toBe(true);
expect(port.disconnect).toHaveBeenCalled();
// 2) Now the race: an already-queued ping callback (e.g. scheduled
// before the reconnect) fires against the just-disconnected port.
// Production code: keepalivePort?.postMessage({type:'PING'}) —
// keepalivePort is still the old non-null reference at this point
// (the onDisconnect handler may not have run yet in the same
// macrotask), so postMessage runs against the disconnected port
// and our stub throws — exactly matching the UAT error message.
//
// The fix must make this safe. Acceptable strategies:
// - Clear the interval as the FIRST step of the pre-emptive
// reconnect, before .disconnect().
// - Wrap postMessage in try/catch inside the ping callback.
// - Switch to a per-port closure (capturedPort) so a stale ping
// can detect it's pinging a swapped port.
expect(() => pingCb()).not.toThrow();
// Restore globals.
globalThis.setInterval = originalSetInterval;
globalThis.clearInterval = originalClearInterval;
globalThis.setTimeout = originalSetTimeout;
globalThis.clearTimeout = originalClearTimeout;
});
});