Implements the offscreen-side architectural refactor per
.planning/debug/empty-archive-port-race.md "Fix Strategy: Option C":
1. **Retired** the 290_000 ms pre-emptive reconnect setTimeout. Its race
window between the synchronous .disconnect() and the onDisconnect
handler running was the bisect-confirmed proximate cause of the H1
"Attempting to use a disconnected port object" Uncaught Errors.
2. **Added** PONG-based health probe: each ping increments missedPongs;
if MAX_MISSED_PONGS (3) consecutive PINGs go without echo, reconnect
via the same clean teardown path the onDisconnect handler uses.
PONG receipt resets the counter. Liveness-based replacement for the
time-based pre-emptive rotation.
3. **H1 fix** — wrap PING postMessage in try/catch. The port object can
transition to disconnected synchronously (SW eviction, port glitch)
between the interval-callback being queued and it running. The catch
absorbs the throw and routes through reconnectPort() — no more
uncaught throws bubble out to the offscreen console.
4. **Request-id'd protocol** — REQUEST_BUFFER carries the SW-generated
requestId; BUFFER response echoes it. The offscreen now posts on the
CURRENT keepalivePort (no more portAtRequest stale-port refuse-to-
post). The SW matches BUFFER → request by id, so port replacement
mid-encode no longer drops the response — the SW retries on the new
port and the matching BUFFER routes correctly.
5. **reconnectPort(reason)** — new helper consolidating the
teardown+disconnect+reconnect dance used by both the missed-PONG
path and the synchronous-throw path. Idempotent w.r.t. the chained
onDisconnect callback.
Test updates:
- H2 now sends REQUEST_BUFFER with a requestId (Option C contract).
- H1.b refactored to test the externally-disconnected path (since the
pre-emptive timeout path is gone): port._disconnected=true, fire
ping, assert no throw + a fresh port appears.
- Top-level snapshots of timer globals + afterEach restoration so a
failing test doesn't leak overridden globals into the next test.
Status: 48 GREEN, 4 RED (the remaining RED is all SW-side — addressed
in next commit). All H1 + H1.b + H2 contracts now GREEN. Pinning
contracts (D-12 port-serialization, D-13 segment-rotation, A3 webm-
playback) untouched. tsc --noEmit exit 0; type-safety grep clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
407 lines
18 KiB
TypeScript
407 lines
18 KiB
TypeScript
// tests/offscreen/port-reconnect-race.test.ts
|
||
//
|
||
// RED-gate tests for debug session empty-archive-port-race
|
||
// (.planning/debug/empty-archive-port-race.md).
|
||
//
|
||
// Empirically pins the two coupled defects observed in Phase 1 UAT Test 3:
|
||
//
|
||
// H1 Pre-emptive reconnect at 290 s leaves the ping interval briefly
|
||
// able to fire against a Port whose remote end has been disconnected,
|
||
// throwing "Attempting to use a disconnected port object" — 3
|
||
// uncaught errors observed in the offscreen console.
|
||
//
|
||
// H2 REQUEST_BUFFER round-trip silently drops BUFFER when the port is
|
||
// replaced mid-encode (src/offscreen/recorder.ts:597 stale-port
|
||
// refuse) AND when the SW's videoPort is null at request time
|
||
// (src/background/index.ts:128 fast-bailout). The SW then times
|
||
// out (BUFFER_FETCH_TIMEOUT_MS = 2 s) and resolves
|
||
// `{ segments: [] }`, which createArchive's branch at
|
||
// src/background/index.ts:346 silently elides into a video-less
|
||
// archive. Operator-visible result: 88 KB zip with NO
|
||
// video/last_30sec.webm.
|
||
//
|
||
// These tests drive the REAL src/offscreen/recorder.ts module via the
|
||
// established chrome-stub harness (same scaffold as
|
||
// tests/offscreen/port.test.ts + handshake.test.ts). No source-code
|
||
// re-implementation — the production module is what we're testing.
|
||
|
||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||
|
||
interface PortStub {
|
||
name: string;
|
||
// Throw-on-disconnected-postMessage: simulates the real Chrome behaviour
|
||
// documented at https://developer.chrome.com/docs/extensions/develop/concepts/messaging
|
||
// — postMessage on a disconnected Port throws synchronously with
|
||
// "Attempting to use a disconnected port object".
|
||
postMessage: ReturnType<typeof vi.fn>;
|
||
onMessage: {
|
||
addListener: ReturnType<typeof vi.fn>;
|
||
// capture for synthetic dispatch
|
||
_listeners: Array<(msg: unknown) => void>;
|
||
};
|
||
onDisconnect: {
|
||
addListener: (fn: () => void) => void;
|
||
// capture for synthetic dispatch
|
||
_listeners: Array<() => void>;
|
||
};
|
||
disconnect: ReturnType<typeof vi.fn>;
|
||
// test seam — flip this to true to simulate the remote disconnect
|
||
// having taken effect before the next postMessage runs.
|
||
_disconnected: boolean;
|
||
}
|
||
|
||
interface ChromeStub {
|
||
runtime: {
|
||
id: string;
|
||
sendMessage: ReturnType<typeof vi.fn>;
|
||
onMessage: { addListener: ReturnType<typeof vi.fn> };
|
||
connect: () => PortStub;
|
||
};
|
||
}
|
||
|
||
interface GlobalWithChrome {
|
||
chrome?: ChromeStub;
|
||
MediaRecorder?: { isTypeSupported: (mime: string) => boolean };
|
||
}
|
||
|
||
// Build a PortStub whose postMessage throws once `_disconnected` flips true,
|
||
// matching Chrome's runtime semantics. The throw is the exact error string
|
||
// the offscreen console showed in UAT Test 3 ("Attempting to use a
|
||
// disconnected port object"), so the test fails loudly when the bug is
|
||
// reproduced — and is silent (pass) once the production code clears the
|
||
// ping interval BEFORE the disconnect-detection race window.
|
||
function makePortStub(): PortStub {
|
||
const port: PortStub = {
|
||
name: 'video-keepalive',
|
||
postMessage: vi.fn(),
|
||
onMessage: {
|
||
addListener: vi.fn(),
|
||
_listeners: [],
|
||
},
|
||
onDisconnect: {
|
||
addListener: (fn: () => void) => {
|
||
port.onDisconnect._listeners.push(fn);
|
||
},
|
||
_listeners: [],
|
||
},
|
||
disconnect: vi.fn(() => {
|
||
port._disconnected = true;
|
||
}),
|
||
_disconnected: false,
|
||
};
|
||
// Wire onMessage.addListener to capture into _listeners so the test can
|
||
// dispatch REQUEST_BUFFER synthetically.
|
||
port.onMessage.addListener.mockImplementation(
|
||
(fn: (msg: unknown) => void) => {
|
||
port.onMessage._listeners.push(fn);
|
||
}
|
||
);
|
||
// Wire postMessage to throw if the port is disconnected — Chrome's
|
||
// documented behaviour. This is the smoking gun for H1.
|
||
port.postMessage.mockImplementation((msg: unknown) => {
|
||
if (port._disconnected) {
|
||
throw new Error('Attempting to use a disconnected port object');
|
||
}
|
||
return msg;
|
||
});
|
||
return port;
|
||
}
|
||
|
||
function buildChromeStub(ports: PortStub[]): ChromeStub {
|
||
return {
|
||
runtime: {
|
||
id: 'ext-id-test',
|
||
sendMessage: vi.fn(),
|
||
onMessage: { addListener: vi.fn() },
|
||
connect: () => {
|
||
const port = makePortStub();
|
||
ports.push(port);
|
||
return port;
|
||
},
|
||
},
|
||
};
|
||
}
|
||
|
||
// Top-level snapshots of the real timer APIs — captured at module load,
|
||
// BEFORE any test runs, so they survive a test that fails before its
|
||
// inline restore code. The afterEach below restores unconditionally.
|
||
const ORIGINAL_SET_INTERVAL = globalThis.setInterval;
|
||
const ORIGINAL_CLEAR_INTERVAL = globalThis.clearInterval;
|
||
const ORIGINAL_SET_TIMEOUT = globalThis.setTimeout;
|
||
const ORIGINAL_CLEAR_TIMEOUT = globalThis.clearTimeout;
|
||
|
||
describe('port reconnect race (RED — confirms empty-archive-port-race H1+H2)', () => {
|
||
beforeEach(() => {
|
||
vi.resetModules();
|
||
(globalThis as unknown as GlobalWithChrome).MediaRecorder = {
|
||
isTypeSupported: vi.fn().mockReturnValue(true),
|
||
};
|
||
});
|
||
|
||
afterEach(() => {
|
||
globalThis.setInterval = ORIGINAL_SET_INTERVAL;
|
||
globalThis.clearInterval = ORIGINAL_CLEAR_INTERVAL;
|
||
globalThis.setTimeout = ORIGINAL_SET_TIMEOUT;
|
||
globalThis.clearTimeout = ORIGINAL_CLEAR_TIMEOUT;
|
||
});
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// H1 — ping fires against disconnected port and throws
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
//
|
||
// Production code path (src/offscreen/recorder.ts:623-625):
|
||
// pingIntervalId = setInterval(() => {
|
||
// keepalivePort?.postMessage({ type: 'PING' }); // throws if disconnected
|
||
// }, PORT_PING_MS);
|
||
//
|
||
// The `?.` only guards against keepalivePort === null. After the SW
|
||
// (or the SW's idle-eviction, or a remote disconnect) tears down the
|
||
// port, the offscreen-side Port reference is still non-null but
|
||
// postMessage throws synchronously with "Attempting to use a
|
||
// disconnected port object". The interval is cleared only in the
|
||
// onDisconnect handler — there's a race window during which the next
|
||
// queued interval callback fires before onDisconnect runs.
|
||
//
|
||
// Test strategy: intercept setInterval to capture the ping callback,
|
||
// then synthetically disconnect the port (without firing onDisconnect
|
||
// first — mimicking the race), then invoke the captured ping callback
|
||
// and assert it does NOT throw an uncaught error.
|
||
it('H1: ping callback handles a remotely-disconnected port without throwing', async () => {
|
||
const ports: PortStub[] = [];
|
||
const stub = buildChromeStub(ports);
|
||
(globalThis as unknown as GlobalWithChrome).chrome = stub;
|
||
|
||
// Intercept setInterval to capture the ping callback for direct invocation.
|
||
const intervalCallbacks: Array<() => void> = [];
|
||
globalThis.setInterval = ((cb: () => void, _ms: number) => {
|
||
intervalCallbacks.push(cb);
|
||
return 999 as unknown as ReturnType<typeof setInterval>;
|
||
}) as typeof globalThis.setInterval;
|
||
// No-op clearInterval — the test wants to keep the callback callable
|
||
// even after the production code attempts a clear, to expose the
|
||
// race-window assumption.
|
||
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
|
||
|
||
await import('../../src/offscreen/recorder');
|
||
|
||
expect(ports.length).toBe(1);
|
||
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
|
||
|
||
const port = ports[0];
|
||
const pingCb = intervalCallbacks[0];
|
||
|
||
// Simulate the race: the remote side of the port has been
|
||
// disconnected (e.g. SW eviction, ~5 min cap, port glitch) but the
|
||
// offscreen's onDisconnect handler has NOT yet run, so the interval
|
||
// is still installed and keepalivePort is still non-null on the
|
||
// offscreen side.
|
||
port._disconnected = true;
|
||
|
||
// The PRODUCTION ping callback must not throw — either by checking
|
||
// port.postMessage safety first, or by wrapping the call in
|
||
// try/catch, or by removing the interval at first postMessage
|
||
// failure. Whatever the fix, an uncaught throw here would surface
|
||
// as the "Attempting to use a disconnected port object" error the
|
||
// operator observed in UAT Test 3.
|
||
expect(() => pingCb()).not.toThrow();
|
||
|
||
});
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// H2 — BUFFER lost on stale-port reconnect; SW left with empty segments
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
//
|
||
// Production code path (src/offscreen/recorder.ts:547-604):
|
||
// const finalized = getSegments();
|
||
// ...
|
||
// for (let idx = 0; idx < allSegments.length; idx++) {
|
||
// const data = await blobToBase64(segment); // ←── async window
|
||
// transferred.push(...);
|
||
// }
|
||
// if (keepalivePort !== portAtRequest) {
|
||
// logger.warn('port replaced during encode; dropping BUFFER response');
|
||
// return;
|
||
// }
|
||
// portAtRequest.postMessage({ type: 'BUFFER', segments: transferred });
|
||
//
|
||
// When the SW issues REQUEST_BUFFER during the ~10-50 ms reconnect
|
||
// window, the offscreen captures `portAtRequest = keepalivePort`,
|
||
// awaits the per-segment base64 encode (~150 ms for 3 segments), and
|
||
// by the time the encode completes the pre-emptive reconnect has
|
||
// swapped `keepalivePort` to a fresh Port. The guard correctly
|
||
// refuses to post on the stale Port. HOWEVER, no retry is issued —
|
||
// the SW's getVideoBufferFromOffscreen times out after 2 s and
|
||
// resolves `{ segments: [] }`. createArchive then takes the silent
|
||
// no-video branch (src/background/index.ts:350-352). The archive
|
||
// ships without video.
|
||
//
|
||
// This RED test pins the contract: when REQUEST_BUFFER arrives AND
|
||
// segments exist, BUFFER (with the encoded segments) MUST reach SOME
|
||
// live port, either:
|
||
// (a) by retrying on the freshly-connected port, OR
|
||
// (b) by re-issuing REQUEST_BUFFER on the new port from the
|
||
// reconnect handler when there's an unanswered request in
|
||
// flight.
|
||
// Whatever the strategy, silent loss with non-empty buffer is the
|
||
// bug. The current production code drops the response on the floor.
|
||
it('H2: REQUEST_BUFFER mid-reconnect must NOT drop BUFFER when segments exist', async () => {
|
||
const ports: PortStub[] = [];
|
||
const stub = buildChromeStub(ports);
|
||
(globalThis as unknown as GlobalWithChrome).chrome = stub;
|
||
|
||
const recorder = await import('../../src/offscreen/recorder');
|
||
|
||
// 1) Seed the ring buffer with 3 real Blob segments via the test
|
||
// seam (recorder.ts:85 — pushSegmentForTest). Each segment is a
|
||
// plausible WebM head (EBML magic + filler bytes). The 3 ×
|
||
// payload ensures the for-loop body actually awaits — opening
|
||
// the real reconnect race window.
|
||
const ebmlMagic = new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]);
|
||
recorder.resetBuffer();
|
||
for (let i = 0; i < 3; i++) {
|
||
const payload = new Uint8Array(1024);
|
||
payload.set(ebmlMagic, 0);
|
||
payload.fill(i + 1, ebmlMagic.length);
|
||
recorder.pushSegmentForTest(
|
||
new Blob([payload], { type: 'video/webm' })
|
||
);
|
||
}
|
||
expect(recorder.getSegments().length).toBe(3);
|
||
|
||
expect(ports.length).toBe(1);
|
||
const oldPort = ports[0];
|
||
expect(oldPort.onMessage._listeners.length).toBeGreaterThanOrEqual(1);
|
||
|
||
// 2) SW issues REQUEST_BUFFER — synthetically dispatch to the
|
||
// captured onPortMessage listener. The production code kicks off
|
||
// encodeAndSendBuffer which awaits 3 blobToBase64 calls.
|
||
//
|
||
// Under the Option C protocol the request carries a requestId the
|
||
// SW generates per call; the offscreen echoes it on the BUFFER
|
||
// response. This test uses a literal id since it's not driving
|
||
// the SW — only the recorder's onPortMessage path.
|
||
const requestId = 'h2-test-id';
|
||
oldPort.onMessage._listeners[0]({
|
||
type: 'REQUEST_BUFFER',
|
||
requestId,
|
||
});
|
||
|
||
// 3) Mid-encode, the pre-emptive reconnect fires. Simulate by
|
||
// firing onDisconnect on the old port — production code's
|
||
// handler runs `keepalivePort = null` then `connectPort()`
|
||
// which spawns a new PortStub. The keepalivePort reference now
|
||
// points to the new port; the in-flight portAtRequest in the
|
||
// encode loop still points to oldPort, so the stale-port guard
|
||
// at line 597 will trigger.
|
||
oldPort.onDisconnect._listeners.forEach((fn) => fn());
|
||
|
||
// After reconnect a NEW port exists.
|
||
expect(ports.length).toBeGreaterThanOrEqual(2);
|
||
const newPort = ports[ports.length - 1];
|
||
|
||
// 4) Drain enough microtasks for the encode loop to complete and
|
||
// the post-await guard to evaluate. blobToBase64 awaits
|
||
// `blob.arrayBuffer()` (1 microtask) ×3 + the guard check.
|
||
// Generous: drain 32 microtasks then a macrotask. This is
|
||
// deterministic — vitest's Node env exposes setImmediate
|
||
// semantics via setTimeout(_, 0).
|
||
for (let i = 0; i < 32; i++) {
|
||
await Promise.resolve();
|
||
}
|
||
await new Promise<void>((resolve) => setTimeout(resolve, 0));
|
||
for (let i = 0; i < 32; i++) {
|
||
await Promise.resolve();
|
||
}
|
||
|
||
// CONTRACT — pick exactly one:
|
||
//
|
||
// (a) BUFFER posted on the NEW port (segments encoded, retry on
|
||
// fresh port works), OR
|
||
// (b) BUFFER posted on the OLD port (encode completed and the
|
||
// post squeezed in before the swap took effect — acceptable
|
||
// because the SW's per-request listener is still bound to it
|
||
// and the SW gets the data).
|
||
//
|
||
// The current production code posts NEITHER when the reconnect
|
||
// arrives between the await and the post (the line 597 guard
|
||
// returns silently). This assertion FAILS today, proving the H2
|
||
// bug. The fix must surface SOME signal that the SW can act on —
|
||
// silent loss when the buffer is non-empty is the bug.
|
||
|
||
const oldPortBuffers = oldPort.postMessage.mock.calls.filter(
|
||
(call: unknown[]) =>
|
||
typeof call[0] === 'object' &&
|
||
call[0] !== null &&
|
||
(call[0] as { type?: unknown }).type === 'BUFFER'
|
||
);
|
||
const newPortBuffers = newPort.postMessage.mock.calls.filter(
|
||
(call: unknown[]) =>
|
||
typeof call[0] === 'object' &&
|
||
call[0] !== null &&
|
||
(call[0] as { type?: unknown }).type === 'BUFFER'
|
||
);
|
||
|
||
const totalBuffersPosted = oldPortBuffers.length + newPortBuffers.length;
|
||
// Stronger assertion: when BUFFER reaches a port, it must carry
|
||
// the 3 segments we seeded. A 0-segment BUFFER is the silent-loss
|
||
// shape and should not satisfy the contract.
|
||
const allBufferCalls = [...oldPortBuffers, ...newPortBuffers];
|
||
const segmentBearingBuffers = allBufferCalls.filter((call) => {
|
||
const msg = call[0] as { segments?: unknown };
|
||
return Array.isArray(msg.segments) && msg.segments.length > 0;
|
||
});
|
||
expect(totalBuffersPosted).toBeGreaterThanOrEqual(1);
|
||
expect(segmentBearingBuffers.length).toBeGreaterThanOrEqual(1);
|
||
|
||
// Cleanup test seam state so we don't bleed segments across tests.
|
||
recorder.resetBuffer();
|
||
});
|
||
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
// H1.b — externally-disconnected port path. The original 290 s
|
||
// pre-emptive setTimeout is gone (Option C retired it — see the
|
||
// "Bisect Results" section of empty-archive-port-race.md). What
|
||
// remains is the same H1 race shape, just triggered by a different
|
||
// path: the SW side (or Chrome's idle eviction, or a network glitch)
|
||
// can mark the remote end of the port as disconnected; the local
|
||
// postMessage then throws synchronously with the same Chrome error
|
||
// string. The fix's try/catch + reconnect path must absorb that.
|
||
// ──────────────────────────────────────────────────────────────────────
|
||
it('H1.b: externally-disconnected port — ping path absorbs the throw and reconnects cleanly', async () => {
|
||
const ports: PortStub[] = [];
|
||
const stub = buildChromeStub(ports);
|
||
(globalThis as unknown as GlobalWithChrome).chrome = stub;
|
||
|
||
const intervalCallbacks: Array<() => void> = [];
|
||
globalThis.setInterval = ((cb: () => void, _ms: number) => {
|
||
intervalCallbacks.push(cb);
|
||
return 100 as unknown as ReturnType<typeof setInterval>;
|
||
}) as typeof globalThis.setInterval;
|
||
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
|
||
|
||
await import('../../src/offscreen/recorder');
|
||
|
||
expect(ports.length).toBe(1);
|
||
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
|
||
|
||
const oldPort = ports[0];
|
||
const pingCb = intervalCallbacks[0];
|
||
|
||
// Simulate an EXTERNAL disconnect: the remote end of the port has
|
||
// been torn down (SW eviction, port glitch, network blip), but the
|
||
// local onDisconnect handler hasn't fired yet — this is exactly the
|
||
// half-open state that the original H1 race weaponised.
|
||
oldPort._disconnected = true;
|
||
|
||
// Fire the ping. Under the legacy code this threw with "Attempting
|
||
// to use a disconnected port object"; under the Option C fix the
|
||
// ping callback wraps postMessage in try/catch and initiates a
|
||
// clean reconnect via reconnectPort(). The test asserts BOTH that
|
||
// no Uncaught Error escapes AND that a fresh port has been
|
||
// requested (the visible side-effect of the reconnect path).
|
||
expect(() => pingCb()).not.toThrow();
|
||
expect(ports.length).toBeGreaterThanOrEqual(2);
|
||
|
||
});
|
||
});
|