test(debug-empty-archive): RED gate for empty-archive-port-race (H1 + H1.b + H2)

Phase 1 UAT Test 3 surfaced a two-headed BLOCKER:
(a) silent empty-video archive when save crosses a port-reconnect window,
(b) 3x "Attempting to use a disconnected port object" Uncaught Errors
starting at the 290 s pre-emptive reconnect mark.

Bisect confirmed: H1 (port lifecycle race) was introduced by Plan 01-04
(b064a21); H2 (createArchive silent-skip on empty segments) is an upstream
defect (555eb05) that became fatal once CR-01 + sweep #5 guaranteed the
silent-skip branch would fire on every save during a reconnect window.

This commit lands the 3 RED tests at the unit-test level — they match the
UAT error string byte-for-byte for H1/H1.b and pin the silent-drop
contract for H2. They will flip GREEN as the Option C architectural
refactor (request-id'd port protocol + port-health probe + retry +
operator-visible error surface) lands across the next commits.

Baseline: 8 files / 43 tests (40 GREEN, 3 RED). tsc --noEmit exit 0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 14:17:45 +02:00
parent 13094d4248
commit 674c415945
2 changed files with 833 additions and 0 deletions

View File

@@ -0,0 +1,427 @@
// tests/offscreen/port-reconnect-race.test.ts
//
// RED-gate tests for debug session empty-archive-port-race
// (.planning/debug/empty-archive-port-race.md).
//
// Empirically pins the two coupled defects observed in Phase 1 UAT Test 3:
//
// H1 Pre-emptive reconnect at 290 s leaves the ping interval briefly
// able to fire against a Port whose remote end has been disconnected,
// throwing "Attempting to use a disconnected port object" — 3
// uncaught errors observed in the offscreen console.
//
// H2 REQUEST_BUFFER round-trip silently drops BUFFER when the port is
// replaced mid-encode (src/offscreen/recorder.ts:597 stale-port
// refuse) AND when the SW's videoPort is null at request time
// (src/background/index.ts:128 fast-bailout). The SW then times
// out (BUFFER_FETCH_TIMEOUT_MS = 2 s) and resolves
// `{ segments: [] }`, which createArchive's branch at
// src/background/index.ts:346 silently elides into a video-less
// archive. Operator-visible result: 88 KB zip with NO
// video/last_30sec.webm.
//
// These tests drive the REAL src/offscreen/recorder.ts module via the
// established chrome-stub harness (same scaffold as
// tests/offscreen/port.test.ts + handshake.test.ts). No source-code
// re-implementation — the production module is what we're testing.
import { describe, it, expect, vi, beforeEach } from 'vitest';
interface PortStub {
name: string;
// Throw-on-disconnected-postMessage: simulates the real Chrome behaviour
// documented at https://developer.chrome.com/docs/extensions/develop/concepts/messaging
// — postMessage on a disconnected Port throws synchronously with
// "Attempting to use a disconnected port object".
postMessage: ReturnType<typeof vi.fn>;
onMessage: {
addListener: ReturnType<typeof vi.fn>;
// capture for synthetic dispatch
_listeners: Array<(msg: unknown) => void>;
};
onDisconnect: {
addListener: (fn: () => void) => void;
// capture for synthetic dispatch
_listeners: Array<() => void>;
};
disconnect: ReturnType<typeof vi.fn>;
// test seam — flip this to true to simulate the remote disconnect
// having taken effect before the next postMessage runs.
_disconnected: boolean;
}
interface ChromeStub {
runtime: {
id: string;
sendMessage: ReturnType<typeof vi.fn>;
onMessage: { addListener: ReturnType<typeof vi.fn> };
connect: () => PortStub;
};
}
interface GlobalWithChrome {
chrome?: ChromeStub;
MediaRecorder?: { isTypeSupported: (mime: string) => boolean };
}
// Build a PortStub whose postMessage throws once `_disconnected` flips true,
// matching Chrome's runtime semantics. The throw is the exact error string
// the offscreen console showed in UAT Test 3 ("Attempting to use a
// disconnected port object"), so the test fails loudly when the bug is
// reproduced — and is silent (pass) once the production code clears the
// ping interval BEFORE the disconnect-detection race window.
function makePortStub(): PortStub {
const port: PortStub = {
name: 'video-keepalive',
postMessage: vi.fn(),
onMessage: {
addListener: vi.fn(),
_listeners: [],
},
onDisconnect: {
addListener: (fn: () => void) => {
port.onDisconnect._listeners.push(fn);
},
_listeners: [],
},
disconnect: vi.fn(() => {
port._disconnected = true;
}),
_disconnected: false,
};
// Wire onMessage.addListener to capture into _listeners so the test can
// dispatch REQUEST_BUFFER synthetically.
port.onMessage.addListener.mockImplementation(
(fn: (msg: unknown) => void) => {
port.onMessage._listeners.push(fn);
}
);
// Wire postMessage to throw if the port is disconnected — Chrome's
// documented behaviour. This is the smoking gun for H1.
port.postMessage.mockImplementation((msg: unknown) => {
if (port._disconnected) {
throw new Error('Attempting to use a disconnected port object');
}
return msg;
});
return port;
}
function buildChromeStub(ports: PortStub[]): ChromeStub {
return {
runtime: {
id: 'ext-id-test',
sendMessage: vi.fn(),
onMessage: { addListener: vi.fn() },
connect: () => {
const port = makePortStub();
ports.push(port);
return port;
},
},
};
}
describe('port reconnect race (RED — confirms empty-archive-port-race H1+H2)', () => {
let originalSetInterval: typeof globalThis.setInterval;
let originalClearInterval: typeof globalThis.clearInterval;
let originalSetTimeout: typeof globalThis.setTimeout;
let originalClearTimeout: typeof globalThis.clearTimeout;
beforeEach(() => {
vi.resetModules();
(globalThis as unknown as GlobalWithChrome).MediaRecorder = {
isTypeSupported: vi.fn().mockReturnValue(true),
};
originalSetInterval = globalThis.setInterval;
originalClearInterval = globalThis.clearInterval;
originalSetTimeout = globalThis.setTimeout;
originalClearTimeout = globalThis.clearTimeout;
});
// ──────────────────────────────────────────────────────────────────────
// H1 — ping fires against disconnected port and throws
// ──────────────────────────────────────────────────────────────────────
//
// Production code path (src/offscreen/recorder.ts:623-625):
// pingIntervalId = setInterval(() => {
// keepalivePort?.postMessage({ type: 'PING' }); // throws if disconnected
// }, PORT_PING_MS);
//
// The `?.` only guards against keepalivePort === null. After the SW
// (or the SW's idle-eviction, or a remote disconnect) tears down the
// port, the offscreen-side Port reference is still non-null but
// postMessage throws synchronously with "Attempting to use a
// disconnected port object". The interval is cleared only in the
// onDisconnect handler — there's a race window during which the next
// queued interval callback fires before onDisconnect runs.
//
// Test strategy: intercept setInterval to capture the ping callback,
// then synthetically disconnect the port (without firing onDisconnect
// first — mimicking the race), then invoke the captured ping callback
// and assert it does NOT throw an uncaught error.
it('H1: ping callback handles a remotely-disconnected port without throwing', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
// Intercept setInterval to capture the ping callback for direct invocation.
const intervalCallbacks: Array<() => void> = [];
globalThis.setInterval = ((cb: () => void, _ms: number) => {
intervalCallbacks.push(cb);
return 999 as unknown as ReturnType<typeof setInterval>;
}) as typeof globalThis.setInterval;
// No-op clearInterval — the test wants to keep the callback callable
// even after the production code attempts a clear, to expose the
// race-window assumption.
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
await import('../../src/offscreen/recorder');
expect(ports.length).toBe(1);
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
const port = ports[0];
const pingCb = intervalCallbacks[0];
// Simulate the race: the remote side of the port has been
// disconnected (e.g. SW eviction, ~5 min cap, port glitch) but the
// offscreen's onDisconnect handler has NOT yet run, so the interval
// is still installed and keepalivePort is still non-null on the
// offscreen side.
port._disconnected = true;
// The PRODUCTION ping callback must not throw — either by checking
// port.postMessage safety first, or by wrapping the call in
// try/catch, or by removing the interval at first postMessage
// failure. Whatever the fix, an uncaught throw here would surface
// as the "Attempting to use a disconnected port object" error the
// operator observed in UAT Test 3.
expect(() => pingCb()).not.toThrow();
// Restore globals.
globalThis.setInterval = originalSetInterval;
globalThis.clearInterval = originalClearInterval;
});
// ──────────────────────────────────────────────────────────────────────
// H2 — BUFFER lost on stale-port reconnect; SW left with empty segments
// ──────────────────────────────────────────────────────────────────────
//
// Production code path (src/offscreen/recorder.ts:547-604):
// const finalized = getSegments();
// ...
// for (let idx = 0; idx < allSegments.length; idx++) {
// const data = await blobToBase64(segment); // ←── async window
// transferred.push(...);
// }
// if (keepalivePort !== portAtRequest) {
// logger.warn('port replaced during encode; dropping BUFFER response');
// return;
// }
// portAtRequest.postMessage({ type: 'BUFFER', segments: transferred });
//
// When the SW issues REQUEST_BUFFER during the ~10-50 ms reconnect
// window, the offscreen captures `portAtRequest = keepalivePort`,
// awaits the per-segment base64 encode (~150 ms for 3 segments), and
// by the time the encode completes the pre-emptive reconnect has
// swapped `keepalivePort` to a fresh Port. The guard correctly
// refuses to post on the stale Port. HOWEVER, no retry is issued —
// the SW's getVideoBufferFromOffscreen times out after 2 s and
// resolves `{ segments: [] }`. createArchive then takes the silent
// no-video branch (src/background/index.ts:350-352). The archive
// ships without video.
//
// This RED test pins the contract: when REQUEST_BUFFER arrives AND
// segments exist, BUFFER (with the encoded segments) MUST reach SOME
// live port, either:
// (a) by retrying on the freshly-connected port, OR
// (b) by re-issuing REQUEST_BUFFER on the new port from the
// reconnect handler when there's an unanswered request in
// flight.
// Whatever the strategy, silent loss with non-empty buffer is the
// bug. The current production code drops the response on the floor.
it('H2: REQUEST_BUFFER mid-reconnect must NOT drop BUFFER when segments exist', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
const recorder = await import('../../src/offscreen/recorder');
// 1) Seed the ring buffer with 3 real Blob segments via the test
// seam (recorder.ts:85 — pushSegmentForTest). Each segment is a
// plausible WebM head (EBML magic + filler bytes). The 3 ×
// payload ensures the for-loop body actually awaits — opening
// the real reconnect race window.
const ebmlMagic = new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]);
recorder.resetBuffer();
for (let i = 0; i < 3; i++) {
const payload = new Uint8Array(1024);
payload.set(ebmlMagic, 0);
payload.fill(i + 1, ebmlMagic.length);
recorder.pushSegmentForTest(
new Blob([payload], { type: 'video/webm' })
);
}
expect(recorder.getSegments().length).toBe(3);
expect(ports.length).toBe(1);
const oldPort = ports[0];
expect(oldPort.onMessage._listeners.length).toBeGreaterThanOrEqual(1);
// 2) SW issues REQUEST_BUFFER — synthetically dispatch to the
// captured onPortMessage listener. The production code kicks off
// encodeAndSendBuffer which awaits 3 blobToBase64 calls.
oldPort.onMessage._listeners[0]({ type: 'REQUEST_BUFFER' });
// 3) Mid-encode, the pre-emptive reconnect fires. Simulate by
// firing onDisconnect on the old port — production code's
// handler runs `keepalivePort = null` then `connectPort()`
// which spawns a new PortStub. The keepalivePort reference now
// points to the new port; the in-flight portAtRequest in the
// encode loop still points to oldPort, so the stale-port guard
// at line 597 will trigger.
oldPort.onDisconnect._listeners.forEach((fn) => fn());
// After reconnect a NEW port exists.
expect(ports.length).toBeGreaterThanOrEqual(2);
const newPort = ports[ports.length - 1];
// 4) Drain enough microtasks for the encode loop to complete and
// the post-await guard to evaluate. blobToBase64 awaits
// `blob.arrayBuffer()` (1 microtask) ×3 + the guard check.
// Generous: drain 32 microtasks then a macrotask. This is
// deterministic — vitest's Node env exposes setImmediate
// semantics via setTimeout(_, 0).
for (let i = 0; i < 32; i++) {
await Promise.resolve();
}
await new Promise<void>((resolve) => setTimeout(resolve, 0));
for (let i = 0; i < 32; i++) {
await Promise.resolve();
}
// CONTRACT — pick exactly one:
//
// (a) BUFFER posted on the NEW port (segments encoded, retry on
// fresh port works), OR
// (b) BUFFER posted on the OLD port (encode completed and the
// post squeezed in before the swap took effect — acceptable
// because the SW's per-request listener is still bound to it
// and the SW gets the data).
//
// The current production code posts NEITHER when the reconnect
// arrives between the await and the post (the line 597 guard
// returns silently). This assertion FAILS today, proving the H2
// bug. The fix must surface SOME signal that the SW can act on —
// silent loss when the buffer is non-empty is the bug.
const oldPortBuffers = oldPort.postMessage.mock.calls.filter(
(call: unknown[]) =>
typeof call[0] === 'object' &&
call[0] !== null &&
(call[0] as { type?: unknown }).type === 'BUFFER'
);
const newPortBuffers = newPort.postMessage.mock.calls.filter(
(call: unknown[]) =>
typeof call[0] === 'object' &&
call[0] !== null &&
(call[0] as { type?: unknown }).type === 'BUFFER'
);
const totalBuffersPosted = oldPortBuffers.length + newPortBuffers.length;
// Stronger assertion: when BUFFER reaches a port, it must carry
// the 3 segments we seeded. A 0-segment BUFFER is the silent-loss
// shape and should not satisfy the contract.
const allBufferCalls = [...oldPortBuffers, ...newPortBuffers];
const segmentBearingBuffers = allBufferCalls.filter((call) => {
const msg = call[0] as { segments?: unknown };
return Array.isArray(msg.segments) && msg.segments.length > 0;
});
expect(totalBuffersPosted).toBeGreaterThanOrEqual(1);
expect(segmentBearingBuffers.length).toBeGreaterThanOrEqual(1);
// Cleanup test seam state so we don't bleed segments across tests.
recorder.resetBuffer();
});
// ──────────────────────────────────────────────────────────────────────
// H1.b — pre-emptive reconnect path specifically. Walk the same
// sequence the operator hit at 290 s: the production code calls
// `keepalivePort?.disconnect()` and the test then invokes the ping
// callback that was installed BEFORE the reconnect. This proves the
// race window in the production timer-clear ordering.
// ──────────────────────────────────────────────────────────────────────
it('H1.b: pre-emptive reconnect path — ping does not throw against just-disconnected port', async () => {
const ports: PortStub[] = [];
const stub = buildChromeStub(ports);
(globalThis as unknown as GlobalWithChrome).chrome = stub;
// Capture both interval callbacks AND timeout callbacks for direct
// invocation. We need to fire the pre-emptive setTimeout
// synthetically (it would otherwise wait 290 s of wall-clock time).
const intervalCallbacks: Array<() => void> = [];
const timeoutCallbacks: Array<() => void> = [];
globalThis.setInterval = ((cb: () => void, _ms: number) => {
intervalCallbacks.push(cb);
return 100 as unknown as ReturnType<typeof setInterval>;
}) as typeof globalThis.setInterval;
globalThis.setTimeout = ((cb: () => void, _ms: number) => {
timeoutCallbacks.push(cb);
return 200 as unknown as ReturnType<typeof setTimeout>;
}) as typeof globalThis.setTimeout;
// No-op the clear* so the captured callbacks remain callable —
// surfaces the race-window assumption in the production code.
globalThis.clearInterval = (() => {}) as typeof globalThis.clearInterval;
globalThis.clearTimeout = (() => {}) as typeof globalThis.clearTimeout;
await import('../../src/offscreen/recorder');
expect(ports.length).toBe(1);
const port = ports[0];
// intervalCallbacks[0] is the ping callback (first setInterval call).
// timeoutCallbacks contains at least the pre-emptive reconnect timer
// (recorder.ts:626). Other setTimeouts may exist (e.g. queueMicrotask
// shims, scheduleRotation — but the latter is gated on mediaStream
// which is null in this test).
expect(intervalCallbacks.length).toBeGreaterThanOrEqual(1);
expect(timeoutCallbacks.length).toBeGreaterThanOrEqual(1);
const pingCb = intervalCallbacks[0];
// The pre-emptive reconnect timeout is the only setTimeout the
// bootstrap installs (rotation is gated on mediaStream). Pick the
// first to keep the test resilient.
const preemptiveReconnect = timeoutCallbacks[0];
// 1) Fire pre-emptive reconnect — the production code calls
// keepalivePort?.disconnect(). Our stub flips _disconnected=true
// synchronously (matching Chrome's local disconnect semantics).
preemptiveReconnect();
expect(port._disconnected).toBe(true);
expect(port.disconnect).toHaveBeenCalled();
// 2) Now the race: an already-queued ping callback (e.g. scheduled
// before the reconnect) fires against the just-disconnected port.
// Production code: keepalivePort?.postMessage({type:'PING'}) —
// keepalivePort is still the old non-null reference at this point
// (the onDisconnect handler may not have run yet in the same
// macrotask), so postMessage runs against the disconnected port
// and our stub throws — exactly matching the UAT error message.
//
// The fix must make this safe. Acceptable strategies:
// - Clear the interval as the FIRST step of the pre-emptive
// reconnect, before .disconnect().
// - Wrap postMessage in try/catch inside the ping callback.
// - Switch to a per-port closure (capturedPort) so a stale ping
// can detect it's pinging a swapped port.
expect(() => pingCb()).not.toThrow();
// Restore globals.
globalThis.setInterval = originalSetInterval;
globalThis.clearInterval = originalClearInterval;
globalThis.setTimeout = originalSetTimeout;
globalThis.clearTimeout = originalClearTimeout;
});
});