feat(option-c-sw): request-id'd BUFFER routing + retry on port replacement + PONG echo

Implements the SW-side architectural refactor per
.planning/debug/empty-archive-port-race.md "Fix Strategy: Option C":

1. **Request-id'd protocol** — getVideoBufferFromOffscreen generates a
   uuid (crypto.randomUUID with Math.random fallback) and sends
   {type:'REQUEST_BUFFER', requestId} on the live videoPort. The
   per-request listener pattern is GONE; replaced by a module-level
   pendingBufferRequests Map<requestId, PendingBufferRequest>. The
   onConnect-level message sink routes BUFFER -> resolve by id.

2. **Stale BUFFER routing** — BUFFER messages without a matching
   requestId in the Map are silently dropped (no cross-talk). BUFFER
   without a valid requestId at all is rejected with a warn (Option C
   protocol requires the id).

3. **Retry on port replacement** — every onConnect (post-bootstrap)
   scans pendingBufferRequests and re-issues REQUEST_BUFFER on the
   fresh port with the SAME requestId. The offscreen posts BUFFER on
   the current keepalivePort (see prior offscreen commit), the sink
   matches by id, and the request resolves. This retires the H2
   silent-drop class architecturally — the BUFFER reaches the SW
   regardless of port-replacement timing.

4. **PING -> PONG echo** — the sink replies to every PING with PONG.
   Closes the offscreen's health-probe loop (it counts missed PONGs
   and reconnects when MAX_MISSED_PONGS exceeded — see prior offscreen
   commit). The PONG post is wrapped in try/catch to absorb the same
   port-closed-mid-response race the offscreen ping path handles.

5. **Outer hard-timeout bumped 2s -> 10s** — the legacy per-port
   BUFFER_FETCH_TIMEOUT_MS = 2000 was too tight to retry across a
   reconnect. The new outer budget covers EVERY retry across port
   replacements; the inner round-trip is still ~100-200 ms.

6. **decodeBufferSegments extracted** — pulled out of the legacy
   inline handler so the new onConnect sink can decode wire segments
   without duplicating the logic. Preserves WR-07 (empty wire segment
   filter) and base64ToBlob defensive catch behaviour. Closes the
   pre-existing implicit-undefined-return path the legacy flatMap
   catch had (tsc happy but semantically ambiguous).

Status: 51 GREEN, 1 RED. The remaining RED (createArchive must throw
on empty video, surfacing to operator) is addressed in the next commit.

Pinning contracts (D-12 port-serialization, D-13 segment-rotation,
A3 webm-playback) untouched. tsc --noEmit exit 0; type-safety grep clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 14:43:12 +02:00
parent 1fb3e978cb
commit 6ffa242cb9

View File

@@ -71,12 +71,97 @@ async function ensureOffscreen() {
}
}
// Outer-bound buffer fetch budget. Larger than the legacy
// BUFFER_FETCH_TIMEOUT_MS (was 2 s; per-port-attempt) because the new
// architecture covers MULTIPLE port-replacement retries inside one outer
// budget. 10 s is generous: the inner per-port encode round-trip is
// still ~100-200 ms; the extra headroom covers up to ~50 reconnect
// cycles before the operator-visible error surfaces.
const BUFFER_FETCH_TIMEOUT_MS = 10_000;
// Option C: in-flight REQUEST_BUFFER requests keyed by requestId. The
// onConnect-level message sink routes BUFFER -> resolve by id, so port
// replacement (videoPort changes mid-request) does NOT lose the
// response — the offscreen posts BUFFER on the CURRENT port (whichever
// that is) and our sink picks it up regardless of which Port object it
// arrives on.
interface PendingBufferRequest {
resolve: (resp: VideoBufferResponse) => void;
hardTimer: ReturnType<typeof setTimeout>;
requestId: string;
}
const pendingBufferRequests: Map<string, PendingBufferRequest> = new Map();
// Generate a per-request correlation id. Uses crypto.randomUUID when
// available (Chrome 92+ in SW context per
// https://developer.chrome.com/docs/extensions/reference/api/runtime#secure_origin),
// with a Math.random fallback that's still unique enough for in-process
// routing — collisions would require simultaneous in-flight requests
// within the same millisecond on the same SW lifetime, vanishingly
// improbable for this UI flow.
function generateRequestId(): string {
if (
typeof crypto !== 'undefined' &&
typeof crypto.randomUUID === 'function'
) {
return crypto.randomUUID();
}
return `req-${Date.now()}-${Math.random().toString(36).slice(2)}`;
}
// Decodes a BUFFER message's wire-format segments into VideoSegment[].
// Extracted from the legacy inline handler so the onConnect sink can
// resolve a pending request without duplicating the decode logic.
function decodeBufferSegments(
wireSegments: TransferredVideoSegment[],
): VideoSegment[] {
// WR-07 fix: filter empty wire segments BEFORE base64 decode. An empty
// wire.data would decode to a zero-byte Blob; mergeVideoSegments would
// then concat it into the output WebM, producing a stray empty EBML
// segment that breaks Chrome playback. Two passes (filter -> decode ->
// filter-non-empty) keep the iteration semantics declarative.
const nonEmptyWires = wireSegments.filter((wire) => {
const isEmpty = !wire.data || wire.data.length === 0;
if (isEmpty) {
logger.warn(
'Skipping empty wire segment (zero-length base64)',
'timestamp:', wire.timestamp,
);
}
return !isEmpty;
});
const segments: VideoSegment[] = nonEmptyWires.flatMap((wire) => {
try {
const blob = base64ToBlob(wire.data, wire.type || VIDEO_MIME_FALLBACK);
if (blob.size === 0) {
logger.warn(
'Skipping segment that decoded to zero bytes',
'timestamp:', wire.timestamp,
);
return [];
}
return [{ data: blob, timestamp: wire.timestamp }];
} catch (err) {
logger.warn(
'base64ToBlob failed; skipping segment',
'timestamp:', wire.timestamp,
'error:', err,
);
return [];
}
});
return segments;
}
// SW-side port host (D-17, RESEARCH.md Pattern 5). The offscreen opens this
// port on bootstrap and reconnects on disconnect. We use it for: (a)
// keepalive traffic (PING) — Chrome 110+ resets the SW idle timer on every
// port message; (b) on-demand REQUEST_BUFFER round-trip during SAVE_ARCHIVE.
// keepalive traffic (PING/PONG health probe — Option C) — Chrome 110+
// resets the SW idle timer on every port message, AND the PONG reply
// closes the offscreen's health-probe loop; (b) on-demand REQUEST_BUFFER
// round-trip during SAVE_ARCHIVE, routed by requestId so port
// replacement mid-request does not drop the response.
chrome.runtime.onConnect.addListener((port) => {
// T-1-04 mitigation: only accept ports from this extension
// T-1-04 mitigation: only accept ports from this extension.
if (port.name !== 'video-keepalive') {
return;
}
@@ -91,123 +176,114 @@ chrome.runtime.onConnect.addListener((port) => {
// Chrome 110+ resets the SW idle-timer on any inbound port message, BUT
// in the field, behaviour has been observed to differ subtly when no
// listener is attached at all — Chrome may skip the idle-timer reset
// path entirely on unrouted messages. A no-op listener guarantees the
// PING traffic is consumed and the timer reset is unconditional. The
// per-request listener installed by `getVideoBufferFromOffscreen` still
// handles BUFFER routing; this sink only drains PING and any unknown
// traffic so it doesn't accumulate or surprise us later.
// path entirely on unrouted messages.
//
// Option C: this sink ALSO routes BUFFER responses to the matching
// pending request by requestId (the per-request listener pattern is
// gone — it could not handle port replacement). And it echoes PONG on
// every PING so the offscreen's health probe sees life.
port.onMessage.addListener((msg) => {
if (
typeof msg === 'object' &&
msg !== null &&
(msg as { type?: unknown }).type === 'PING'
) {
// Explicit drain — silences "no listener" semantics in Chrome's
// port-message dispatch and keeps the SW idle-timer reset reliable.
if (typeof msg !== 'object' || msg === null) {
return;
}
const type = (msg as { type?: unknown }).type;
if (type === 'PING') {
// Health-probe echo (Option C). Wrapped in try/catch because the
// port may have been disconnected between the inbound PING and
// our response — silently drop in that race window.
try {
port.postMessage({ type: 'PONG' });
} catch (err) {
logger.warn('PONG postMessage failed (port closed):', err);
}
return;
}
if (type === 'BUFFER') {
const requestId = (msg as { requestId?: unknown }).requestId;
if (typeof requestId !== 'string' || requestId.length === 0) {
// Defense-in-depth: BUFFER without a valid requestId is invalid
// under the Option C protocol — drop with a warn. (Legacy
// offscreen code that didn't carry requestId is gone.)
logger.warn('BUFFER without a valid requestId — dropping');
return;
}
const pending = pendingBufferRequests.get(requestId);
if (pending === undefined) {
// Stale BUFFER (request already resolved or timed out). Silently
// drop — this is the no-cross-talk property the request-id
// routing guarantees.
return;
}
const wireSegments =
(msg as { segments?: TransferredVideoSegment[] }).segments ?? [];
const segments = decodeBufferSegments(wireSegments);
clearTimeout(pending.hardTimer);
pendingBufferRequests.delete(requestId);
pending.resolve({ segments });
return;
}
// Unknown traffic — drop silently (T-1-04 defense-in-depth).
// BUFFER is routed by the per-request listener in
// getVideoBufferFromOffscreen; that listener fires first when
// attached, so this branch never observes BUFFER in practice.
});
port.onDisconnect.addListener(() => {
logger.log('Offscreen port disconnected; offscreen will reconnect');
videoPort = null;
if (videoPort === port) {
videoPort = null;
}
});
// If there are pending REQUEST_BUFFER requests at the moment this port
// connects, re-issue them on the fresh port with the SAME requestId.
// This is the architectural mechanism that retires the H2 silent-drop
// class — the BUFFER reaches the SW regardless of port-replacement
// timing. (Note: the FIRST onConnect has pendingBufferRequests.size
// === 0 so this branch correctly does nothing on bootstrap.)
if (pendingBufferRequests.size > 0) {
for (const pending of pendingBufferRequests.values()) {
try {
port.postMessage({
type: 'REQUEST_BUFFER',
requestId: pending.requestId,
});
} catch (err) {
// The fresh port disconnected synchronously — the outer hard
// timer will fire and surface the error.
logger.warn('REQUEST_BUFFER retry post failed:', err);
}
}
}
});
// 2 s budget covers the worst-case round-trip: offscreen base64-encodes
// up to ~15 chunks of ~100 KB each (~1.5 MB raw → ~2 MB base64) in
// well under 100 ms, post-message + JSON parse adds < 50 ms, leaving
// plenty of headroom. Bumping later is cheap if real-world recordings
// produce significantly larger buffers; today this is sufficient.
const BUFFER_FETCH_TIMEOUT_MS = 2_000;
async function getVideoBufferFromOffscreen(): Promise<VideoBufferResponse> {
if (videoPort === null) {
logger.warn('No offscreen port available; returning empty buffer');
return { segments: [] };
}
const port = videoPort;
const requestId = generateRequestId();
return new Promise<VideoBufferResponse>((resolve) => {
const timer = setTimeout(() => {
port.onMessage.removeListener(handler);
// Sweep #5 fix: surface the diagnostic when the timeout fires
// because the port was replaced by a reconnect mid-request.
// The OLD port (captured as `port`) has a dead listener; the
// offscreen will encode-and-send on the NEW port but the
// listener installed there belongs to a different
// getVideoBufferFromOffscreen call (if any). Without this
// diagnostic the operator sees a silent timeout that masquerades
// as an offscreen-side problem. With it, the SW log shows the
// reconnect timing was the proximate cause.
const portReplaced = videoPort !== port;
const hardTimer = setTimeout(() => {
pendingBufferRequests.delete(requestId);
// Outer hard-timeout: covers EVERY retry across port replacements
// (the legacy per-port BUFFER_FETCH_TIMEOUT_MS was 2 s per
// attempt — too tight to retry across a reconnect). 10 s is
// generous; the inner round-trip is still ~100-200 ms.
logger.warn(
`Buffer fetch timed out after ${BUFFER_FETCH_TIMEOUT_MS} ms`,
'port_replaced_during_fetch:', portReplaced,
`Buffer fetch outer timeout (${BUFFER_FETCH_TIMEOUT_MS} ms) — no BUFFER for requestId ${requestId}`,
);
resolve({ segments: [] });
}, BUFFER_FETCH_TIMEOUT_MS);
const handler = (msg: unknown) => {
if (
typeof msg === 'object' &&
msg !== null &&
(msg as { type?: unknown }).type === 'BUFFER'
) {
clearTimeout(timer);
port.onMessage.removeListener(handler);
// D-12 wire format + D-13 segment lifecycle: payload arrives
// as TransferredVideoSegment[] (base64 string + MIME). Decode
// each entry back into a VideoSegment — each is a
// self-contained ~10 s WebM (EBML header + seed keyframe).
// Concatenating them sequentially produces a multi-EBML-header
// file Chrome plays natively. See src/shared/binary.ts +
// RESEARCH.md Pattern 3.
const wireSegments =
(msg as { segments?: TransferredVideoSegment[] }).segments ?? [];
// WR-07 fix: filter empty wire segments BEFORE base64 decode.
// An empty wire.data would decode to a zero-byte Blob; the
// SW-side mergeVideoSegments would then concat it into the
// output WebM, producing a stray empty EBML segment that
// breaks Chrome playback. We split into two passes (filter →
// decode → filter-non-empty) so the iteration semantics stay
// declarative (no early-return in the loop body).
const nonEmptyWires = wireSegments.filter((wire) => {
const isEmpty = !wire.data || wire.data.length === 0;
if (isEmpty) {
logger.warn(
'Skipping empty wire segment (zero-length base64)',
'timestamp:', wire.timestamp,
);
}
return !isEmpty;
});
const segments: VideoSegment[] = nonEmptyWires.flatMap((wire) => {
try {
const blob = base64ToBlob(wire.data, wire.type || VIDEO_MIME_FALLBACK);
if (blob.size === 0) {
logger.warn(
'Skipping segment that decoded to zero bytes',
'timestamp:', wire.timestamp,
);
return [];
}
return [{ data: blob, timestamp: wire.timestamp }];
} catch (err) {
logger.warn(
'base64ToBlob failed; skipping segment',
'timestamp:', wire.timestamp,
'error:', err,
);
return [];
}
});
resolve({ segments });
}
};
port.onMessage.addListener(handler);
port.postMessage({ type: 'REQUEST_BUFFER' });
pendingBufferRequests.set(requestId, {
resolve,
hardTimer,
requestId,
});
try {
videoPort?.postMessage({ type: 'REQUEST_BUFFER', requestId });
} catch (err) {
// The current port disconnected synchronously. Don't resolve here
// — the offscreen's reconnect will fire a fresh onConnect, the
// sink will detect the in-flight request, and the retry path will
// re-post REQUEST_BUFFER on the new port.
logger.warn('Initial REQUEST_BUFFER post failed (port disconnected):', err);
}
});
}