From 6ffa242cb921ad3498aadf30c585a277dc4f95c7 Mon Sep 17 00:00:00 2001 From: Mark Date: Sat, 16 May 2026 14:43:12 +0200 Subject: [PATCH] feat(option-c-sw): request-id'd BUFFER routing + retry on port replacement + PONG echo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the SW-side architectural refactor per .planning/debug/empty-archive-port-race.md "Fix Strategy: Option C": 1. **Request-id'd protocol** — getVideoBufferFromOffscreen generates a uuid (crypto.randomUUID with Math.random fallback) and sends {type:'REQUEST_BUFFER', requestId} on the live videoPort. The per-request listener pattern is GONE; replaced by a module-level pendingBufferRequests Map. The onConnect-level message sink routes BUFFER -> resolve by id. 2. **Stale BUFFER routing** — BUFFER messages without a matching requestId in the Map are silently dropped (no cross-talk). BUFFER without a valid requestId at all is rejected with a warn (Option C protocol requires the id). 3. **Retry on port replacement** — every onConnect (post-bootstrap) scans pendingBufferRequests and re-issues REQUEST_BUFFER on the fresh port with the SAME requestId. The offscreen posts BUFFER on the current keepalivePort (see prior offscreen commit), the sink matches by id, and the request resolves. This retires the H2 silent-drop class architecturally — the BUFFER reaches the SW regardless of port-replacement timing. 4. **PING -> PONG echo** — the sink replies to every PING with PONG. Closes the offscreen's health-probe loop (it counts missed PONGs and reconnects when MAX_MISSED_PONGS exceeded — see prior offscreen commit). The PONG post is wrapped in try/catch to absorb the same port-closed-mid-response race the offscreen ping path handles. 5. **Outer hard-timeout bumped 2s -> 10s** — the legacy per-port BUFFER_FETCH_TIMEOUT_MS = 2000 was too tight to retry across a reconnect. The new outer budget covers EVERY retry across port replacements; the inner round-trip is still ~100-200 ms. 6. **decodeBufferSegments extracted** — pulled out of the legacy inline handler so the new onConnect sink can decode wire segments without duplicating the logic. Preserves WR-07 (empty wire segment filter) and base64ToBlob defensive catch behaviour. Closes the pre-existing implicit-undefined-return path the legacy flatMap catch had (tsc happy but semantically ambiguous). Status: 51 GREEN, 1 RED. The remaining RED (createArchive must throw on empty video, surfacing to operator) is addressed in the next commit. Pinning contracts (D-12 port-serialization, D-13 segment-rotation, A3 webm-playback) untouched. tsc --noEmit exit 0; type-safety grep clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/background/index.ts | 276 +++++++++++++++++++++++++--------------- 1 file changed, 176 insertions(+), 100 deletions(-) diff --git a/src/background/index.ts b/src/background/index.ts index 9436d35..21b184f 100644 --- a/src/background/index.ts +++ b/src/background/index.ts @@ -71,12 +71,97 @@ async function ensureOffscreen() { } } +// Outer-bound buffer fetch budget. Larger than the legacy +// BUFFER_FETCH_TIMEOUT_MS (was 2 s; per-port-attempt) because the new +// architecture covers MULTIPLE port-replacement retries inside one outer +// budget. 10 s is generous: the inner per-port encode round-trip is +// still ~100-200 ms; the extra headroom covers up to ~50 reconnect +// cycles before the operator-visible error surfaces. +const BUFFER_FETCH_TIMEOUT_MS = 10_000; + +// Option C: in-flight REQUEST_BUFFER requests keyed by requestId. The +// onConnect-level message sink routes BUFFER -> resolve by id, so port +// replacement (videoPort changes mid-request) does NOT lose the +// response — the offscreen posts BUFFER on the CURRENT port (whichever +// that is) and our sink picks it up regardless of which Port object it +// arrives on. +interface PendingBufferRequest { + resolve: (resp: VideoBufferResponse) => void; + hardTimer: ReturnType; + requestId: string; +} +const pendingBufferRequests: Map = new Map(); + +// Generate a per-request correlation id. Uses crypto.randomUUID when +// available (Chrome 92+ in SW context per +// https://developer.chrome.com/docs/extensions/reference/api/runtime#secure_origin), +// with a Math.random fallback that's still unique enough for in-process +// routing — collisions would require simultaneous in-flight requests +// within the same millisecond on the same SW lifetime, vanishingly +// improbable for this UI flow. +function generateRequestId(): string { + if ( + typeof crypto !== 'undefined' && + typeof crypto.randomUUID === 'function' + ) { + return crypto.randomUUID(); + } + return `req-${Date.now()}-${Math.random().toString(36).slice(2)}`; +} + +// Decodes a BUFFER message's wire-format segments into VideoSegment[]. +// Extracted from the legacy inline handler so the onConnect sink can +// resolve a pending request without duplicating the decode logic. +function decodeBufferSegments( + wireSegments: TransferredVideoSegment[], +): VideoSegment[] { + // WR-07 fix: filter empty wire segments BEFORE base64 decode. An empty + // wire.data would decode to a zero-byte Blob; mergeVideoSegments would + // then concat it into the output WebM, producing a stray empty EBML + // segment that breaks Chrome playback. Two passes (filter -> decode -> + // filter-non-empty) keep the iteration semantics declarative. + const nonEmptyWires = wireSegments.filter((wire) => { + const isEmpty = !wire.data || wire.data.length === 0; + if (isEmpty) { + logger.warn( + 'Skipping empty wire segment (zero-length base64)', + 'timestamp:', wire.timestamp, + ); + } + return !isEmpty; + }); + const segments: VideoSegment[] = nonEmptyWires.flatMap((wire) => { + try { + const blob = base64ToBlob(wire.data, wire.type || VIDEO_MIME_FALLBACK); + if (blob.size === 0) { + logger.warn( + 'Skipping segment that decoded to zero bytes', + 'timestamp:', wire.timestamp, + ); + return []; + } + return [{ data: blob, timestamp: wire.timestamp }]; + } catch (err) { + logger.warn( + 'base64ToBlob failed; skipping segment', + 'timestamp:', wire.timestamp, + 'error:', err, + ); + return []; + } + }); + return segments; +} + // SW-side port host (D-17, RESEARCH.md Pattern 5). The offscreen opens this // port on bootstrap and reconnects on disconnect. We use it for: (a) -// keepalive traffic (PING) — Chrome 110+ resets the SW idle timer on every -// port message; (b) on-demand REQUEST_BUFFER round-trip during SAVE_ARCHIVE. +// keepalive traffic (PING/PONG health probe — Option C) — Chrome 110+ +// resets the SW idle timer on every port message, AND the PONG reply +// closes the offscreen's health-probe loop; (b) on-demand REQUEST_BUFFER +// round-trip during SAVE_ARCHIVE, routed by requestId so port +// replacement mid-request does not drop the response. chrome.runtime.onConnect.addListener((port) => { - // T-1-04 mitigation: only accept ports from this extension + // T-1-04 mitigation: only accept ports from this extension. if (port.name !== 'video-keepalive') { return; } @@ -91,123 +176,114 @@ chrome.runtime.onConnect.addListener((port) => { // Chrome 110+ resets the SW idle-timer on any inbound port message, BUT // in the field, behaviour has been observed to differ subtly when no // listener is attached at all — Chrome may skip the idle-timer reset - // path entirely on unrouted messages. A no-op listener guarantees the - // PING traffic is consumed and the timer reset is unconditional. The - // per-request listener installed by `getVideoBufferFromOffscreen` still - // handles BUFFER routing; this sink only drains PING and any unknown - // traffic so it doesn't accumulate or surprise us later. + // path entirely on unrouted messages. + // + // Option C: this sink ALSO routes BUFFER responses to the matching + // pending request by requestId (the per-request listener pattern is + // gone — it could not handle port replacement). And it echoes PONG on + // every PING so the offscreen's health probe sees life. port.onMessage.addListener((msg) => { - if ( - typeof msg === 'object' && - msg !== null && - (msg as { type?: unknown }).type === 'PING' - ) { - // Explicit drain — silences "no listener" semantics in Chrome's - // port-message dispatch and keeps the SW idle-timer reset reliable. + if (typeof msg !== 'object' || msg === null) { + return; + } + const type = (msg as { type?: unknown }).type; + if (type === 'PING') { + // Health-probe echo (Option C). Wrapped in try/catch because the + // port may have been disconnected between the inbound PING and + // our response — silently drop in that race window. + try { + port.postMessage({ type: 'PONG' }); + } catch (err) { + logger.warn('PONG postMessage failed (port closed):', err); + } + return; + } + if (type === 'BUFFER') { + const requestId = (msg as { requestId?: unknown }).requestId; + if (typeof requestId !== 'string' || requestId.length === 0) { + // Defense-in-depth: BUFFER without a valid requestId is invalid + // under the Option C protocol — drop with a warn. (Legacy + // offscreen code that didn't carry requestId is gone.) + logger.warn('BUFFER without a valid requestId — dropping'); + return; + } + const pending = pendingBufferRequests.get(requestId); + if (pending === undefined) { + // Stale BUFFER (request already resolved or timed out). Silently + // drop — this is the no-cross-talk property the request-id + // routing guarantees. + return; + } + const wireSegments = + (msg as { segments?: TransferredVideoSegment[] }).segments ?? []; + const segments = decodeBufferSegments(wireSegments); + clearTimeout(pending.hardTimer); + pendingBufferRequests.delete(requestId); + pending.resolve({ segments }); return; } // Unknown traffic — drop silently (T-1-04 defense-in-depth). - // BUFFER is routed by the per-request listener in - // getVideoBufferFromOffscreen; that listener fires first when - // attached, so this branch never observes BUFFER in practice. }); port.onDisconnect.addListener(() => { logger.log('Offscreen port disconnected; offscreen will reconnect'); - videoPort = null; + if (videoPort === port) { + videoPort = null; + } }); + // If there are pending REQUEST_BUFFER requests at the moment this port + // connects, re-issue them on the fresh port with the SAME requestId. + // This is the architectural mechanism that retires the H2 silent-drop + // class — the BUFFER reaches the SW regardless of port-replacement + // timing. (Note: the FIRST onConnect has pendingBufferRequests.size + // === 0 so this branch correctly does nothing on bootstrap.) + if (pendingBufferRequests.size > 0) { + for (const pending of pendingBufferRequests.values()) { + try { + port.postMessage({ + type: 'REQUEST_BUFFER', + requestId: pending.requestId, + }); + } catch (err) { + // The fresh port disconnected synchronously — the outer hard + // timer will fire and surface the error. + logger.warn('REQUEST_BUFFER retry post failed:', err); + } + } + } }); -// 2 s budget covers the worst-case round-trip: offscreen base64-encodes -// up to ~15 chunks of ~100 KB each (~1.5 MB raw → ~2 MB base64) in -// well under 100 ms, post-message + JSON parse adds < 50 ms, leaving -// plenty of headroom. Bumping later is cheap if real-world recordings -// produce significantly larger buffers; today this is sufficient. -const BUFFER_FETCH_TIMEOUT_MS = 2_000; - async function getVideoBufferFromOffscreen(): Promise { if (videoPort === null) { logger.warn('No offscreen port available; returning empty buffer'); return { segments: [] }; } - const port = videoPort; + const requestId = generateRequestId(); return new Promise((resolve) => { - const timer = setTimeout(() => { - port.onMessage.removeListener(handler); - // Sweep #5 fix: surface the diagnostic when the timeout fires - // because the port was replaced by a reconnect mid-request. - // The OLD port (captured as `port`) has a dead listener; the - // offscreen will encode-and-send on the NEW port but the - // listener installed there belongs to a different - // getVideoBufferFromOffscreen call (if any). Without this - // diagnostic the operator sees a silent timeout that masquerades - // as an offscreen-side problem. With it, the SW log shows the - // reconnect timing was the proximate cause. - const portReplaced = videoPort !== port; + const hardTimer = setTimeout(() => { + pendingBufferRequests.delete(requestId); + // Outer hard-timeout: covers EVERY retry across port replacements + // (the legacy per-port BUFFER_FETCH_TIMEOUT_MS was 2 s per + // attempt — too tight to retry across a reconnect). 10 s is + // generous; the inner round-trip is still ~100-200 ms. logger.warn( - `Buffer fetch timed out after ${BUFFER_FETCH_TIMEOUT_MS} ms`, - 'port_replaced_during_fetch:', portReplaced, + `Buffer fetch outer timeout (${BUFFER_FETCH_TIMEOUT_MS} ms) — no BUFFER for requestId ${requestId}`, ); resolve({ segments: [] }); }, BUFFER_FETCH_TIMEOUT_MS); - const handler = (msg: unknown) => { - if ( - typeof msg === 'object' && - msg !== null && - (msg as { type?: unknown }).type === 'BUFFER' - ) { - clearTimeout(timer); - port.onMessage.removeListener(handler); - // D-12 wire format + D-13 segment lifecycle: payload arrives - // as TransferredVideoSegment[] (base64 string + MIME). Decode - // each entry back into a VideoSegment — each is a - // self-contained ~10 s WebM (EBML header + seed keyframe). - // Concatenating them sequentially produces a multi-EBML-header - // file Chrome plays natively. See src/shared/binary.ts + - // RESEARCH.md Pattern 3. - const wireSegments = - (msg as { segments?: TransferredVideoSegment[] }).segments ?? []; - // WR-07 fix: filter empty wire segments BEFORE base64 decode. - // An empty wire.data would decode to a zero-byte Blob; the - // SW-side mergeVideoSegments would then concat it into the - // output WebM, producing a stray empty EBML segment that - // breaks Chrome playback. We split into two passes (filter → - // decode → filter-non-empty) so the iteration semantics stay - // declarative (no early-return in the loop body). - const nonEmptyWires = wireSegments.filter((wire) => { - const isEmpty = !wire.data || wire.data.length === 0; - if (isEmpty) { - logger.warn( - 'Skipping empty wire segment (zero-length base64)', - 'timestamp:', wire.timestamp, - ); - } - return !isEmpty; - }); - const segments: VideoSegment[] = nonEmptyWires.flatMap((wire) => { - try { - const blob = base64ToBlob(wire.data, wire.type || VIDEO_MIME_FALLBACK); - if (blob.size === 0) { - logger.warn( - 'Skipping segment that decoded to zero bytes', - 'timestamp:', wire.timestamp, - ); - return []; - } - return [{ data: blob, timestamp: wire.timestamp }]; - } catch (err) { - logger.warn( - 'base64ToBlob failed; skipping segment', - 'timestamp:', wire.timestamp, - 'error:', err, - ); - return []; - } - }); - resolve({ segments }); - } - }; - port.onMessage.addListener(handler); - port.postMessage({ type: 'REQUEST_BUFFER' }); + pendingBufferRequests.set(requestId, { + resolve, + hardTimer, + requestId, + }); + try { + videoPort?.postMessage({ type: 'REQUEST_BUFFER', requestId }); + } catch (err) { + // The current port disconnected synchronously. Don't resolve here + // — the offscreen's reconnect will fire a fresh onConnect, the + // sink will detect the in-flight request, and the retry path will + // re-post REQUEST_BUFFER on the new port. + logger.warn('Initial REQUEST_BUFFER post failed (port disconnected):', err); + } }); }