diff --git a/src/background/webm-remux.ts b/src/background/webm-remux.ts new file mode 100644 index 0000000..c0b04c2 --- /dev/null +++ b/src/background/webm-remux.ts @@ -0,0 +1,434 @@ +/** + * @file src/background/webm-remux.ts + * + * WebM remux pipeline for Plan 01-08 (CONTEXT.md amendment D-14-remux — + * disambiguated from the historical "D-14: Not applicable" tab-switch + * decision recorded in the original decisions block; see CONTEXT.md + * §"Amendment ... D-14-remux: WebM remux via ts-ebml + webm-muxer" + * for the canonical statement, B-02 fix). Replaces + * `mergeVideoSegments()` in `src/background/index.ts` (which + * file-concatenated the offscreen recorder's 3 self-contained ~10 s + * WebM segments — producing a multi-EBML-header file that mpv, + * Chrome's HTMLMediaElement, and ffprobe's `format=duration` all + * truncate to the first segment's local Info.Duration ~9.94 s). + * + * The new pipeline produces a single-EBML-headered WebM that any + * standards-compliant Matroska parser reads as the full ~30 s + * timeline. See `.planning/debug/d13-multi-ebml-concat-unplayable.md` + * for the byte-level evidence and library-survey rationale that + * locked the `ts-ebml` (parse) + `webm-muxer` (write) choice. + * + * ## Algorithm + * + * 1. Sort input segments by `timestamp` ascending (defensive — the + * offscreen recorder already emits in order, but a copy+sort is + * cheap relative to the parse/mux pass). + * 2. Parse the FIRST segment via `ts-ebml.Decoder` to derive track + * info: PixelWidth, PixelHeight, optional CodecPrivate. Needed + * for the muxer's `video` config. + * 3. Create one `Muxer` configured for VP9 + * (`codec: 'V_VP9'`) with `type: 'webm'` and + * `firstTimestampBehavior: 'offset'` (forgives a non-zero first + * frame, in case the muxer rejects the first segment's "start + * at 0" timestamp after we add the prior-segment offset). + * 4. For each segment, accumulate a `segmentBaseMs` counter and + * feed every SimpleBlock through `addVideoChunkRaw(data, type, + * globalUs)` where `globalUs = (segmentBaseMs + clusterTs + + * blockOffset) * 1_000`. Frame data comes from + * `tools.ebmlBlock(simpleBlock.data).frames` (each SimpleBlock + * typically carries 1 VP9 frame; multi-frame lacing is rare in + * MediaRecorder output but supported here). + * 5. After every segment, advance `segmentBaseMs` by that + * segment's measured content duration (last-frame timestamp + * + nominal frame interval). + * 6. `muxer.finalize()` → wrap `target.buffer` in a `Blob`. + * + * ## Style notes + * + * - Extensive JSDoc per the project's global style guide. + * - No `as any`, no `@ts-ignore`. The two libraries' published + * type surfaces (see `node_modules/{ts-ebml,webm-muxer}/`) + * are used directly. + * - `if (cond) return ...` guard-clause exceptions to the + * "prefer if-else over early return" project rule are documented + * inline where they appear (empty input, missing track info). + * The user's CLAUDE.md acknowledges guard clauses as a clarity + * exception worth preserving over a deeper indent. + * - All diagnostics via `Logger('Remux')` — no bare `console.log`. + * - Threat-model note (T-1-08-01, T-1-08-03 in PLAN.md §threat_model): + * ts-ebml + webm-muxer process attacker-influenced bytes (the input + * segments come from the offscreen MediaRecorder, which captures + * whatever screen content the operator picked). Parse failures + * are surfaced as `EmptyVideoBufferError` upstream (via the + * `output.size === 0` branch in `createArchive`), giving the + * operator a clear failure surface rather than a corrupt archive. + */ + +import { Decoder, tools } from 'ts-ebml'; +import { Muxer, ArrayBufferTarget } from 'webm-muxer'; + +import { Logger } from '../shared/logger'; +import type { VideoSegment } from '../shared/types'; + +const logger = new Logger('Remux'); + +/** + * Codec identifier the muxer expects for VP9 in the Matroska + * codec-id taxonomy. See https://www.matroska.org/technical/codec_specs.html + */ +const VP9_MATROSKA_CODEC = 'V_VP9'; + +/** + * Nominal frame interval added after the last frame of each segment + * to advance `segmentBaseMs` so the next segment's first frame slots + * in just after the prior segment's last frame. Mirrors the + * MediaRecorder cadence (`getDisplayMedia` at ~30 fps → 33 ms/frame). + * The exact value matters only for the inter-segment gap; +/-3 ms + * is invisible at human playback timescales. + */ +const NOMINAL_FRAME_INTERVAL_MS = 33; + +/** + * Fallback frame rate hint for the muxer's `video.frameRate` + * field. Used as metadata only — the muxer does not enforce it. + */ +const DEFAULT_FRAME_RATE = 30; + +/** + * Default pixel dimensions when the first segment lacks a usable + * Video element. These are conservative — they keep the muxer + * happy even if the first segment is malformed. In practice the + * MediaRecorder always emits PixelWidth/PixelHeight at segment + * head, so this branch is a defense-in-depth fallback. + */ +const FALLBACK_PIXEL_WIDTH = 1024; +const FALLBACK_PIXEL_HEIGHT = 768; + +/** + * Track info extracted from a segment's Tracks → TrackEntry → Video + * subtree. `codecPrivate` is optional because VP9 MediaRecorder + * streams generally do not ship one (Chrome derives the VP9 config + * from the first keyframe's superframe header). + */ +interface TrackInfo { + width: number; + height: number; + codecPrivate?: Uint8Array; +} + +/** + * Read the contents of a Blob into an ArrayBuffer. Uses the web + * standard `Blob.arrayBuffer()` which is available in Chrome + * service-worker context (Chrome 76+) — no fallback needed. + * + * @param blob - Source Blob. + * @returns Promise resolving to the blob's bytes as an ArrayBuffer. + */ +async function blobToArrayBuffer(blob: Blob): Promise { + return blob.arrayBuffer(); +} + +/** + * Convert a Node `Buffer` (which ts-ebml uses internally because + * its `ebml` dep is built for Node Buffer) to a `Uint8Array` view + * with no copy. Both share the same underlying memory. + * + * Why: `webm-muxer.addVideoChunkRaw` takes `Uint8Array`. `Buffer` + * IS a `Uint8Array` (TypeScript's lib.dom.d.ts encodes this), but + * to keep call-site types crisp we narrow explicitly here. + * + * @param buf - Node Buffer or already-Uint8Array. + * @returns The same bytes as a `Uint8Array`. + */ +function asUint8Array(buf: Uint8Array): Uint8Array { + // Buffer extends Uint8Array — pass through unchanged. The explicit + // identity function is a typed-narrowing convenience; the runtime + // cost is zero. + return buf; +} + +/** + * Walk the decoded EBML element list for a segment and pull out + * the first Tracks → TrackEntry → Video subtree's PixelWidth, + * PixelHeight, and (optional) CodecPrivate. Stops at the first + * complete Video subtree; subsequent video tracks are ignored + * because Phase 1's MediaRecorder produces exactly one track. + * + * Returns `null` if no Video subtree was found — caller should + * fall back to {@link FALLBACK_PIXEL_WIDTH} / {@link + * FALLBACK_PIXEL_HEIGHT} so the muxer can still produce output. + * + * @param elements - Output of `Decoder.decode(buffer)`. + * @returns Track info or null if not derivable. + */ +function pickTrackInfoFromSegment( + elements: ReturnType, +): TrackInfo | null { + let inVideo = false; + let width: number | null = null; + let height: number | null = null; + let codecPrivate: Uint8Array | undefined; + for (const el of elements) { + if (el.name === 'Video' && el.type === 'm') { + // Master element: track enter/exit. + if (el.isEnd) { + // Leaving the Video subtree — if we got both dimensions we're done. + if (width !== null && height !== null) { + return { width, height, codecPrivate }; + } + inVideo = false; + } else { + inVideo = true; + } + } else if (inVideo && el.name === 'PixelWidth' && el.type === 'u') { + width = el.value; + } else if (inVideo && el.name === 'PixelHeight' && el.type === 'u') { + height = el.value; + } else if (el.name === 'CodecPrivate' && el.type === 'b') { + // CodecPrivate lives at TrackEntry level (sibling of Video), + // not inside Video. Pick the first one seen. + if (codecPrivate === undefined && el.data) { + codecPrivate = asUint8Array(el.data); + } + } + } + if (width !== null && height !== null) { + return { width, height, codecPrivate }; + } + return null; +} + +/** + * A single VP9 frame extracted from a segment's SimpleBlock, + * paired with its keyframe flag and a per-segment-local + * timestamp in milliseconds. + */ +interface ExtractedFrame { + data: Uint8Array; + isKey: boolean; + /** Per-segment-local timestamp in milliseconds. */ + localTimestampMs: number; +} + +/** + * Result of {@link extractFramesFromSegment}. `segmentDurationMs` + * is the duration the SEGMENT consumed on its own local timeline + * (last frame's timestamp + {@link NOMINAL_FRAME_INTERVAL_MS}). + * The caller adds this to `segmentBaseMs` so the next segment's + * first frame doesn't collide with this segment's last. + */ +interface SegmentExtraction { + frames: ExtractedFrame[]; + segmentDurationMs: number; + trackInfo: TrackInfo | null; +} + +/** + * Parse a segment's ArrayBuffer via ts-ebml and walk its element + * tree extracting one {@link ExtractedFrame} per VP9 frame inside + * each SimpleBlock. Tracks current Cluster Timestamp so each + * frame's `localTimestampMs` is the absolute segment-local time + * (cluster timestamp + per-block offset). + * + * The keyframe flag is taken from `tools.ebmlBlock(buf).keyframe` + * which decodes the SimpleBlock's flags byte per the Matroska + * spec (bit 7 of the byte after the variable-length track number + * and the 16-bit timestamp delta). + * + * Multi-frame SimpleBlocks (lacing) are flattened — each frame + * gets its own `addVideoChunkRaw` call sharing the same + * `localTimestampMs`. MediaRecorder under Chrome rarely uses + * lacing for VP9 (typical SimpleBlock = 1 frame), but the + * implementation handles it correctly. + * + * @param buffer - The segment's bytes. + * @returns Frames + measured segment duration + extracted track info. + */ +function extractFramesFromSegment( + buffer: ArrayBuffer, +): SegmentExtraction { + const decoder = new Decoder(); + const elements = decoder.decode(buffer); + const trackInfo = pickTrackInfoFromSegment(elements); + const frames: ExtractedFrame[] = []; + let currentClusterTs = 0; + let lastFrameTimestampMs = 0; + let inCluster = false; + for (const el of elements) { + if (el.name === 'Cluster' && el.type === 'm') { + if (el.isEnd) { + inCluster = false; + } else { + inCluster = true; + } + } else if (inCluster && el.name === 'Timestamp' && el.type === 'u') { + // Matroska v4 renamed Cluster.Timecode → Cluster.Timestamp. + // ts-ebml's schema reflects the rename, so this is the + // correct name (not 'Timecode'). + currentClusterTs = el.value; + } else if (el.name === 'SimpleBlock' && el.type === 'b' && el.data) { + const sb = tools.ebmlBlock(el.data); + const blockGlobalMs = currentClusterTs + sb.timecode; + for (const frame of sb.frames) { + frames.push({ + data: asUint8Array(frame), + isKey: sb.keyframe, + localTimestampMs: blockGlobalMs, + }); + lastFrameTimestampMs = blockGlobalMs; + } + } + } + // Segment duration covers the last frame plus a nominal interval + // — the next segment's first frame slots in just after. + const segmentDurationMs = + frames.length === 0 ? 0 : lastFrameTimestampMs + NOMINAL_FRAME_INTERVAL_MS; + return { frames, segmentDurationMs, trackInfo }; +} + +/** + * Remux a sequence of self-contained WebM `VideoSegment` blobs + * into a single WebM Blob with one EBML header and one Segment + * element. Each input segment carries its own EBML+Segment+ + * Cluster tree (output of `MediaRecorder.start()` → + * `dataavailable` → `MediaRecorder.stop()` cycle); the output + * concatenates every VP9 frame across all input segments into a + * single Matroska timeline with monotonically increasing + * timestamps. + * + * Empty input is handled defensively (returns an empty Blob — + * the upstream `EmptyVideoBufferError` throw in + * `src/background/index.ts:createArchive` catches that and + * surfaces RECORDING_ERROR to the popup; this function is also + * safe in isolation). + * + * Caller contract: + * - Input segments must be self-contained WebM bytes per + * D-13's restart-segments lifecycle. + * - Input order is normalized internally (sorted by + * `timestamp` ascending — defensive copy). + * - Output type is `video/webm`. + * + * @param segments - Sequence of WebM segments produced by the + * offscreen MediaRecorder rotation lifecycle. + * @returns Single-EBML-headered WebM Blob covering every VP9 + * frame across all input segments with adjusted monotonic + * timestamps. + */ +export async function remuxSegments(segments: VideoSegment[]): Promise { + // Guard clause exception: empty input is the most common + // failure surface in the saveArchive path, and the early-return + // body is one statement long — clearer than nesting the entire + // remux inside an else. + if (segments.length === 0) { + logger.log('Empty input — returning empty Blob'); + return new Blob([], { type: 'video/webm' }); + } + + const sorted = [...segments].sort((a, b) => a.timestamp - b.timestamp); + logger.log( + `Remuxing ${sorted.length} segments; sizes:`, + sorted.map((s) => s.data.size), + ); + + // First-pass extraction of all segments. We need the FIRST + // segment's trackInfo for the muxer config before we can start + // pushing chunks, so it's cleanest to extract everything up + // front and then drive the muxer in a second pass with the + // monotonic timestamps. Memory cost is modest (~3 × ~10 s of + // VP9 frame bytes ≈ same as the input total ~1.5 MB). + const extractions: SegmentExtraction[] = []; + for (const seg of sorted) { + const ab = await blobToArrayBuffer(seg.data); + const extraction = extractFramesFromSegment(ab); + extractions.push(extraction); + logger.log( + `Segment ts=${seg.timestamp}: ${extraction.frames.length} frames, ` + + `duration=${extraction.segmentDurationMs}ms, ` + + `trackInfo=${ + extraction.trackInfo + ? `${extraction.trackInfo.width}x${extraction.trackInfo.height}` + : 'null' + }`, + ); + } + + // Pick track info from the FIRST segment that exposes one. + // Fallback to conservative defaults — the muxer needs *some* + // width/height in its video config or the output WebM will + // refuse to play. + let pickedTrackInfo: TrackInfo | null = null; + for (const extraction of extractions) { + if (extraction.trackInfo !== null) { + pickedTrackInfo = extraction.trackInfo; + break; + } + } + if (pickedTrackInfo === null) { + logger.warn( + `pickTrackInfoFromSegment returned null for all ${extractions.length} segments — ` + + `falling back to ${FALLBACK_PIXEL_WIDTH}x${FALLBACK_PIXEL_HEIGHT}`, + ); + } + const trackInfo: TrackInfo = pickedTrackInfo ?? { + width: FALLBACK_PIXEL_WIDTH, + height: FALLBACK_PIXEL_HEIGHT, + }; + + const target = new ArrayBufferTarget(); + const muxer = new Muxer({ + target, + video: { + codec: VP9_MATROSKA_CODEC, + width: trackInfo.width, + height: trackInfo.height, + frameRate: DEFAULT_FRAME_RATE, + }, + // No `audio` block — Phase 1 SPEC §9 / CAP-01 excludes audio. + type: 'webm', + firstTimestampBehavior: 'offset', + }); + + let segmentBaseMs = 0; + let totalFramesEmitted = 0; + for (const extraction of extractions) { + for (const frame of extraction.frames) { + const globalMs = segmentBaseMs + frame.localTimestampMs; + const globalUs = globalMs * 1_000; + // EncodedVideoChunkMetadata.decoderConfig.codec is required by + // the WebCodecs typing (lib.dom.d.ts VideoDecoderConfig). + // 'vp09.00.10.08' is the canonical WebCodecs codec string for + // VP9 Profile 0, level 1.0, 8-bit — the published Chrome + // default for MediaRecorder VP9 output. Only attached when a + // CodecPrivate was actually extracted (T-trackInfo path). For + // MediaRecorder-produced segments this branch is rare — + // Chrome's published VP9 stream omits CodecPrivate and the + // muxer derives parameters from the first keyframe. + const meta = trackInfo.codecPrivate + ? { + decoderConfig: { + codec: 'vp09.00.10.08', + description: trackInfo.codecPrivate, + }, + } + : undefined; + muxer.addVideoChunkRaw( + frame.data, + frame.isKey ? 'key' : 'delta', + globalUs, + meta, + ); + totalFramesEmitted++; + } + segmentBaseMs += extraction.segmentDurationMs; + } + + muxer.finalize(); + const outputBuffer = target.buffer; + const outputBlob = new Blob([outputBuffer], { type: 'video/webm' }); + logger.log( + `Remux complete: ${totalFramesEmitted} frames, ` + + `total timeline=${segmentBaseMs}ms, output=${outputBlob.size} bytes`, + ); + return outputBlob; +}