2026-05-31 15:34:17 +00:00
1 changed files with 434 additions and 0 deletions
--- a/src/background/webm-remux.ts
+++ b/src/background/webm-remux.ts
@@ -0,0 +1,434 @@
 /**
 * @file src/background/webm-remux.ts
 *
 * WebM remux pipeline for Plan 01-08 (CONTEXT.md amendment D-14-remux —
 * disambiguated from the historical "D-14: Not applicable" tab-switch
 * decision recorded in the original decisions block; see CONTEXT.md
 * §"Amendment ... D-14-remux: WebM remux via ts-ebml + webm-muxer"
 * for the canonical statement, B-02 fix). Replaces
 * `mergeVideoSegments()` in `src/background/index.ts` (which
 * file-concatenated the offscreen recorder's 3 self-contained ~10 s
 * WebM segments — producing a multi-EBML-header file that mpv,
 * Chrome's HTMLMediaElement, and ffprobe's `format=duration` all
 * truncate to the first segment's local Info.Duration ~9.94 s).
 *
 * The new pipeline produces a single-EBML-headered WebM that any
 * standards-compliant Matroska parser reads as the full ~30 s
 * timeline. See `.planning/debug/d13-multi-ebml-concat-unplayable.md`
 * for the byte-level evidence and library-survey rationale that
 * locked the `ts-ebml` (parse) + `webm-muxer` (write) choice.
 *
 * ## Algorithm
 *
 * 1. Sort input segments by `timestamp` ascending (defensive — the
 *    offscreen recorder already emits in order, but a copy+sort is
 *    cheap relative to the parse/mux pass).
 * 2. Parse the FIRST segment via `ts-ebml.Decoder` to derive track
 *    info: PixelWidth, PixelHeight, optional CodecPrivate. Needed
 *    for the muxer's `video` config.
 * 3. Create one `Muxer<ArrayBufferTarget>` configured for VP9
 *    (`codec: 'V_VP9'`) with `type: 'webm'` and
 *    `firstTimestampBehavior: 'offset'` (forgives a non-zero first
 *    frame, in case the muxer rejects the first segment's "start
 *    at 0" timestamp after we add the prior-segment offset).
 * 4. For each segment, accumulate a `segmentBaseMs` counter and
 *    feed every SimpleBlock through `addVideoChunkRaw(data, type,
 *    globalUs)` where `globalUs = (segmentBaseMs + clusterTs +
 *    blockOffset) * 1_000`. Frame data comes from
 *    `tools.ebmlBlock(simpleBlock.data).frames` (each SimpleBlock
 *    typically carries 1 VP9 frame; multi-frame lacing is rare in
 *    MediaRecorder output but supported here).
 * 5. After every segment, advance `segmentBaseMs` by that
 *    segment's measured content duration (last-frame timestamp
 *    + nominal frame interval).
 * 6. `muxer.finalize()` → wrap `target.buffer` in a `Blob`.
 *
 * ## Style notes
 *
 * - Extensive JSDoc per the project's global style guide.
 * - No `as any`, no `@ts-ignore`. The two libraries' published
 *   type surfaces (see `node_modules/{ts-ebml,webm-muxer}/`)
 *   are used directly.
 * - `if (cond) return ...` guard-clause exceptions to the
 *   "prefer if-else over early return" project rule are documented
 *   inline where they appear (empty input, missing track info).
 *   The user's CLAUDE.md acknowledges guard clauses as a clarity
 *   exception worth preserving over a deeper indent.
 * - All diagnostics via `Logger('Remux')` — no bare `console.log`.
 * - Threat-model note (T-1-08-01, T-1-08-03 in PLAN.md §threat_model):
 *   ts-ebml + webm-muxer process attacker-influenced bytes (the input
 *   segments come from the offscreen MediaRecorder, which captures
 *   whatever screen content the operator picked). Parse failures
 *   are surfaced as `EmptyVideoBufferError` upstream (via the
 *   `output.size === 0` branch in `createArchive`), giving the
 *   operator a clear failure surface rather than a corrupt archive.
 */
 import { Decoder, tools } from 'ts-ebml';
 import { Muxer, ArrayBufferTarget } from 'webm-muxer';
 import { Logger } from '../shared/logger';
 import type { VideoSegment } from '../shared/types';
 const logger = new Logger('Remux');
 /**
 * Codec identifier the muxer expects for VP9 in the Matroska
 * codec-id taxonomy. See https://www.matroska.org/technical/codec_specs.html
 */
 const VP9_MATROSKA_CODEC = 'V_VP9';
 /**
 * Nominal frame interval added after the last frame of each segment
 * to advance `segmentBaseMs` so the next segment's first frame slots
 * in just after the prior segment's last frame. Mirrors the
 * MediaRecorder cadence (`getDisplayMedia` at ~30 fps → 33 ms/frame).
 * The exact value matters only for the inter-segment gap; +/-3 ms
 * is invisible at human playback timescales.
 */
 const NOMINAL_FRAME_INTERVAL_MS = 33;
 /**
 * Fallback frame rate hint for the muxer's `video.frameRate`
 * field. Used as metadata only — the muxer does not enforce it.
 */
 const DEFAULT_FRAME_RATE = 30;
 /**
 * Default pixel dimensions when the first segment lacks a usable
 * Video element. These are conservative — they keep the muxer
 * happy even if the first segment is malformed. In practice the
 * MediaRecorder always emits PixelWidth/PixelHeight at segment
 * head, so this branch is a defense-in-depth fallback.
 */
 const FALLBACK_PIXEL_WIDTH = 1024;
 const FALLBACK_PIXEL_HEIGHT = 768;
 /**
 * Track info extracted from a segment's Tracks → TrackEntry → Video
 * subtree. `codecPrivate` is optional because VP9 MediaRecorder
 * streams generally do not ship one (Chrome derives the VP9 config
 * from the first keyframe's superframe header).
 */
 interface TrackInfo {
  width: number;
  height: number;
  codecPrivate?: Uint8Array;
 }
 /**
 * Read the contents of a Blob into an ArrayBuffer. Uses the web
 * standard `Blob.arrayBuffer()` which is available in Chrome
 * service-worker context (Chrome 76+) — no fallback needed.
 *
 * @param blob - Source Blob.
 * @returns Promise resolving to the blob's bytes as an ArrayBuffer.
 */
 async function blobToArrayBuffer(blob: Blob): Promise<ArrayBuffer> {
  return blob.arrayBuffer();
 }
 /**
 * Convert a Node `Buffer` (which ts-ebml uses internally because
 * its `ebml` dep is built for Node Buffer) to a `Uint8Array` view
 * with no copy. Both share the same underlying memory.
 *
 * Why: `webm-muxer.addVideoChunkRaw` takes `Uint8Array`. `Buffer`
 * IS a `Uint8Array` (TypeScript's lib.dom.d.ts encodes this), but
 * to keep call-site types crisp we narrow explicitly here.
 *
 * @param buf - Node Buffer or already-Uint8Array.
 * @returns The same bytes as a `Uint8Array`.
 */
 function asUint8Array(buf: Uint8Array): Uint8Array {
  // Buffer extends Uint8Array — pass through unchanged. The explicit
  // identity function is a typed-narrowing convenience; the runtime
  // cost is zero.
  return buf;
 }
 /**
 * Walk the decoded EBML element list for a segment and pull out
 * the first Tracks → TrackEntry → Video subtree's PixelWidth,
 * PixelHeight, and (optional) CodecPrivate. Stops at the first
 * complete Video subtree; subsequent video tracks are ignored
 * because Phase 1's MediaRecorder produces exactly one track.
 *
 * Returns `null` if no Video subtree was found — caller should
 * fall back to {@link FALLBACK_PIXEL_WIDTH} / {@link
 * FALLBACK_PIXEL_HEIGHT} so the muxer can still produce output.
 *
 * @param elements - Output of `Decoder.decode(buffer)`.
 * @returns Track info or null if not derivable.
 */
 function pickTrackInfoFromSegment(
  elements: ReturnType<Decoder['decode']>,
 ): TrackInfo | null {
  let inVideo = false;
  let width: number | null = null;
  let height: number | null = null;
  let codecPrivate: Uint8Array | undefined;
  for (const el of elements) {
    if (el.name === 'Video' && el.type === 'm') {
      // Master element: track enter/exit.
      if (el.isEnd) {
        // Leaving the Video subtree — if we got both dimensions we're done.
        if (width !== null && height !== null) {
          return { width, height, codecPrivate };
        }
        inVideo = false;
      } else {
        inVideo = true;
      }
    } else if (inVideo && el.name === 'PixelWidth' && el.type === 'u') {
      width = el.value;
    } else if (inVideo && el.name === 'PixelHeight' && el.type === 'u') {
      height = el.value;
    } else if (el.name === 'CodecPrivate' && el.type === 'b') {
      // CodecPrivate lives at TrackEntry level (sibling of Video),
      // not inside Video. Pick the first one seen.
      if (codecPrivate === undefined && el.data) {
        codecPrivate = asUint8Array(el.data);
      }
    }
  }
  if (width !== null && height !== null) {
    return { width, height, codecPrivate };
  }
  return null;
 }
 /**
 * A single VP9 frame extracted from a segment's SimpleBlock,
 * paired with its keyframe flag and a per-segment-local
 * timestamp in milliseconds.
 */
 interface ExtractedFrame {
  data: Uint8Array;
  isKey: boolean;
  /** Per-segment-local timestamp in milliseconds. */
  localTimestampMs: number;
 }
 /**
 * Result of {@link extractFramesFromSegment}. `segmentDurationMs`
 * is the duration the SEGMENT consumed on its own local timeline
 * (last frame's timestamp + {@link NOMINAL_FRAME_INTERVAL_MS}).
 * The caller adds this to `segmentBaseMs` so the next segment's
 * first frame doesn't collide with this segment's last.
 */
 interface SegmentExtraction {
  frames: ExtractedFrame[];
  segmentDurationMs: number;
  trackInfo: TrackInfo | null;
 }
 /**
 * Parse a segment's ArrayBuffer via ts-ebml and walk its element
 * tree extracting one {@link ExtractedFrame} per VP9 frame inside
 * each SimpleBlock. Tracks current Cluster Timestamp so each
 * frame's `localTimestampMs` is the absolute segment-local time
 * (cluster timestamp + per-block offset).
 *
 * The keyframe flag is taken from `tools.ebmlBlock(buf).keyframe`
 * which decodes the SimpleBlock's flags byte per the Matroska
 * spec (bit 7 of the byte after the variable-length track number
 * and the 16-bit timestamp delta).
 *
 * Multi-frame SimpleBlocks (lacing) are flattened — each frame
 * gets its own `addVideoChunkRaw` call sharing the same
 * `localTimestampMs`. MediaRecorder under Chrome rarely uses
 * lacing for VP9 (typical SimpleBlock = 1 frame), but the
 * implementation handles it correctly.
 *
 * @param buffer - The segment's bytes.
 * @returns Frames + measured segment duration + extracted track info.
 */
 function extractFramesFromSegment(
  buffer: ArrayBuffer,
 ): SegmentExtraction {
  const decoder = new Decoder();
  const elements = decoder.decode(buffer);
  const trackInfo = pickTrackInfoFromSegment(elements);
  const frames: ExtractedFrame[] = [];
  let currentClusterTs = 0;
  let lastFrameTimestampMs = 0;
  let inCluster = false;
  for (const el of elements) {
    if (el.name === 'Cluster' && el.type === 'm') {
      if (el.isEnd) {
        inCluster = false;
      } else {
        inCluster = true;
      }
    } else if (inCluster && el.name === 'Timestamp' && el.type === 'u') {
      // Matroska v4 renamed Cluster.Timecode → Cluster.Timestamp.
      // ts-ebml's schema reflects the rename, so this is the
      // correct name (not 'Timecode').
      currentClusterTs = el.value;
    } else if (el.name === 'SimpleBlock' && el.type === 'b' && el.data) {
      const sb = tools.ebmlBlock(el.data);
      const blockGlobalMs = currentClusterTs + sb.timecode;
      for (const frame of sb.frames) {
        frames.push({
          data: asUint8Array(frame),
          isKey: sb.keyframe,
          localTimestampMs: blockGlobalMs,
        });
        lastFrameTimestampMs = blockGlobalMs;
      }
    }
  }
  // Segment duration covers the last frame plus a nominal interval
  // — the next segment's first frame slots in just after.
  const segmentDurationMs =
    frames.length === 0 ? 0 : lastFrameTimestampMs + NOMINAL_FRAME_INTERVAL_MS;
  return { frames, segmentDurationMs, trackInfo };
 }
 /**
 * Remux a sequence of self-contained WebM `VideoSegment` blobs
 * into a single WebM Blob with one EBML header and one Segment
 * element. Each input segment carries its own EBML+Segment+
 * Cluster tree (output of `MediaRecorder.start()` →
 * `dataavailable` → `MediaRecorder.stop()` cycle); the output
 * concatenates every VP9 frame across all input segments into a
 * single Matroska timeline with monotonically increasing
 * timestamps.
 *
 * Empty input is handled defensively (returns an empty Blob —
 * the upstream `EmptyVideoBufferError` throw in
 * `src/background/index.ts:createArchive` catches that and
 * surfaces RECORDING_ERROR to the popup; this function is also
 * safe in isolation).
 *
 * Caller contract:
 *   - Input segments must be self-contained WebM bytes per
 *     D-13's restart-segments lifecycle.
 *   - Input order is normalized internally (sorted by
 *     `timestamp` ascending — defensive copy).
 *   - Output type is `video/webm`.
 *
 * @param segments - Sequence of WebM segments produced by the
 *   offscreen MediaRecorder rotation lifecycle.
 * @returns Single-EBML-headered WebM Blob covering every VP9
 *   frame across all input segments with adjusted monotonic
 *   timestamps.
 */
 export async function remuxSegments(segments: VideoSegment[]): Promise<Blob> {
  // Guard clause exception: empty input is the most common
  // failure surface in the saveArchive path, and the early-return
  // body is one statement long — clearer than nesting the entire
  // remux inside an else.
  if (segments.length === 0) {
    logger.log('Empty input — returning empty Blob');
    return new Blob([], { type: 'video/webm' });
  }
  const sorted = [...segments].sort((a, b) => a.timestamp - b.timestamp);
  logger.log(
    `Remuxing ${sorted.length} segments; sizes:`,
    sorted.map((s) => s.data.size),
  );
  // First-pass extraction of all segments. We need the FIRST
  // segment's trackInfo for the muxer config before we can start
  // pushing chunks, so it's cleanest to extract everything up
  // front and then drive the muxer in a second pass with the
  // monotonic timestamps. Memory cost is modest (~3 × ~10 s of
  // VP9 frame bytes ≈ same as the input total ~1.5 MB).
  const extractions: SegmentExtraction[] = [];
  for (const seg of sorted) {
    const ab = await blobToArrayBuffer(seg.data);
    const extraction = extractFramesFromSegment(ab);
    extractions.push(extraction);
    logger.log(
      `Segment ts=${seg.timestamp}: ${extraction.frames.length} frames, ` +
        `duration=${extraction.segmentDurationMs}ms, ` +
        `trackInfo=${
          extraction.trackInfo
            ? `${extraction.trackInfo.width}x${extraction.trackInfo.height}`
            : 'null'
        }`,
    );
  }
  // Pick track info from the FIRST segment that exposes one.
  // Fallback to conservative defaults — the muxer needs *some*
  // width/height in its video config or the output WebM will
  // refuse to play.
  let pickedTrackInfo: TrackInfo | null = null;
  for (const extraction of extractions) {
    if (extraction.trackInfo !== null) {
      pickedTrackInfo = extraction.trackInfo;
      break;
    }
  }
  if (pickedTrackInfo === null) {
    logger.warn(
      `pickTrackInfoFromSegment returned null for all ${extractions.length} segments — ` +
        `falling back to ${FALLBACK_PIXEL_WIDTH}x${FALLBACK_PIXEL_HEIGHT}`,
    );
  }
  const trackInfo: TrackInfo = pickedTrackInfo ?? {
    width: FALLBACK_PIXEL_WIDTH,
    height: FALLBACK_PIXEL_HEIGHT,
  };
  const target = new ArrayBufferTarget();
  const muxer = new Muxer({
    target,
    video: {
      codec: VP9_MATROSKA_CODEC,
      width: trackInfo.width,
      height: trackInfo.height,
      frameRate: DEFAULT_FRAME_RATE,
    },
    // No `audio` block — Phase 1 SPEC §9 / CAP-01 excludes audio.
    type: 'webm',
    firstTimestampBehavior: 'offset',
  });
  let segmentBaseMs = 0;
  let totalFramesEmitted = 0;
  for (const extraction of extractions) {
    for (const frame of extraction.frames) {
      const globalMs = segmentBaseMs + frame.localTimestampMs;
      const globalUs = globalMs * 1_000;
      // EncodedVideoChunkMetadata.decoderConfig.codec is required by
      // the WebCodecs typing (lib.dom.d.ts VideoDecoderConfig).
      // 'vp09.00.10.08' is the canonical WebCodecs codec string for
      // VP9 Profile 0, level 1.0, 8-bit — the published Chrome
      // default for MediaRecorder VP9 output. Only attached when a
      // CodecPrivate was actually extracted (T-trackInfo path). For
      // MediaRecorder-produced segments this branch is rare —
      // Chrome's published VP9 stream omits CodecPrivate and the
      // muxer derives parameters from the first keyframe.
      const meta = trackInfo.codecPrivate
        ? {
            decoderConfig: {
              codec: 'vp09.00.10.08',
              description: trackInfo.codecPrivate,
            },
          }
        : undefined;
      muxer.addVideoChunkRaw(
        frame.data,
        frame.isKey ? 'key' : 'delta',
        globalUs,
        meta,
      );
      totalFramesEmitted++;
    }
    segmentBaseMs += extraction.segmentDurationMs;
  }
  muxer.finalize();
  const outputBuffer = target.buffer;
  const outputBlob = new Blob([outputBuffer], { type: 'video/webm' });
  logger.log(
    `Remux complete: ${totalFramesEmitted} frames, ` +
      `total timeline=${segmentBaseMs}ms, output=${outputBlob.size} bytes`,
  );
  return outputBlob;
 }