Milestone v1 (v2.0.0): Mokosh — Session Capture #1
434
src/background/webm-remux.ts
Normal file
434
src/background/webm-remux.ts
Normal file
@@ -0,0 +1,434 @@
|
||||
/**
|
||||
* @file src/background/webm-remux.ts
|
||||
*
|
||||
* WebM remux pipeline for Plan 01-08 (CONTEXT.md amendment D-14-remux —
|
||||
* disambiguated from the historical "D-14: Not applicable" tab-switch
|
||||
* decision recorded in the original decisions block; see CONTEXT.md
|
||||
* §"Amendment ... D-14-remux: WebM remux via ts-ebml + webm-muxer"
|
||||
* for the canonical statement, B-02 fix). Replaces
|
||||
* `mergeVideoSegments()` in `src/background/index.ts` (which
|
||||
* file-concatenated the offscreen recorder's 3 self-contained ~10 s
|
||||
* WebM segments — producing a multi-EBML-header file that mpv,
|
||||
* Chrome's HTMLMediaElement, and ffprobe's `format=duration` all
|
||||
* truncate to the first segment's local Info.Duration ~9.94 s).
|
||||
*
|
||||
* The new pipeline produces a single-EBML-headered WebM that any
|
||||
* standards-compliant Matroska parser reads as the full ~30 s
|
||||
* timeline. See `.planning/debug/d13-multi-ebml-concat-unplayable.md`
|
||||
* for the byte-level evidence and library-survey rationale that
|
||||
* locked the `ts-ebml` (parse) + `webm-muxer` (write) choice.
|
||||
*
|
||||
* ## Algorithm
|
||||
*
|
||||
* 1. Sort input segments by `timestamp` ascending (defensive — the
|
||||
* offscreen recorder already emits in order, but a copy+sort is
|
||||
* cheap relative to the parse/mux pass).
|
||||
* 2. Parse the FIRST segment via `ts-ebml.Decoder` to derive track
|
||||
* info: PixelWidth, PixelHeight, optional CodecPrivate. Needed
|
||||
* for the muxer's `video` config.
|
||||
* 3. Create one `Muxer<ArrayBufferTarget>` configured for VP9
|
||||
* (`codec: 'V_VP9'`) with `type: 'webm'` and
|
||||
* `firstTimestampBehavior: 'offset'` (forgives a non-zero first
|
||||
* frame, in case the muxer rejects the first segment's "start
|
||||
* at 0" timestamp after we add the prior-segment offset).
|
||||
* 4. For each segment, accumulate a `segmentBaseMs` counter and
|
||||
* feed every SimpleBlock through `addVideoChunkRaw(data, type,
|
||||
* globalUs)` where `globalUs = (segmentBaseMs + clusterTs +
|
||||
* blockOffset) * 1_000`. Frame data comes from
|
||||
* `tools.ebmlBlock(simpleBlock.data).frames` (each SimpleBlock
|
||||
* typically carries 1 VP9 frame; multi-frame lacing is rare in
|
||||
* MediaRecorder output but supported here).
|
||||
* 5. After every segment, advance `segmentBaseMs` by that
|
||||
* segment's measured content duration (last-frame timestamp
|
||||
* + nominal frame interval).
|
||||
* 6. `muxer.finalize()` → wrap `target.buffer` in a `Blob`.
|
||||
*
|
||||
* ## Style notes
|
||||
*
|
||||
* - Extensive JSDoc per the project's global style guide.
|
||||
* - No `as any`, no `@ts-ignore`. The two libraries' published
|
||||
* type surfaces (see `node_modules/{ts-ebml,webm-muxer}/`)
|
||||
* are used directly.
|
||||
* - `if (cond) return ...` guard-clause exceptions to the
|
||||
* "prefer if-else over early return" project rule are documented
|
||||
* inline where they appear (empty input, missing track info).
|
||||
* The user's CLAUDE.md acknowledges guard clauses as a clarity
|
||||
* exception worth preserving over a deeper indent.
|
||||
* - All diagnostics via `Logger('Remux')` — no bare `console.log`.
|
||||
* - Threat-model note (T-1-08-01, T-1-08-03 in PLAN.md §threat_model):
|
||||
* ts-ebml + webm-muxer process attacker-influenced bytes (the input
|
||||
* segments come from the offscreen MediaRecorder, which captures
|
||||
* whatever screen content the operator picked). Parse failures
|
||||
* are surfaced as `EmptyVideoBufferError` upstream (via the
|
||||
* `output.size === 0` branch in `createArchive`), giving the
|
||||
* operator a clear failure surface rather than a corrupt archive.
|
||||
*/
|
||||
|
||||
import { Decoder, tools } from 'ts-ebml';
|
||||
import { Muxer, ArrayBufferTarget } from 'webm-muxer';
|
||||
|
||||
import { Logger } from '../shared/logger';
|
||||
import type { VideoSegment } from '../shared/types';
|
||||
|
||||
const logger = new Logger('Remux');
|
||||
|
||||
/**
|
||||
* Codec identifier the muxer expects for VP9 in the Matroska
|
||||
* codec-id taxonomy. See https://www.matroska.org/technical/codec_specs.html
|
||||
*/
|
||||
const VP9_MATROSKA_CODEC = 'V_VP9';
|
||||
|
||||
/**
|
||||
* Nominal frame interval added after the last frame of each segment
|
||||
* to advance `segmentBaseMs` so the next segment's first frame slots
|
||||
* in just after the prior segment's last frame. Mirrors the
|
||||
* MediaRecorder cadence (`getDisplayMedia` at ~30 fps → 33 ms/frame).
|
||||
* The exact value matters only for the inter-segment gap; +/-3 ms
|
||||
* is invisible at human playback timescales.
|
||||
*/
|
||||
const NOMINAL_FRAME_INTERVAL_MS = 33;
|
||||
|
||||
/**
|
||||
* Fallback frame rate hint for the muxer's `video.frameRate`
|
||||
* field. Used as metadata only — the muxer does not enforce it.
|
||||
*/
|
||||
const DEFAULT_FRAME_RATE = 30;
|
||||
|
||||
/**
|
||||
* Default pixel dimensions when the first segment lacks a usable
|
||||
* Video element. These are conservative — they keep the muxer
|
||||
* happy even if the first segment is malformed. In practice the
|
||||
* MediaRecorder always emits PixelWidth/PixelHeight at segment
|
||||
* head, so this branch is a defense-in-depth fallback.
|
||||
*/
|
||||
const FALLBACK_PIXEL_WIDTH = 1024;
|
||||
const FALLBACK_PIXEL_HEIGHT = 768;
|
||||
|
||||
/**
|
||||
* Track info extracted from a segment's Tracks → TrackEntry → Video
|
||||
* subtree. `codecPrivate` is optional because VP9 MediaRecorder
|
||||
* streams generally do not ship one (Chrome derives the VP9 config
|
||||
* from the first keyframe's superframe header).
|
||||
*/
|
||||
interface TrackInfo {
|
||||
width: number;
|
||||
height: number;
|
||||
codecPrivate?: Uint8Array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the contents of a Blob into an ArrayBuffer. Uses the web
|
||||
* standard `Blob.arrayBuffer()` which is available in Chrome
|
||||
* service-worker context (Chrome 76+) — no fallback needed.
|
||||
*
|
||||
* @param blob - Source Blob.
|
||||
* @returns Promise resolving to the blob's bytes as an ArrayBuffer.
|
||||
*/
|
||||
async function blobToArrayBuffer(blob: Blob): Promise<ArrayBuffer> {
|
||||
return blob.arrayBuffer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a Node `Buffer` (which ts-ebml uses internally because
|
||||
* its `ebml` dep is built for Node Buffer) to a `Uint8Array` view
|
||||
* with no copy. Both share the same underlying memory.
|
||||
*
|
||||
* Why: `webm-muxer.addVideoChunkRaw` takes `Uint8Array`. `Buffer`
|
||||
* IS a `Uint8Array` (TypeScript's lib.dom.d.ts encodes this), but
|
||||
* to keep call-site types crisp we narrow explicitly here.
|
||||
*
|
||||
* @param buf - Node Buffer or already-Uint8Array.
|
||||
* @returns The same bytes as a `Uint8Array`.
|
||||
*/
|
||||
function asUint8Array(buf: Uint8Array): Uint8Array {
|
||||
// Buffer extends Uint8Array — pass through unchanged. The explicit
|
||||
// identity function is a typed-narrowing convenience; the runtime
|
||||
// cost is zero.
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the decoded EBML element list for a segment and pull out
|
||||
* the first Tracks → TrackEntry → Video subtree's PixelWidth,
|
||||
* PixelHeight, and (optional) CodecPrivate. Stops at the first
|
||||
* complete Video subtree; subsequent video tracks are ignored
|
||||
* because Phase 1's MediaRecorder produces exactly one track.
|
||||
*
|
||||
* Returns `null` if no Video subtree was found — caller should
|
||||
* fall back to {@link FALLBACK_PIXEL_WIDTH} / {@link
|
||||
* FALLBACK_PIXEL_HEIGHT} so the muxer can still produce output.
|
||||
*
|
||||
* @param elements - Output of `Decoder.decode(buffer)`.
|
||||
* @returns Track info or null if not derivable.
|
||||
*/
|
||||
function pickTrackInfoFromSegment(
|
||||
elements: ReturnType<Decoder['decode']>,
|
||||
): TrackInfo | null {
|
||||
let inVideo = false;
|
||||
let width: number | null = null;
|
||||
let height: number | null = null;
|
||||
let codecPrivate: Uint8Array | undefined;
|
||||
for (const el of elements) {
|
||||
if (el.name === 'Video' && el.type === 'm') {
|
||||
// Master element: track enter/exit.
|
||||
if (el.isEnd) {
|
||||
// Leaving the Video subtree — if we got both dimensions we're done.
|
||||
if (width !== null && height !== null) {
|
||||
return { width, height, codecPrivate };
|
||||
}
|
||||
inVideo = false;
|
||||
} else {
|
||||
inVideo = true;
|
||||
}
|
||||
} else if (inVideo && el.name === 'PixelWidth' && el.type === 'u') {
|
||||
width = el.value;
|
||||
} else if (inVideo && el.name === 'PixelHeight' && el.type === 'u') {
|
||||
height = el.value;
|
||||
} else if (el.name === 'CodecPrivate' && el.type === 'b') {
|
||||
// CodecPrivate lives at TrackEntry level (sibling of Video),
|
||||
// not inside Video. Pick the first one seen.
|
||||
if (codecPrivate === undefined && el.data) {
|
||||
codecPrivate = asUint8Array(el.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (width !== null && height !== null) {
|
||||
return { width, height, codecPrivate };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single VP9 frame extracted from a segment's SimpleBlock,
|
||||
* paired with its keyframe flag and a per-segment-local
|
||||
* timestamp in milliseconds.
|
||||
*/
|
||||
interface ExtractedFrame {
|
||||
data: Uint8Array;
|
||||
isKey: boolean;
|
||||
/** Per-segment-local timestamp in milliseconds. */
|
||||
localTimestampMs: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of {@link extractFramesFromSegment}. `segmentDurationMs`
|
||||
* is the duration the SEGMENT consumed on its own local timeline
|
||||
* (last frame's timestamp + {@link NOMINAL_FRAME_INTERVAL_MS}).
|
||||
* The caller adds this to `segmentBaseMs` so the next segment's
|
||||
* first frame doesn't collide with this segment's last.
|
||||
*/
|
||||
interface SegmentExtraction {
|
||||
frames: ExtractedFrame[];
|
||||
segmentDurationMs: number;
|
||||
trackInfo: TrackInfo | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a segment's ArrayBuffer via ts-ebml and walk its element
|
||||
* tree extracting one {@link ExtractedFrame} per VP9 frame inside
|
||||
* each SimpleBlock. Tracks current Cluster Timestamp so each
|
||||
* frame's `localTimestampMs` is the absolute segment-local time
|
||||
* (cluster timestamp + per-block offset).
|
||||
*
|
||||
* The keyframe flag is taken from `tools.ebmlBlock(buf).keyframe`
|
||||
* which decodes the SimpleBlock's flags byte per the Matroska
|
||||
* spec (bit 7 of the byte after the variable-length track number
|
||||
* and the 16-bit timestamp delta).
|
||||
*
|
||||
* Multi-frame SimpleBlocks (lacing) are flattened — each frame
|
||||
* gets its own `addVideoChunkRaw` call sharing the same
|
||||
* `localTimestampMs`. MediaRecorder under Chrome rarely uses
|
||||
* lacing for VP9 (typical SimpleBlock = 1 frame), but the
|
||||
* implementation handles it correctly.
|
||||
*
|
||||
* @param buffer - The segment's bytes.
|
||||
* @returns Frames + measured segment duration + extracted track info.
|
||||
*/
|
||||
function extractFramesFromSegment(
|
||||
buffer: ArrayBuffer,
|
||||
): SegmentExtraction {
|
||||
const decoder = new Decoder();
|
||||
const elements = decoder.decode(buffer);
|
||||
const trackInfo = pickTrackInfoFromSegment(elements);
|
||||
const frames: ExtractedFrame[] = [];
|
||||
let currentClusterTs = 0;
|
||||
let lastFrameTimestampMs = 0;
|
||||
let inCluster = false;
|
||||
for (const el of elements) {
|
||||
if (el.name === 'Cluster' && el.type === 'm') {
|
||||
if (el.isEnd) {
|
||||
inCluster = false;
|
||||
} else {
|
||||
inCluster = true;
|
||||
}
|
||||
} else if (inCluster && el.name === 'Timestamp' && el.type === 'u') {
|
||||
// Matroska v4 renamed Cluster.Timecode → Cluster.Timestamp.
|
||||
// ts-ebml's schema reflects the rename, so this is the
|
||||
// correct name (not 'Timecode').
|
||||
currentClusterTs = el.value;
|
||||
} else if (el.name === 'SimpleBlock' && el.type === 'b' && el.data) {
|
||||
const sb = tools.ebmlBlock(el.data);
|
||||
const blockGlobalMs = currentClusterTs + sb.timecode;
|
||||
for (const frame of sb.frames) {
|
||||
frames.push({
|
||||
data: asUint8Array(frame),
|
||||
isKey: sb.keyframe,
|
||||
localTimestampMs: blockGlobalMs,
|
||||
});
|
||||
lastFrameTimestampMs = blockGlobalMs;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Segment duration covers the last frame plus a nominal interval
|
||||
// — the next segment's first frame slots in just after.
|
||||
const segmentDurationMs =
|
||||
frames.length === 0 ? 0 : lastFrameTimestampMs + NOMINAL_FRAME_INTERVAL_MS;
|
||||
return { frames, segmentDurationMs, trackInfo };
|
||||
}
|
||||
|
||||
/**
|
||||
* Remux a sequence of self-contained WebM `VideoSegment` blobs
|
||||
* into a single WebM Blob with one EBML header and one Segment
|
||||
* element. Each input segment carries its own EBML+Segment+
|
||||
* Cluster tree (output of `MediaRecorder.start()` →
|
||||
* `dataavailable` → `MediaRecorder.stop()` cycle); the output
|
||||
* concatenates every VP9 frame across all input segments into a
|
||||
* single Matroska timeline with monotonically increasing
|
||||
* timestamps.
|
||||
*
|
||||
* Empty input is handled defensively (returns an empty Blob —
|
||||
* the upstream `EmptyVideoBufferError` throw in
|
||||
* `src/background/index.ts:createArchive` catches that and
|
||||
* surfaces RECORDING_ERROR to the popup; this function is also
|
||||
* safe in isolation).
|
||||
*
|
||||
* Caller contract:
|
||||
* - Input segments must be self-contained WebM bytes per
|
||||
* D-13's restart-segments lifecycle.
|
||||
* - Input order is normalized internally (sorted by
|
||||
* `timestamp` ascending — defensive copy).
|
||||
* - Output type is `video/webm`.
|
||||
*
|
||||
* @param segments - Sequence of WebM segments produced by the
|
||||
* offscreen MediaRecorder rotation lifecycle.
|
||||
* @returns Single-EBML-headered WebM Blob covering every VP9
|
||||
* frame across all input segments with adjusted monotonic
|
||||
* timestamps.
|
||||
*/
|
||||
export async function remuxSegments(segments: VideoSegment[]): Promise<Blob> {
|
||||
// Guard clause exception: empty input is the most common
|
||||
// failure surface in the saveArchive path, and the early-return
|
||||
// body is one statement long — clearer than nesting the entire
|
||||
// remux inside an else.
|
||||
if (segments.length === 0) {
|
||||
logger.log('Empty input — returning empty Blob');
|
||||
return new Blob([], { type: 'video/webm' });
|
||||
}
|
||||
|
||||
const sorted = [...segments].sort((a, b) => a.timestamp - b.timestamp);
|
||||
logger.log(
|
||||
`Remuxing ${sorted.length} segments; sizes:`,
|
||||
sorted.map((s) => s.data.size),
|
||||
);
|
||||
|
||||
// First-pass extraction of all segments. We need the FIRST
|
||||
// segment's trackInfo for the muxer config before we can start
|
||||
// pushing chunks, so it's cleanest to extract everything up
|
||||
// front and then drive the muxer in a second pass with the
|
||||
// monotonic timestamps. Memory cost is modest (~3 × ~10 s of
|
||||
// VP9 frame bytes ≈ same as the input total ~1.5 MB).
|
||||
const extractions: SegmentExtraction[] = [];
|
||||
for (const seg of sorted) {
|
||||
const ab = await blobToArrayBuffer(seg.data);
|
||||
const extraction = extractFramesFromSegment(ab);
|
||||
extractions.push(extraction);
|
||||
logger.log(
|
||||
`Segment ts=${seg.timestamp}: ${extraction.frames.length} frames, ` +
|
||||
`duration=${extraction.segmentDurationMs}ms, ` +
|
||||
`trackInfo=${
|
||||
extraction.trackInfo
|
||||
? `${extraction.trackInfo.width}x${extraction.trackInfo.height}`
|
||||
: 'null'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Pick track info from the FIRST segment that exposes one.
|
||||
// Fallback to conservative defaults — the muxer needs *some*
|
||||
// width/height in its video config or the output WebM will
|
||||
// refuse to play.
|
||||
let pickedTrackInfo: TrackInfo | null = null;
|
||||
for (const extraction of extractions) {
|
||||
if (extraction.trackInfo !== null) {
|
||||
pickedTrackInfo = extraction.trackInfo;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pickedTrackInfo === null) {
|
||||
logger.warn(
|
||||
`pickTrackInfoFromSegment returned null for all ${extractions.length} segments — ` +
|
||||
`falling back to ${FALLBACK_PIXEL_WIDTH}x${FALLBACK_PIXEL_HEIGHT}`,
|
||||
);
|
||||
}
|
||||
const trackInfo: TrackInfo = pickedTrackInfo ?? {
|
||||
width: FALLBACK_PIXEL_WIDTH,
|
||||
height: FALLBACK_PIXEL_HEIGHT,
|
||||
};
|
||||
|
||||
const target = new ArrayBufferTarget();
|
||||
const muxer = new Muxer({
|
||||
target,
|
||||
video: {
|
||||
codec: VP9_MATROSKA_CODEC,
|
||||
width: trackInfo.width,
|
||||
height: trackInfo.height,
|
||||
frameRate: DEFAULT_FRAME_RATE,
|
||||
},
|
||||
// No `audio` block — Phase 1 SPEC §9 / CAP-01 excludes audio.
|
||||
type: 'webm',
|
||||
firstTimestampBehavior: 'offset',
|
||||
});
|
||||
|
||||
let segmentBaseMs = 0;
|
||||
let totalFramesEmitted = 0;
|
||||
for (const extraction of extractions) {
|
||||
for (const frame of extraction.frames) {
|
||||
const globalMs = segmentBaseMs + frame.localTimestampMs;
|
||||
const globalUs = globalMs * 1_000;
|
||||
// EncodedVideoChunkMetadata.decoderConfig.codec is required by
|
||||
// the WebCodecs typing (lib.dom.d.ts VideoDecoderConfig).
|
||||
// 'vp09.00.10.08' is the canonical WebCodecs codec string for
|
||||
// VP9 Profile 0, level 1.0, 8-bit — the published Chrome
|
||||
// default for MediaRecorder VP9 output. Only attached when a
|
||||
// CodecPrivate was actually extracted (T-trackInfo path). For
|
||||
// MediaRecorder-produced segments this branch is rare —
|
||||
// Chrome's published VP9 stream omits CodecPrivate and the
|
||||
// muxer derives parameters from the first keyframe.
|
||||
const meta = trackInfo.codecPrivate
|
||||
? {
|
||||
decoderConfig: {
|
||||
codec: 'vp09.00.10.08',
|
||||
description: trackInfo.codecPrivate,
|
||||
},
|
||||
}
|
||||
: undefined;
|
||||
muxer.addVideoChunkRaw(
|
||||
frame.data,
|
||||
frame.isKey ? 'key' : 'delta',
|
||||
globalUs,
|
||||
meta,
|
||||
);
|
||||
totalFramesEmitted++;
|
||||
}
|
||||
segmentBaseMs += extraction.segmentDurationMs;
|
||||
}
|
||||
|
||||
muxer.finalize();
|
||||
const outputBuffer = target.buffer;
|
||||
const outputBlob = new Blob([outputBuffer], { type: 'video/webm' });
|
||||
logger.log(
|
||||
`Remux complete: ${totalFramesEmitted} frames, ` +
|
||||
`total timeline=${segmentBaseMs}ms, output=${outputBlob.size} bytes`,
|
||||
);
|
||||
return outputBlob;
|
||||
}
|
||||
Reference in New Issue
Block a user