Milestone v1 (v2.0.0): Mokosh — Session Capture #1

Merged
strategy155 merged 297 commits from gsd/phase-04-harden-clean-up-optional into main 2026-05-31 15:34:17 +00:00
Showing only changes of commit 41e94d5daa - Show all commits

View File

@@ -0,0 +1,434 @@
/**
* @file src/background/webm-remux.ts
*
* WebM remux pipeline for Plan 01-08 (CONTEXT.md amendment D-14-remux —
* disambiguated from the historical "D-14: Not applicable" tab-switch
* decision recorded in the original decisions block; see CONTEXT.md
* §"Amendment ... D-14-remux: WebM remux via ts-ebml + webm-muxer"
* for the canonical statement, B-02 fix). Replaces
* `mergeVideoSegments()` in `src/background/index.ts` (which
* file-concatenated the offscreen recorder's 3 self-contained ~10 s
* WebM segments — producing a multi-EBML-header file that mpv,
* Chrome's HTMLMediaElement, and ffprobe's `format=duration` all
* truncate to the first segment's local Info.Duration ~9.94 s).
*
* The new pipeline produces a single-EBML-headered WebM that any
* standards-compliant Matroska parser reads as the full ~30 s
* timeline. See `.planning/debug/d13-multi-ebml-concat-unplayable.md`
* for the byte-level evidence and library-survey rationale that
* locked the `ts-ebml` (parse) + `webm-muxer` (write) choice.
*
* ## Algorithm
*
* 1. Sort input segments by `timestamp` ascending (defensive — the
* offscreen recorder already emits in order, but a copy+sort is
* cheap relative to the parse/mux pass).
* 2. Parse the FIRST segment via `ts-ebml.Decoder` to derive track
* info: PixelWidth, PixelHeight, optional CodecPrivate. Needed
* for the muxer's `video` config.
* 3. Create one `Muxer<ArrayBufferTarget>` configured for VP9
* (`codec: 'V_VP9'`) with `type: 'webm'` and
* `firstTimestampBehavior: 'offset'` (forgives a non-zero first
* frame, in case the muxer rejects the first segment's "start
* at 0" timestamp after we add the prior-segment offset).
* 4. For each segment, accumulate a `segmentBaseMs` counter and
* feed every SimpleBlock through `addVideoChunkRaw(data, type,
* globalUs)` where `globalUs = (segmentBaseMs + clusterTs +
* blockOffset) * 1_000`. Frame data comes from
* `tools.ebmlBlock(simpleBlock.data).frames` (each SimpleBlock
* typically carries 1 VP9 frame; multi-frame lacing is rare in
* MediaRecorder output but supported here).
* 5. After every segment, advance `segmentBaseMs` by that
* segment's measured content duration (last-frame timestamp
* + nominal frame interval).
* 6. `muxer.finalize()` → wrap `target.buffer` in a `Blob`.
*
* ## Style notes
*
* - Extensive JSDoc per the project's global style guide.
* - No `as any`, no `@ts-ignore`. The two libraries' published
* type surfaces (see `node_modules/{ts-ebml,webm-muxer}/`)
* are used directly.
* - `if (cond) return ...` guard-clause exceptions to the
* "prefer if-else over early return" project rule are documented
* inline where they appear (empty input, missing track info).
* The user's CLAUDE.md acknowledges guard clauses as a clarity
* exception worth preserving over a deeper indent.
* - All diagnostics via `Logger('Remux')` — no bare `console.log`.
* - Threat-model note (T-1-08-01, T-1-08-03 in PLAN.md §threat_model):
* ts-ebml + webm-muxer process attacker-influenced bytes (the input
* segments come from the offscreen MediaRecorder, which captures
* whatever screen content the operator picked). Parse failures
* are surfaced as `EmptyVideoBufferError` upstream (via the
* `output.size === 0` branch in `createArchive`), giving the
* operator a clear failure surface rather than a corrupt archive.
*/
import { Decoder, tools } from 'ts-ebml';
import { Muxer, ArrayBufferTarget } from 'webm-muxer';
import { Logger } from '../shared/logger';
import type { VideoSegment } from '../shared/types';
const logger = new Logger('Remux');
/**
* Codec identifier the muxer expects for VP9 in the Matroska
* codec-id taxonomy. See https://www.matroska.org/technical/codec_specs.html
*/
const VP9_MATROSKA_CODEC = 'V_VP9';
/**
* Nominal frame interval added after the last frame of each segment
* to advance `segmentBaseMs` so the next segment's first frame slots
* in just after the prior segment's last frame. Mirrors the
* MediaRecorder cadence (`getDisplayMedia` at ~30 fps → 33 ms/frame).
* The exact value matters only for the inter-segment gap; +/-3 ms
* is invisible at human playback timescales.
*/
const NOMINAL_FRAME_INTERVAL_MS = 33;
/**
* Fallback frame rate hint for the muxer's `video.frameRate`
* field. Used as metadata only — the muxer does not enforce it.
*/
const DEFAULT_FRAME_RATE = 30;
/**
* Default pixel dimensions when the first segment lacks a usable
* Video element. These are conservative — they keep the muxer
* happy even if the first segment is malformed. In practice the
* MediaRecorder always emits PixelWidth/PixelHeight at segment
* head, so this branch is a defense-in-depth fallback.
*/
const FALLBACK_PIXEL_WIDTH = 1024;
const FALLBACK_PIXEL_HEIGHT = 768;
/**
* Track info extracted from a segment's Tracks → TrackEntry → Video
* subtree. `codecPrivate` is optional because VP9 MediaRecorder
* streams generally do not ship one (Chrome derives the VP9 config
* from the first keyframe's superframe header).
*/
interface TrackInfo {
width: number;
height: number;
codecPrivate?: Uint8Array;
}
/**
* Read the contents of a Blob into an ArrayBuffer. Uses the web
* standard `Blob.arrayBuffer()` which is available in Chrome
* service-worker context (Chrome 76+) — no fallback needed.
*
* @param blob - Source Blob.
* @returns Promise resolving to the blob's bytes as an ArrayBuffer.
*/
async function blobToArrayBuffer(blob: Blob): Promise<ArrayBuffer> {
return blob.arrayBuffer();
}
/**
* Convert a Node `Buffer` (which ts-ebml uses internally because
* its `ebml` dep is built for Node Buffer) to a `Uint8Array` view
* with no copy. Both share the same underlying memory.
*
* Why: `webm-muxer.addVideoChunkRaw` takes `Uint8Array`. `Buffer`
* IS a `Uint8Array` (TypeScript's lib.dom.d.ts encodes this), but
* to keep call-site types crisp we narrow explicitly here.
*
* @param buf - Node Buffer or already-Uint8Array.
* @returns The same bytes as a `Uint8Array`.
*/
function asUint8Array(buf: Uint8Array): Uint8Array {
// Buffer extends Uint8Array — pass through unchanged. The explicit
// identity function is a typed-narrowing convenience; the runtime
// cost is zero.
return buf;
}
/**
* Walk the decoded EBML element list for a segment and pull out
* the first Tracks → TrackEntry → Video subtree's PixelWidth,
* PixelHeight, and (optional) CodecPrivate. Stops at the first
* complete Video subtree; subsequent video tracks are ignored
* because Phase 1's MediaRecorder produces exactly one track.
*
* Returns `null` if no Video subtree was found — caller should
* fall back to {@link FALLBACK_PIXEL_WIDTH} / {@link
* FALLBACK_PIXEL_HEIGHT} so the muxer can still produce output.
*
* @param elements - Output of `Decoder.decode(buffer)`.
* @returns Track info or null if not derivable.
*/
function pickTrackInfoFromSegment(
elements: ReturnType<Decoder['decode']>,
): TrackInfo | null {
let inVideo = false;
let width: number | null = null;
let height: number | null = null;
let codecPrivate: Uint8Array | undefined;
for (const el of elements) {
if (el.name === 'Video' && el.type === 'm') {
// Master element: track enter/exit.
if (el.isEnd) {
// Leaving the Video subtree — if we got both dimensions we're done.
if (width !== null && height !== null) {
return { width, height, codecPrivate };
}
inVideo = false;
} else {
inVideo = true;
}
} else if (inVideo && el.name === 'PixelWidth' && el.type === 'u') {
width = el.value;
} else if (inVideo && el.name === 'PixelHeight' && el.type === 'u') {
height = el.value;
} else if (el.name === 'CodecPrivate' && el.type === 'b') {
// CodecPrivate lives at TrackEntry level (sibling of Video),
// not inside Video. Pick the first one seen.
if (codecPrivate === undefined && el.data) {
codecPrivate = asUint8Array(el.data);
}
}
}
if (width !== null && height !== null) {
return { width, height, codecPrivate };
}
return null;
}
/**
* A single VP9 frame extracted from a segment's SimpleBlock,
* paired with its keyframe flag and a per-segment-local
* timestamp in milliseconds.
*/
interface ExtractedFrame {
data: Uint8Array;
isKey: boolean;
/** Per-segment-local timestamp in milliseconds. */
localTimestampMs: number;
}
/**
* Result of {@link extractFramesFromSegment}. `segmentDurationMs`
* is the duration the SEGMENT consumed on its own local timeline
* (last frame's timestamp + {@link NOMINAL_FRAME_INTERVAL_MS}).
* The caller adds this to `segmentBaseMs` so the next segment's
* first frame doesn't collide with this segment's last.
*/
interface SegmentExtraction {
frames: ExtractedFrame[];
segmentDurationMs: number;
trackInfo: TrackInfo | null;
}
/**
* Parse a segment's ArrayBuffer via ts-ebml and walk its element
* tree extracting one {@link ExtractedFrame} per VP9 frame inside
* each SimpleBlock. Tracks current Cluster Timestamp so each
* frame's `localTimestampMs` is the absolute segment-local time
* (cluster timestamp + per-block offset).
*
* The keyframe flag is taken from `tools.ebmlBlock(buf).keyframe`
* which decodes the SimpleBlock's flags byte per the Matroska
* spec (bit 7 of the byte after the variable-length track number
* and the 16-bit timestamp delta).
*
* Multi-frame SimpleBlocks (lacing) are flattened — each frame
* gets its own `addVideoChunkRaw` call sharing the same
* `localTimestampMs`. MediaRecorder under Chrome rarely uses
* lacing for VP9 (typical SimpleBlock = 1 frame), but the
* implementation handles it correctly.
*
* @param buffer - The segment's bytes.
* @returns Frames + measured segment duration + extracted track info.
*/
function extractFramesFromSegment(
buffer: ArrayBuffer,
): SegmentExtraction {
const decoder = new Decoder();
const elements = decoder.decode(buffer);
const trackInfo = pickTrackInfoFromSegment(elements);
const frames: ExtractedFrame[] = [];
let currentClusterTs = 0;
let lastFrameTimestampMs = 0;
let inCluster = false;
for (const el of elements) {
if (el.name === 'Cluster' && el.type === 'm') {
if (el.isEnd) {
inCluster = false;
} else {
inCluster = true;
}
} else if (inCluster && el.name === 'Timestamp' && el.type === 'u') {
// Matroska v4 renamed Cluster.Timecode → Cluster.Timestamp.
// ts-ebml's schema reflects the rename, so this is the
// correct name (not 'Timecode').
currentClusterTs = el.value;
} else if (el.name === 'SimpleBlock' && el.type === 'b' && el.data) {
const sb = tools.ebmlBlock(el.data);
const blockGlobalMs = currentClusterTs + sb.timecode;
for (const frame of sb.frames) {
frames.push({
data: asUint8Array(frame),
isKey: sb.keyframe,
localTimestampMs: blockGlobalMs,
});
lastFrameTimestampMs = blockGlobalMs;
}
}
}
// Segment duration covers the last frame plus a nominal interval
// — the next segment's first frame slots in just after.
const segmentDurationMs =
frames.length === 0 ? 0 : lastFrameTimestampMs + NOMINAL_FRAME_INTERVAL_MS;
return { frames, segmentDurationMs, trackInfo };
}
/**
* Remux a sequence of self-contained WebM `VideoSegment` blobs
* into a single WebM Blob with one EBML header and one Segment
* element. Each input segment carries its own EBML+Segment+
* Cluster tree (output of `MediaRecorder.start()` →
* `dataavailable` → `MediaRecorder.stop()` cycle); the output
* concatenates every VP9 frame across all input segments into a
* single Matroska timeline with monotonically increasing
* timestamps.
*
* Empty input is handled defensively (returns an empty Blob —
* the upstream `EmptyVideoBufferError` throw in
* `src/background/index.ts:createArchive` catches that and
* surfaces RECORDING_ERROR to the popup; this function is also
* safe in isolation).
*
* Caller contract:
* - Input segments must be self-contained WebM bytes per
* D-13's restart-segments lifecycle.
* - Input order is normalized internally (sorted by
* `timestamp` ascending — defensive copy).
* - Output type is `video/webm`.
*
* @param segments - Sequence of WebM segments produced by the
* offscreen MediaRecorder rotation lifecycle.
* @returns Single-EBML-headered WebM Blob covering every VP9
* frame across all input segments with adjusted monotonic
* timestamps.
*/
export async function remuxSegments(segments: VideoSegment[]): Promise<Blob> {
// Guard clause exception: empty input is the most common
// failure surface in the saveArchive path, and the early-return
// body is one statement long — clearer than nesting the entire
// remux inside an else.
if (segments.length === 0) {
logger.log('Empty input — returning empty Blob');
return new Blob([], { type: 'video/webm' });
}
const sorted = [...segments].sort((a, b) => a.timestamp - b.timestamp);
logger.log(
`Remuxing ${sorted.length} segments; sizes:`,
sorted.map((s) => s.data.size),
);
// First-pass extraction of all segments. We need the FIRST
// segment's trackInfo for the muxer config before we can start
// pushing chunks, so it's cleanest to extract everything up
// front and then drive the muxer in a second pass with the
// monotonic timestamps. Memory cost is modest (~3 × ~10 s of
// VP9 frame bytes ≈ same as the input total ~1.5 MB).
const extractions: SegmentExtraction[] = [];
for (const seg of sorted) {
const ab = await blobToArrayBuffer(seg.data);
const extraction = extractFramesFromSegment(ab);
extractions.push(extraction);
logger.log(
`Segment ts=${seg.timestamp}: ${extraction.frames.length} frames, ` +
`duration=${extraction.segmentDurationMs}ms, ` +
`trackInfo=${
extraction.trackInfo
? `${extraction.trackInfo.width}x${extraction.trackInfo.height}`
: 'null'
}`,
);
}
// Pick track info from the FIRST segment that exposes one.
// Fallback to conservative defaults — the muxer needs *some*
// width/height in its video config or the output WebM will
// refuse to play.
let pickedTrackInfo: TrackInfo | null = null;
for (const extraction of extractions) {
if (extraction.trackInfo !== null) {
pickedTrackInfo = extraction.trackInfo;
break;
}
}
if (pickedTrackInfo === null) {
logger.warn(
`pickTrackInfoFromSegment returned null for all ${extractions.length} segments — ` +
`falling back to ${FALLBACK_PIXEL_WIDTH}x${FALLBACK_PIXEL_HEIGHT}`,
);
}
const trackInfo: TrackInfo = pickedTrackInfo ?? {
width: FALLBACK_PIXEL_WIDTH,
height: FALLBACK_PIXEL_HEIGHT,
};
const target = new ArrayBufferTarget();
const muxer = new Muxer({
target,
video: {
codec: VP9_MATROSKA_CODEC,
width: trackInfo.width,
height: trackInfo.height,
frameRate: DEFAULT_FRAME_RATE,
},
// No `audio` block — Phase 1 SPEC §9 / CAP-01 excludes audio.
type: 'webm',
firstTimestampBehavior: 'offset',
});
let segmentBaseMs = 0;
let totalFramesEmitted = 0;
for (const extraction of extractions) {
for (const frame of extraction.frames) {
const globalMs = segmentBaseMs + frame.localTimestampMs;
const globalUs = globalMs * 1_000;
// EncodedVideoChunkMetadata.decoderConfig.codec is required by
// the WebCodecs typing (lib.dom.d.ts VideoDecoderConfig).
// 'vp09.00.10.08' is the canonical WebCodecs codec string for
// VP9 Profile 0, level 1.0, 8-bit — the published Chrome
// default for MediaRecorder VP9 output. Only attached when a
// CodecPrivate was actually extracted (T-trackInfo path). For
// MediaRecorder-produced segments this branch is rare —
// Chrome's published VP9 stream omits CodecPrivate and the
// muxer derives parameters from the first keyframe.
const meta = trackInfo.codecPrivate
? {
decoderConfig: {
codec: 'vp09.00.10.08',
description: trackInfo.codecPrivate,
},
}
: undefined;
muxer.addVideoChunkRaw(
frame.data,
frame.isKey ? 'key' : 'delta',
globalUs,
meta,
);
totalFramesEmitted++;
}
segmentBaseMs += extraction.segmentDurationMs;
}
muxer.finalize();
const outputBuffer = target.buffer;
const outputBlob = new Blob([outputBuffer], { type: 'video/webm' });
logger.log(
`Remux complete: ${totalFramesEmitted} frames, ` +
`total timeline=${segmentBaseMs}ms, output=${outputBlob.size} bytes`,
);
return outputBlob;
}