// tests/offscreen/webm-playback.test.ts // // RED-gate test for debug session webm-playback-freeze. // // Empirically proves the playback freeze observed in // tests/fixtures/last_30sec.webm (Phase 1, Plan 01-07 smoke retest after the // D-12 base64-transfer fix landed at commit bf07619). // // Hypothesis under test (per .planning/debug/webm-playback-freeze.md): // // The single-continuous MediaRecorder + 30 s age-trim approach (D-09..D-11) // drops VP9 P-frames' keyframe references when the buffer trims out the // middle of the recording. VP9's `kf_max_dist=100` (Chrome default) puts // keyframes every ~3-5 s. With chunks emitted every 2 s (D-09 timeslice), // the boundary chunks contain only P-frames referencing keyframes that have // been evicted. The decoder therefore fails ~1 s into playback in Chrome, // and `ffmpeg -v warning -i -f null -` emits multiple // "Error submitting packet to decoder: Invalid data found" lines plus a // "File ended prematurely" tail-error. // // This test runs ffmpeg's CLI (an external dependency — /usr/bin/ffmpeg) // over the COMMITTED fixture and asserts: // * zero "Error submitting packet to decoder" lines, AND // * no "File ended prematurely" line. // // Today (commit bf07619) the test goes RED because the fixture was produced // by the single-continuous-recorder path. The D-13 fix (restart-segments, // activate the pre-staged skeleton in src/offscreen/recorder.ts) will produce // a fresh fixture whose decode is clean — at which point this test flips // GREEN. See `tests/offscreen/segment-keyframes.test.ts` for the unit-level // algorithmic guard that does NOT require regenerating the fixture. // // Skip discipline: if ffmpeg is missing from the environment the test // auto-skips rather than failing. CI ships ffmpeg per `smoke.sh` so this is // a developer-convenience fence, not a behavioural softening. // // --- 2026-05-16 amendment: D-13 architecture failure RED tests --- // // Debug session `.planning/debug/d13-multi-ebml-concat-unplayable.md` proved // the existing two assertions ABOVE pass under D-13 only because they check // structural validity (ffmpeg null-decode tolerates the multi-EBML-header // concat by silently reading segments 1+2 and dropping segment 3, and by // collapsing all segments onto seg1's local timestamp axis so no muxer // "File ended prematurely" warning fires). Players that respect Matroska's // segment-info Duration element (mpv, Chrome's HTMLMediaElement, ffprobe's // `format=duration`) read 9.94 s — the FIRST segment's metadata duration — // and stop. The committed 1.6 MB fixture contains ~30 s of valid VP9 frames // but presents as ~10 s of content to operators and tests. // // The "container-level playable duration" describe block below adds the // assertion the closure check missed on 2026-05-15: that ffprobe-reported // format duration EXCEEDS 25_000 ms for the canonical fixture. This is // RED today under D-13 and stays RED until the multi-EBML concat at // src/background/index.ts mergeVideoSegments() is replaced with a true // remux that writes a single EBML header whose Info.Duration covers the // whole ~30 s span. import { describe, it, expect } from 'vitest'; import { existsSync, statSync } from 'node:fs'; import { spawnSync } from 'node:child_process'; import { fileURLToPath } from 'node:url'; import { dirname, resolve } from 'node:path'; const here = dirname(fileURLToPath(import.meta.url)); const FIXTURE_PATH = resolve(here, '..', 'fixtures', 'last_30sec.webm'); const FFMPEG_BIN = '/usr/bin/ffmpeg'; const FFPROBE_BIN = '/usr/bin/ffprobe'; // Cap: a clean 30-second WebM decoded with `-f null` finishes well under // 10 s on commodity hardware. If we ever exceed this we want a hard failure, // not a hung CI job. const FFMPEG_TIMEOUT_MS = 30_000; // Playable-duration floor. The recorder rotates every 10 s and keeps 3 // segments (D-13 / SEGMENT_DURATION_MS × MAX_SEGMENTS = 30_000 ms). The // rotation lifecycle can drop a partial sub-second at each boundary so the // final remux file is bounded by [~27_000, ~30_000] ms in steady state. We // gate at 25_000 ms to keep slack for boundary noise but still firmly above // the broken-architecture failure mode (9_940 ms — first segment only). const MIN_PLAYABLE_DURATION_MS = 25_000; function ffmpegAvailable(): boolean { try { return existsSync(FFMPEG_BIN) && statSync(FFMPEG_BIN).isFile(); } catch { return false; } } function ffprobeAvailable(): boolean { try { return existsSync(FFPROBE_BIN) && statSync(FFPROBE_BIN).isFile(); } catch { return false; } } interface DecodeResult { stderr: string; packetErrorCount: number; endedPrematurely: boolean; } /** * Run ffmpeg in `-f null` mode to dry-decode a WebM fixture without writing * any output. Returns the captured stderr plus parsed counters for the two * signals we care about: per-packet decoder errors and the * "File ended prematurely" tail-error. * * Why spawnSync (and not execFileSync): * execFileSync returns ONLY stdout — we cannot read the stderr pipe on the * success path. ffmpeg exits 0 even when it emitted per-packet decode * errors (with `-f null -`), so the diagnostic signal lives on stderr * regardless of exit code. spawnSync exposes both pipes uniformly. * * The IN-04 fix retired the parallel `decodeDryRun(execFileSync)` helper — * the spawnSync path was always the actual code path used by the assertions * below; the execFile variant existed only as a documentation foil and * required a `void decodeDryRun` noUnusedLocals appeasement hack. * * Flags: * -nostdin — never block on a TTY (vitest doesn't provide one) * -v warning — drop the noise floor; signals we care about are emitted * at warning level or above * -i — input file * -f null - — swallow decoded output; stderr still carries diagnostics * * @param fixturePath - Absolute path to the WebM file under test. * @returns DecodeResult with `stderr`, `packetErrorCount`, `endedPrematurely`. * @throws If ffmpeg was killed by a signal (not a clean exit). */ function decodeDryRunStrict(fixturePath: string): DecodeResult { const proc = spawnSync( FFMPEG_BIN, ['-nostdin', '-v', 'warning', '-i', fixturePath, '-f', 'null', '-'], { stdio: ['ignore', 'ignore', 'pipe'], encoding: 'utf-8', timeout: FFMPEG_TIMEOUT_MS, maxBuffer: 4 * 1024 * 1024, }, ); if (proc.signal !== null) { throw new Error(`ffmpeg was killed by signal ${proc.signal}`); } const stderr = proc.stderr ?? ''; return { stderr, packetErrorCount: (stderr.match(/Error submitting packet to decoder/g) ?? []).length, endedPrematurely: /File ended prematurely/.test(stderr), }; } /** * Read the container-level `format=duration` value from a WebM file via * ffprobe. This is the value that mpv, Chrome's HTMLMediaElement, and most * Matroska parsers honor when deciding "how long is this file?" — they pick * up the first Segment's Info.Duration EBML element and stop seeking past * the EBML header's reported length. * * Returns NaN on parse failure (ffprobe missing input track, malformed * float, etc.) so the assertion downstream can produce a precise error * message rather than masking a probe-side failure as a duration check. * * @param fixturePath - Absolute path to the WebM file under test. * @returns Container-level duration in milliseconds. */ function probeContainerDurationMs(fixturePath: string): number { const proc = spawnSync( FFPROBE_BIN, [ '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', '-i', fixturePath, ], { stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf-8', timeout: FFMPEG_TIMEOUT_MS, maxBuffer: 1 * 1024 * 1024, }, ); if (proc.signal !== null) { throw new Error(`ffprobe was killed by signal ${proc.signal}`); } const stdout = (proc.stdout ?? '').trim(); const seconds = parseFloat(stdout); return Number.isFinite(seconds) ? Math.round(seconds * 1000) : Number.NaN; } describe('webm playback (RED — confirms webm-playback-freeze bug)', () => { it.skipIf(!ffmpegAvailable())( 'ffmpeg dry-run on last_30sec.webm produces zero decoder packet errors', () => { expect(existsSync(FIXTURE_PATH)).toBe(true); const result = decodeDryRunStrict(FIXTURE_PATH); // Document the failure in the assertion message so a regression // bisect lands on a useful diff, not just "expected 0 received N". expect( result.packetErrorCount, `ffmpeg reported ${result.packetErrorCount} "Error submitting packet to decoder" line(s). ` + `This means the VP9 decoder hit P-frames whose reference keyframe was missing from the ` + `stream — the symptom of the single-continuous-recorder + 30 s age-trim approach (D-09..D-11). ` + `Fix: activate the D-13 restart-segments skeleton at src/offscreen/recorder.ts:298-316 and ` + `regenerate the fixture via ./smoke.sh. Full ffmpeg stderr:\n${result.stderr}`, ).toBe(0); }, ); it.skipIf(!ffmpegAvailable())( 'ffmpeg dry-run on last_30sec.webm does not end prematurely', () => { expect(existsSync(FIXTURE_PATH)).toBe(true); const result = decodeDryRunStrict(FIXTURE_PATH); // The "File ended prematurely" line indicates the WebM lacks proper // Matroska SegmentSize / Cues finalization because the SW reads the // in-memory buffer while the MediaRecorder is still active (no .stop()). // The D-13 restart-segments approach fixes this as a side effect — // each rotated segment gets a proper .stop() and is therefore finalized. expect( result.endedPrematurely, `ffmpeg reported "File ended prematurely". The WebM container was read mid-stream ` + `without calling MediaRecorder.stop(), so SegmentSize/Cues are unwritten. The D-13 ` + `restart-segments fix finalizes each segment naturally. Full ffmpeg stderr:\n${result.stderr}`, ).toBe(false); }, ); }); describe('webm playable duration (RED — confirms d13-multi-ebml-concat-unplayable bug)', () => { it.skipIf(!ffprobeAvailable())( 'container-level format=duration on last_30sec.webm exceeds 25 s', () => { // SPEC §10 #7 requires last_30sec.webm to "play back in a browser" // covering the most recent ~30 s. Both mpv and Chrome's HTMLMediaElement // honor the first Segment's Info.Duration EBML element — which under // D-13's multi-EBML concat is hardcoded to the FIRST segment's local // duration (~9.94 s for the canonical fixture). That bug means the // canonical Phase 1 closure fixture (committed 2026-05-15) presents // as ~10 s of content to any standards-compliant Matroska parser, // even though segments 2+3 are physically present in the bytes. // // The fix is a true WebM REMUX of the concatenated segments: parse // each segment's clusters via an EBML library, extract the VP9 // frame payloads with their keyframe/delta flags, and re-mux into // a single-EBML-header WebM whose clusters carry monotonically // increasing timestamps. The resulting file's Info.Duration will // span the full ~30 s window. // // Floor of MIN_PLAYABLE_DURATION_MS (25_000) accommodates the // ~3 s boundary slack from segment rotation while remaining well // above the broken-architecture failure mode (9_940 ms). expect(existsSync(FIXTURE_PATH)).toBe(true); const durationMs = probeContainerDurationMs(FIXTURE_PATH); expect( durationMs, `ffprobe reported container duration=${durationMs} ms for ${FIXTURE_PATH}. ` + `Under SPEC §10 #7 the file must present at least ${MIN_PLAYABLE_DURATION_MS} ms ` + `of playable content to standards-compliant Matroska parsers (mpv, Chrome). ` + `If this value is ~9_940 ms the file is a multi-EBML-header concat (D-13 raw output) ` + `where players honor only the first segment's local Info.Duration metadata. ` + `Fix: replace mergeVideoSegments() in src/background/index.ts with a true WebM remux ` + `(parse + rewrite into a single-EBML-headered WebM with adjusted monotonic timestamps).`, ).toBeGreaterThanOrEqual(MIN_PLAYABLE_DURATION_MS); }, ); it.skipIf(!ffmpegAvailable())( 'ffmpeg full decode of last_30sec.webm reaches at least 25 s of timeline', () => { // Defense-in-depth: even if a future ffprobe quirk computes // format=duration by summing all reachable cluster timestamps, // ffmpeg's full null-decode of the concatenated file collapses // segments 2..N onto the first segment's local timestamp axis // (verified empirically 2026-05-16: 601 frames decoded, time=09.96) // because the multi-EBML format provides no segment-level offset. // The remux fix will produce a stream whose decoded `time=...` // reaches at least 25 s end-to-end. expect(existsSync(FIXTURE_PATH)).toBe(true); const proc = spawnSync( FFMPEG_BIN, ['-nostdin', '-v', 'error', '-stats', '-i', FIXTURE_PATH, '-f', 'null', '-'], { stdio: ['ignore', 'ignore', 'pipe'], encoding: 'utf-8', timeout: FFMPEG_TIMEOUT_MS, maxBuffer: 4 * 1024 * 1024, }, ); if (proc.signal !== null) { throw new Error(`ffmpeg was killed by signal ${proc.signal}`); } const stderr = proc.stderr ?? ''; // ffmpeg's `-stats` line on the final frame looks like: // frame= 601 fps=0.0 q=-0.0 Lsize=N/A time=00:00:09.96 bitrate=N/A ... // We want the LAST time= match (subsequent stats lines overwrite the // earlier ones with monotonically increasing time values). const timeMatches = [...stderr.matchAll(/time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})/g)]; const last = timeMatches[timeMatches.length - 1]; const decodedMs = last ? (parseInt(last[1], 10) * 3600 + parseInt(last[2], 10) * 60 + parseInt(last[3], 10)) * 1000 + parseInt(last[4], 10) * 10 : Number.NaN; expect( decodedMs, `ffmpeg decoded only ${decodedMs} ms of timeline from ${FIXTURE_PATH}. ` + `SPEC §10 #7 requires at least ${MIN_PLAYABLE_DURATION_MS} ms of decoded content. ` + `If decoded duration is ~9_960 ms the multi-EBML concat is collapsing all segments ` + `onto seg1's local timestamp axis (the timestamp-collision symptom). ` + `Fix: real WebM remux per d13-multi-ebml-concat-unplayable debug session. ` + `Full ffmpeg stderr:\n${stderr}`, ).toBeGreaterThanOrEqual(MIN_PLAYABLE_DURATION_MS); }, ); });