diff --git a/tests/fixtures/last_30sec.webm b/tests/fixtures/last_30sec.webm new file mode 100644 index 0000000..363e47b Binary files /dev/null and b/tests/fixtures/last_30sec.webm differ diff --git a/tests/offscreen/segment-keyframes.test.ts b/tests/offscreen/segment-keyframes.test.ts new file mode 100644 index 0000000..1881e6d --- /dev/null +++ b/tests/offscreen/segment-keyframes.test.ts @@ -0,0 +1,346 @@ +// tests/offscreen/segment-keyframes.test.ts +// +// RED-gate test for debug session webm-playback-freeze. +// +// Algorithmic / unit-level companion to webm-playback.test.ts. Where that +// test runs ffmpeg over the committed fixture (empirical, requires ffmpeg in +// PATH, requires the fixture to be regenerated after the fix), THIS test +// works against a pure-data model of the recorder behaviour and runs in any +// vitest environment without external tooling. +// +// Model +// ----- +// +// We simulate a 30 fps capture in which Chrome emits a VP9 keyframe every +// `KF_PERIOD_S = 3` seconds (kf_max_dist=100 ≈ 3.33 s; we round down for a +// conservative test). The recorder is configured with +// `MediaRecorder.start(TIMESLICE_MS)`, so chunks fire every 2 s — NOT aligned +// to keyframes. We classify each emitted chunk by whether it contains a +// keyframe ("kf-bearing") or only P-frames ("p-only"). +// +// Failure mode (D-09..D-11 — current behaviour) +// --------------------------------------------- +// +// `addChunk` from src/offscreen/recorder.ts pins the FIRST chunk (which holds +// the WebM header + an initial keyframe) and then ages out chunks older than +// 30 s. After ~30 s of recording, the kept set is: +// +// [chunk_0 (header, kf)] + [chunks emitted in the last 30 s] +// +// The last-30-s tail contains chunks that may have started mid-GOP. When the +// SW concatenates `chunk_0` with the tail, the tail's first P-frames +// reference keyframes that lived in trimmed-out middle chunks. Result: +// decoder error ~1 s past `chunk_0`'s end. +// +// Fix (D-13 restart-segments) +// --------------------------- +// +// Stop + restart the MediaRecorder every SEGMENT_MS = 10 s on the same +// MediaStream. Each restart forces a new WebM header AND a new keyframe at +// the segment's start (since the encoder is freshly initialized). Keep the +// last `MAX_SEGMENTS = 3` segments (= 30 s). Each segment in the kept window +// is self-contained — its first chunk is kf-bearing. +// +// Test structure +// -------------- +// +// block 1 — "RED — D-09..D-11 leaks P-only chunks past trim": +// Pure-simulation tests that document the current bug. Pass today; +// they encode the failure mode as executable evidence. (They will keep +// passing post-fix; their purpose is documentation + regression guard +// against re-introducing single-continuous-recorder semantics.) +// +// block 2 — "GREEN-pinning — D-13 contract for restart-segments": +// Pure-simulation tests that pin the segment-based fix's contract. +// Pass today; their purpose is to give the fix's reviewer an +// algorithmic spec to check against before reading code. +// +// block 3 — "production recorder must expose segment-aware buffer (RED)": +// Imports src/offscreen/recorder.ts and asserts a `getSegments` API +// exists with the D-13 shape. GOES RED TODAY because the production +// code only exposes `getBuffer()` (chunk-level). FLIPS GREEN when the +// D-13 skeleton at src/offscreen/recorder.ts:298-316 is activated and +// a `getSegments` export is added. This is the genuine TDD anchor. + +import { describe, it, expect, beforeEach } from 'vitest'; + +// ─── Recorder model parameters ────────────────────────────────────────── +const TIMESLICE_MS = 2_000; // matches src/offscreen/recorder.ts TIMESLICE_MS +const VIDEO_BUFFER_DURATION_MS = 30_000; // matches VIDEO_BUFFER_DURATION_MS +const KF_PERIOD_MS = 3_000; // Chrome VP9 default kf_max_dist=100 ≈ 3 s @ 30 fps +const SEGMENT_MS = 10_000; // D-13 design — see CONTEXT.md +const MAX_SEGMENTS = 3; // D-13 design — keep last 3 segments (30 s) + +interface SimChunk { + index: number; + emittedAtMs: number; + hasKeyframe: boolean; + isFirstEmitted: boolean; +} + +interface SimSegment { + startMs: number; + endMs: number; + chunks: SimChunk[]; +} + +// ─── Simulation: single continuous MediaRecorder (D-09..D-11) ────────── +function simulateContinuousRecorder(totalDurationMs: number): SimChunk[] { + const chunks: SimChunk[] = []; + const totalChunks = Math.floor(totalDurationMs / TIMESLICE_MS); + for (let i = 0; i < totalChunks; i++) { + const emittedAt = (i + 1) * TIMESLICE_MS; + // A chunk covers [emittedAt - TIMESLICE_MS, emittedAt]. It contains a + // keyframe iff a keyframe boundary falls strictly inside that interval. + const intervalStart = emittedAt - TIMESLICE_MS; + // Index of the first keyframe at-or-after intervalStart. + const firstKfIdx = Math.ceil(intervalStart / KF_PERIOD_MS); + const firstKfMs = firstKfIdx * KF_PERIOD_MS; + const hasKf = firstKfMs >= intervalStart && firstKfMs < emittedAt; + chunks.push({ + index: i, + emittedAtMs: emittedAt, + hasKeyframe: hasKf, + isFirstEmitted: i === 0, + }); + } + return chunks; +} + +// Mirrors trimAged() from src/offscreen/recorder.ts: pin the first-flagged +// chunk, drop everything else older than VIDEO_BUFFER_DURATION_MS. +function trimContinuousBuffer(chunks: SimChunk[], nowMs: number): SimChunk[] { + const cutoff = nowMs - VIDEO_BUFFER_DURATION_MS; + return chunks.filter((c) => c.isFirstEmitted || c.emittedAtMs >= cutoff); +} + +// ─── Simulation: restart-segments (D-13) ────────────────────────────── +function simulateSegmentRecorder(totalDurationMs: number): SimSegment[] { + const segments: SimSegment[] = []; + const totalSegments = Math.floor(totalDurationMs / SEGMENT_MS); + for (let s = 0; s < totalSegments; s++) { + const segStart = s * SEGMENT_MS; + const segEnd = segStart + SEGMENT_MS; + const chunks: SimChunk[] = []; + // Each segment's first chunk is always kf-bearing because the MediaRecorder + // is freshly constructed on segment rotation — the encoder always emits + // an initial keyframe. + const chunksPerSegment = Math.floor(SEGMENT_MS / TIMESLICE_MS); + for (let i = 0; i < chunksPerSegment; i++) { + const emittedAt = segStart + (i + 1) * TIMESLICE_MS; + chunks.push({ + index: i, + emittedAtMs: emittedAt, + hasKeyframe: i === 0, // the fresh recorder always seeds a keyframe + isFirstEmitted: i === 0, + }); + } + segments.push({ startMs: segStart, endMs: segEnd, chunks }); + } + return segments; +} + +function keepLastSegments(segments: SimSegment[]): SimSegment[] { + return segments.slice(-MAX_SEGMENTS); +} + +// ─── Tests ────────────────────────────────────────────────────────────── + +describe('segment keyframes (documentation — D-09..D-11 leaks P-only chunks past trim)', () => { + it('continuous-recorder model has chunks with no keyframe (proves the gap exists)', () => { + // Sanity check the model: with TIMESLICE_MS=2000 and KF_PERIOD_MS=3000, + // a 2-s chunk window can sometimes contain no keyframe at all. + const chunks = simulateContinuousRecorder(60_000); + const pOnly = chunks.filter((c) => !c.hasKeyframe); + expect(pOnly.length).toBeGreaterThan(0); + // And the count is meaningful — significantly more than just the + // boundary between two 3-s GOPs. Model integrity check. + expect(pOnly.length / chunks.length).toBeGreaterThan(0.25); + }); + + it('after 60 s, trimming to 30 s leaves the pinned first chunk + P-only tail chunks orphaned from their keyframes', () => { + const allChunks = simulateContinuousRecorder(60_000); + const kept = trimContinuousBuffer(allChunks, 60_000); + + // The pinned first chunk is still there. + expect(kept[0].isFirstEmitted).toBe(true); + expect(kept[0].hasKeyframe).toBe(true); + + // The tail (everything after the pinned first chunk) contains AT LEAST + // one P-only chunk that immediately follows the pinned header, with + // no kf-bearing chunk in between to anchor it. THIS is the freeze + // mechanism: the decoder accepts the pinned header + its keyframe, + // then hits the tail's first P-frame whose reference keyframe lived + // in a trimmed-out chunk. + const tail = kept.slice(1); + const firstTailChunkIsPOnly = tail.length > 0 && !tail[0].hasKeyframe; + // Pin the failure: the tail does start with a P-only chunk, and the + // gap between pinned-kf and the next kf-bearing chunk in the tail is + // greater than what a single GOP can survive. + expect(firstTailChunkIsPOnly).toBe(true); + + // The gap between pinned chunk's keyframe and the next kf-bearing + // chunk in the tail is the time the decoder will play before freezing. + const pinnedKfMs = kept[0].emittedAtMs; + const firstTailKfChunk = tail.find((c) => c.hasKeyframe); + expect(firstTailKfChunk).toBeDefined(); + // The decoder needs every P-frame's reference keyframe present. + // Between pinnedKfMs and firstTailKfChunk.emittedAtMs there are + // P-only chunks whose references were trimmed → freeze. + const orphanGapMs = firstTailKfChunk!.emittedAtMs - pinnedKfMs; + expect(orphanGapMs).toBeGreaterThan(KF_PERIOD_MS); + }); +}); + +describe('segment keyframes (GREEN-pinning — D-13 contract for restart-segments)', () => { + it('each retained segment starts with a keyframe', () => { + const allSegments = simulateSegmentRecorder(60_000); + const kept = keepLastSegments(allSegments); + expect(kept).toHaveLength(MAX_SEGMENTS); + for (const seg of kept) { + expect(seg.chunks.length).toBeGreaterThan(0); + expect( + seg.chunks[0].hasKeyframe, + `Segment starting at ${seg.startMs}ms is missing a keyframe in its first chunk. ` + + `Under D-13 the MediaRecorder must be freshly constructed on each rotation so ` + + `the encoder seeds a keyframe at segment t=0.`, + ).toBe(true); + } + }); + + it('kept window spans exactly MAX_SEGMENTS * SEGMENT_MS = 30 s', () => { + const allSegments = simulateSegmentRecorder(60_000); + const kept = keepLastSegments(allSegments); + const spanMs = kept[kept.length - 1].endMs - kept[0].startMs; + expect(spanMs).toBe(MAX_SEGMENTS * SEGMENT_MS); + expect(spanMs).toBe(VIDEO_BUFFER_DURATION_MS); + }); + + it('concatenating retained segments yields a fully decodable timeline (no orphan P-frames)', () => { + // Decodability invariant: every chunk in the concatenated stream either + // IS kf-bearing or is preceded (within the SAME segment) by a kf-bearing + // chunk. Under D-13 this is satisfied trivially because each segment's + // first chunk is kf-bearing and the segment is self-contained. + const allSegments = simulateSegmentRecorder(60_000); + const kept = keepLastSegments(allSegments); + + for (const seg of kept) { + let lastKfBearingInSegment = -1; + for (let i = 0; i < seg.chunks.length; i++) { + if (seg.chunks[i].hasKeyframe) { + lastKfBearingInSegment = i; + } + // Every chunk must have a kf-bearing predecessor (or itself) inside + // the segment. If lastKfBearingInSegment is still -1 we've found a + // P-only chunk with no anchoring keyframe — the freeze condition. + expect( + lastKfBearingInSegment, + `Chunk ${i} of segment ${seg.startMs}ms has no preceding keyframe in its segment.`, + ).toBeGreaterThanOrEqual(0); + } + } + }); + + it('a continuous-recorder buffer that trims out middle chunks DOES exhibit the orphan-keyframe gap (the bug, restated as code)', () => { + // This is the mirror image of the D-13 invariant test above: prove that + // the D-09..D-11 approach explicitly exhibits the orphan-keyframe gap. + // That empirically lock-steps the test pair: GREEN on D-13 ⇔ orphan-gap on D-09..D-11. + const allChunks = simulateContinuousRecorder(60_000); + const kept = trimContinuousBuffer(allChunks, 60_000); + + // Note: under D-09..D-11 the pinned first chunk IS kf-bearing, so a naive + // "every chunk has a preceding kf in the kept buffer" check passes. The + // real bug is that the tail's P-frames reference KEYFRAMES THAT WERE + // TRIMMED FROM THE MIDDLE OF THE TIMELINE — those keyframes are not in + // `kept` because they came from chunks evicted by the age trim. We + // assert this via the gap evidence: there is a stretch in the kept + // timeline where no kf-bearing chunk appears between the pinned header + // and the recent tail. + const pinnedKfMs = kept[0].emittedAtMs; + const firstTailKfChunk = kept.slice(1).find((c) => c.hasKeyframe); + expect(firstTailKfChunk).toBeDefined(); + const orphanGapMs = firstTailKfChunk!.emittedAtMs - pinnedKfMs; + // The decoder will freeze for orphanGapMs - KF_PERIOD_MS worth of frames + // because their reference keyframes were in trimmed chunks. We require + // the gap to be much larger than KF_PERIOD_MS — i.e. trimmed material + // contained keyframes that the kept material depends on. + expect(orphanGapMs).toBeGreaterThan(KF_PERIOD_MS * 2); + }); +}); + +describe('production recorder must expose segment-aware buffer (RED — pins D-13)', () => { + // This block is the genuine TDD anchor. It drives an import of the real + // src/offscreen/recorder.ts and asserts that a `getSegments` export exists + // with a shape consistent with the D-13 contract. + // + // Today this is RED: the module exports `getBuffer()` (chunk-level), not + // `getSegments()` (segment-level). The activation of the D-13 skeleton at + // src/offscreen/recorder.ts:298-316 must: + // 1. Maintain a `segments: Blob[]` array (each entry = one finalized + // ~10 s self-contained WebM). + // 2. Rotate segments via stop+restart-on-same-MediaStream every + // SEGMENT_MS, keeping at most MAX_SEGMENTS. + // 3. Export a `getSegments(): Blob[]` function. (The wire format on the + // port stays base64-per-segment per D-12.) + // + // We use vitest's beforeEach + vi.resetModules pattern from + // codec-check.test.ts so the module's bootstrap side-effects don't poison + // the test environment. + + interface ChromeStub { + runtime: { + sendMessage?: (msg: unknown) => void; + onMessage?: { addListener?: (cb: unknown) => void }; + connect?: () => unknown; + id?: string; + }; + } + interface GlobalWithChrome { + chrome?: ChromeStub; + MediaRecorder?: { isTypeSupported: (mime: string) => boolean }; + } + + beforeEach(async () => { + const { vi } = await import('vitest'); + vi.resetModules(); + (globalThis as unknown as GlobalWithChrome).chrome = { + runtime: { id: 'test', sendMessage: () => {} }, + }; + }); + + it('src/offscreen/recorder exports a getSegments function', async () => { + const mod = (await import('../../src/offscreen/recorder')) as Record< + string, + unknown + >; + // RED today — recorder.ts only exports getBuffer/addChunk/trimAged/etc. + // GREEN when D-13 lands and getSegments is added. + expect( + typeof mod.getSegments, + 'src/offscreen/recorder.ts must export `getSegments(): Blob[]` once ' + + 'the D-13 restart-segments skeleton is activated. Today it only ' + + 'exports the chunk-level `getBuffer()`, which is the API responsible ' + + 'for the orphan-keyframe gap in tests/fixtures/last_30sec.webm. See ' + + '.planning/debug/webm-playback-freeze.md and the commented skeleton ' + + 'at src/offscreen/recorder.ts:298-316.', + ).toBe('function'); + }); + + it('getSegments returns at most MAX_SEGMENTS=3 Blobs', async () => { + const mod = (await import('../../src/offscreen/recorder')) as { + getSegments?: () => Blob[]; + }; + if (typeof mod.getSegments !== 'function') { + // Skip the body — the structural test above is the one that drives + // the fix. We still want this assertion documented as a contract. + expect.fail( + 'getSegments not exported yet; see the previous test in this block ' + + 'for the activation instructions.', + ); + return; + } + const segments = mod.getSegments(); + expect(Array.isArray(segments)).toBe(true); + expect(segments.length).toBeLessThanOrEqual(MAX_SEGMENTS); + }); +}); diff --git a/tests/offscreen/webm-playback.test.ts b/tests/offscreen/webm-playback.test.ts new file mode 100644 index 0000000..ebddc29 --- /dev/null +++ b/tests/offscreen/webm-playback.test.ts @@ -0,0 +1,179 @@ +// tests/offscreen/webm-playback.test.ts +// +// RED-gate test for debug session webm-playback-freeze. +// +// Empirically proves the playback freeze observed in +// tests/fixtures/last_30sec.webm (Phase 1, Plan 01-07 smoke retest after the +// D-12 base64-transfer fix landed at commit bf07619). +// +// Hypothesis under test (per .planning/debug/webm-playback-freeze.md): +// +// The single-continuous MediaRecorder + 30 s age-trim approach (D-09..D-11) +// drops VP9 P-frames' keyframe references when the buffer trims out the +// middle of the recording. VP9's `kf_max_dist=100` (Chrome default) puts +// keyframes every ~3-5 s. With chunks emitted every 2 s (D-09 timeslice), +// the boundary chunks contain only P-frames referencing keyframes that have +// been evicted. The decoder therefore fails ~1 s into playback in Chrome, +// and `ffmpeg -v warning -i -f null -` emits multiple +// "Error submitting packet to decoder: Invalid data found" lines plus a +// "File ended prematurely" tail-error. +// +// This test runs ffmpeg's CLI (an external dependency — /usr/bin/ffmpeg) +// over the COMMITTED fixture and asserts: +// * zero "Error submitting packet to decoder" lines, AND +// * no "File ended prematurely" line. +// +// Today (commit bf07619) the test goes RED because the fixture was produced +// by the single-continuous-recorder path. The D-13 fix (restart-segments, +// activate the pre-staged skeleton in src/offscreen/recorder.ts) will produce +// a fresh fixture whose decode is clean — at which point this test flips +// GREEN. See `tests/offscreen/segment-keyframes.test.ts` for the unit-level +// algorithmic guard that does NOT require regenerating the fixture. +// +// Skip discipline: if ffmpeg is missing from the environment the test +// auto-skips rather than failing. CI ships ffmpeg per `smoke.sh` so this is +// a developer-convenience fence, not a behavioural softening. + +import { describe, it, expect } from 'vitest'; +import { existsSync, statSync } from 'node:fs'; +import { execFileSync } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; +import { dirname, resolve } from 'node:path'; + +const here = dirname(fileURLToPath(import.meta.url)); +const FIXTURE_PATH = resolve(here, '..', 'fixtures', 'last_30sec.webm'); +const FFMPEG_BIN = '/usr/bin/ffmpeg'; + +// Cap: a clean 30-second WebM decoded with `-f null` finishes well under +// 10 s on commodity hardware. If we ever exceed this we want a hard failure, +// not a hung CI job. +const FFMPEG_TIMEOUT_MS = 30_000; + +function ffmpegAvailable(): boolean { + try { + return existsSync(FFMPEG_BIN) && statSync(FFMPEG_BIN).isFile(); + } catch { + return false; + } +} + +interface DecodeResult { + stderr: string; + packetErrorCount: number; + endedPrematurely: boolean; +} + +function decodeDryRun(fixturePath: string): DecodeResult { + // `-f null -` swallows the decoded output but still surfaces every per-packet + // decoder error to stderr. `-nostdin` prevents ffmpeg from blocking on a TTY + // that vitest does not provide. `-v warning` filters the noise floor; the + // signals we care about (`Error submitting packet to decoder`, + // `File ended prematurely`) are emitted at warning level or above. + let stderr = ''; + try { + execFileSync( + FFMPEG_BIN, + ['-nostdin', '-v', 'warning', '-i', fixturePath, '-f', 'null', '-'], + { + stdio: ['ignore', 'ignore', 'pipe'], + encoding: 'utf-8', + timeout: FFMPEG_TIMEOUT_MS, + maxBuffer: 4 * 1024 * 1024, // 4 MiB is comfortable for warning-level logs + }, + ); + } catch (err) { + // ffmpeg exits 0 even on per-packet decode errors with `-f null -`, + // so a thrown error usually means the binary is genuinely broken or the + // file is unreadable. Re-throw to fail loudly with full context. + const e = err as { stderr?: string; message?: string }; + stderr = e.stderr ?? ''; + if (!stderr) { + throw err; + } + } + // ffmpeg may also write its diagnostics directly when execFileSync succeeds. + // The captured stderr lives on the error path; on success we attach the + // pipe explicitly. + // execFileSync returns stdout-only by design — to also capture success-path + // stderr, repeat with stdio: ['ignore', 'ignore', 'pipe'] reading the + // returned Buffer is not possible. Use spawnSync semantics instead. + return { + stderr, + packetErrorCount: (stderr.match(/Error submitting packet to decoder/g) ?? []).length, + endedPrematurely: /File ended prematurely/.test(stderr), + }; +} + +// Variant that uses spawnSync so we can read stderr on the success path too. +// execFileSync above is intentionally kept for the documentation value, but +// the actual assertion uses spawnSync. +import { spawnSync } from 'node:child_process'; + +function decodeDryRunStrict(fixturePath: string): DecodeResult { + const proc = spawnSync( + FFMPEG_BIN, + ['-nostdin', '-v', 'warning', '-i', fixturePath, '-f', 'null', '-'], + { + stdio: ['ignore', 'ignore', 'pipe'], + encoding: 'utf-8', + timeout: FFMPEG_TIMEOUT_MS, + maxBuffer: 4 * 1024 * 1024, + }, + ); + if (proc.signal !== null) { + throw new Error(`ffmpeg was killed by signal ${proc.signal}`); + } + const stderr = proc.stderr ?? ''; + return { + stderr, + packetErrorCount: (stderr.match(/Error submitting packet to decoder/g) ?? []).length, + endedPrematurely: /File ended prematurely/.test(stderr), + }; +} + +describe('webm playback (RED — confirms webm-playback-freeze bug)', () => { + it.skipIf(!ffmpegAvailable())( + 'ffmpeg dry-run on last_30sec.webm produces zero decoder packet errors', + () => { + expect(existsSync(FIXTURE_PATH)).toBe(true); + const result = decodeDryRunStrict(FIXTURE_PATH); + // Document the failure in the assertion message so a regression + // bisect lands on a useful diff, not just "expected 0 received N". + expect( + result.packetErrorCount, + `ffmpeg reported ${result.packetErrorCount} "Error submitting packet to decoder" line(s). ` + + `This means the VP9 decoder hit P-frames whose reference keyframe was missing from the ` + + `stream — the symptom of the single-continuous-recorder + 30 s age-trim approach (D-09..D-11). ` + + `Fix: activate the D-13 restart-segments skeleton at src/offscreen/recorder.ts:298-316 and ` + + `regenerate the fixture via ./smoke.sh. Full ffmpeg stderr:\n${result.stderr}`, + ).toBe(0); + }, + ); + + it.skipIf(!ffmpegAvailable())( + 'ffmpeg dry-run on last_30sec.webm does not end prematurely', + () => { + expect(existsSync(FIXTURE_PATH)).toBe(true); + const result = decodeDryRunStrict(FIXTURE_PATH); + // The "File ended prematurely" line indicates the WebM lacks proper + // Matroska SegmentSize / Cues finalization because the SW reads the + // in-memory buffer while the MediaRecorder is still active (no .stop()). + // The D-13 restart-segments approach fixes this as a side effect — + // each rotated segment gets a proper .stop() and is therefore finalized. + expect( + result.endedPrematurely, + `ffmpeg reported "File ended prematurely". The WebM container was read mid-stream ` + + `without calling MediaRecorder.stop(), so SegmentSize/Cues are unwritten. The D-13 ` + + `restart-segments fix finalizes each segment naturally. Full ffmpeg stderr:\n${result.stderr}`, + ).toBe(false); + }, + ); + + // Touch the unused decodeDryRun symbol so the file's documentation block + // stays compilable under noUnusedLocals. The intent is to leave both + // helpers documented side-by-side: one shows the execFileSync semantics + // (succeeds quietly on decode errors) and the other shows the spawnSync + // approach actually used. Vitest will not execute the body. + // eslint-disable-next-line @typescript-eslint/no-unused-expressions + void decodeDryRun; +});