diff --git a/src/shared/binary.ts b/src/shared/binary.ts new file mode 100644 index 0000000..18bb4c0 --- /dev/null +++ b/src/shared/binary.ts @@ -0,0 +1,74 @@ +// src/shared/binary.ts — Portable Blob↔base64 helpers for the +// offscreen↔SW port wire-format (D-12 fix). +// +// Why this file exists: +// chrome.runtime.Port.postMessage JSON-serializes payloads across +// extension contexts. JSON.stringify(blob) === "{}" (Blob has no +// enumerable own properties), so any Blob passed through a port +// arrives as a plain empty object on the other side. The SW then +// calls `new Blob([{}, {}, ...])` and the constructor coerces each +// non-Blob member via String({}) === "[object Object]", producing +// exactly 75 bytes of garbage for 5 chunks — matching the observed +// ffprobe failure forensics in debug session d12. +// +// Fix: encode each chunk's binary payload to base64 in offscreen +// BEFORE port.postMessage; decode back to Blob in SW AFTER receive. +// base64 strings round-trip cleanly through JSON. +// +// The wire-format contract is pinned by tests/offscreen/port-serialization.test.ts +// (GREEN describe block). This module's two helpers MUST satisfy: +// 1. `await blobToBase64(blob)` returns just the base64 payload +// (no `data:...;base64,` prefix). +// 2. `base64ToBlob(b64, mimeType)` returns a Blob whose bytes +// equal `Uint8Array(await blob.arrayBuffer())` of the original. +// 3. EBML magic bytes 0x1A 0x45 0xDF 0xA3 round-trip intact through +// JSON.parse(JSON.stringify({dataBase64: await blobToBase64(...)})). +// +// Portability: both `btoa`/`atob` and `arrayBuffer()` are available in +// the modern browser/extension runtime AND in the Node-based vitest +// environment (Node 18+ has both as globals). No FileReader dependency +// (FileReader is browser-only — would break Node tests). + +/** + * Encode a Blob's binary content to a base64 string suitable for + * JSON-friendly transport over chrome.runtime.Port. + * + * Returns the raw base64 payload only (no `data:;base64,` prefix). + * + * @param blob - The binary blob to encode (typically a MediaRecorder + * dataavailable chunk). + * @returns Base64-encoded representation of the blob's bytes. + */ +export async function blobToBase64(blob: Blob): Promise { + const buf = await blob.arrayBuffer(); + const bytes = new Uint8Array(buf); + // Per-byte concat is the portable way: avoids the + // `String.fromCharCode(...bytes)` apply-spread argument-length limit + // (typically ~64 KiB on some engines) for large chunks. + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + return btoa(binary); +} + +/** + * Decode a base64 string back into a Blob with the given MIME type. + * + * Synchronous because atob + Uint8Array population requires no I/O. + * + * @param b64 - The base64-encoded payload (as produced by + * {@link blobToBase64}). Must NOT include a + * `data:;base64,` prefix. + * @param mimeType - The MIME type to assign to the reconstructed Blob + * (e.g. `video/webm;codecs=vp9`). + * @returns A Blob whose bytes match the original encoded blob exactly. + */ +export function base64ToBlob(b64: string, mimeType: string): Blob { + const binary = atob(b64); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + return new Blob([bytes], { type: mimeType }); +} diff --git a/tests/offscreen/port-serialization.test.ts b/tests/offscreen/port-serialization.test.ts new file mode 100644 index 0000000..8c5bc66 --- /dev/null +++ b/tests/offscreen/port-serialization.test.ts @@ -0,0 +1,195 @@ +// tests/offscreen/port-serialization.test.ts +// +// RED-gate test for debug session d12-blob-port-transfer-fails. +// +// Empirically proves the failure mode behind the 75-byte "[object Object]" +// WebM payload observed in /home/parf/Downloads/session_report_2026-05-15_19-42-01.zip. +// +// Hypothesis under test (per .planning/debug/d12-blob-port-transfer-fails.md): +// 1. chrome.runtime.Port.postMessage serializes payloads as JSON across +// extension contexts (offscreen ↔ SW). JSON.stringify(blob) === "{}" +// because Blob has no enumerable own properties. +// 2. The current REQUEST_BUFFER handler in src/offscreen/recorder.ts:174-178 +// sends `{ type: 'BUFFER', chunks: getBuffer() }` where each chunk has +// `.data: Blob`. After the JSON round-trip the SW receives chunks with +// `.data === {}`. +// 3. mergeVideoChunks in src/background/index.ts:204-222 then calls +// `new Blob([{}, {}, ...])`. The Blob constructor coerces each non-Blob +// member via String(member) → "[object Object]", concatenated with no +// separator → exactly 75 bytes for 5 chunks (5 × 15 = 75). +// +// Goal: make these tests RED first (proving the bug is real), then write the +// fix (Blob → base64 string transfer-format) to flip them GREEN. + +import { describe, it, expect } from 'vitest'; + +// Simulates Chrome's documented chrome.runtime.Port serialization across +// extension contexts: JSON round-trip with no structured-clone fallback. +// Ref: https://developer.chrome.com/docs/extensions/develop/concepts/messaging +function chromeRuntimePortRoundTrip(message: T): unknown { + return JSON.parse(JSON.stringify(message)); +} + +describe('port serialization (RED — confirms d12 bug)', () => { + it('JSON.stringify(blob) drops binary content (root cause part 1)', () => { + const blob = new Blob([new Uint8Array([0x1a, 0x45, 0xdf, 0xa3])], { + type: 'video/webm', + }); + // EBML magic = 0x1A 0x45 0xDF 0xA3 — the bytes the real WebM should start with. + expect(blob.size).toBe(4); + + const serialized = JSON.stringify(blob); + // The smoking gun: Blob has no enumerable own properties, so JSON drops it. + expect(serialized).toBe('{}'); + }); + + it('Port round-trip strips Blob from chunks (reproduces the SW receive-side)', () => { + const blob = new Blob([new Uint8Array([0x1a, 0x45, 0xdf, 0xa3])], { + type: 'video/webm', + }); + const portMessage = { + type: 'BUFFER', + chunks: [ + { data: blob, timestamp: 1, isFirst: true }, + { data: blob, timestamp: 2 }, + ], + }; + + const received = chromeRuntimePortRoundTrip(portMessage) as { + type: string; + chunks: Array<{ data: unknown; timestamp: number; isFirst?: boolean }>; + }; + + expect(received.type).toBe('BUFFER'); + expect(received.chunks).toHaveLength(2); + // The bug: data is no longer a Blob — it's a plain empty object. + expect(received.chunks[0].data).not.toBeInstanceOf(Blob); + expect(received.chunks[0].data).toEqual({}); + expect(received.chunks[1].data).toEqual({}); + // Metadata survives because timestamp/isFirst are JSON-friendly. + expect(received.chunks[0].timestamp).toBe(1); + expect(received.chunks[0].isFirst).toBe(true); + }); + + it('new Blob([{}, {}, ...]) coerces each member to "[object Object]" (root cause part 2)', () => { + // This is the smoking gun for the merge step. Once the SW gets {} for each + // chunk.data, mergeVideoChunks does `new Blob(blobs, { type: 'video/webm' })` + // and the Blob ctor stringifies non-Blob members. + const merged = new Blob([{}, {}, {}, {}, {}], { type: 'video/webm' }); + expect(merged.size).toBe(75); // 5 × 15 = 75 — matches the observed payload! + return merged.text().then((text) => { + expect(text).toBe('[object Object][object Object][object Object][object Object][object Object]'); + }); + }); + + it('end-to-end: simulate full failure path — Blob → port → merge → 75-byte garbage', async () => { + // Build a fake recorder buffer of 5 real WebM-ish blobs. + const realChunks = [0x1a, 0x45, 0xdf, 0xa3, 0xa3].map((firstByte, i) => ({ + data: new Blob([new Uint8Array([firstByte, 0x00, 0x00, 0x00])], { + type: 'video/webm', + }), + timestamp: 1000 + i, + isFirst: i === 0, + })); + + // Send through the simulated port. + const message = { type: 'BUFFER', chunks: realChunks }; + const received = chromeRuntimePortRoundTrip(message) as { + chunks: Array<{ data: unknown; timestamp: number }>; + }; + + // SW-side: mergeVideoChunks does this exact operation. + const blobs = received.chunks + .sort((a, b) => a.timestamp - b.timestamp) + .map((c) => c.data as Blob); + const merged = new Blob(blobs, { type: 'video/webm' }); + + // The observed failure: 75 bytes of "[object Object]" repeated. + expect(merged.size).toBe(75); + const text = await merged.text(); + expect(text).toBe( + '[object Object][object Object][object Object][object Object][object Object]' + ); + // The original 5 × 4 = 20 bytes of WebM data are GONE. + expect(merged.size).not.toBe(20); + }); +}); + +describe('port serialization (GREEN — pins the eventual fix contract)', () => { + // These tests are forward-pinning: they describe the wire-format the fix + // MUST implement. They will run today and PASS — they test pure helpers, not + // the (still-buggy) recorder.ts handler. + // + // The fix must: + // 1. Define a TransferredVideoChunk wire-format that is JSON-friendly: + // { dataBase64: string; type: string; timestamp: number; isFirst?: boolean } + // 2. Convert Blob → base64 in offscreen before postMessage. + // 3. Convert base64 → Blob in SW after receive. + // + // Until the fix lands, these helpers don't exist in src/. These tests live + // here as a contract the fix's reviewer can check against. + + async function blobToBase64(blob: Blob): Promise { + const buf = await blob.arrayBuffer(); + const bytes = new Uint8Array(buf); + // Node + browser both support Buffer / btoa. Use a portable conversion. + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + // btoa exists in vitest's node env via globalThis polyfill in modern Node. + return typeof btoa === 'function' + ? btoa(binary) + : Buffer.from(binary, 'binary').toString('base64'); + } + + function base64ToBlob(b64: string, type: string): Blob { + const binary = + typeof atob === 'function' + ? atob(b64) + : Buffer.from(b64, 'base64').toString('binary'); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + return new Blob([bytes], { type }); + } + + it('base64 round-trip preserves binary content across JSON serialization', async () => { + const original = new Blob([new Uint8Array([0x1a, 0x45, 0xdf, 0xa3])], { + type: 'video/webm', + }); + const b64 = await blobToBase64(original); + expect(b64).toBe('GkXfow=='); // base64 of EBML magic + // JSON round-trip preserves the string. + const wire = { dataBase64: b64, type: 'video/webm' }; + const received = JSON.parse(JSON.stringify(wire)) as { + dataBase64: string; + type: string; + }; + const restored = base64ToBlob(received.dataBase64, received.type); + expect(restored.size).toBe(4); + const text = await restored.text(); + // EBML magic ≈ "Eߣ" — bytes preserved. + expect(new Uint8Array(await restored.arrayBuffer())).toEqual( + new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]) + ); + }); + + it('merging base64-decoded chunks produces a real WebM-prefixed blob', async () => { + const original = new Blob([new Uint8Array([0x1a, 0x45, 0xdf, 0xa3])], { + type: 'video/webm', + }); + const b64 = await blobToBase64(original); + const wireChunks = [ + { dataBase64: b64, type: 'video/webm', timestamp: 1, isFirst: true }, + ]; + const received = JSON.parse(JSON.stringify({ chunks: wireChunks })) as { + chunks: Array<{ dataBase64: string; type: string; timestamp: number }>; + }; + const blobs = received.chunks.map((c) => base64ToBlob(c.dataBase64, c.type)); + const merged = new Blob(blobs, { type: 'video/webm' }); + expect(merged.size).toBe(4); + expect(merged.size).not.toBe(75); // the bug is gone in the fixed form. + }); +});