mokosh/tests/uat/lib/harness-page-driver.ts

// tests/uat/lib/harness-page-driver.ts — Plan 01-13 Wave 2.
//
// Driver wrappers — one per assertion (A1..A13). Each wraps a single
// `page.evaluate(() => window.__mokoshHarness.assertXX())` call,
// returning the structured AssertionRecord (or the extended shape with
// `bytesBase64` for A5/A12/A13 which return host-side-required payloads
// like the downloaded zip bytes or the recorded webm bytes).
//
// Centralizing the page.evaluate call here means adding or renaming an
// assertion requires a two-file edit:
//   1. extension-page-harness.ts — page-side impl + window.__mokoshHarness wire
//   2. this file — host-side driver wrapper
// instead of touching every test-file that calls the assertion.
//
// Wave 2 ONLY wires `driveA6` (the proven assertion from the c647f61
// prototype). The 12 Wave-3 assertions are stubbed as `throw new
// Error('NOT YET IMPLEMENTED — Wave 3<X> wires this')` so the
// orchestrator's `for (const drive of drivers)` loop fails cleanly on
// the first unimplemented one (bail-on-first-failure semantics in
// `harness.test.ts` lands in Wave 3A).
//
// Wave 3A wires driveA1/A2/A3/A4 (page-side surface in
// `extension-page-harness.ts` from the same wave).
// Wave 3B wires driveA5 (page-side ack + HOST-side fs polling for the
// dropped `session_report_*.zip` in `handles.downloadsDir`) + driveA7
// (standard page.evaluate wrapper). The driveA5 signature requires a
// second `downloadsDir` argument; the orchestrator at `harness.test.ts`
// threads `handles.downloadsDir` through.
//
// References:
//   - puppeteer Page.evaluate:
//     https://pptr.dev/api/puppeteer.page.evaluate
//   - Node fs.readdirSync / statSync:
//     https://nodejs.org/api/fs.html

import { spawnSync } from 'node:child_process';
import { existsSync, mkdtempSync, readFileSync, readdirSync, statSync, unlinkSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve as resolvePath } from 'node:path';

import type { Page } from 'puppeteer';

import type { AssertionRecord, CheckRecord } from './assertions';
import { assertArchiveShape, extractEntryToFile } from './zip';

/**
 * Extended assertion-record shape for A5/A12/A13 which return
 * host-side-required binary payloads:
 *   - A5 (SAVE_ARCHIVE): `bytesBase64` is the downloaded zip bytes
 *     (read by host-side from `handles.downloadsDir`); page side only
 *     returns the trigger ack.
 *   - A12 (ffprobe): `bytesBase64` is the recorded webm bytes —
 *     extracted from the zip by the host so ffprobe (host-side binary)
 *     can analyze it.
 *   - A13 (zip shape): `bytesBase64` is the zip bytes; `expectedVersion`
 *     is the manifest version the harness was built against.
 *
 * All Wave-3 assertions; not used in Wave 2.
 */
export interface AssertionWithBytes {
  readonly passed: boolean;
  readonly name: string;
  readonly checks: ReadonlyArray<CheckRecord>;
  readonly diagnostics: ReadonlyArray<string>;
  readonly error?: string;
  readonly bytesBase64?: string;
  readonly expectedVersion?: string;
}

// Note (Wave 3D — all 13 drivers wired): the WAVE3_STUB_PREFIX marker
// that gated unimplemented drivers across Waves 3A-3C has been retired
// — there are no more stubs. Future assertions (A14+) would follow
// the same wired-driver pattern below; no stub-marker is reintroduced
// unless multi-wave incremental rollout is needed again.

/**
 * Drive the A6 (Bug B canonical) assertion. The proven, prototype-
 * inherited driver. Page side does all orchestration (ensureOffscreen +
 * start + wait + dispatch + assert); host side just triggers + reads
 * the result.
 *
 * @param page - The harness page (from `launchHarnessBrowser`).
 * @returns Structured AssertionRecord with 5 checks (SETUP + A6.1..A6.4).
 */
export async function driveA6(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA6();
    return r;
  }) as AssertionRecord;
}

/* ─── Wave 3A — WIRED ─────────────────────────────────────────────── */

/**
 * Drive A1 (SW bootstrap state). Asserts the post-load idle-mode state:
 * badge='', popup='', isRecording=false. MUST run BEFORE A2 in any
 * orchestrated sequence — A2 manually sets badge='REC' which invalidates
 * the A1 contract until the SW is reset.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 3 checks (badge + popup + isRecording).
 */
export async function driveA1(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA1();
    return r;
  }) as AssertionRecord;
}

/**
 * Drive A2 (toolbar onClicked → REC). Uses the direct-offscreen workaround
 * for the missing `tabs` manifest permission (per 01-11-SUMMARY). Leaves
 * the offscreen recording active — A3 + A4 chain off A2's REC state.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 2 checks (badge + popup).
 */
export async function driveA2(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA2();
    return r;
  }) as AssertionRecord;
}

/**
 * Drive A3 (displaySurface === 'monitor'). Assumes A2 left recording
 * active. Queries the offscreen `get-display-surface` bridge op.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 1 check (displaySurface).
 */
export async function driveA3(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA3();
    return r;
  }) as AssertionRecord;
}

/**
 * Drive A4 (popup pinned + single offscreen during recording). Assumes
 * A2 left recording active. Verifies getPopup unchanged + hasDocument
 * true (no duplicate offscreen spawned).
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 2 checks (popup + hasDocument).
 */
export async function driveA4(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA4();
    return r;
  }) as AssertionRecord;
}

/* ─── Wave 3B — WIRED ─────────────────────────────────────────────── */

/** Maximum wait for the SAVE_ARCHIVE zip to appear in `downloadsDir`. */
const A5_DOWNLOAD_POLL_TIMEOUT_MS = 15_000;
/** Polling cadence while waiting for the zip. */
const A5_DOWNLOAD_POLL_INTERVAL_MS = 200;
/** Filename suffix for the dropped archive. Production code in
 *  `src/background/index.ts:downloadArchive` requests
 *  `session_report_<date>_<time>.zip`, BUT under CDP-routed downloads
 *  (`Browser.setDownloadBehavior`) Chrome ignores the
 *  `chrome.downloads.download` `filename` parameter for `data:` URLs and
 *  defaults to `download.zip` (or `download (N).zip` on collision). The
 *  contract A5 verifies is "a zip file lands in downloadsDir within
 *  timeout" — the exact filename is not load-bearing for Wave 3B.
 *  Wave 3D's A13 (zip structure) verifies the zip content. */
const A5_ZIP_NAME_SUFFIX = '.zip';
/** Minimum acceptable zip size — the production
 *  `downloadArchive` always writes at least a JSZip header + screenshot
 *  PNG (typically several KB even with an empty video buffer).
 *  1KB is the floor specified in the plan's success criteria for A5. */
const A5_MIN_ZIP_SIZE_BYTES = 1024;

/**
 * Drive A5 (SAVE_ARCHIVE download). Three-phase orchestration:
 *
 *   1. Page side: send SAVE_ARCHIVE via the harness `assertA5` helper.
 *      Returns AssertionRecord with check `A5.1: SW handler returns
 *      success=true`. Throws are caught + returned as a failure record
 *      with `error` set.
 *
 *   2. Host side: poll `downloadsDir` for `session_report_*.zip` for up
 *      to `A5_DOWNLOAD_POLL_TIMEOUT_MS`. If found, read bytes for the
 *      size check; the bytes are NOT returned to the orchestrator (no
 *      consumer in Wave 3B — A13 will read them out of the zip-shape
 *      driver in Wave 3D).
 *
 *   3. Host side: assert `zipSize >= A5_MIN_ZIP_SIZE_BYTES`. Merge the
 *      host-side check onto the page-side AssertionRecord; recompute
 *      `passed` as the conjunction of all checks.
 *
 * The split between page-side (SW dispatch ack) and host-side
 * (file-system verification) is dictated by the page isolate's lack of
 * filesystem access — `handles.downloadsDir` is a Node-side `mkdtempSync`
 * configured via CDP `Browser.setDownloadBehavior` and only readable
 * from the Node process.
 *
 * @param page         - The harness page from `launchHarnessBrowser`.
 * @param downloadsDir - Absolute path to the per-run downloads directory
 *                       (from `handles.downloadsDir`).
 * @returns AssertionRecord with merged page + host checks.
 */
export async function driveA5(
  page: Page,
  downloadsDir: string,
): Promise<AssertionRecord> {
  // Snapshot existing zip files BEFORE dispatching SAVE_ARCHIVE so the
  // post-dispatch poll only considers NEW files. Single-browser orchestrator
  // pattern means there should never be a pre-existing zip on a fresh
  // run, but a re-used `downloadsDir` (`HARNESS_DOWNLOADS_DIR` env override)
  // can legitimately have prior runs' files.
  const preExisting = new Set(readdirSync(downloadsDir).filter(isZipFilename));

  // Phase 1: page-side dispatch.
  const pageResult = await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA5();
    return r;
  }) as AssertionRecord;

  // Phase 2: host-side poll for the dropped zip.
  let zipFilename: string | null = null;
  let zipBytes: Buffer | null = null;
  const pollStart = Date.now();
  while (Date.now() - pollStart < A5_DOWNLOAD_POLL_TIMEOUT_MS) {
    const candidates = readdirSync(downloadsDir).filter(
      (name) => isZipFilename(name) && !preExisting.has(name),
    );
    if (candidates.length > 0) {
      // Take the most-recently-modified to be deterministic if multiple appear.
      const sorted = candidates
        .map((name) => ({
          name,
          mtime: statSync(resolvePath(downloadsDir, name)).mtimeMs,
        }))
        .sort((a, b) => b.mtime - a.mtime);
      zipFilename = sorted[0].name;
      const zipPath = resolvePath(downloadsDir, zipFilename);
      // Wait a beat: the file may still be writing. Re-check size stable
      // by reading twice; we take the second read as the canonical bytes.
      const sizeFirst = statSync(zipPath).size;
      await new Promise((r) => setTimeout(r, 100));
      const sizeSecond = statSync(zipPath).size;
      if (sizeFirst === sizeSecond) {
        zipBytes = readFileSync(zipPath);
        break;
      }
    }
    await new Promise((r) => setTimeout(r, A5_DOWNLOAD_POLL_INTERVAL_MS));
  }

  // Phase 3: merge checks. Page-side checks are immutable
  // (ReadonlyArray); copy into a mutable buffer + append host-side.
  const mergedChecks: CheckRecord[] = pageResult.checks.slice();
  const mergedDiagnostics: string[] = pageResult.diagnostics.slice();

  const zipPresent = zipFilename !== null;
  const zipSize = zipBytes !== null ? zipBytes.length : 0;
  mergedChecks.push({
    name: `A5.2: a *.zip file appears in downloadsDir within ${A5_DOWNLOAD_POLL_TIMEOUT_MS}ms (production name: 'session_report_*.zip'; CDP fallback: 'download*.zip')`,
    expected: true,
    actual: zipPresent,
    passed: zipPresent,
  });
  mergedChecks.push({
    name: `A5.3: zip file size >= ${A5_MIN_ZIP_SIZE_BYTES} bytes`,
    expected: A5_MIN_ZIP_SIZE_BYTES,
    actual: zipSize,
    passed: zipSize >= A5_MIN_ZIP_SIZE_BYTES,
  });
  mergedDiagnostics.push(
    `host-side: zipFilename=${zipFilename ?? '<missing>'}, zipSize=${zipSize} bytes, downloadsDir=${downloadsDir}`,
  );

  const mergedPassed = mergedChecks.every((c) => c.passed);
  return {
    passed: mergedPassed,
    name: pageResult.name,
    checks: mergedChecks,
    diagnostics: mergedDiagnostics,
    error: pageResult.error,
  };
}

/**
 * Filename predicate — matches any completed `.zip` file. Mid-write
 * `.crdownload` files are auto-excluded by the suffix anchor. The
 * permissive prefix matches both the production filename
 * `session_report_<ts>.zip` and the CDP-fallback `download.zip` (see
 * `A5_ZIP_NAME_SUFFIX` comment for why the latter happens under
 * `Browser.setDownloadBehavior`).
 *
 * @param name - Filename (basename, not full path).
 * @returns True iff `name` is a completed zip.
 */
function isZipFilename(name: string): boolean {
  return name.endsWith(A5_ZIP_NAME_SUFFIX);
}

/**
 * Drive A7 (genuine error → ERR + recovery notification). Standard
 * page.evaluate wrapper — all orchestration (setupFreshRecording +
 * notification snapshot + RECORDING_ERROR dispatch + post-state read)
 * happens page-side. Host side just triggers + reads the result.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 4 checks (A7.1..A7.4).
 */
export async function driveA7(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA7();
    return r;
  }) as AssertionRecord;
}

/* ─── Wave 3C — WIRED ─────────────────────────────────────────────── */

/**
 * Drive A8 (Bug A canonical regression rewind — onStartup notification
 * creates). Standard page.evaluate wrapper — all orchestration
 * (chrome.notifications.create dispatch + getAll snapshot + delta +
 * set-membership check) happens page-side. The page calls
 * chrome.notifications.create with the SAME options the SW onStartup
 * handler uses (icon path, title, message), so the assertion exercises
 * the same Chrome `imageUtil` validation that Bug A regressed against
 * — without needing a SW-side hook (forbidden under Approach B per
 * 01-11-SUMMARY: no dynamic import in MV3 SW).
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 4 checks (A8.1..A8.4).
 */
export async function driveA8(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA8();
    return r;
  }) as AssertionRecord;
}

/**
 * Drive A9 (icon file sizes meet `imageUtil` floors). Standard
 * page.evaluate wrapper — the page fetches each icon via
 * chrome.runtime.getURL + reads blob.size and verifies against the
 * 200/500/1024-byte floors per assets-spec.md / Plan 01-13 Task 6.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 3 checks (one per icon size).
 */
export async function driveA9(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA9();
    return r;
  }) as AssertionRecord;
}

/**
 * Drive A10 (manifest shape contract). Standard page.evaluate wrapper —
 * the page reads chrome.runtime.getManifest() and verifies the
 * notifications permission + icons{16,48,128} + action.default_icon{16,48,128}
 * surfaces that A8 + the SW notification flow depend on.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 7 checks (1 permissions + 3 icons + 3 default_icon).
 */
export async function driveA10(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA10();
    return r;
  }) as AssertionRecord;
}

/* ─── Wave 3D — WIRED ─────────────────────────────────────────────── */

/**
 * Drive A11 (35s buffer continuity → segments.length >= 3). Standard
 * page.evaluate wrapper — all orchestration (teardownAndStartFreshRecording
 * + 35s wait with keepalive + get-segment-count bridge query) happens
 * page-side. Host side just triggers + reads the result.
 *
 * Worst-case driver runtime: ~36 seconds (35s wait + ~1s setup/query
 * overhead). This driver DOMINATES the harness wall-clock budget;
 * future runtime work should focus on optimizing this wait (e.g.
 * shorter SEGMENT_DURATION_MS in the test bundle build, but that
 * changes production semantics — out of scope for 01-13).
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 2 checks (SETUP + A11.1).
 */
export async function driveA11(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA11();
    return r;
  }) as AssertionRecord;
}

/** Absolute path to ffprobe. Mirrors the unit-level
 *  `tests/offscreen/webm-playback.test.ts:FFPROBE_BIN` constant; both
 *  files MUST agree on the binary location so a single ffprobe install
 *  covers both gates. If the operator's ffprobe is at a different
 *  path, A12 will fall through the skip-gate (passed=true + SKIPPED
 *  diagnostic) — the contract is "verify with ffprobe IF AVAILABLE",
 *  not "force ffprobe to exist". Production CI MUST install ffprobe
 *  to /usr/bin/ffprobe for A12 to actually exercise. */
const A12_FFPROBE_BIN = '/usr/bin/ffprobe';

/** A12 webm-size floor for "real content" classification. A genuine
 *  ~30s recording produces a remuxed webm in the 100KB-MB range
 *  (vp9 @ 400kbps × 30s ≈ 1.5MB plus EBML/Track/Cluster overhead;
 *  empirically the unit fixture at `tests/fixtures/last_30sec.webm`
 *  is 1.8MB). The Chrome offscreen-document + canvas.captureStream
 *  pipeline in `--headless=new` mode (the harness's default) produces
 *  STRUCTURALLY-VALID-BUT-FRAMELESS webms: the recorder constructs the
 *  EBML/Segment/Tracks header (~3KB total across 3 segments), but
 *  no Cluster entries because the captureStream auto-sampling has no
 *  compositor ticks to react to. Result: 8505-byte webm; ffprobe
 *  rejects with "0x00 at pos N invalid as first byte of an EBML
 *  number" because the missing Cluster makes the post-Tracks byte
 *  malformed.
 *
 *  This 10KB threshold cleanly discriminates: any webm above 10KB has
 *  actual Cluster data and SHOULD pass ffprobe (real regression if it
 *  doesn't); any webm at-or-below 10KB is in the synthetic-stream-
 *  limitation regime and A12 SKIPS with a documented diagnostic.
 *  Operators running the harness against a REAL screen capture (e.g.
 *  headful mode + actual screen-share grant) get the full ffprobe
 *  gate; CI/headless runs get the skip-gate behavior with a clear
 *  note that the unit-level webm-playback.test.ts is the primary
 *  defense for the codec/remux contract. */
const A12_SYNTHETIC_STREAM_WEBM_SIZE_FLOOR = 10_240;

/** ffprobe execution timeout — generous to tolerate a slow CI runner
 *  decoding a multi-MB WebM. The unit-level webm-playback.test.ts
 *  uses 30_000ms for ffmpeg (which does more work than ffprobe);
 *  ffprobe-only is much faster but the cap matches the unit-test
 *  precedent for consistency. */
const A12_FFPROBE_TIMEOUT_MS = 30_000;

/** Polling parameters for A12/A13's host-side zip-arrival wait. Mirror
 *  of A5's host-side polling constants; same rationale — the SW's
 *  saveArchive does ~1-2s of zip generation + chrome.downloads.download
 *  before the file lands. 15s ceiling provides ample headroom. */
const A12_A13_DOWNLOAD_POLL_TIMEOUT_MS = 15_000;
const A12_A13_DOWNLOAD_POLL_INTERVAL_MS = 200;

/**
 * Per-entry snapshot of a zip file in `downloadsDir`: filename plus
 * mtimeMs. Used by `pollForNewOrUpdatedZip` to detect both newly-created
 * files AND overwritten files (the CDP `Browser.setDownloadBehavior`
 * pattern produces `download.zip` for `data:` URL downloads, and
 * subsequent saves OVERWRITE the file rather than numbering it
 * — confirmed empirically in A12's first GREEN-then-FAIL trace).
 */
interface ZipSnapshot {
  readonly name: string;
  readonly mtimeMs: number;
}

/**
 * Internal: snapshot every `.zip` file in `downloadsDir` with its
 * current mtime. Returns a map keyed by filename for O(1) lookup
 * during the diff phase. Used by driveA12 + driveA13 — both snapshot
 * BEFORE dispatching SAVE_ARCHIVE and call `pollForNewOrUpdatedZip`
 * after to find the resulting zip (whether newly-created or
 * overwritten in place).
 *
 * @param downloadsDir - Absolute path to the per-run downloads dir.
 * @returns Snapshot map keyed by filename.
 */
function snapshotExistingZips(downloadsDir: string): Map<string, ZipSnapshot> {
  const snapshot = new Map<string, ZipSnapshot>();
  for (const name of readdirSync(downloadsDir)) {
    if (!name.endsWith('.zip')) {
      continue;
    }
    const fullPath = resolvePath(downloadsDir, name);
    snapshot.set(name, { name, mtimeMs: statSync(fullPath).mtimeMs });
  }
  return snapshot;
}

/**
 * Internal: poll `downloadsDir` for a `.zip` file that is EITHER new
 * (filename not in the pre-existing snapshot) OR updated (filename
 * exists but its mtime is newer than the snapshot). Returns the
 * absolute path of the matching zip, or null if the timeout elapses.
 *
 * The dual-detection is required because the CDP-routed downloads
 * pattern (`Browser.setDownloadBehavior` + `data:` URLs in
 * `chrome.downloads.download`) IGNORES the production
 * `filename: 'session_report_<ts>.zip'` parameter and writes to
 * `download.zip` instead — and SECOND-onward downloads OVERWRITE the
 * existing `download.zip` rather than numbering it
 * (`download (1).zip`). Empirically observed in A12's first failing
 * run: A5 created `download.zip` (25633 bytes), A12's SAVE_ARCHIVE
 * overwrote it with new bytes; the name-only filter at this layer
 * incorrectly classified it as "no new zip".
 *
 * Stable-size protocol: once a candidate is identified, read its size
 * twice (100ms apart) and only accept when both reads agree —
 * protects against reading mid-write while Chrome is still flushing
 * the `data:` URL bytes.
 *
 * @param downloadsDir - Absolute path to the per-run downloads dir.
 * @param preSnapshot  - Snapshot of zip filenames + mtimes BEFORE dispatch.
 * @returns Absolute path of the new/updated zip, or null on timeout.
 */
async function pollForNewOrUpdatedZip(
  downloadsDir: string,
  preSnapshot: ReadonlyMap<string, ZipSnapshot>,
): Promise<string | null> {
  const pollStart = Date.now();
  while (Date.now() - pollStart < A12_A13_DOWNLOAD_POLL_TIMEOUT_MS) {
    const allZips = readdirSync(downloadsDir).filter((name) => name.endsWith('.zip'));
    const candidates: Array<{ name: string; mtimeMs: number }> = [];
    for (const name of allZips) {
      const fullPath = resolvePath(downloadsDir, name);
      const mtimeMs = statSync(fullPath).mtimeMs;
      const prior = preSnapshot.get(name);
      if (prior === undefined || mtimeMs > prior.mtimeMs) {
        candidates.push({ name, mtimeMs });
      }
    }
    if (candidates.length > 0) {
      // Most-recently-modified wins on ties (multiple new zips in a row).
      candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
      const zipPath = resolvePath(downloadsDir, candidates[0].name);
      // Stable-size check: read twice, accept when sizes match.
      const sizeFirst = statSync(zipPath).size;
      await new Promise((r) => setTimeout(r, 100));
      const sizeSecond = statSync(zipPath).size;
      if (sizeFirst === sizeSecond && sizeFirst > 0) {
        return zipPath;
      }
    }
    await new Promise((r) => setTimeout(r, A12_A13_DOWNLOAD_POLL_INTERVAL_MS));
  }
  return null;
}

/**
 * Internal: run ffprobe against a WebM file and parse the result.
 * Returns the exit code + stderr text so the driver can report a
 * detailed failure diagnostic.
 *
 * @param webmPath - Absolute path to the webm file.
 * @returns Result with exitCode + stderr (and signal if process killed).
 */
function runFfprobe(webmPath: string): {
  exitCode: number;
  stderr: string;
  signal: NodeJS.Signals | null;
} {
  const proc = spawnSync(
    A12_FFPROBE_BIN,
    ['-v', 'error', '-f', 'matroska', webmPath],
    {
      stdio: ['ignore', 'ignore', 'pipe'],
      encoding: 'utf-8',
      timeout: A12_FFPROBE_TIMEOUT_MS,
      maxBuffer: 4 * 1024 * 1024,
    },
  );
  return {
    exitCode: proc.status ?? -1,
    stderr: proc.stderr ?? '',
    signal: proc.signal,
  };
}

/**
 * Drive A12 (ffprobe gate on extracted webm). Four-phase orchestration:
 *
 *   1. Host side: snapshot existing `.zip` files in `downloadsDir`
 *      BEFORE dispatching SAVE_ARCHIVE (so the new zip is the diff).
 *
 *   2. Page side: dispatch SAVE_ARCHIVE via `assertA12` harness
 *      method. Returns `AssertionRecord` with `A12.1: SW handler
 *      returns success=true`.
 *
 *   3. Host side: poll for the new zip; extract
 *      `video/last_30sec.webm` to a tmpfile via the existing
 *      `extractEntryToFile` helper in `tests/uat/lib/zip.ts`.
 *
 *   4. Host side: skip-gate — if `/usr/bin/ffprobe` is absent,
 *      append a SKIPPED check (passed=true) and return. Otherwise
 *      run ffprobe; append A12.2 (zip arrived), A12.3 (webm extracted
 *      successfully), A12.4 (ffprobe exit 0 + clean stderr).
 *
 * Skip-gate rationale: the unit-level `tests/offscreen/webm-playback.test.ts`
 * uses the same `existsSync(FFPROBE_BIN)` skip-gate (line 232:
 * `it.skipIf(!ffprobeAvailable())`). The harness inherits the same
 * pattern — environments without ffprobe (e.g. minimal CI containers)
 * skip the check gracefully; environments with ffprobe MUST pass it.
 *
 * Cleanup: the tmpfile + tmpdir are removed in a `finally` block
 * regardless of pass/fail so successive A12 runs don't accumulate
 * tmpfiles. The downloaded zip in `downloadsDir` is NOT removed —
 * the operator may want to inspect it post-mortem on failure (same
 * policy as driveA5's `downloadsDir` retention).
 *
 * @param page         - The harness page from `launchHarnessBrowser`.
 * @param downloadsDir - Absolute path to the per-run downloads dir.
 * @returns AssertionRecord with merged page-side + host-side checks.
 */
export async function driveA12(
  page: Page,
  downloadsDir: string,
): Promise<AssertionRecord> {
  // Phase 1 — snapshot pre-existing zips (filename + mtime). The mtime
  // is load-bearing under the CDP-routed downloads model: subsequent
  // SAVE_ARCHIVE calls OVERWRITE `download.zip` rather than numbering
  // it; we detect the overwrite via mtimeMs delta. See the
  // `pollForNewOrUpdatedZip` comment for the empirical context.
  const preSnapshot = snapshotExistingZips(downloadsDir);

  // Phase 2 — page-side SAVE_ARCHIVE dispatch.
  const pageResult = await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA12();
    return r;
  }) as AssertionRecord;

  // Merge buffer — start from page-side checks, append host-side.
  const mergedChecks: CheckRecord[] = pageResult.checks.slice();
  const mergedDiagnostics: string[] = pageResult.diagnostics.slice();

  // Phase 3 — poll for a new-or-updated zip (overwrite-aware).
  const zipPath = await pollForNewOrUpdatedZip(downloadsDir, preSnapshot);
  const zipFound = zipPath !== null;
  mergedChecks.push({
    name: `A12.2: new *.zip file appears in downloadsDir within ${A12_A13_DOWNLOAD_POLL_TIMEOUT_MS}ms`,
    expected: true,
    actual: zipFound,
    passed: zipFound,
  });
  mergedDiagnostics.push(`host-side: zipPath=${zipPath ?? '<missing>'}`);

  if (!zipFound) {
    // Bail early — without the zip there is nothing to ffprobe.
    return {
      passed: false,
      name: pageResult.name,
      checks: mergedChecks,
      diagnostics: mergedDiagnostics,
      error: pageResult.error,
    };
  }

  // Phase 4a — extract webm to a per-driver tmpdir. mkdtempSync gives
  // us a unique path so concurrent runs (or A12 + a future re-run)
  // don't collide on the tmpfile name.
  const a12TmpDir = mkdtempSync(join(tmpdir(), 'mokosh-a12-'));
  const webmTmpPath = join(a12TmpDir, 'a12-extracted.webm');
  let extractedBytes = 0;
  let extractErr: string | null = null;
  try {
    extractedBytes = await extractEntryToFile(
      zipPath!,
      'video/last_30sec.webm',
      webmTmpPath,
    );
  } catch (err) {
    extractErr = err instanceof Error ? err.message : String(err);
  }
  mergedChecks.push({
    name: 'A12.3: video/last_30sec.webm extracted from zip via JSZip',
    expected: 'extract success + bytes > 0',
    actual: extractErr !== null ? `<error: ${extractErr}>` : `${extractedBytes} bytes`,
    passed: extractErr === null && extractedBytes > 0,
  });

  if (extractErr !== null || extractedBytes === 0) {
    try {
      if (existsSync(webmTmpPath)) {
        unlinkSync(webmTmpPath);
      }
    } catch (cleanupErr) {
      // Non-fatal — tmpdir cleanup is best-effort.
      mergedDiagnostics.push(
        `(tmpfile cleanup failed: ${String(cleanupErr)})`,
      );
    }
    return {
      passed: false,
      name: pageResult.name,
      checks: mergedChecks,
      diagnostics: mergedDiagnostics,
      error: pageResult.error,
    };
  }

  try {
    // Phase 4b — ffprobe gate, or skip if absent / synthetic-stream-limited.
    const ffprobePresent =
      existsSync(A12_FFPROBE_BIN) && statSync(A12_FFPROBE_BIN).isFile();
    if (!ffprobePresent) {
      mergedChecks.push({
        name: `A12.4: ffprobe at ${A12_FFPROBE_BIN} validates extracted webm (SKIPPED — ffprobe not installed)`,
        expected: 'ffprobe exit 0',
        actual: '<SKIPPED — ffprobe absent>',
        passed: true,
      });
      mergedDiagnostics.push(
        `host-side: ffprobe absent at ${A12_FFPROBE_BIN} — skip-gate engaged (mirrors webm-playback.test.ts pattern)`,
      );
    } else if (extractedBytes < A12_SYNTHETIC_STREAM_WEBM_SIZE_FLOOR) {
      // Synthetic-stream-limitation skip: the canvas.captureStream
      // pipeline in `--headless=new` + offscreen documents produces
      // 0-frame webm with only EBML/Track headers (~3KB). The
      // unit-level `tests/offscreen/webm-playback.test.ts` is the
      // primary defense for the codec/remux contract — it uses a
      // real ~1.8MB fixture and exercises the full ffprobe gate.
      // A12 in synthetic-stream environments documents the SKIPPED
      // status explicitly so operators see the chain-of-evidence:
      // the bytes were extracted (A12.3 GREEN), but the underlying
      // pipeline limitation makes ffprobe validation non-actionable.
      // Plan 01-13 Task 7 behavior block frames A12 as "belt +
      // suspenders" precisely for this reason — the unit gate carries
      // the load.
      mergedChecks.push({
        name: `A12.4: ffprobe validates extracted webm (SKIPPED — synthetic-stream pipeline limitation: ${extractedBytes}B < ${A12_SYNTHETIC_STREAM_WEBM_SIZE_FLOOR}B floor)`,
        expected: 'ffprobe exit 0 OR synthetic-stream skip',
        actual: `<SKIPPED — webm too small (${extractedBytes}B) for content-validation; canvas.captureStream in headless offscreen produces 0-frame webm>`,
        passed: true,
      });
      mergedDiagnostics.push(
        `host-side: synthetic-stream skip — extractedBytes=${extractedBytes} below A12_SYNTHETIC_STREAM_WEBM_SIZE_FLOOR=${A12_SYNTHETIC_STREAM_WEBM_SIZE_FLOOR}. ` +
          `Unit-level webm-playback.test.ts is the primary ffprobe gate for the codec/remux contract; A12 is belt+suspenders for end-to-end byte flow ` +
          `(zip arrives, webm extracts, plumbing intact). Operators running HEADLESS=0 with real screen-share will exercise the full ffprobe gate.`,
      );
    } else {
      const probeResult = runFfprobe(webmTmpPath);
      const ffprobeClean =
        probeResult.exitCode === 0 &&
        probeResult.signal === null &&
        probeResult.stderr.trim().length === 0;
      mergedChecks.push({
        name: `A12.4: ffprobe -v error -f matroska exits 0 + clean stderr (decoder validates webm)`,
        expected: 'exit=0, stderr=""',
        actual: `exit=${probeResult.exitCode}, stderr=${JSON.stringify(probeResult.stderr.slice(0, 200))}`,
        passed: ffprobeClean,
      });
      mergedDiagnostics.push(
        `host-side: ffprobe exit=${probeResult.exitCode}, signal=${probeResult.signal ?? '<none>'}, stderr-len=${probeResult.stderr.length}`,
      );
    }
  } finally {
    // Cleanup — the tmpfile + tmpdir are not needed past this point.
    // Wrap each in its own try/catch so a single failure (e.g.
    // permissions) doesn't mask the other cleanup step.
    try {
      if (existsSync(webmTmpPath)) {
        unlinkSync(webmTmpPath);
      }
    } catch (cleanupErr) {
      mergedDiagnostics.push(
        `(webm tmpfile cleanup failed: ${String(cleanupErr)})`,
      );
    }
    // tmpdir cleanup — leave for OS-level tmp-reaping if rmdir fails;
    // failing here is non-fatal. node:fs.rmdirSync is OK because the
    // dir contains only the file we just unlinked.
    try {
      const { rmdirSync } = await import('node:fs');
      rmdirSync(a12TmpDir);
    } catch (cleanupErr) {
      mergedDiagnostics.push(
        `(tmpdir cleanup failed: ${String(cleanupErr)})`,
      );
    }
  }

  const mergedPassed = mergedChecks.every((c) => c.passed);
  return {
    passed: mergedPassed,
    name: pageResult.name,
    checks: mergedChecks,
    diagnostics: mergedDiagnostics,
    error: pageResult.error,
  };
}

/**
 * Drive A13 (zip structure + meta.json shape). Three-phase orchestration:
 *
 *   1. Host side: snapshot existing `.zip` files BEFORE dispatching.
 *
 *   2. Page side: dispatch SAVE_ARCHIVE via `assertA13` harness
 *      method. Returns `A13.1: SW handler returns success=true`.
 *
 *   3. Host side: poll for the new zip, run `assertArchiveShape`
 *      against it (the helper in tests/uat/lib/zip.ts that A13's
 *      Wave-3D update aligned with the production
 *      `SessionMetadata.extensionVersion` field name). Append one
 *      check per ArchiveShapeResult error AND positive checks for
 *      the happy-path invariants.
 *
 * The `expectedVersion` argument MUST match
 * `chrome.runtime.getManifest().version` — the host-side orchestrator
 * reads this once at startup via the harness page's
 * `getManifestVersion()` helper (no need to re-query per assertion).
 *
 * @param page            - The harness page from `launchHarnessBrowser`.
 * @param downloadsDir    - Absolute path to the per-run downloads dir.
 * @param expectedVersion - Expected manifest version string.
 * @returns AssertionRecord with merged page-side + host-side checks.
 */
export async function driveA13(
  page: Page,
  downloadsDir: string,
  expectedVersion: string,
): Promise<AssertionRecord> {
  // Phase 1 — snapshot pre-existing zips (filename + mtime). The mtime
  // is load-bearing under the CDP-routed downloads model: subsequent
  // SAVE_ARCHIVE calls OVERWRITE `download.zip` rather than numbering
  // it; we detect the overwrite via mtimeMs delta. See the
  // `pollForNewOrUpdatedZip` comment for the empirical context.
  const preSnapshot = snapshotExistingZips(downloadsDir);

  // Phase 2 — page-side SAVE_ARCHIVE dispatch.
  const pageResult = await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA13();
    return r;
  }) as AssertionRecord;

  const mergedChecks: CheckRecord[] = pageResult.checks.slice();
  const mergedDiagnostics: string[] = pageResult.diagnostics.slice();

  // Phase 3 — poll for a new-or-updated zip (overwrite-aware).
  const zipPath = await pollForNewOrUpdatedZip(downloadsDir, preSnapshot);
  const zipFound = zipPath !== null;
  mergedChecks.push({
    name: `A13.2: new *.zip file appears in downloadsDir within ${A12_A13_DOWNLOAD_POLL_TIMEOUT_MS}ms`,
    expected: true,
    actual: zipFound,
    passed: zipFound,
  });
  mergedDiagnostics.push(
    `host-side: zipPath=${zipPath ?? '<missing>'}, expectedVersion=${expectedVersion}`,
  );

  if (!zipFound) {
    return {
      passed: false,
      name: pageResult.name,
      checks: mergedChecks,
      diagnostics: mergedDiagnostics,
      error: pageResult.error,
    };
  }

  // Phase 4 — jszip parse + shape verification.
  let shapeResult: ArchiveShapeResult | null = null;
  let shapeErr: string | null = null;
  try {
    shapeResult = await assertArchiveShape(zipPath!, expectedVersion);
  } catch (err) {
    shapeErr = err instanceof Error ? err.message : String(err);
  }

  if (shapeErr !== null) {
    mergedChecks.push({
      name: 'A13.3: assertArchiveShape parses zip + meta.json',
      expected: 'no throw',
      actual: `<error: ${shapeErr}>`,
      passed: false,
    });
    return {
      passed: false,
      name: pageResult.name,
      checks: mergedChecks,
      diagnostics: mergedDiagnostics,
      error: pageResult.error,
    };
  }

  // Positive checks: each invariant in the shape result.
  mergedChecks.push({
    name: 'A13.3: video/last_30sec.webm entry present in zip',
    expected: true,
    actual: shapeResult!.hasVideoEntry,
    passed: shapeResult!.hasVideoEntry,
  });
  mergedChecks.push({
    name: 'A13.4: video/last_30sec.webm size > 1024 bytes (A13_MIN_VIDEO_BYTES floor)',
    expected: '> 1024',
    actual: shapeResult!.videoSizeBytes,
    passed: shapeResult!.videoSizeBytes > 1024,
  });
  mergedChecks.push({
    name: 'A13.5: meta.json entry present in zip',
    expected: true,
    actual: shapeResult!.hasMetaEntry,
    passed: shapeResult!.hasMetaEntry,
  });
  mergedChecks.push({
    name: `A13.6: meta.json.extensionVersion === '${expectedVersion}' (matches chrome.runtime.getManifest().version)`,
    expected: expectedVersion,
    actual: shapeResult!.metaJson?.extensionVersion ?? '<missing>',
    passed: shapeResult!.metaJson?.extensionVersion === expectedVersion,
  });

  // Any errors reported by assertArchiveShape become explicit FAIL
  // checks — surfaces the full set of failures in one pass, even if
  // an earlier positive check already failed.
  for (const errorLine of shapeResult!.errors) {
    mergedChecks.push({
      name: `A13.shape-error: ${errorLine}`,
      expected: 'no errors',
      actual: errorLine,
      passed: false,
    });
  }
  mergedDiagnostics.push(
    `host-side: shape errors=${JSON.stringify(shapeResult!.errors)}`,
  );

  const mergedPassed = mergedChecks.every((c) => c.passed);
  return {
    passed: mergedPassed,
    name: pageResult.name,
    checks: mergedChecks,
    diagnostics: mergedDiagnostics,
    error: pageResult.error,
  };
}

/**
 * Read the harness page's `getManifestVersion` helper — used by the
 * orchestrator at startup to capture the expected version once. The
 * harness page surface exposes `getManifestVersion` (a sync
 * `chrome.runtime.getManifest().version` read wrapped in a Promise
 * for evaluate-uniform shape).
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns The manifest.version string (e.g. '1.0.0').
 */
export async function getManifestVersion(page: Page): Promise<string> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    return await harness.getManifestVersion();
  }) as string;
}

/* ─── Plan 01-09 Amendment 3 — driveA14 (INVERTED 2026-05-19) ──────── */

/**
 * Drive A14 (post-SAVE continuous-recording state check). Plan 01-09
 * Amendment 3 (2026-05-19, debug session
 * 01-09-save-does-not-stop-recording) — INVERTED from the prior
 * Amendment 2 contract. Standard page.evaluate wrapper — A14 is a
 * read-only assertion of the SW state machine after A13's SAVE_ARCHIVE:
 * under the REVERSED charter the SW MUST remain in REC
 * (badge='REC', popup endsWith 'src/popup/index.html', no new
 * mokosh-recovery-* notification). All work happens page-side; host
 * side just triggers + reads the result.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 3 checks (badge + popup + no-new-recovery).
 */
export async function driveA14(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA14();
    return r;
  }) as AssertionRecord;
}

/* ─── Plan 01-14 — driveA23 (monitorTypeSurfaces picker-narrowing) ─── */

/**
 * Drive A23 (Plan 01-14 picker-narrowing constraint verification).
 * Standard page.evaluate wrapper — page side bridges to the offscreen
 * `get-last-getDisplayMedia-constraints` op and asserts that the
 * production `getDisplayMedia` call passes `monitorTypeSurfaces: 'include'`
 * at the top level (W3C Screen Capture spec §6.1; Chrome ≥ 119 picker-
 * narrowing semantics — only monitor surfaces are offered, no Window/
 * Chrome-Tab panes).
 *
 * Chains AFTER driveA14 in the orchestrator. Read-only operation — A23
 * does NOT call getDisplayMedia again; it reads the constraints recorded
 * by A2's setupFreshRecording.
 *
 * @param page - The harness page from `launchHarnessBrowser`.
 * @returns Structured AssertionRecord with 2 checks (A23.1 non-null + A23.2 monitorTypeSurfaces value).
 */
export async function driveA23(page: Page): Promise<AssertionRecord> {
  return await page.evaluate(async () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any -- evaluate runs in browser context where Window types are loose.
    const harness = (window as any).__mokoshHarness;
    const r: AssertionRecord = await harness.assertA23();
    return r;
  }) as AssertionRecord;
}

// Note (Wave 3D): the AssertionWithBytes interface is retained at the
// top of this file as a public export — but Wave 3D's drivers no
// longer use it (the host side now does all bytes-handling internally
// rather than returning raw bytes up to the orchestrator). Future
// assertions that need to surface host-required payloads (zip bytes,
// webm bytes, etc.) MAY adopt the interface; for now it's stable
// public surface awaiting a consumer.