mokosh/tests/uat/harness.test.ts

// tests/uat/harness.test.ts — Plan 01-13 Wave 3A orchestrator.
//
// Top-level entry for the production UAT harness. Drives all 14
// assertions sequentially against a SINGLE launched Chrome instance with
// a SINGLE harness page; bails on the first failure with a structured
// diagnostic dump. Exits 0 only when 14/14 GREEN.
//
// Wave 3A scope — wires A0+A1+A2+A3+A4+A6 (A6 via the proven Wave-2
// driver). A5+A7..A13 throw `NOT YET IMPLEMENTED — Wave 3<X> wires this`
// from `tests/uat/lib/harness-page-driver.ts`; the bail-on-first-failure
// loop stops at the first such throw.
//
// Wave 3B wires A5 (SAVE_ARCHIVE → zip on disk) + A7 (genuine
// RECORDING_ERROR → ERR + recovery notification). Wave 3C (this file's
// current state) wires A8 (Bug A canonical onStartup-notification
// regression rewind) + A9 (icon file sizes meet imageUtil floors) +
// A10 (manifest shape contract). Expected diagnostic:
// "11/14 GREEN: A0+A1+A2+A3+A4+A5+A6+A7+A8+A9+A10; bail at A11".
// Wave 3D wires A11+A12+A13 for 14/14 GREEN.
//
// The orchestrator structure is final from Wave 3A onward; future waves
// only fill in the assertion-driver stubs.
//
// Architectural commitments (per 01-11-SUMMARY.md, DO NOT REGRESS):
//   - Single browser, single recording per run (state machine: idle →
//     A1 reads idle → A2 transitions to REC → A3+A4 read REC →
//     A5 saves archive → A6 simulates user-stop → A7 surfaces ERR → ...).
//   - A0 (Tier-1 grep gate) runs PRE-FLIGHT before any Chrome launch.
//     Mirrors `tests/background/no-test-hooks-in-prod-bundle.test.ts`
//     FORBIDDEN_HOOK_STRINGS inventory. Belt-and-suspenders: the unit
//     test gate runs in `npm test` (~15s); the UAT-level A0 runs in
//     `npm run test:uat` (~60-90s). Same invariant; two independent
//     verification paths.
//   - Drive Chrome FROM INSIDE: each assertion is a single
//     `page.evaluate(() => window.__mokoshHarness.assertXX())` call;
//     no SW.evaluate, no popup-bridge (both falsified per 01-11-SUMMARY).
//
// References:
//   - puppeteer.launch + extension loading:
//     https://pptr.dev/api/puppeteer.launchoptions
//   - Node fs.readdirSync recursive walk:
//     https://nodejs.org/api/fs.html#fsreaddirsyncpath-options
//   - Node child_process.execFileSync:
//     https://nodejs.org/api/child_process.html#child_processexecfilesyncfile-args-options

import { execFileSync } from 'node:child_process';
import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
import { dirname, resolve as resolvePath } from 'node:path';
import { fileURLToPath } from 'node:url';

import { launchHarnessBrowser } from './lib/launch';
import {
  driveA1,
  driveA2,
  driveA3,
  driveA4,
  driveA5,
  driveA6,
  driveA7,
  driveA8,
  driveA9,
  driveA10,
  driveA11,
  driveA12,
  driveA13,
} from './lib/harness-page-driver';
import {
  printAssertionResult,
  runAssertion,
  type AssertionRecord,
} from './lib/assertions';

/**
 * A0 forbidden-string inventory — mirrors
 * `tests/background/no-test-hooks-in-prod-bundle.test.ts:FORBIDDEN_HOOK_STRINGS`.
 * Keep in sync. The two lists serving the same invariant is intentional
 * (belt-and-suspenders per `feedback-pre-checkpoint-bundle-gates.md`):
 * unit-test gate catches at `npm test`, UAT gate catches at `npm run test:uat`.
 */
const FORBIDDEN_HOOK_STRINGS: ReadonlyArray<string> = [
  '__mokoshTest',
  'setCurrentStream',
  'setSegmentCountGetter',
  'installFakeDisplayMedia',
  'uninstallFakeDisplayMedia',
  'dispatchEndedOnTrack',
  'getSegmentCount',
  '__mokoshOffscreenQuery',
  'get-display-surface',
];

/** Build timeout for the pre-flight production rebuild (matches unit-gate value). */
const PROD_BUILD_TIMEOUT_MS = 60_000;

/** Resolve repo-root paths from this file's location. */
const HARNESS_FILE_DIR = dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = resolvePath(HARNESS_FILE_DIR, '..', '..');
const DIST_DIR = resolvePath(REPO_ROOT, 'dist');

/** Binary extensions skipped during the grep walk (mirror of unit gate). */
const BINARY_EXTENSIONS: ReadonlySet<string> = new Set([
  '.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', '.woff', '.woff2', '.ttf', '.otf',
]);

/**
 * Recursively collect every regular file under `root`. Returns absolute
 * paths sorted alphabetically for stable diagnostics.
 *
 * @param root - Absolute directory path to walk.
 * @returns Sorted list of absolute file paths under `root`.
 */
function listAllFilesRecursive(root: string): ReadonlyArray<string> {
  const accumulator: string[] = [];
  const stack: string[] = [root];
  while (stack.length > 0) {
    const dir = stack.pop()!;
    const entries = readdirSync(dir, { withFileTypes: true });
    for (const entry of entries) {
      const fullPath = resolvePath(dir, entry.name);
      if (entry.isSymbolicLink()) {
        continue;
      }
      if (entry.isDirectory()) {
        stack.push(fullPath);
      } else if (entry.isFile()) {
        accumulator.push(fullPath);
      }
    }
  }
  return accumulator.sort();
}

/**
 * Count occurrences of `needle` in the given file. Returns 0 for binary
 * file extensions (text matching against UTF-8 of a PNG would be
 * meaningless and could yield spurious matches).
 *
 * @param filePath - Absolute file path to scan.
 * @param needle   - Literal substring to count.
 * @returns Total occurrences in the file's text.
 */
function countOccurrencesInFile(filePath: string, needle: string): number {
  const dotIdx = filePath.lastIndexOf('.');
  const ext = dotIdx >= 0 ? filePath.substring(dotIdx).toLowerCase() : '';
  if (BINARY_EXTENSIONS.has(ext)) {
    return 0;
  }
  const stat = statSync(filePath);
  if (stat.size === 0) {
    return 0;
  }
  const text = readFileSync(filePath, 'utf8');
  let count = 0;
  let from = 0;
  for (;;) {
    const idx = text.indexOf(needle, from);
    if (idx < 0) {
      break;
    }
    count += 1;
    from = idx + needle.length;
  }
  return count;
}

/**
 * A0 — Tier-1 grep gate (UAT-level mirror of the unit-gate). Spawns
 * `npm run build` if `SKIP_PROD_REBUILD !== '1'`, then walks `dist/`
 * checking every forbidden string. Reports all matches in one pass
 * (full enumeration, not bail-on-first) so the operator sees the entire
 * leak surface in a single failure.
 *
 * @returns Structured A0 result: passed flag + list of (string, file) matches.
 */
async function assertA0_GrepGate(): Promise<{
  passed: boolean;
  matches: Array<{ needle: string; filePath: string; count: number }>;
}> {
  if (process.env.SKIP_PROD_REBUILD !== '1') {
    process.stdout.write('A0: running `npm run build` (set SKIP_PROD_REBUILD=1 to skip)...\n');
    execFileSync('npm', ['run', 'build'], {
      stdio: 'inherit',
      timeout: PROD_BUILD_TIMEOUT_MS,
    });
  } else {
    process.stdout.write('A0: SKIP_PROD_REBUILD=1 — using existing dist/\n');
  }

  if (!existsSync(DIST_DIR)) {
    return {
      passed: false,
      matches: [
        {
          needle: '<missing dist/>',
          filePath: DIST_DIR,
          count: 0,
        },
      ],
    };
  }

  const files = listAllFilesRecursive(DIST_DIR);
  const matches: Array<{ needle: string; filePath: string; count: number }> = [];
  for (const needle of FORBIDDEN_HOOK_STRINGS) {
    for (const filePath of files) {
      const count = countOccurrencesInFile(filePath, needle);
      if (count > 0) {
        matches.push({ needle, filePath, count });
      }
    }
  }
  return { passed: matches.length === 0, matches };
}

/**
 * Top-to-bottom orchestrator entry. Pre-flight A0 → launch browser →
 * iterate driver list → bail on first failure → close browser → return
 * exit code.
 *
 * @returns Process exit code: 0 on 14/14 GREEN, 1 on any failure.
 */
async function main(): Promise<number> {
  process.stdout.write('\nMokosh Plan 01-13 — UAT harness orchestrator\n');
  process.stdout.write('Architecture: A0 pre-flight + extension-internal page driver (A1..A13)\n');
  process.stdout.write('='.repeat(72) + '\n');

  // A0 pre-flight (no Chrome launch needed; runs against built dist/).
  const a0 = await assertA0_GrepGate();
  if (!a0.passed) {
    process.stderr.write('\nA0 FAIL: production bundle hook-string leak detected.\n');
    for (const m of a0.matches) {
      process.stderr.write(`  - '${m.needle}' in ${m.filePath} (${m.count} occurrence${m.count === 1 ? '' : 's'})\n`);
    }
    process.stderr.write(
      '\nThe Vite mode gate on the test-hook imports has regressed; verify\n' +
        'src/background/index.ts + src/offscreen/recorder.ts still gate via `__MOKOSH_UAT__`.\n',
    );
    return 1;
  }
  process.stdout.write('A0: GREEN (production bundle hook-free)\n\n');

  // Driver registry — execution order matters:
  //   A1 (idle) → A2 (REC start) → A3 (displaySurface) → A4 (popup pinned)
  //   → A5 (SAVE_ARCHIVE) → A6 (Bug B dispatch-ended) → A7 (genuine error)
  //   → A8 (Bug A onStartup) → A9 (icon sizes) → A10 (manifest)
  //   → A11 (35s segments) → A12 (ffprobe) → A13 (zip shape).
  //
  // A6 currently lives mid-list because the prototype's assertA6 does
  // its own ensureOffscreen + START_RECORDING (idempotent w.r.t. A2's
  // recording), then dispatch-ended. After A6 the recording is torn
  // down — A7+ would need to re-start or test post-stop state.
  //
  // Wave 3C wires A8 + A9 + A10 in addition to A1..A7 — bail-on-first-
  // failure stops at A11 (Wave 3D wires that). Expected diagnostic:
  // "11/14 GREEN: A0+A1+A2+A3+A4+A5+A6+A7+A8+A9+A10; A11..A13 NOT YET IMPLEMENTED".
  // The standalone `npx tsx tests/uat/a6.test.ts` entry remains the
  // way to verify A6 in isolation for inner-loop iteration.
  process.stdout.write('Launching Chrome + opening harness page...\n');
  const handles = await launchHarnessBrowser();
  process.stdout.write(`Extension id: ${handles.extensionId}\n`);
  process.stdout.write(`Downloads dir: ${handles.downloadsDir}\n\n`);

  // Adapter: driveA5 needs `downloadsDir` (host-side fs polling); driveA12 +
  // driveA13 return `AssertionWithBytes`. We wrap each in a closure that
  // hides those signature differences so the orchestrator's driver list
  // is uniform `Page -> Promise<AssertionRecord>`. The byte-returning
  // drivers' extra fields are out-of-scope for Wave 3B; Wave 3D will
  // extend the orchestrator to surface them when A12/A13 land. The driver
  // list is constructed AFTER `launchHarnessBrowser` returns so the
  // closure can capture `handles.downloadsDir` without a TDZ trap.
  const driveA5Wrapped: (page: import('puppeteer').Page) => Promise<AssertionRecord> =
    (page) => driveA5(page, handles.downloadsDir);

  const drivers: ReadonlyArray<{
    readonly name: string;
    readonly drive: (page: import('puppeteer').Page) => Promise<AssertionRecord>;
  }> = [
    { name: 'A1', drive: driveA1 },
    { name: 'A2', drive: driveA2 },
    { name: 'A3', drive: driveA3 },
    { name: 'A4', drive: driveA4 },
    { name: 'A5', drive: driveA5Wrapped },
    { name: 'A6', drive: driveA6 },
    { name: 'A7', drive: driveA7 },
    { name: 'A8', drive: driveA8 },
    { name: 'A9', drive: driveA9 },
    { name: 'A10', drive: driveA10 },
    { name: 'A11', drive: driveA11 },
    { name: 'A12', drive: driveA12 as (page: import('puppeteer').Page) => Promise<AssertionRecord> },
    { name: 'A13', drive: driveA13 as (page: import('puppeteer').Page) => Promise<AssertionRecord> },
  ];

  const buffers = { swConsole: handles.swConsole, offConsole: handles.offConsole };
  const results: Array<{ name: string; passed: boolean; error?: string }> = [];
  let bailReason: string | null = null;

  try {
    for (const { name, drive } of drivers) {
      process.stdout.write(`--- ${name} ---\n`);
      let driverErr: string | undefined;
      let result: AssertionRecord | null = null;
      try {
        result = await runAssertion(
          name,
          () => drive(handles.harnessPage),
          buffers,
        );
        printAssertionResult(result);
      } catch (err) {
        driverErr = err instanceof Error ? err.message : String(err);
        // A throw here is either: (a) a Wave-3 stub firing
        // (NOT YET IMPLEMENTED) — expected during incremental waves; OR
        // (b) a CDP/Puppeteer-level error (e.g. page closed, timeout) —
        // a genuine harness regression. Both bail uniformly.
        process.stderr.write(`*** ${name} THREW: ${driverErr}\n`);
      }
      const passed = result !== null && result.passed && driverErr === undefined;
      results.push({ name, passed, error: driverErr });
      if (!passed) {
        bailReason = driverErr ?? `${name} failed; see structured checks above`;
        break;
      }
    }
  } finally {
    try {
      await handles.browser.close();
    } catch (closeErr) {
      process.stderr.write(`(non-fatal: browser close threw: ${String(closeErr)})\n`);
    }
  }

  const passedCount = results.filter((r) => r.passed).length;
  // Total = 1 (A0) + drivers.length (A1..A13) = 14.
  const total = drivers.length + 1;
  const finalPassed = passedCount + 1; // +1 for A0 (we already passed it to reach here)

  process.stdout.write('\n' + '='.repeat(72) + '\n');
  process.stdout.write(
    `UAT harness: ${finalPassed}/${total} assertions passed${bailReason !== null ? ` (bailed: ${bailReason})` : ''}\n`,
  );
  for (const r of results) {
    const mark = r.passed ? '[PASS]' : '[FAIL]';
    const tail = r.error !== undefined ? ` — ${r.error}` : '';
    process.stdout.write(`  ${mark} ${r.name}${tail}\n`);
  }
  if (bailReason !== null) {
    const remainingStart = results.length;
    for (let i = remainingStart; i < drivers.length; i += 1) {
      process.stdout.write(`  [SKIP] ${drivers[i].name} (not reached — bailed at ${results[results.length - 1].name})\n`);
    }
  }
  process.stdout.write('='.repeat(72) + '\n');

  return finalPassed === total ? 0 : 1;
}

const code = await main();
process.exit(code);