Milestone v1 (v2.0.0): Mokosh — Session Capture #1

Merged
strategy155 merged 297 commits from gsd/phase-04-harden-clean-up-optional into main 2026-05-31 15:34:17 +00:00
2 changed files with 302 additions and 85 deletions
Showing only changes of commit 7beb69059e - Show all commits

View File

@@ -0,0 +1,246 @@
// src/background/tab-url-tracker.ts
//
// Phase 2 Plan 02-03 — D-P2-02 tab-URL tracker.
//
// Maintains an internal, deduplicated, first-seen-ordered Set of tab URLs
// observed during the SW's lifetime via chrome.tabs.onActivated +
// chrome.tabs.onUpdated listeners. Feeds the multi-tab `meta.urls` array
// in `createArchive()` (closes audit P1 #10).
//
// Architectural decisions (frozen by .planning/phases/02-stabilize-export-
// pipeline/02-03-PLAN.md):
//
// - The tracker is FED PASSIVELY by Chrome's tab events: every time the
// operator switches tabs OR a tab navigates (changeInfo.url present),
// we get a URL — pass it through the filter, dedup, append.
//
// - It is SAVE-time AUGMENTED via `snapshotOpenTabs()` which calls
// `chrome.tabs.query({})` (requires the `tabs` permission per
// DEC-011 Amendment 1, 2026-05-20) to catch tabs the operator opened
// during the 30 s window but never activated.
//
// - Always-on charter (Plan 01-09 Amendment 3): `clearTabUrlsSeen()` is
// NOT called by createArchive. The tracker keeps accumulating across
// saves so the next save captures any tabs activated AFTER the prior
// save fired.
//
// - F2 (plan-checker iteration 1): no sentinel-URL fallback. If the
// tracker is empty (whole-desktop-no-tab session — operator captured
// a non-Chrome surface via desktopCapture only), `getTabUrlsSeen()`
// returns `[]` faithfully. createArchive emits `urls: []`.
//
// URL filter (per .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md
// `<specifics>`):
//
// INCLUDE: https://, http://, chrome-extension://
// EXCLUDE: chrome://, about:, devtools://, file://, blob:, data:, edge://
//
// (Implemented via a single positive-allow regex; default-deny everything
// else.)
//
// Tier-1 grep-gate compliance: the module exposes NO `_resetForTesting`
// / `_observeForTesting` ergonomic test hooks (which would have leaked
// into production bundles and violated the
// `tests/background/no-test-hooks-in-prod-bundle.test.ts` 13-entry gate).
// Unit tests drive the registered chrome.tabs.onUpdated callbacks
// directly via the chrome stub's `_callbacks` array; module state is
// reset between tests via vitest's `vi.resetModules()` in beforeEach.
//
// References:
// - .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md
// <decisions> D-P2-02 + <specifics> URL filter clause
// - .planning/PROJECT.md DEC-011 Amendment 1 (`tabs` permission)
// - Chrome tabs API: https://developer.chrome.com/docs/extensions/reference/api/tabs
import { Logger } from '../shared/logger';
const logger = new Logger('TabUrlTracker');
// ─── Module state ───────────────────────────────────────────────────────
// `tabUrlsSeen` is the dedup Set (O(1) membership checks).
// `firstSeenOrder` is the append-only list preserving first-seen ordering.
// Both stay in lockstep: a URL is added to BOTH only on its first
// observation (the Set membership check gates the array push).
//
// `initialized` guards against double-listener-registration if some caller
// (a unit test, a future re-init path) calls `initTabUrlTracker()` twice.
let tabUrlsSeen: Set<string> = new Set();
let firstSeenOrder: string[] = [];
let initialized = false;
// ─── URL filter ─────────────────────────────────────────────────────────
// Positive-allow: anything matching the regex is INCLUDED; everything else
// is DROPPED. Equivalent to the long-form switch over schemes but
// preserves a single canonical pattern that's easy to grep for.
//
// Test 4 of tests/background/meta-json-urls-schema.test.ts asserts this
// exact filter behaviour (https://example.com + chrome-extension://abc/...
// IN; chrome://newtab + about:blank OUT).
const URL_SCHEME_ALLOW = /^(https?|chrome-extension):\/\//;
/**
* Whether a URL passes the inclusion filter.
*
* @param url - Candidate URL string.
* @returns true if the URL's scheme is in the allow-list; false otherwise.
*/
function passesFilter(url: string): boolean {
if (typeof url !== 'string' || url.length === 0) return false;
return URL_SCHEME_ALLOW.test(url);
}
/**
* Add a URL to the tracker if it passes the filter and is not already
* present. Idempotent: re-observation of an existing URL is a no-op.
*
* @param url - Candidate URL string (may be empty or malformed —
* defensively filtered).
*/
function addUrl(url: string): void {
if (!passesFilter(url)) return;
if (tabUrlsSeen.has(url)) return;
tabUrlsSeen.add(url);
firstSeenOrder.push(url);
}
/**
* Initialize the tab-URL tracker. Registers chrome.tabs.onActivated +
* chrome.tabs.onUpdated listeners that maintain an internal Set of URLs
* observed during the SW's lifetime. Must be called once at SW init.
*
* Idempotent — subsequent calls return early after logging a warning
* (defensive pattern matching src/background/index.ts:bootstrap try/catch
* wrappers around chrome.* listener registrations).
*
* D-P2-02 binding: captures the operator's multi-tab context, not just
* the active-at-save tab.
*/
export function initTabUrlTracker(): void {
if (initialized) {
logger.warn('initTabUrlTracker called twice — second call ignored');
return;
}
initialized = true;
// chrome.tabs.onActivated: fires when the user switches to a different
// tab. The activated tab's URL is fetched via chrome.tabs.get because
// onActivated's payload omits .url (it carries tabId + windowId only).
// DEC-011 Amendment 1: the `tabs` permission makes chrome.tabs.get
// reliably return the `.url` field for any tab in any window.
try {
chrome.tabs.onActivated.addListener((activeInfo: { tabId: number; windowId: number }) => {
const onTabResolved = (tab: { url?: string } | undefined): void => {
if (tab === undefined || tab === null) return;
if (typeof tab.url !== 'string' || tab.url.length === 0) {
logger.warn(`tabs.onActivated: tab ${activeInfo.tabId} has no .url (permission gap?)`);
return;
}
addUrl(tab.url);
};
const onTabFailed = (err: unknown): void => {
logger.warn(`tabs.onActivated: chrome.tabs.get(${activeInfo.tabId}) failed:`, err);
};
try {
const result = chrome.tabs.get(activeInfo.tabId);
// chrome.tabs.get returns a Promise in MV3. The Promise.resolve
// wrapper accepts both Promise and plain values (defensive against
// older stubs that might return synchronously).
Promise.resolve(result).then(onTabResolved).catch(onTabFailed);
} catch (err) {
onTabFailed(err);
}
});
} catch (err) {
logger.warn('chrome.tabs.onActivated.addListener failed:', err);
}
// chrome.tabs.onUpdated: fires on every tab state change. We only care
// about URL transitions (changeInfo.url present). NOT gated on
// changeInfo.status === 'complete' so SPA-style routing changes (which
// emit changeInfo.url WITHOUT a top-level load event) still get
// captured.
try {
chrome.tabs.onUpdated.addListener(
(_tabId: number, changeInfo: { url?: string }, tab: { url?: string }) => {
// Prefer changeInfo.url (the transition URL) but fall back to
// tab.url (the current resolved URL) for stubs that don't populate
// changeInfo.url consistently. Both are forwarded through the
// filter + dedup gate by addUrl.
const candidate = typeof changeInfo.url === 'string' && changeInfo.url.length > 0
? changeInfo.url
: (typeof tab.url === 'string' ? tab.url : '');
if (candidate.length > 0) addUrl(candidate);
},
);
} catch (err) {
logger.warn('chrome.tabs.onUpdated.addListener failed:', err);
}
}
/**
* Return the deduplicated, first-seen-ordered, filtered list of tab URLs
* observed since module init OR since the last `clearTabUrlsSeen()` call.
*
* Filter (per .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md
* `<specifics>`):
* - INCLUDE: https://, http://, chrome-extension://
* - EXCLUDE: chrome://, about:, devtools://, file://, blob:, data:
*
* Dedup: each URL appears exactly once.
* Order: first-seen-first.
*
* Always returns a NEW array (slice) — caller cannot mutate the internal
* state.
*
* @returns A copy of the observed URL list. Empty array IS the canonical
* representation of a whole-desktop-no-tab session (per F2
* resolution from plan-checker iteration 1).
*/
export function getTabUrlsSeen(): string[] {
return firstSeenOrder.slice();
}
/**
* Snapshot every currently-open tab's URL into the tracker. Called by
* `createArchive()` (src/background/index.ts) AT SAVE TIME as a defensive
* fallback: captures tabs the operator OPENED during the recording window
* but never activated (so the onActivated listener never fired). Dedup
* + first-seen ordering preserves invariants — tabs already in the Set
* stay where they are; new tabs append in chrome.tabs.query() order.
*
* Requires `tabs` permission (DEC-011 Amendment 1, 2026-05-20).
*
* Errors are caught + logged; the SAVE flow continues with whatever
* state the tracker already had. Per F2: if the snapshot leaves the
* tracker empty (e.g. operator captured a non-Chrome surface), the
* `meta.urls` array stays `[]` — no fake extension-origin URL inserted.
*
* @returns Promise that resolves once the snapshot completes (or fails
* softly via the catch path).
*/
export async function snapshotOpenTabs(): Promise<void> {
try {
const tabs = await chrome.tabs.query({});
if (!Array.isArray(tabs)) {
logger.warn('snapshotOpenTabs: chrome.tabs.query did not return an Array');
return;
}
for (const tab of tabs) {
const url = (tab as { url?: string }).url;
if (typeof url === 'string' && url.length > 0) addUrl(url);
}
} catch (err) {
logger.warn('snapshotOpenTabs: chrome.tabs.query failed:', err);
}
}
/**
* Clear the internal Set + first-seen array. NOT called by
* `createArchive()` — always-on charter (Plan 01-09 Amendment 3) keeps
* the tracker accumulating across saves. Reserved for future use
* (manual session-reset, test isolation outside vi.resetModules).
*/
export function clearTabUrlsSeen(): void {
tabUrlsSeen = new Set();
firstSeenOrder = [];
}

View File

@@ -595,45 +595,37 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty-
// it pins the dedup-and-order CONTRACT of getTabUrlsSeen() directly. // it pins the dedup-and-order CONTRACT of getTabUrlsSeen() directly.
// ──────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────
it('meta.urls deduplicates repeated URLs and preserves first-seen order', async () => { it('meta.urls deduplicates repeated URLs and preserves first-seen order', async () => {
// RED gate: the tab-url-tracker module does not exist yet. The // Plan 02-03 GREEN path: tab-url-tracker landed without the optional
// dynamic import throws and expect.fail emits the precise marker // `_resetForTesting` / `_observeForTesting` ergonomic hooks (those
// for the Plan 02-03 GREEN-flip. // would have leaked into production bundles and violated the Tier-1
let tracker: typeof import('../../src/background/tab-url-tracker'); // 13-entry FORBIDDEN_HOOK_STRINGS gate at
try { // tests/background/no-test-hooks-in-prod-bundle.test.ts:108). The
tracker = await import('../../src/background/tab-url-tracker'); // canonical Plan-02-01-SUMMARY-anticipated alternative is to wire
} catch (e) { // chrome.tabs.onUpdated callbacks directly via the chrome stub.
expect.fail(
`src/background/tab-url-tracker.ts does not exist yet — this is the Plan 02-03 ` +
`GREEN gate. The module MUST export a 'getTabUrlsSeen(): string[]' function ` +
`fed by chrome.tabs.onUpdated + chrome.tabs.onActivated listeners with dedup ` +
`Set semantics + first-seen-first iteration order. Module import error: ${String(e)}`,
);
return;
}
// Below: GREEN-side contract (Plan 02-03 implementer codes against
// this). Tracker exposes a way to inject observations for testing,
// OR the test wires chrome.tabs.onUpdated callbacks directly.
// Both shapes are acceptable per the planner's "Claude's Discretion"
// delegation in CONTEXT.md `<decisions>`. For now we document the
// expectation and let the implementer pick.
// //
// The skeleton below assumes a `reset()` + observation API for // Mechanic: build the chrome stub, install it as globalThis.chrome,
// ergonomic test wiring. Plan 02-03's implementer SHOULD provide // import the tracker, call initTabUrlTracker() to register listeners
// such an API OR amend this test to wire callbacks directly. // on the stub, then invoke the captured callbacks with synthetic
type TrackerModule = { // tab-update events. The tracker treats each callback invocation as
getTabUrlsSeen: () => string[]; // a real Chrome event; dedup + ordering invariants kick in naturally.
_resetForTesting?: () => void; const stub = buildBgStub();
_observeForTesting?: (url: string) => void; (globalThis as unknown as GlobalWithBgChrome).chrome = stub;
const tracker = await import('../../src/background/tab-url-tracker');
tracker.initTabUrlTracker();
// Synthetic chrome.tabs.onUpdated events. The first 'A' establishes
// first-seen ordering; 'B' appends second; the repeated 'A' is
// deduplicated. Mirrors the Plan 02-01 RED-test contract verbatim.
const fireUpdate = (tabId: number, url: string): void => {
stub.tabs.onUpdated._callbacks.forEach((cb) =>
cb(tabId, { url }, { url }),
);
}; };
const t = tracker as TrackerModule; fireUpdate(1, 'https://a.example.com');
if (typeof t._resetForTesting === 'function') t._resetForTesting(); fireUpdate(2, 'https://b.example.com');
if (typeof t._observeForTesting === 'function') { fireUpdate(1, 'https://a.example.com');
t._observeForTesting('https://a.example.com');
t._observeForTesting('https://b.example.com'); expect(tracker.getTabUrlsSeen()).toEqual([
t._observeForTesting('https://a.example.com');
}
expect(t.getTabUrlsSeen()).toEqual([
'https://a.example.com', 'https://a.example.com',
'https://b.example.com', 'https://b.example.com',
]); ]);
@@ -646,33 +638,27 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty-
// RED today: missing module. GREEN after Plan 02-03. // RED today: missing module. GREEN after Plan 02-03.
// ──────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────
it('meta.urls filters chrome:// and about: URLs and includes chrome-extension://', async () => { it('meta.urls filters chrome:// and about: URLs and includes chrome-extension://', async () => {
let tracker: typeof import('../../src/background/tab-url-tracker'); // Plan 02-03 GREEN path: same chrome.tabs.onUpdated driver pattern
try { // as Test 3 (preserves the 13-entry Tier-1 grep gate by not
tracker = await import('../../src/background/tab-url-tracker'); // requiring `_observeForTesting` hooks on the tracker module).
} catch (e) { const stub = buildBgStub();
expect.fail( (globalThis as unknown as GlobalWithBgChrome).chrome = stub;
`src/background/tab-url-tracker.ts does not exist yet — Plan 02-03 GREEN gate. ` + const tracker = await import('../../src/background/tab-url-tracker');
`Filter contract: include https + http + chrome-extension://; exclude chrome:// + ` + tracker.initTabUrlTracker();
`about: + (default-deny on devtools://, file://, edge://). Per CONTEXT.md ` +
`<specifics> URL filter clause. Module import error: ${String(e)}`,
);
return;
}
type TrackerModule = { const fireUpdate = (tabId: number, url: string): void => {
getTabUrlsSeen: () => string[]; stub.tabs.onUpdated._callbacks.forEach((cb) =>
_resetForTesting?: () => void; cb(tabId, { url }, { url }),
_observeForTesting?: (url: string) => void; );
}; };
const t = tracker as TrackerModule; // Filter test: include https + chrome-extension://; exclude chrome:// +
if (typeof t._resetForTesting === 'function') t._resetForTesting(); // about: per CONTEXT.md `<specifics>` URL filter clause.
if (typeof t._observeForTesting === 'function') { fireUpdate(1, 'https://example.com');
t._observeForTesting('https://example.com'); fireUpdate(2, 'chrome://newtab');
t._observeForTesting('chrome://newtab'); fireUpdate(3, 'about:blank');
t._observeForTesting('about:blank'); fireUpdate(4, 'chrome-extension://abc/popup.html');
t._observeForTesting('chrome-extension://abc/popup.html');
} expect(tracker.getTabUrlsSeen()).toEqual([
expect(t.getTabUrlsSeen()).toEqual([
'https://example.com', 'https://example.com',
'chrome-extension://abc/popup.html', 'chrome-extension://abc/popup.html',
]); ]);
@@ -692,30 +678,15 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty-
// Revision Log 2026-05-20). // Revision Log 2026-05-20).
// ──────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────
it('meta.urls is exactly [] when the tracker observed no browser tabs (F2)', async () => { it('meta.urls is exactly [] when the tracker observed no browser tabs (F2)', async () => {
let tracker: typeof import('../../src/background/tab-url-tracker'); // Plan 02-03 GREEN path: import the tracker on a freshly-reset
try { // module graph (vi.resetModules in beforeEach), then immediately
tracker = await import('../../src/background/tab-url-tracker'); // query without firing any chrome.tabs.* callbacks. The empty
} catch (e) { // representation is the canonical whole-desktop-no-tab state.
expect.fail( const tracker = await import('../../src/background/tab-url-tracker');
`src/background/tab-url-tracker.ts does not exist yet — Plan 02-03 GREEN gate. ` +
`F2 contract: empty-tracker → meta.urls === [] (empty Array; NOT undefined; ` +
`NOT [extension-origin]; NOT null). Whole-desktop-no-tab recording is ` +
`meaningful per CONTEXT.md Revision Log 2026-05-20. Module import error: ${String(e)}`,
);
return;
}
type TrackerModule = {
getTabUrlsSeen: () => string[];
_resetForTesting?: () => void;
_observeForTesting?: (url: string) => void;
};
const t = tracker as TrackerModule;
if (typeof t._resetForTesting === 'function') t._resetForTesting();
// Deliberately observe nothing — simulating a session where no tab // Deliberately observe nothing — simulating a session where no tab
// events fired during the 30 s window. // events fired during the 30 s window.
expect(t.getTabUrlsSeen()).toEqual([]); expect(tracker.getTabUrlsSeen()).toEqual([]);
expect(t.getTabUrlsSeen()).not.toBeUndefined(); expect(tracker.getTabUrlsSeen()).not.toBeUndefined();
expect(t.getTabUrlsSeen()).not.toBeNull(); expect(tracker.getTabUrlsSeen()).not.toBeNull();
}); });
}); });