diff --git a/src/background/tab-url-tracker.ts b/src/background/tab-url-tracker.ts new file mode 100644 index 0000000..64811e6 --- /dev/null +++ b/src/background/tab-url-tracker.ts @@ -0,0 +1,246 @@ +// src/background/tab-url-tracker.ts +// +// Phase 2 Plan 02-03 — D-P2-02 tab-URL tracker. +// +// Maintains an internal, deduplicated, first-seen-ordered Set of tab URLs +// observed during the SW's lifetime via chrome.tabs.onActivated + +// chrome.tabs.onUpdated listeners. Feeds the multi-tab `meta.urls` array +// in `createArchive()` (closes audit P1 #10). +// +// Architectural decisions (frozen by .planning/phases/02-stabilize-export- +// pipeline/02-03-PLAN.md): +// +// - The tracker is FED PASSIVELY by Chrome's tab events: every time the +// operator switches tabs OR a tab navigates (changeInfo.url present), +// we get a URL — pass it through the filter, dedup, append. +// +// - It is SAVE-time AUGMENTED via `snapshotOpenTabs()` which calls +// `chrome.tabs.query({})` (requires the `tabs` permission per +// DEC-011 Amendment 1, 2026-05-20) to catch tabs the operator opened +// during the 30 s window but never activated. +// +// - Always-on charter (Plan 01-09 Amendment 3): `clearTabUrlsSeen()` is +// NOT called by createArchive. The tracker keeps accumulating across +// saves so the next save captures any tabs activated AFTER the prior +// save fired. +// +// - F2 (plan-checker iteration 1): no sentinel-URL fallback. If the +// tracker is empty (whole-desktop-no-tab session — operator captured +// a non-Chrome surface via desktopCapture only), `getTabUrlsSeen()` +// returns `[]` faithfully. createArchive emits `urls: []`. +// +// URL filter (per .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md +// ``): +// +// INCLUDE: https://, http://, chrome-extension:// +// EXCLUDE: chrome://, about:, devtools://, file://, blob:, data:, edge:// +// +// (Implemented via a single positive-allow regex; default-deny everything +// else.) +// +// Tier-1 grep-gate compliance: the module exposes NO `_resetForTesting` +// / `_observeForTesting` ergonomic test hooks (which would have leaked +// into production bundles and violated the +// `tests/background/no-test-hooks-in-prod-bundle.test.ts` 13-entry gate). +// Unit tests drive the registered chrome.tabs.onUpdated callbacks +// directly via the chrome stub's `_callbacks` array; module state is +// reset between tests via vitest's `vi.resetModules()` in beforeEach. +// +// References: +// - .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md +// D-P2-02 + URL filter clause +// - .planning/PROJECT.md DEC-011 Amendment 1 (`tabs` permission) +// - Chrome tabs API: https://developer.chrome.com/docs/extensions/reference/api/tabs + +import { Logger } from '../shared/logger'; + +const logger = new Logger('TabUrlTracker'); + +// ─── Module state ─────────────────────────────────────────────────────── +// `tabUrlsSeen` is the dedup Set (O(1) membership checks). +// `firstSeenOrder` is the append-only list preserving first-seen ordering. +// Both stay in lockstep: a URL is added to BOTH only on its first +// observation (the Set membership check gates the array push). +// +// `initialized` guards against double-listener-registration if some caller +// (a unit test, a future re-init path) calls `initTabUrlTracker()` twice. +let tabUrlsSeen: Set = new Set(); +let firstSeenOrder: string[] = []; +let initialized = false; + +// ─── URL filter ───────────────────────────────────────────────────────── +// Positive-allow: anything matching the regex is INCLUDED; everything else +// is DROPPED. Equivalent to the long-form switch over schemes but +// preserves a single canonical pattern that's easy to grep for. +// +// Test 4 of tests/background/meta-json-urls-schema.test.ts asserts this +// exact filter behaviour (https://example.com + chrome-extension://abc/... +// IN; chrome://newtab + about:blank OUT). +const URL_SCHEME_ALLOW = /^(https?|chrome-extension):\/\//; + +/** + * Whether a URL passes the inclusion filter. + * + * @param url - Candidate URL string. + * @returns true if the URL's scheme is in the allow-list; false otherwise. + */ +function passesFilter(url: string): boolean { + if (typeof url !== 'string' || url.length === 0) return false; + return URL_SCHEME_ALLOW.test(url); +} + +/** + * Add a URL to the tracker if it passes the filter and is not already + * present. Idempotent: re-observation of an existing URL is a no-op. + * + * @param url - Candidate URL string (may be empty or malformed — + * defensively filtered). + */ +function addUrl(url: string): void { + if (!passesFilter(url)) return; + if (tabUrlsSeen.has(url)) return; + tabUrlsSeen.add(url); + firstSeenOrder.push(url); +} + +/** + * Initialize the tab-URL tracker. Registers chrome.tabs.onActivated + + * chrome.tabs.onUpdated listeners that maintain an internal Set of URLs + * observed during the SW's lifetime. Must be called once at SW init. + * + * Idempotent — subsequent calls return early after logging a warning + * (defensive pattern matching src/background/index.ts:bootstrap try/catch + * wrappers around chrome.* listener registrations). + * + * D-P2-02 binding: captures the operator's multi-tab context, not just + * the active-at-save tab. + */ +export function initTabUrlTracker(): void { + if (initialized) { + logger.warn('initTabUrlTracker called twice — second call ignored'); + return; + } + initialized = true; + + // chrome.tabs.onActivated: fires when the user switches to a different + // tab. The activated tab's URL is fetched via chrome.tabs.get because + // onActivated's payload omits .url (it carries tabId + windowId only). + // DEC-011 Amendment 1: the `tabs` permission makes chrome.tabs.get + // reliably return the `.url` field for any tab in any window. + try { + chrome.tabs.onActivated.addListener((activeInfo: { tabId: number; windowId: number }) => { + const onTabResolved = (tab: { url?: string } | undefined): void => { + if (tab === undefined || tab === null) return; + if (typeof tab.url !== 'string' || tab.url.length === 0) { + logger.warn(`tabs.onActivated: tab ${activeInfo.tabId} has no .url (permission gap?)`); + return; + } + addUrl(tab.url); + }; + const onTabFailed = (err: unknown): void => { + logger.warn(`tabs.onActivated: chrome.tabs.get(${activeInfo.tabId}) failed:`, err); + }; + try { + const result = chrome.tabs.get(activeInfo.tabId); + // chrome.tabs.get returns a Promise in MV3. The Promise.resolve + // wrapper accepts both Promise and plain values (defensive against + // older stubs that might return synchronously). + Promise.resolve(result).then(onTabResolved).catch(onTabFailed); + } catch (err) { + onTabFailed(err); + } + }); + } catch (err) { + logger.warn('chrome.tabs.onActivated.addListener failed:', err); + } + + // chrome.tabs.onUpdated: fires on every tab state change. We only care + // about URL transitions (changeInfo.url present). NOT gated on + // changeInfo.status === 'complete' so SPA-style routing changes (which + // emit changeInfo.url WITHOUT a top-level load event) still get + // captured. + try { + chrome.tabs.onUpdated.addListener( + (_tabId: number, changeInfo: { url?: string }, tab: { url?: string }) => { + // Prefer changeInfo.url (the transition URL) but fall back to + // tab.url (the current resolved URL) for stubs that don't populate + // changeInfo.url consistently. Both are forwarded through the + // filter + dedup gate by addUrl. + const candidate = typeof changeInfo.url === 'string' && changeInfo.url.length > 0 + ? changeInfo.url + : (typeof tab.url === 'string' ? tab.url : ''); + if (candidate.length > 0) addUrl(candidate); + }, + ); + } catch (err) { + logger.warn('chrome.tabs.onUpdated.addListener failed:', err); + } +} + +/** + * Return the deduplicated, first-seen-ordered, filtered list of tab URLs + * observed since module init OR since the last `clearTabUrlsSeen()` call. + * + * Filter (per .planning/phases/02-stabilize-export-pipeline/02-CONTEXT.md + * ``): + * - INCLUDE: https://, http://, chrome-extension:// + * - EXCLUDE: chrome://, about:, devtools://, file://, blob:, data: + * + * Dedup: each URL appears exactly once. + * Order: first-seen-first. + * + * Always returns a NEW array (slice) — caller cannot mutate the internal + * state. + * + * @returns A copy of the observed URL list. Empty array IS the canonical + * representation of a whole-desktop-no-tab session (per F2 + * resolution from plan-checker iteration 1). + */ +export function getTabUrlsSeen(): string[] { + return firstSeenOrder.slice(); +} + +/** + * Snapshot every currently-open tab's URL into the tracker. Called by + * `createArchive()` (src/background/index.ts) AT SAVE TIME as a defensive + * fallback: captures tabs the operator OPENED during the recording window + * but never activated (so the onActivated listener never fired). Dedup + * + first-seen ordering preserves invariants — tabs already in the Set + * stay where they are; new tabs append in chrome.tabs.query() order. + * + * Requires `tabs` permission (DEC-011 Amendment 1, 2026-05-20). + * + * Errors are caught + logged; the SAVE flow continues with whatever + * state the tracker already had. Per F2: if the snapshot leaves the + * tracker empty (e.g. operator captured a non-Chrome surface), the + * `meta.urls` array stays `[]` — no fake extension-origin URL inserted. + * + * @returns Promise that resolves once the snapshot completes (or fails + * softly via the catch path). + */ +export async function snapshotOpenTabs(): Promise { + try { + const tabs = await chrome.tabs.query({}); + if (!Array.isArray(tabs)) { + logger.warn('snapshotOpenTabs: chrome.tabs.query did not return an Array'); + return; + } + for (const tab of tabs) { + const url = (tab as { url?: string }).url; + if (typeof url === 'string' && url.length > 0) addUrl(url); + } + } catch (err) { + logger.warn('snapshotOpenTabs: chrome.tabs.query failed:', err); + } +} + +/** + * Clear the internal Set + first-seen array. NOT called by + * `createArchive()` — always-on charter (Plan 01-09 Amendment 3) keeps + * the tracker accumulating across saves. Reserved for future use + * (manual session-reset, test isolation outside vi.resetModules). + */ +export function clearTabUrlsSeen(): void { + tabUrlsSeen = new Set(); + firstSeenOrder = []; +} diff --git a/tests/background/meta-json-urls-schema.test.ts b/tests/background/meta-json-urls-schema.test.ts index 2d748f1..076d3c2 100644 --- a/tests/background/meta-json-urls-schema.test.ts +++ b/tests/background/meta-json-urls-schema.test.ts @@ -595,45 +595,37 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty- // it pins the dedup-and-order CONTRACT of getTabUrlsSeen() directly. // ──────────────────────────────────────────────────────────────────── it('meta.urls deduplicates repeated URLs and preserves first-seen order', async () => { - // RED gate: the tab-url-tracker module does not exist yet. The - // dynamic import throws and expect.fail emits the precise marker - // for the Plan 02-03 GREEN-flip. - let tracker: typeof import('../../src/background/tab-url-tracker'); - try { - tracker = await import('../../src/background/tab-url-tracker'); - } catch (e) { - expect.fail( - `src/background/tab-url-tracker.ts does not exist yet — this is the Plan 02-03 ` + - `GREEN gate. The module MUST export a 'getTabUrlsSeen(): string[]' function ` + - `fed by chrome.tabs.onUpdated + chrome.tabs.onActivated listeners with dedup ` + - `Set semantics + first-seen-first iteration order. Module import error: ${String(e)}`, - ); - return; - } - - // Below: GREEN-side contract (Plan 02-03 implementer codes against - // this). Tracker exposes a way to inject observations for testing, - // OR the test wires chrome.tabs.onUpdated callbacks directly. - // Both shapes are acceptable per the planner's "Claude's Discretion" - // delegation in CONTEXT.md ``. For now we document the - // expectation and let the implementer pick. + // Plan 02-03 GREEN path: tab-url-tracker landed without the optional + // `_resetForTesting` / `_observeForTesting` ergonomic hooks (those + // would have leaked into production bundles and violated the Tier-1 + // 13-entry FORBIDDEN_HOOK_STRINGS gate at + // tests/background/no-test-hooks-in-prod-bundle.test.ts:108). The + // canonical Plan-02-01-SUMMARY-anticipated alternative is to wire + // chrome.tabs.onUpdated callbacks directly via the chrome stub. // - // The skeleton below assumes a `reset()` + observation API for - // ergonomic test wiring. Plan 02-03's implementer SHOULD provide - // such an API OR amend this test to wire callbacks directly. - type TrackerModule = { - getTabUrlsSeen: () => string[]; - _resetForTesting?: () => void; - _observeForTesting?: (url: string) => void; + // Mechanic: build the chrome stub, install it as globalThis.chrome, + // import the tracker, call initTabUrlTracker() to register listeners + // on the stub, then invoke the captured callbacks with synthetic + // tab-update events. The tracker treats each callback invocation as + // a real Chrome event; dedup + ordering invariants kick in naturally. + const stub = buildBgStub(); + (globalThis as unknown as GlobalWithBgChrome).chrome = stub; + const tracker = await import('../../src/background/tab-url-tracker'); + tracker.initTabUrlTracker(); + + // Synthetic chrome.tabs.onUpdated events. The first 'A' establishes + // first-seen ordering; 'B' appends second; the repeated 'A' is + // deduplicated. Mirrors the Plan 02-01 RED-test contract verbatim. + const fireUpdate = (tabId: number, url: string): void => { + stub.tabs.onUpdated._callbacks.forEach((cb) => + cb(tabId, { url }, { url }), + ); }; - const t = tracker as TrackerModule; - if (typeof t._resetForTesting === 'function') t._resetForTesting(); - if (typeof t._observeForTesting === 'function') { - t._observeForTesting('https://a.example.com'); - t._observeForTesting('https://b.example.com'); - t._observeForTesting('https://a.example.com'); - } - expect(t.getTabUrlsSeen()).toEqual([ + fireUpdate(1, 'https://a.example.com'); + fireUpdate(2, 'https://b.example.com'); + fireUpdate(1, 'https://a.example.com'); + + expect(tracker.getTabUrlsSeen()).toEqual([ 'https://a.example.com', 'https://b.example.com', ]); @@ -646,33 +638,27 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty- // RED today: missing module. GREEN after Plan 02-03. // ──────────────────────────────────────────────────────────────────── it('meta.urls filters chrome:// and about: URLs and includes chrome-extension://', async () => { - let tracker: typeof import('../../src/background/tab-url-tracker'); - try { - tracker = await import('../../src/background/tab-url-tracker'); - } catch (e) { - expect.fail( - `src/background/tab-url-tracker.ts does not exist yet — Plan 02-03 GREEN gate. ` + - `Filter contract: include https + http + chrome-extension://; exclude chrome:// + ` + - `about: + (default-deny on devtools://, file://, edge://). Per CONTEXT.md ` + - ` URL filter clause. Module import error: ${String(e)}`, - ); - return; - } + // Plan 02-03 GREEN path: same chrome.tabs.onUpdated driver pattern + // as Test 3 (preserves the 13-entry Tier-1 grep gate by not + // requiring `_observeForTesting` hooks on the tracker module). + const stub = buildBgStub(); + (globalThis as unknown as GlobalWithBgChrome).chrome = stub; + const tracker = await import('../../src/background/tab-url-tracker'); + tracker.initTabUrlTracker(); - type TrackerModule = { - getTabUrlsSeen: () => string[]; - _resetForTesting?: () => void; - _observeForTesting?: (url: string) => void; + const fireUpdate = (tabId: number, url: string): void => { + stub.tabs.onUpdated._callbacks.forEach((cb) => + cb(tabId, { url }, { url }), + ); }; - const t = tracker as TrackerModule; - if (typeof t._resetForTesting === 'function') t._resetForTesting(); - if (typeof t._observeForTesting === 'function') { - t._observeForTesting('https://example.com'); - t._observeForTesting('chrome://newtab'); - t._observeForTesting('about:blank'); - t._observeForTesting('chrome-extension://abc/popup.html'); - } - expect(t.getTabUrlsSeen()).toEqual([ + // Filter test: include https + chrome-extension://; exclude chrome:// + + // about: per CONTEXT.md `` URL filter clause. + fireUpdate(1, 'https://example.com'); + fireUpdate(2, 'chrome://newtab'); + fireUpdate(3, 'about:blank'); + fireUpdate(4, 'chrome-extension://abc/popup.html'); + + expect(tracker.getTabUrlsSeen()).toEqual([ 'https://example.com', 'chrome-extension://abc/popup.html', ]); @@ -692,30 +678,15 @@ describe('Plan 02-01 Task 2 RED: meta.json urls[] schema + dedup/filter + empty- // Revision Log 2026-05-20). // ──────────────────────────────────────────────────────────────────── it('meta.urls is exactly [] when the tracker observed no browser tabs (F2)', async () => { - let tracker: typeof import('../../src/background/tab-url-tracker'); - try { - tracker = await import('../../src/background/tab-url-tracker'); - } catch (e) { - expect.fail( - `src/background/tab-url-tracker.ts does not exist yet — Plan 02-03 GREEN gate. ` + - `F2 contract: empty-tracker → meta.urls === [] (empty Array; NOT undefined; ` + - `NOT [extension-origin]; NOT null). Whole-desktop-no-tab recording is ` + - `meaningful per CONTEXT.md Revision Log 2026-05-20. Module import error: ${String(e)}`, - ); - return; - } - - type TrackerModule = { - getTabUrlsSeen: () => string[]; - _resetForTesting?: () => void; - _observeForTesting?: (url: string) => void; - }; - const t = tracker as TrackerModule; - if (typeof t._resetForTesting === 'function') t._resetForTesting(); + // Plan 02-03 GREEN path: import the tracker on a freshly-reset + // module graph (vi.resetModules in beforeEach), then immediately + // query without firing any chrome.tabs.* callbacks. The empty + // representation is the canonical whole-desktop-no-tab state. + const tracker = await import('../../src/background/tab-url-tracker'); // Deliberately observe nothing — simulating a session where no tab // events fired during the 30 s window. - expect(t.getTabUrlsSeen()).toEqual([]); - expect(t.getTabUrlsSeen()).not.toBeUndefined(); - expect(t.getTabUrlsSeen()).not.toBeNull(); + expect(tracker.getTabUrlsSeen()).toEqual([]); + expect(tracker.getTabUrlsSeen()).not.toBeUndefined(); + expect(tracker.getTabUrlsSeen()).not.toBeNull(); }); });