Milestone v1 (v2.0.0): Mokosh — Session Capture #1

Merged
strategy155 merged 297 commits from gsd/phase-04-harden-clean-up-optional into main 2026-05-31 15:34:17 +00:00
Showing only changes of commit 349ae88a8e - Show all commits

View File

@@ -60,6 +60,12 @@ echo
[[ -x "${CHROME_BIN}" ]] || { red "FAIL: ${CHROME_BIN} not found (set CHROME_BIN=...)"; exit 1; } [[ -x "${CHROME_BIN}" ]] || { red "FAIL: ${CHROME_BIN} not found (set CHROME_BIN=...)"; exit 1; }
command -v ffprobe >/dev/null || { red "FAIL: ffprobe not installed"; exit 1; } command -v ffprobe >/dev/null || { red "FAIL: ffprobe not installed"; exit 1; }
command -v unzip >/dev/null || { red "FAIL: unzip not installed"; exit 1; } command -v unzip >/dev/null || { red "FAIL: unzip not installed"; exit 1; }
# WR-04 fix: python3 is REQUIRED for URL encoding of the smoke-tab data: URL.
# Previously a fallback `|| printf '%s' "${SMOKE_HTML}"` would emit raw HTML
# into the data URL on python3-missing systems — Chrome silently failed to
# parse those URLs (containing literal `<`, `>`, spaces, quotes) and the
# operator saw a blank tab with no diagnostic. Better to fail loud early.
command -v python3 >/dev/null || { red "FAIL: python3 not installed (needed for URL encoding the smoke tab data URL)"; exit 1; }
grep -q '"desktopCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json missing desktopCapture"; exit 1; } grep -q '"desktopCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json missing desktopCapture"; exit 1; }
! grep -q '"tabCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json still has tabCapture"; exit 1; } ! grep -q '"tabCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json still has tabCapture"; exit 1; }
green "✓ pre-flight checks passed" green "✓ pre-flight checks passed"
@@ -71,16 +77,24 @@ echo " downloads: ${DOWNLOADS_DIR}"
echo echo
# --- snapshot Downloads --- # --- snapshot Downloads ---
BEFORE_LIST=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | wc -l) # WR-05 fix: snapshot the FULL list of pre-existing zips, not just the
echo " existing session_report_*.zip in Downloads: ${BEFORE_LIST}" # count. The count-only approach falsely succeeded when an unrelated
# session_report appeared in Downloads (operator running the extension
# in another window, etc.) — the script would then ffprobe the WRONG
# file. comm -13 against the post-recording list yields the genuinely
# new file by identity. The mtime sort below still picks the latest if
# multiple new zips appear (unlikely but defensive).
BEFORE_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
BEFORE_COUNT=$(printf '%s\n' "${BEFORE_ZIPS}" | grep -c . || true)
echo " existing session_report_*.zip in Downloads: ${BEFORE_COUNT}"
echo echo
# --- profile prep --- # --- profile prep ---
PROFILE_HAS_EXTENSION=0 PROFILE_HAS_EXTENSION=0
if [[ "${KEEP_PROFILE}" != "1" ]]; then if [[ "${KEEP_PROFILE}" != "1" ]]; then
rm -rf "${PROFILE_DIR}" rm -rf -- "${PROFILE_DIR}"
fi fi
mkdir -p "${PROFILE_DIR}" mkdir -p -- "${PROFILE_DIR}"
# Detect if a previous run already loaded the extension into this profile # Detect if a previous run already loaded the extension into this profile
if [[ -d "${PROFILE_DIR}/Default/Extensions" ]] && \ if [[ -d "${PROFILE_DIR}/Default/Extensions" ]] && \
find "${PROFILE_DIR}/Default/Extensions" -maxdepth 3 -name 'manifest.json' 2>/dev/null | head -1 | xargs -r grep -q 'AI Call Recorder' 2>/dev/null; then find "${PROFILE_DIR}/Default/Extensions" -maxdepth 3 -name 'manifest.json' 2>/dev/null | head -1 | xargs -r grep -q 'AI Call Recorder' 2>/dev/null; then
@@ -106,7 +120,11 @@ read -r -d '' SMOKE_HTML <<'EOF' || true
<p>The script in your terminal will detect the download and finish the ffprobe gate automatically.</p> <p>The script in your terminal will detect the download and finish the ffprobe gate automatically.</p>
</body> </body>
EOF EOF
SMOKE_DATA_URL="data:text/html,$(printf '%s' "${SMOKE_HTML}" | python3 -c 'import sys,urllib.parse;print(urllib.parse.quote(sys.stdin.read(), safe=""))' 2>/dev/null || printf '%s' "${SMOKE_HTML}")" # WR-04 fix: python3 is required (asserted in pre-flight). NO fallback —
# the previous `|| printf '%s' "${SMOKE_HTML}"` would emit unencoded HTML
# into the data URL, causing Chrome to fail to parse the URL silently and
# the operator to see a blank tab. Fail loudly if URL encoding fails.
SMOKE_DATA_URL="data:text/html,$(printf '%s' "${SMOKE_HTML}" | python3 -c 'import sys,urllib.parse;print(urllib.parse.quote(sys.stdin.read(), safe=""))')"
# --- launch Chrome --- # --- launch Chrome ---
blue "==> launching Chrome with smoke profile + auto-accept picker..." blue "==> launching Chrome with smoke profile + auto-accept picker..."
@@ -152,22 +170,52 @@ echo " (Ctrl+C aborts. Auto-detects, ffprobes, stages fixture, opens WebM.)"
echo echo
# --- poll Downloads --- # --- poll Downloads ---
# WR-05 fix: detect by IDENTITY via `comm -13 <before> <after>`, not by
# count comparison. The count approach false-positives when ANY
# session_report appears (e.g. the operator's daily extension in another
# window). `comm -13` returns lines present in <after> but not in
# <before> — the genuine new file(s). We still apply mtime sort + head -1
# to pick the latest if multiple new zips materialize (e.g., overlapping
# operator activity), but the candidate set is now restricted to actually-new
# files.
NEW_ARCHIVE="" NEW_ARCHIVE=""
WAITED=0 WAITED=0
while [[ ${WAITED} -lt ${POLL_TIMEOUT} ]]; do while [[ ${WAITED} -lt ${POLL_TIMEOUT} ]]; do
NEW_COUNT=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | wc -l) AFTER_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
if [[ ${NEW_COUNT} -gt ${BEFORE_LIST} ]]; then # comm requires sorted streams; both inputs above are pre-sorted.
# `-13` keeps only lines unique to file2 (AFTER), suppressing common
# lines and lines unique to BEFORE.
NEW_ZIPS=$(comm -13 <(printf '%s\n' "${BEFORE_ZIPS}") <(printf '%s\n' "${AFTER_ZIPS}") | grep -v '^$' || true)
if [[ -n "${NEW_ZIPS}" ]]; then
sleep 2 # let the download settle sleep 2 # let the download settle
LATEST=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | cut -d' ' -f2- || true) # Re-snapshot after settle, recompute identity diff, pick latest by mtime
if [[ -n "${LATEST}" ]]; then AFTER_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
NEW_ARCHIVE="${LATEST}" NEW_ZIPS=$(comm -13 <(printf '%s\n' "${BEFORE_ZIPS}") <(printf '%s\n' "${AFTER_ZIPS}") | grep -v '^$' || true)
break if [[ -n "${NEW_ZIPS}" ]]; then
# Pick the latest among the genuinely-new zips by mtime. Quoting note:
# NEW_ZIPS is a newline-separated list of full paths from `find`; we
# iterate via `while read` to preserve paths with embedded spaces.
LATEST=""
LATEST_MTIME=0
while IFS= read -r zip_path; do
[[ -z "${zip_path}" ]] && continue
mtime=$(stat -c %Y -- "${zip_path}" 2>/dev/null || echo 0)
if [[ "${mtime}" -gt "${LATEST_MTIME}" ]]; then
LATEST="${zip_path}"
LATEST_MTIME="${mtime}"
fi
done <<<"${NEW_ZIPS}"
if [[ -n "${LATEST}" ]]; then
NEW_ARCHIVE="${LATEST}"
break
fi
fi fi
fi fi
sleep 1 sleep 1
WAITED=$((WAITED + 1)) WAITED=$((WAITED + 1))
if [[ $((WAITED % 30)) -eq 0 ]]; then if [[ $((WAITED % 30)) -eq 0 ]]; then
yellow " ...still waiting (${WAITED}s elapsed, count=${NEW_COUNT}/baseline=${BEFORE_LIST})" after_count=$(printf '%s\n' "${AFTER_ZIPS}" | grep -c . || true)
yellow " ...still waiting (${WAITED}s elapsed, count=${after_count}/baseline=${BEFORE_COUNT})"
fi fi
done done
@@ -183,9 +231,12 @@ green "✓ archive detected: ${NEW_ARCHIVE}"
echo echo
# --- extract + ffprobe gate --- # --- extract + ffprobe gate ---
unzip -p "${NEW_ARCHIVE}" video/last_30sec.webm > "${WEBM_TMP}" # Bash-style sweep: pass `--` to terminate options on file-taking commands
SIZE_BYTES=$(stat -c %s "${WEBM_TMP}") # that accept user-controlled paths (Google shell style guide §"Special
SIZE_HUMAN=$(ls -lh "${WEBM_TMP}" | awk '{print $5}') # considerations" — defensive against filenames starting with `-`).
unzip -p -- "${NEW_ARCHIVE}" video/last_30sec.webm > "${WEBM_TMP}"
SIZE_BYTES=$(stat -c %s -- "${WEBM_TMP}")
SIZE_HUMAN=$(ls -lh -- "${WEBM_TMP}" | awk '{print $5}')
echo " WebM size: ${SIZE_HUMAN} (${SIZE_BYTES} bytes)" echo " WebM size: ${SIZE_HUMAN} (${SIZE_BYTES} bytes)"
if [[ ${SIZE_BYTES} -lt 100000 ]]; then if [[ ${SIZE_BYTES} -lt 100000 ]]; then
yellow "⚠ WebM is smaller than 100 KB — buffer may not have rotated; capture longer" yellow "⚠ WebM is smaller than 100 KB — buffer may not have rotated; capture longer"
@@ -194,6 +245,12 @@ fi
echo echo
blue "==> D-12 ACCEPTANCE GATE — ffprobe -v error" blue "==> D-12 ACCEPTANCE GATE — ffprobe -v error"
echo "---" echo "---"
# The `&& GATE=0 || GATE=$?` chain is correct: under `set -e`, ffprobe's
# non-zero exit doesn't terminate the script because it's followed by `||`.
# When ffprobe succeeds, `GATE=0` (an assignment returning 0) is executed
# and the `||` branch is skipped. When ffprobe fails, the `&&` chain is
# bypassed and `GATE=$?` captures ffprobe's exit. The earlier review note
# WR-04 confirmed this is NOT broken.
ffprobe -v error -f matroska -i "${WEBM_TMP}" && GATE=0 || GATE=$? ffprobe -v error -f matroska -i "${WEBM_TMP}" && GATE=0 || GATE=$?
echo "---" echo "---"
echo "ffprobe exit: ${GATE}" echo "ffprobe exit: ${GATE}"
@@ -215,9 +272,9 @@ echo
# --- stage fixture --- # --- stage fixture ---
if [[ ${GATE} -eq 0 ]]; then if [[ ${GATE} -eq 0 ]]; then
mkdir -p "$(dirname "${FIXTURE_DEST}")" mkdir -p -- "$(dirname -- "${FIXTURE_DEST}")"
cp "${WEBM_TMP}" "${FIXTURE_DEST}" cp -- "${WEBM_TMP}" "${FIXTURE_DEST}"
green "✓ fixture staged: ${FIXTURE_DEST} ($(ls -lh "${FIXTURE_DEST}" | awk '{print $5}'))" green "✓ fixture staged: ${FIXTURE_DEST} ($(ls -lh -- "${FIXTURE_DEST}" | awk '{print $5}'))"
fi fi
# --- open the WebM for visual check --- # --- open the WebM for visual check ---