fix(01-review): WR-04+WR-05 smoke.sh require python3 + identity-based zip detection

This commit is contained in:
2026-05-16 10:19:46 +02:00
parent 650c546a6e
commit 349ae88a8e

View File

@@ -60,6 +60,12 @@ echo
[[ -x "${CHROME_BIN}" ]] || { red "FAIL: ${CHROME_BIN} not found (set CHROME_BIN=...)"; exit 1; }
command -v ffprobe >/dev/null || { red "FAIL: ffprobe not installed"; exit 1; }
command -v unzip >/dev/null || { red "FAIL: unzip not installed"; exit 1; }
# WR-04 fix: python3 is REQUIRED for URL encoding of the smoke-tab data: URL.
# Previously a fallback `|| printf '%s' "${SMOKE_HTML}"` would emit raw HTML
# into the data URL on python3-missing systems — Chrome silently failed to
# parse those URLs (containing literal `<`, `>`, spaces, quotes) and the
# operator saw a blank tab with no diagnostic. Better to fail loud early.
command -v python3 >/dev/null || { red "FAIL: python3 not installed (needed for URL encoding the smoke tab data URL)"; exit 1; }
grep -q '"desktopCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json missing desktopCapture"; exit 1; }
! grep -q '"tabCapture"' "${DIST_DIR}/manifest.json" || { red "FAIL: dist/manifest.json still has tabCapture"; exit 1; }
green "✓ pre-flight checks passed"
@@ -71,16 +77,24 @@ echo " downloads: ${DOWNLOADS_DIR}"
echo
# --- snapshot Downloads ---
BEFORE_LIST=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | wc -l)
echo " existing session_report_*.zip in Downloads: ${BEFORE_LIST}"
# WR-05 fix: snapshot the FULL list of pre-existing zips, not just the
# count. The count-only approach falsely succeeded when an unrelated
# session_report appeared in Downloads (operator running the extension
# in another window, etc.) — the script would then ffprobe the WRONG
# file. comm -13 against the post-recording list yields the genuinely
# new file by identity. The mtime sort below still picks the latest if
# multiple new zips appear (unlikely but defensive).
BEFORE_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
BEFORE_COUNT=$(printf '%s\n' "${BEFORE_ZIPS}" | grep -c . || true)
echo " existing session_report_*.zip in Downloads: ${BEFORE_COUNT}"
echo
# --- profile prep ---
PROFILE_HAS_EXTENSION=0
if [[ "${KEEP_PROFILE}" != "1" ]]; then
rm -rf "${PROFILE_DIR}"
rm -rf -- "${PROFILE_DIR}"
fi
mkdir -p "${PROFILE_DIR}"
mkdir -p -- "${PROFILE_DIR}"
# Detect if a previous run already loaded the extension into this profile
if [[ -d "${PROFILE_DIR}/Default/Extensions" ]] && \
find "${PROFILE_DIR}/Default/Extensions" -maxdepth 3 -name 'manifest.json' 2>/dev/null | head -1 | xargs -r grep -q 'AI Call Recorder' 2>/dev/null; then
@@ -106,7 +120,11 @@ read -r -d '' SMOKE_HTML <<'EOF' || true
<p>The script in your terminal will detect the download and finish the ffprobe gate automatically.</p>
</body>
EOF
SMOKE_DATA_URL="data:text/html,$(printf '%s' "${SMOKE_HTML}" | python3 -c 'import sys,urllib.parse;print(urllib.parse.quote(sys.stdin.read(), safe=""))' 2>/dev/null || printf '%s' "${SMOKE_HTML}")"
# WR-04 fix: python3 is required (asserted in pre-flight). NO fallback —
# the previous `|| printf '%s' "${SMOKE_HTML}"` would emit unencoded HTML
# into the data URL, causing Chrome to fail to parse the URL silently and
# the operator to see a blank tab. Fail loudly if URL encoding fails.
SMOKE_DATA_URL="data:text/html,$(printf '%s' "${SMOKE_HTML}" | python3 -c 'import sys,urllib.parse;print(urllib.parse.quote(sys.stdin.read(), safe=""))')"
# --- launch Chrome ---
blue "==> launching Chrome with smoke profile + auto-accept picker..."
@@ -152,22 +170,52 @@ echo " (Ctrl+C aborts. Auto-detects, ffprobes, stages fixture, opens WebM.)"
echo
# --- poll Downloads ---
# WR-05 fix: detect by IDENTITY via `comm -13 <before> <after>`, not by
# count comparison. The count approach false-positives when ANY
# session_report appears (e.g. the operator's daily extension in another
# window). `comm -13` returns lines present in <after> but not in
# <before> — the genuine new file(s). We still apply mtime sort + head -1
# to pick the latest if multiple new zips materialize (e.g., overlapping
# operator activity), but the candidate set is now restricted to actually-new
# files.
NEW_ARCHIVE=""
WAITED=0
while [[ ${WAITED} -lt ${POLL_TIMEOUT} ]]; do
NEW_COUNT=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | wc -l)
if [[ ${NEW_COUNT} -gt ${BEFORE_LIST} ]]; then
AFTER_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
# comm requires sorted streams; both inputs above are pre-sorted.
# `-13` keeps only lines unique to file2 (AFTER), suppressing common
# lines and lines unique to BEFORE.
NEW_ZIPS=$(comm -13 <(printf '%s\n' "${BEFORE_ZIPS}") <(printf '%s\n' "${AFTER_ZIPS}") | grep -v '^$' || true)
if [[ -n "${NEW_ZIPS}" ]]; then
sleep 2 # let the download settle
LATEST=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | cut -d' ' -f2- || true)
# Re-snapshot after settle, recompute identity diff, pick latest by mtime
AFTER_ZIPS=$(find "${DOWNLOADS_DIR}" -maxdepth 1 -name 'session_report_*.zip' 2>/dev/null | sort)
NEW_ZIPS=$(comm -13 <(printf '%s\n' "${BEFORE_ZIPS}") <(printf '%s\n' "${AFTER_ZIPS}") | grep -v '^$' || true)
if [[ -n "${NEW_ZIPS}" ]]; then
# Pick the latest among the genuinely-new zips by mtime. Quoting note:
# NEW_ZIPS is a newline-separated list of full paths from `find`; we
# iterate via `while read` to preserve paths with embedded spaces.
LATEST=""
LATEST_MTIME=0
while IFS= read -r zip_path; do
[[ -z "${zip_path}" ]] && continue
mtime=$(stat -c %Y -- "${zip_path}" 2>/dev/null || echo 0)
if [[ "${mtime}" -gt "${LATEST_MTIME}" ]]; then
LATEST="${zip_path}"
LATEST_MTIME="${mtime}"
fi
done <<<"${NEW_ZIPS}"
if [[ -n "${LATEST}" ]]; then
NEW_ARCHIVE="${LATEST}"
break
fi
fi
fi
sleep 1
WAITED=$((WAITED + 1))
if [[ $((WAITED % 30)) -eq 0 ]]; then
yellow " ...still waiting (${WAITED}s elapsed, count=${NEW_COUNT}/baseline=${BEFORE_LIST})"
after_count=$(printf '%s\n' "${AFTER_ZIPS}" | grep -c . || true)
yellow " ...still waiting (${WAITED}s elapsed, count=${after_count}/baseline=${BEFORE_COUNT})"
fi
done
@@ -183,9 +231,12 @@ green "✓ archive detected: ${NEW_ARCHIVE}"
echo
# --- extract + ffprobe gate ---
unzip -p "${NEW_ARCHIVE}" video/last_30sec.webm > "${WEBM_TMP}"
SIZE_BYTES=$(stat -c %s "${WEBM_TMP}")
SIZE_HUMAN=$(ls -lh "${WEBM_TMP}" | awk '{print $5}')
# Bash-style sweep: pass `--` to terminate options on file-taking commands
# that accept user-controlled paths (Google shell style guide §"Special
# considerations" — defensive against filenames starting with `-`).
unzip -p -- "${NEW_ARCHIVE}" video/last_30sec.webm > "${WEBM_TMP}"
SIZE_BYTES=$(stat -c %s -- "${WEBM_TMP}")
SIZE_HUMAN=$(ls -lh -- "${WEBM_TMP}" | awk '{print $5}')
echo " WebM size: ${SIZE_HUMAN} (${SIZE_BYTES} bytes)"
if [[ ${SIZE_BYTES} -lt 100000 ]]; then
yellow "⚠ WebM is smaller than 100 KB — buffer may not have rotated; capture longer"
@@ -194,6 +245,12 @@ fi
echo
blue "==> D-12 ACCEPTANCE GATE — ffprobe -v error"
echo "---"
# The `&& GATE=0 || GATE=$?` chain is correct: under `set -e`, ffprobe's
# non-zero exit doesn't terminate the script because it's followed by `||`.
# When ffprobe succeeds, `GATE=0` (an assignment returning 0) is executed
# and the `||` branch is skipped. When ffprobe fails, the `&&` chain is
# bypassed and `GATE=$?` captures ffprobe's exit. The earlier review note
# WR-04 confirmed this is NOT broken.
ffprobe -v error -f matroska -i "${WEBM_TMP}" && GATE=0 || GATE=$?
echo "---"
echo "ffprobe exit: ${GATE}"
@@ -215,9 +272,9 @@ echo
# --- stage fixture ---
if [[ ${GATE} -eq 0 ]]; then
mkdir -p "$(dirname "${FIXTURE_DEST}")"
cp "${WEBM_TMP}" "${FIXTURE_DEST}"
green "✓ fixture staged: ${FIXTURE_DEST} ($(ls -lh "${FIXTURE_DEST}" | awk '{print $5}'))"
mkdir -p -- "$(dirname -- "${FIXTURE_DEST}")"
cp -- "${WEBM_TMP}" "${FIXTURE_DEST}"
green "✓ fixture staged: ${FIXTURE_DEST} ($(ls -lh -- "${FIXTURE_DEST}" | awk '{print $5}'))"
fi
# --- open the WebM for visual check ---