From 2e756557ffd515580cd58e4077b348d42c462036 Mon Sep 17 00:00:00 2001 From: le king fu Date: Sun, 10 May 2026 16:53:14 -0400 Subject: [PATCH] feat(reports): scan archive/ subdir as fallback to handle post-07:30 UTC window Sergent renameSync() rotates reports/ -> reports/archive/ at 07:30 UTC daily, so for ~22h per day the only copy of a fresh scan lives in archive/. The handler now scans both directories and concatenates with top-level priority on filename collision. archive/ missing is a silent skip. Tests : 17/17 in test-curl.sh (11 existing + 6 new for archive coverage). Co-Authored-By: Claude Opus 4.7 (1M context) --- index.js | 46 ++++++++++++++++++++++++++---------- test-curl.sh | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 13 deletions(-) diff --git a/index.js b/index.js index 0c0a041..a70f631 100644 --- a/index.js +++ b/index.js @@ -95,34 +95,56 @@ function isScanReport(value) { ); } -// Read all `defenseur-_*.json` files under REPORTS_DIR for the -// given UTC date. The scan reports use an ISO timestamp with `:` and `.` -// rewritten as `-` in the filename (e.g. defenseur-booking_2026-05-06T05-30-11-249Z.json). -// We match `_` then re-confirm via parsed.timestamp.startsWith(date). -function readScanReportsForDate(date) { - const out = []; - if (!existsSync(REPORTS_DIR)) return out; +// Read all `defenseur-_*.json` files under `dir` matching the +// given UTC date. Returns parsed scan reports keyed by filename so the caller +// can dedupe across REPORTS_DIR + REPORTS_DIR/archive. +function collectScanReportsFromDir(dir, date) { + const collected = new Map(); + if (!existsSync(dir)) return collected; - const files = readdirSync(REPORTS_DIR).filter( + const files = readdirSync(dir).filter( (f) => f.startsWith("defenseur-") && f.includes(`_${date}`) && f.endsWith(".json"), ); for (const file of files) { try { - const raw = readFileSync(path.join(REPORTS_DIR, file), "utf-8"); + const raw = readFileSync(path.join(dir, file), "utf-8"); const parsed = JSON.parse(raw); if (!isScanReport(parsed)) continue; if (!parsed.timestamp.startsWith(date)) continue; - out.push(parsed); + collected.set(file, parsed); } catch (err) { console.error(`[reports/scans] failed to parse ${file}:`, err.message); } } + return collected; +} + +// Read all `defenseur-_*.json` files under REPORTS_DIR for the +// given UTC date. The scan reports use an ISO timestamp with `:` and `.` +// rewritten as `-` in the filename (e.g. defenseur-booking_2026-05-06T05-30-11-249Z.json). +// We match `_` then re-confirm via parsed.timestamp.startsWith(date). +// +// The Sergent rotates fresh reports out of REPORTS_DIR into REPORTS_DIR/archive +// at 07:30 UTC daily (cf. defenseurs/src/sergent.ts renameSync). For ~22h/day +// the only copy lives in archive/ — so we scan both and concatenate. Top-level +// files take precedence on filename collision (more recent by definition). +function readScanReportsForDate(date) { + const topLevel = collectScanReportsFromDir(REPORTS_DIR, date); + const archive = collectScanReportsFromDir(path.join(REPORTS_DIR, "archive"), date); + + // Merge with top-level priority — only insert archive entries whose filename + // is not already present at the top level. + for (const [file, report] of archive) { + if (!topLevel.has(file)) topLevel.set(file, report); + } + // Stable sort by timestamp asc — same convention as readReports() in // defenseurs/src/report.ts. - out.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()); - return out; + return [...topLevel.values()].sort( + (a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(), + ); } async function getHealth() { diff --git a/test-curl.sh b/test-curl.sh index 0bd1be1..ecd1bf8 100755 --- a/test-curl.sh +++ b/test-curl.sh @@ -10,7 +10,7 @@ # Usage : # bash test-curl.sh # -# Exit 0 if all 10 cases pass, exit 1 on first failure (fail-fast). +# Exit 0 if all cases pass, exit 1 on first failure (fail-fast). set -euo pipefail @@ -23,7 +23,11 @@ trap 'rm -rf "$TMP_DIR"; kill "$SERVER_PID" 2>/dev/null || true' EXIT # - 3 scan reports on 2026-05-07 (booking, simpl-liste, maximus) # - 1 defenseur-auto run report on 2026-05-07 (must be filtered out) # - 1 booking scan report on 2026-05-06 (must be excluded by date filter) +# - 1 archived scan report on 2026-05-04 (sergent rotated it post-07:30 UTC) +# - 1 archived scan report on 2026-05-07 used to assert top-level priority +# when the same filename also exists in REPORTS_DIR (defensive dedupe). mkdir -p "$TMP_DIR/reports" +mkdir -p "$TMP_DIR/reports/archive" cat > "$TMP_DIR/reports/defenseur-booking_2026-05-07T05-30-11-249Z.json" <<'JSON' { @@ -80,6 +84,33 @@ cat > "$TMP_DIR/reports/defenseur-booking_2026-05-06T05-30-00-000Z.json" <<'JSON } JSON +# Archived scan report (sergent renameSync at 07:30 UTC moves files here). +cat > "$TMP_DIR/reports/archive/defenseur-vps_2026-05-04T05-15-00-000Z.json" <<'JSON' +{ + "agent": "defenseur-vps", + "timestamp": "2026-05-04T05:15:00.000Z", + "project": "vps", + "checksRun": 10, + "checksPassed": 10, + "findings": [] +} +JSON + +# Same filename present at top-level (already created above) AND in archive/. +# Top-level wins (more recent — the archive copy is the stale one). The +# archive copy carries agent="defenseur-maximus-STALE" so the dedupe +# regression case can detect a leak. +cat > "$TMP_DIR/reports/archive/defenseur-maximus_2026-05-07T05-00-12-100Z.json" <<'JSON' +{ + "agent": "defenseur-maximus-STALE", + "timestamp": "2026-05-07T05:00:12.100Z", + "project": "la-compagnie-maximus", + "checksRun": 1, + "checksPassed": 0, + "findings": [] +} +JSON + # Boot the server with the temp REPORTS_DIR. PORT=3099 \ HEALTH_TOKEN="$TOKEN" \ @@ -163,6 +194,40 @@ code=$(curl -s -o /dev/null -w '%{http_code}' \ -H "Authorization: Bearer $TOKEN" "$BASE_URL/reports/nope") [[ "$code" == "404" ]] && pass "wrong path -> 404" || fail "wrong path -> got $code" +# Case 11 : archive-only date -> 200 count=1, returns the archived report. +# Reproduces the post-07:30 UTC window (sergent rotated all reports out of +# REPORTS_DIR into REPORTS_DIR/archive). +body=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "$BASE_URL/reports/scans?date=2026-05-04") +count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});') +agent=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).reports[0]?.agent||"");});') +[[ "$count" == "1" ]] && pass "archive-only date 2026-05-04 -> count=1" || fail "archive-only date 2026-05-04 -> count=$count" +[[ "$agent" == "defenseur-vps" ]] && pass "archive report agent matches" || fail "archive report agent mismatch -> $agent" + +# Case 12 : top-level + archive same filename -> top-level wins (defensive +# dedupe). The archive copy carries agent="defenseur-maximus-STALE" — if we +# see that string in the response we picked the wrong copy. +body=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "$BASE_URL/reports/scans?date=2026-05-07") +stale=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{const a=JSON.parse(s).reports.map(r=>r.agent);console.log(a.includes("defenseur-maximus-STALE")?"yes":"no");});') +[[ "$stale" == "no" ]] && pass "top-level priority over archive (no STALE)" || fail "archive copy leaked -> reports include STALE" +# Also assert count is still 3 — no duplication of the maximus report. +count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});') +[[ "$count" == "3" ]] && pass "no dedupe duplication on 2026-05-07" || fail "dedupe duplication -> count=$count" + +# Case 13 : missing archive/ subdir is OK (silent skip). Remove the directory +# and re-query 2026-05-07 — should still return the 3 top-level reports. +rm -rf "$TMP_DIR/reports/archive" +body=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "$BASE_URL/reports/scans?date=2026-05-07") +count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});') +[[ "$count" == "3" ]] && pass "missing archive/ -> still count=3 from top-level" || fail "missing archive/ -> count=$count" +# And the archive-only date now collapses to 0 silently. +body=$(curl -s -H "Authorization: Bearer $TOKEN" \ + "$BASE_URL/reports/scans?date=2026-05-04") +count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});') +[[ "$count" == "0" ]] && pass "missing archive/ + archive-only date -> count=0" || fail "missing archive/ archive-only -> count=$count" + echo echo "=== Results: $PASS passed, $FAIL failed ===" [[ "$FAIL" == "0" ]] || exit 1