feat(reports): scan archive/ subdir as fallback to handle post-07:30 UTC window #8

Merged
maximus merged 1 commit from feat/reports-scans-archive-fallback into main 2026-05-11 00:44:26 +00:00
2 changed files with 100 additions and 13 deletions
Showing only changes of commit 2e756557ff - Show all commits

View file

@ -95,34 +95,56 @@ function isScanReport(value) {
); );
} }
// Read all `defenseur-<agent>_<date>*.json` files under REPORTS_DIR for the // Read all `defenseur-<agent>_<date>*.json` files under `dir` matching the
// given UTC date. The scan reports use an ISO timestamp with `:` and `.` // given UTC date. Returns parsed scan reports keyed by filename so the caller
// rewritten as `-` in the filename (e.g. defenseur-booking_2026-05-06T05-30-11-249Z.json). // can dedupe across REPORTS_DIR + REPORTS_DIR/archive.
// We match `_<date>` then re-confirm via parsed.timestamp.startsWith(date). function collectScanReportsFromDir(dir, date) {
function readScanReportsForDate(date) { const collected = new Map();
const out = []; if (!existsSync(dir)) return collected;
if (!existsSync(REPORTS_DIR)) return out;
const files = readdirSync(REPORTS_DIR).filter( const files = readdirSync(dir).filter(
(f) => f.startsWith("defenseur-") && f.includes(`_${date}`) && f.endsWith(".json"), (f) => f.startsWith("defenseur-") && f.includes(`_${date}`) && f.endsWith(".json"),
); );
for (const file of files) { for (const file of files) {
try { try {
const raw = readFileSync(path.join(REPORTS_DIR, file), "utf-8"); const raw = readFileSync(path.join(dir, file), "utf-8");
const parsed = JSON.parse(raw); const parsed = JSON.parse(raw);
if (!isScanReport(parsed)) continue; if (!isScanReport(parsed)) continue;
if (!parsed.timestamp.startsWith(date)) continue; if (!parsed.timestamp.startsWith(date)) continue;
out.push(parsed); collected.set(file, parsed);
} catch (err) { } catch (err) {
console.error(`[reports/scans] failed to parse ${file}:`, err.message); console.error(`[reports/scans] failed to parse ${file}:`, err.message);
} }
} }
return collected;
}
// Read all `defenseur-<agent>_<date>*.json` files under REPORTS_DIR for the
// given UTC date. The scan reports use an ISO timestamp with `:` and `.`
// rewritten as `-` in the filename (e.g. defenseur-booking_2026-05-06T05-30-11-249Z.json).
// We match `_<date>` then re-confirm via parsed.timestamp.startsWith(date).
//
// The Sergent rotates fresh reports out of REPORTS_DIR into REPORTS_DIR/archive
// at 07:30 UTC daily (cf. defenseurs/src/sergent.ts renameSync). For ~22h/day
// the only copy lives in archive/ — so we scan both and concatenate. Top-level
// files take precedence on filename collision (more recent by definition).
function readScanReportsForDate(date) {
const topLevel = collectScanReportsFromDir(REPORTS_DIR, date);
const archive = collectScanReportsFromDir(path.join(REPORTS_DIR, "archive"), date);
// Merge with top-level priority — only insert archive entries whose filename
// is not already present at the top level.
for (const [file, report] of archive) {
if (!topLevel.has(file)) topLevel.set(file, report);
}
// Stable sort by timestamp asc — same convention as readReports() in // Stable sort by timestamp asc — same convention as readReports() in
// defenseurs/src/report.ts. // defenseurs/src/report.ts.
out.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()); return [...topLevel.values()].sort(
return out; (a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(),
);
} }
async function getHealth() { async function getHealth() {

View file

@ -10,7 +10,7 @@
# Usage : # Usage :
# bash test-curl.sh # bash test-curl.sh
# #
# Exit 0 if all 10 cases pass, exit 1 on first failure (fail-fast). # Exit 0 if all cases pass, exit 1 on first failure (fail-fast).
set -euo pipefail set -euo pipefail
@ -23,7 +23,11 @@ trap 'rm -rf "$TMP_DIR"; kill "$SERVER_PID" 2>/dev/null || true' EXIT
# - 3 scan reports on 2026-05-07 (booking, simpl-liste, maximus) # - 3 scan reports on 2026-05-07 (booking, simpl-liste, maximus)
# - 1 defenseur-auto run report on 2026-05-07 (must be filtered out) # - 1 defenseur-auto run report on 2026-05-07 (must be filtered out)
# - 1 booking scan report on 2026-05-06 (must be excluded by date filter) # - 1 booking scan report on 2026-05-06 (must be excluded by date filter)
# - 1 archived scan report on 2026-05-04 (sergent rotated it post-07:30 UTC)
# - 1 archived scan report on 2026-05-07 used to assert top-level priority
# when the same filename also exists in REPORTS_DIR (defensive dedupe).
mkdir -p "$TMP_DIR/reports" mkdir -p "$TMP_DIR/reports"
mkdir -p "$TMP_DIR/reports/archive"
cat > "$TMP_DIR/reports/defenseur-booking_2026-05-07T05-30-11-249Z.json" <<'JSON' cat > "$TMP_DIR/reports/defenseur-booking_2026-05-07T05-30-11-249Z.json" <<'JSON'
{ {
@ -80,6 +84,33 @@ cat > "$TMP_DIR/reports/defenseur-booking_2026-05-06T05-30-00-000Z.json" <<'JSON
} }
JSON JSON
# Archived scan report (sergent renameSync at 07:30 UTC moves files here).
cat > "$TMP_DIR/reports/archive/defenseur-vps_2026-05-04T05-15-00-000Z.json" <<'JSON'
{
"agent": "defenseur-vps",
"timestamp": "2026-05-04T05:15:00.000Z",
"project": "vps",
"checksRun": 10,
"checksPassed": 10,
"findings": []
}
JSON
# Same filename present at top-level (already created above) AND in archive/.
# Top-level wins (more recent — the archive copy is the stale one). The
# archive copy carries agent="defenseur-maximus-STALE" so the dedupe
# regression case can detect a leak.
cat > "$TMP_DIR/reports/archive/defenseur-maximus_2026-05-07T05-00-12-100Z.json" <<'JSON'
{
"agent": "defenseur-maximus-STALE",
"timestamp": "2026-05-07T05:00:12.100Z",
"project": "la-compagnie-maximus",
"checksRun": 1,
"checksPassed": 0,
"findings": []
}
JSON
# Boot the server with the temp REPORTS_DIR. # Boot the server with the temp REPORTS_DIR.
PORT=3099 \ PORT=3099 \
HEALTH_TOKEN="$TOKEN" \ HEALTH_TOKEN="$TOKEN" \
@ -163,6 +194,40 @@ code=$(curl -s -o /dev/null -w '%{http_code}' \
-H "Authorization: Bearer $TOKEN" "$BASE_URL/reports/nope") -H "Authorization: Bearer $TOKEN" "$BASE_URL/reports/nope")
[[ "$code" == "404" ]] && pass "wrong path -> 404" || fail "wrong path -> got $code" [[ "$code" == "404" ]] && pass "wrong path -> 404" || fail "wrong path -> got $code"
# Case 11 : archive-only date -> 200 count=1, returns the archived report.
# Reproduces the post-07:30 UTC window (sergent rotated all reports out of
# REPORTS_DIR into REPORTS_DIR/archive).
body=$(curl -s -H "Authorization: Bearer $TOKEN" \
"$BASE_URL/reports/scans?date=2026-05-04")
count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});')
agent=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).reports[0]?.agent||"");});')
[[ "$count" == "1" ]] && pass "archive-only date 2026-05-04 -> count=1" || fail "archive-only date 2026-05-04 -> count=$count"
[[ "$agent" == "defenseur-vps" ]] && pass "archive report agent matches" || fail "archive report agent mismatch -> $agent"
# Case 12 : top-level + archive same filename -> top-level wins (defensive
# dedupe). The archive copy carries agent="defenseur-maximus-STALE" — if we
# see that string in the response we picked the wrong copy.
body=$(curl -s -H "Authorization: Bearer $TOKEN" \
"$BASE_URL/reports/scans?date=2026-05-07")
stale=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{const a=JSON.parse(s).reports.map(r=>r.agent);console.log(a.includes("defenseur-maximus-STALE")?"yes":"no");});')
[[ "$stale" == "no" ]] && pass "top-level priority over archive (no STALE)" || fail "archive copy leaked -> reports include STALE"
# Also assert count is still 3 — no duplication of the maximus report.
count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});')
[[ "$count" == "3" ]] && pass "no dedupe duplication on 2026-05-07" || fail "dedupe duplication -> count=$count"
# Case 13 : missing archive/ subdir is OK (silent skip). Remove the directory
# and re-query 2026-05-07 — should still return the 3 top-level reports.
rm -rf "$TMP_DIR/reports/archive"
body=$(curl -s -H "Authorization: Bearer $TOKEN" \
"$BASE_URL/reports/scans?date=2026-05-07")
count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});')
[[ "$count" == "3" ]] && pass "missing archive/ -> still count=3 from top-level" || fail "missing archive/ -> count=$count"
# And the archive-only date now collapses to 0 silently.
body=$(curl -s -H "Authorization: Bearer $TOKEN" \
"$BASE_URL/reports/scans?date=2026-05-04")
count=$(echo "$body" | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{console.log(JSON.parse(s).count);});')
[[ "$count" == "0" ]] && pass "missing archive/ + archive-only date -> count=0" || fail "missing archive/ archive-only -> count=$count"
echo echo
echo "=== Results: $PASS passed, $FAIL failed ===" echo "=== Results: $PASS passed, $FAIL failed ==="
[[ "$FAIL" == "0" ]] || exit 1 [[ "$FAIL" == "0" ]] || exit 1