|
| 1 | +"""Docker integration snapshot helper. |
| 2 | +
|
| 3 | +Reads the container's output directory, builds the same snapshot structure |
| 4 | +as test_output_snapshot.py, then either updates the baseline or compares |
| 5 | +against it. |
| 6 | +
|
| 7 | +Usage: |
| 8 | + # Compare against existing snapshot: |
| 9 | + python3 docker_snapshot.py <output_dir> <snapshot_file> |
| 10 | +
|
| 11 | + # Update (or create) the snapshot baseline: |
| 12 | + python3 docker_snapshot.py <output_dir> <snapshot_file> --update |
| 13 | +""" |
| 14 | + |
| 15 | +from __future__ import annotations |
| 16 | + |
| 17 | +import json |
| 18 | +import sys |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | + |
| 22 | +def _build_snapshot(output_dir: Path) -> dict: |
| 23 | + """Collect comparable metrics from the output directory. |
| 24 | +
|
| 25 | + Mirrors the logic in test_output_snapshot._build_snapshot(). |
| 26 | + """ |
| 27 | + snapshot: dict = {"files": {}} |
| 28 | + |
| 29 | + for path in sorted(output_dir.iterdir()): |
| 30 | + if path.name.startswith(".") or path.is_dir(): |
| 31 | + continue |
| 32 | + |
| 33 | + entry: dict = {"size_bytes": path.stat().st_size} |
| 34 | + |
| 35 | + if path.suffix == ".json": |
| 36 | + try: |
| 37 | + data = json.loads(path.read_text(encoding="utf-8")) |
| 38 | + if isinstance(data, list): |
| 39 | + entry["record_count"] = len(data) |
| 40 | + elif isinstance(data, dict): |
| 41 | + entry["keys"] = sorted(data.keys()) |
| 42 | + except json.JSONDecodeError: |
| 43 | + pass |
| 44 | + |
| 45 | + if path.suffix == ".csv": |
| 46 | + lines = path.read_text(encoding="utf-8").splitlines() |
| 47 | + entry["row_count"] = len([line for line in lines if line.strip()]) - 1 |
| 48 | + |
| 49 | + snapshot["files"][path.name] = entry |
| 50 | + |
| 51 | + csv_files = [ |
| 52 | + k for k in snapshot["files"] if k.endswith(".csv") and "duplicate" not in k |
| 53 | + ] |
| 54 | + snapshot["summary"] = { |
| 55 | + "total_files": len(snapshot["files"]), |
| 56 | + "csv_outputs": len(csv_files), |
| 57 | + "output_filenames": sorted(snapshot["files"].keys()), |
| 58 | + } |
| 59 | + |
| 60 | + return snapshot |
| 61 | + |
| 62 | + |
| 63 | +def main() -> None: |
| 64 | + if len(sys.argv) < 3: |
| 65 | + print( |
| 66 | + "Usage: docker_snapshot.py <output_dir> <snapshot_file> [--update]", |
| 67 | + file=sys.stderr, |
| 68 | + ) |
| 69 | + sys.exit(1) |
| 70 | + |
| 71 | + output_dir = Path(sys.argv[1]) |
| 72 | + snapshot_file = Path(sys.argv[2]) |
| 73 | + update = "--update" in sys.argv |
| 74 | + |
| 75 | + if not output_dir.exists(): |
| 76 | + print(f"❌ Output directory not found: {output_dir}", file=sys.stderr) |
| 77 | + sys.exit(1) |
| 78 | + |
| 79 | + current = _build_snapshot(output_dir) |
| 80 | + |
| 81 | + if update: |
| 82 | + snapshot_file.parent.mkdir(parents=True, exist_ok=True) |
| 83 | + snapshot_file.write_text( |
| 84 | + json.dumps(current, indent=2, sort_keys=True), encoding="utf-8" |
| 85 | + ) |
| 86 | + print(f"✅ Snapshot updated: {snapshot_file}") |
| 87 | + return |
| 88 | + |
| 89 | + if not snapshot_file.exists(): |
| 90 | + print( |
| 91 | + f"❌ No snapshot found at {snapshot_file}.\n" |
| 92 | + "Run with UPDATE=1 to create your baseline:\n" |
| 93 | + " make docker-integration UPDATE=1", |
| 94 | + file=sys.stderr, |
| 95 | + ) |
| 96 | + sys.exit(1) |
| 97 | + |
| 98 | + baseline = json.loads(snapshot_file.read_text(encoding="utf-8")) |
| 99 | + diffs = [] |
| 100 | + |
| 101 | + # Compare per-file metrics |
| 102 | + base_files = set(baseline.get("summary", {}).get("output_filenames", [])) |
| 103 | + curr_files = set(current.get("summary", {}).get("output_filenames", [])) |
| 104 | + for added in sorted(curr_files - base_files): |
| 105 | + diffs.append(f" new output file: {added}") |
| 106 | + for removed in sorted(base_files - curr_files): |
| 107 | + diffs.append(f" removed output file: {removed}") |
| 108 | + |
| 109 | + for fname in sorted(base_files & curr_files): |
| 110 | + base_entry = baseline["files"].get(fname, {}) |
| 111 | + curr_entry = current["files"].get(fname, {}) |
| 112 | + for metric in ("row_count", "record_count"): |
| 113 | + bv = base_entry.get(metric) |
| 114 | + cv = curr_entry.get(metric) |
| 115 | + if bv is not None and bv != cv: |
| 116 | + diffs.append(f" {fname}.{metric}: {bv} → {cv}") |
| 117 | + |
| 118 | + if diffs: |
| 119 | + diff_text = "\n".join(diffs) |
| 120 | + print( |
| 121 | + f"❌ Snapshot mismatch — {len(diffs)} change(s) detected:\n" |
| 122 | + f"{diff_text}\n\n" |
| 123 | + "If intentional, re-run with UPDATE=1 to accept:\n" |
| 124 | + " make docker-integration UPDATE=1", |
| 125 | + file=sys.stderr, |
| 126 | + ) |
| 127 | + sys.exit(1) |
| 128 | + |
| 129 | + total = sum( |
| 130 | + e.get("record_count", e.get("row_count", 0)) |
| 131 | + for e in current["files"].values() |
| 132 | + if "record_count" in e or "row_count" in e |
| 133 | + ) |
| 134 | + print( |
| 135 | + f"✅ Snapshot matches baseline ({total} records across {len(curr_files)} files)" |
| 136 | + ) |
| 137 | + |
| 138 | + |
| 139 | +if __name__ == "__main__": |
| 140 | + main() |
0 commit comments