Skip to content

Commit b50a01a

Browse files
committed
Tighten v3 AddSnapshotUpdate validation invariants
1 parent 9adfd5b commit b50a01a

File tree

2 files changed

+57
-13
lines changed

2 files changed

+57
-13
lines changed

pyiceberg/table/update/__init__.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def _(update: AddSnapshotUpdate, base_metadata: TableMetadata, context: _TableMe
441441
elif base_metadata.snapshot_by_id(update.snapshot.snapshot_id) is not None:
442442
raise ValueError(f"Snapshot with id {update.snapshot.snapshot_id} already exists")
443443
elif (
444-
base_metadata.format_version == 2
444+
base_metadata.format_version >= 2
445445
and update.snapshot.sequence_number is not None
446446
and update.snapshot.sequence_number <= base_metadata.last_sequence_number
447447
and update.snapshot.parent_snapshot_id is not None
@@ -462,20 +462,25 @@ def _(update: AddSnapshotUpdate, base_metadata: TableMetadata, context: _TableMe
462462
f"Cannot add a snapshot with first row id smaller than the table's next-row-id "
463463
f"{update.snapshot.first_row_id} < {base_metadata.next_row_id}"
464464
)
465+
elif base_metadata.format_version >= 3 and update.snapshot.added_rows is None:
466+
raise ValueError("Cannot add snapshot without added rows")
467+
elif base_metadata.format_version >= 3 and base_metadata.next_row_id is None:
468+
raise ValueError("Cannot add a snapshot when table next-row-id is null")
469+
470+
metadata_updates: dict[str, Any] = {
471+
"last_updated_ms": update.snapshot.timestamp_ms,
472+
"last_sequence_number": update.snapshot.sequence_number,
473+
"snapshots": base_metadata.snapshots + [update.snapshot],
474+
}
475+
if base_metadata.format_version >= 3:
476+
next_row_id = base_metadata.next_row_id
477+
added_rows = update.snapshot.added_rows
478+
if next_row_id is None or added_rows is None:
479+
raise ValueError("Cannot compute next-row-id for v3 snapshot update")
480+
metadata_updates["next_row_id"] = next_row_id + added_rows
465481

466482
context.add_update(update)
467-
return base_metadata.model_copy(
468-
update={
469-
"last_updated_ms": update.snapshot.timestamp_ms,
470-
"last_sequence_number": update.snapshot.sequence_number,
471-
"snapshots": base_metadata.snapshots + [update.snapshot],
472-
"next_row_id": base_metadata.next_row_id + update.snapshot.added_rows
473-
if base_metadata.format_version >= 3
474-
and base_metadata.next_row_id is not None
475-
and update.snapshot.added_rows is not None
476-
else None,
477-
}
478-
)
483+
return base_metadata.model_copy(update=metadata_updates)
479484

480485

481486
@_apply_table_update.register(SetSnapshotRefUpdate)

tests/table/test_init.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,6 +1714,45 @@ def test_add_snapshot_update_fails_with_smaller_first_row_id(table_v3: Table) ->
17141714
update_table_metadata(table_v3.metadata, (AddSnapshotUpdate(snapshot=new_snapshot),))
17151715

17161716

1717+
def test_add_snapshot_update_fails_without_added_rows(table_v3: Table) -> None:
1718+
new_snapshot = Snapshot(
1719+
snapshot_id=25,
1720+
parent_snapshot_id=19,
1721+
sequence_number=200,
1722+
timestamp_ms=1602638593590,
1723+
manifest_list="s3:/a/b/c.avro",
1724+
summary=Summary(Operation.APPEND),
1725+
schema_id=3,
1726+
first_row_id=2,
1727+
)
1728+
1729+
with pytest.raises(
1730+
ValueError,
1731+
match="Cannot add snapshot without added rows",
1732+
):
1733+
update_table_metadata(table_v3.metadata, (AddSnapshotUpdate(snapshot=new_snapshot),))
1734+
1735+
1736+
def test_add_snapshot_update_fails_with_stale_sequence_number_in_v3(table_v3: Table) -> None:
1737+
new_snapshot = Snapshot(
1738+
snapshot_id=25,
1739+
parent_snapshot_id=19,
1740+
sequence_number=34,
1741+
timestamp_ms=1602638593590,
1742+
manifest_list="s3:/a/b/c.avro",
1743+
summary=Summary(Operation.APPEND),
1744+
schema_id=3,
1745+
first_row_id=2,
1746+
added_rows=1,
1747+
)
1748+
1749+
with pytest.raises(
1750+
ValueError,
1751+
match="Cannot add snapshot with sequence number 34 older than last sequence number 34",
1752+
):
1753+
update_table_metadata(table_v3.metadata, (AddSnapshotUpdate(snapshot=new_snapshot),))
1754+
1755+
17171756
def test_add_snapshot_update_updates_next_row_id(table_v3: Table) -> None:
17181757
new_snapshot = Snapshot(
17191758
snapshot_id=25,

0 commit comments

Comments
 (0)