diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index b189b4094d..efd61c7362 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -818,8 +818,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None: manifests_to_delete: List[ManifestFile] = [] for snapshot in metadata.snapshots: manifests_to_delete += snapshot.manifests(io) - if snapshot.manifest_list is not None: - manifest_lists_to_delete.add(snapshot.manifest_list) + manifest_lists_to_delete.add(snapshot.manifest_list) manifest_paths_to_delete = {manifest.manifest_path for manifest in manifests_to_delete} prev_metadata_files = {log.metadata_file for log in metadata.metadata_log} diff --git a/pyiceberg/cli/output.py b/pyiceberg/cli/output.py index 13a15c53f9..a4183c32bd 100644 --- a/pyiceberg/cli/output.py +++ b/pyiceberg/cli/output.py @@ -112,8 +112,7 @@ def describe_table(self, table: Table) -> None: snapshot_tree = Tree("Snapshots") for snapshot in metadata.snapshots: - manifest_list_str = f": {snapshot.manifest_list}" if snapshot.manifest_list else "" - snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}{manifest_list_str}") + snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}: {snapshot.manifest_list}") output_table = self._table output_table.add_row("Table format version", str(metadata.format_version)) @@ -141,8 +140,9 @@ def files(self, table: Table, history: bool) -> None: io = table.io for snapshot in snapshots: - manifest_list_str = f": {snapshot.manifest_list}" if snapshot.manifest_list else "" - list_tree = snapshot_tree.add(f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}{manifest_list_str}") + list_tree = snapshot_tree.add( + f"Snapshot {snapshot.snapshot_id}, schema {snapshot.schema_id}: {snapshot.manifest_list}" + ) manifest_list = snapshot.manifests(io) for manifest in manifest_list: diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index c5cb57e691..a5515f12b0 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -239,9 +239,7 @@ class Snapshot(IcebergBaseModel): parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None) sequence_number: Optional[int] = Field(alias="sequence-number", default=INITIAL_SEQUENCE_NUMBER) timestamp_ms: int = Field(alias="timestamp-ms", default_factory=lambda: int(time.time() * 1000)) - manifest_list: Optional[str] = Field( - alias="manifest-list", description="Location of the snapshot's manifest list file", default=None - ) + manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file") summary: Optional[Summary] = Field(default=None) schema_id: Optional[int] = Field(alias="schema-id", default=None) @@ -255,9 +253,7 @@ def __str__(self) -> str: def manifests(self, io: FileIO) -> List[ManifestFile]: """Return the manifests for the given snapshot.""" - if self.manifest_list: - return list(_manifests(io, self.manifest_list)) - return [] + return list(_manifests(io, self.manifest_list)) class MetadataLogEntry(IcebergBaseModel): diff --git a/tests/conftest.py b/tests/conftest.py index 9160a1435d..842e0f2717 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -596,7 +596,7 @@ def all_avro_types() -> Dict[str, Any]: "partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}], "properties": {}, "current-snapshot-id": -1, - "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}], + "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822, "manifest-list": "s3://bucket/test/manifest-list"}], } diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index 0e2b91f24b..3b7ccf7c10 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -168,7 +168,7 @@ def test_updating_metadata(example_table_metadata_v2: Dict[str, Any]) -> None: def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None: table_metadata = TableMetadataV1(**example_table_metadata_v1) table_metadata_json = table_metadata.model_dump_json() - expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}""" + expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822,"manifest-list":"s3://bucket/test/manifest-list"}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}""" assert table_metadata_json == expected @@ -497,7 +497,7 @@ def test_v1_write_metadata_for_v2() -> None: "partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}], "properties": {}, "current-snapshot-id": -1, - "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}], + "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822, "manifest-list": "s3://bucket/test/manifests"}], } table_metadata = TableMetadataV1(**minimal_example_v1).to_v2()