Skip to content

Commit 8cf6e6e

Browse files
ikyasam18Masayuki-Kamoda
andauthored
Fix: dictionary changed size during iteration in GCSObjectMetadataClient (#468)
* Fix: dictionary changed size during iteration in GCSObjectMetadataClient * chroe:ruff * refactor: Convert lists to tuples to reduce memory usage * fix: Use constant for GCS metadata size * fix: Remove unnecessary result assignment in metadata size adjustment test --------- Co-authored-by: Masayuki-Kamoda <[email protected]>
1 parent 855ef94 commit 8cf6e6e

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

gokart/gcs_obj_metadata_client.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import re
77
from collections.abc import Iterable
88
from logging import getLogger
9-
from typing import Any
9+
from typing import Any, Final
1010
from urllib.parse import urlsplit
1111

1212
from googleapiclient.model import makepatch
@@ -24,6 +24,9 @@ class GCSObjectMetadataClient:
2424
This class used for adding metadata as labels.
2525
"""
2626

27+
# Maximum metadata size for GCS objects (8 KiB)
28+
MAX_GCS_METADATA_SIZE: Final[int] = 8 * 1024
29+
2730
@staticmethod
2831
def _is_log_related_path(path: str) -> bool:
2932
return re.match(r'^gs://.+?/log/(processing_time/|task_info/|task_log/|module_versions/|random_seed/|task_params/).+', path) is not None
@@ -154,15 +157,20 @@ def _get_label_size(label_name: str, label_value: str) -> int:
154157

155158
labels = copy.deepcopy(_labels)
156159
max_gcs_metadata_size, current_total_metadata_size = (
157-
8 * 1024,
160+
GCSObjectMetadataClient.MAX_GCS_METADATA_SIZE,
158161
sum(_get_label_size(label_name, label_value) for label_name, label_value in labels.items()),
159162
)
160163
if current_total_metadata_size <= max_gcs_metadata_size:
161164
return labels
162-
for label_name, label_value in reversed(labels.items()):
165+
# NOTE: remove labels to stay within max metadata size.
166+
to_remove = []
167+
for label_name, label_value in reversed(tuple(labels.items())):
163168
size = _get_label_size(label_name, label_value)
164-
del labels[label_name]
169+
to_remove.append(label_name)
165170
current_total_metadata_size -= size
166171
if current_total_metadata_size <= max_gcs_metadata_size:
167172
break
173+
174+
for key in to_remove:
175+
del labels[key]
168176
return labels

test/test_gcs_obj_metadata_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ def test_get_patched_obj_metadata_with_nested_required_task_outputs(self):
140140
got['__required_task_outputs'], '{"nested_task": {"nest": {"__gokart_task_name": "task1", "__gokart_output_path": "path/to/output1"}}}'
141141
)
142142

143+
def test_adjust_gcs_metadata_limit_size_runtime_error(self):
144+
large_labels = {}
145+
for i in range(100):
146+
large_labels[f'key_{i}'] = 'x' * 1000
147+
148+
GCSObjectMetadataClient._adjust_gcs_metadata_limit_size(large_labels)
149+
143150

144151
class TestGokartTask(unittest.TestCase):
145152
@patch.object(_DummyTaskOnKart, '_get_output_target')

0 commit comments

Comments
 (0)