Skip to content

Commit 01f6a6a

Browse files
authored
Revert "Change delete to load only undeleted versions (#2375)" (#2401)
This reverts commit 4c596f2. #### Reference Issues/PRs <!--Example: Fixes #1234. See also #3456.--> #### What does this implement or fix? #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details> <!-- Thanks for contributing a Pull Request to ArcticDB! Please ensure you have taken a look at: - ArcticDB's Code of Conduct: https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md - ArcticDB's Contribution Licensing: https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing -->
1 parent c7875d8 commit 01f6a6a

File tree

7 files changed

+133
-344
lines changed

7 files changed

+133
-344
lines changed

cpp/arcticdb/version/local_versioned_engine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,8 @@ folly::Future<folly::Unit> delete_trees_responsibly(
890890
auto min_versions = min_versions_for_each_stream(orig_keys_to_delete);
891891
for (const auto& min : min_versions) {
892892
auto load_strategy = load_type == LoadType::DOWNTO
893-
? LoadStrategy{load_type, LoadObjective::UNDELETED_ONLY, static_cast<SignedVersionId>(min.second)}
894-
: LoadStrategy{load_type, LoadObjective::UNDELETED_ONLY};
893+
? LoadStrategy{load_type, LoadObjective::INCLUDE_DELETED, static_cast<SignedVersionId>(min.second)}
894+
: LoadStrategy{load_type, LoadObjective::INCLUDE_DELETED};
895895
const auto entry = version_map->check_reload(store, min.first, load_strategy, __FUNCTION__);
896896
entry_map.try_emplace(std::move(min.first), entry);
897897
}

python/.asv/results/benchmarks.json

Lines changed: 75 additions & 95 deletions
Large diffs are not rendered by default.

python/benchmarks/basic_functions.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
from typing import List
1111
from arcticdb import Arctic
1212
from arcticdb.version_store.library import WritePayload, ReadRequest
13-
from arcticdb.util.test import config_context
14-
1513
import pandas as pd
1614

1715
from benchmarks.common import *
@@ -363,9 +361,3 @@ def time_delete_multiple_versions(self, lad: LargeAppendDataModify, rows):
363361

364362
def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):
365363
self.lib_short_wide.delete("short_wide_sym")
366-
367-
def time_delete_over_time(self, lad: LargeAppendDataModify, rows):
368-
with config_context("VersionMap.ReloadInterval", 0):
369-
for i in range(100):
370-
self.lib.write("delete_over_time", pd.DataFrame())
371-
self.lib.delete("delete_over_time")

python/benchmarks/real_modification_functions.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from arcticdb.util.utils import TimestampNumber
1515
from arcticdb.version_store.library import Library
1616
from benchmarks.common import AsvBase
17-
from arcticdb.util.test import config_context
1817

1918

2019
# region Setup classes
@@ -315,10 +314,3 @@ def teardown(self, cache, num_rows):
315314

316315
def time_delete(self, cache, num_rows):
317316
self.lib.delete(self.symbol)
318-
self.symbol_deleted = True
319-
320-
def time_delete_over_time(self, cache, num_rows):
321-
with config_context("VersionMap.ReloadInterval", 0):
322-
for i in range(25):
323-
self.lib.write("delete_over_time", pd.DataFrame())
324-
self.lib.delete("delete_over_time")

python/tests/conftest.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,6 @@
8383
# Use a smaller memory mapped limit for all tests
8484
MsgPackNormalizer.MMAP_DEFAULT_SIZE = 20 * (1 << 20)
8585

86-
87-
# silence warnings about custom markers
88-
def pytest_configure(config):
89-
config.addinivalue_line("markers", "storage: Mark tests related to storage functionality")
90-
config.addinivalue_line("markers", "authentication: Mark tests related to authentication functionality")
91-
config.addinivalue_line("markers", "pipeline: Mark tests related to pipeline functionality")
92-
93-
9486
if platform.system() == "Linux":
9587
try:
9688
from ctypes import cdll
@@ -110,7 +102,7 @@ def lib_name(request: "pytest.FixtureRequest") -> str:
110102
name = re.sub(r"[^\w]", "_", request.node.name)[:30]
111103
pid = os.getpid()
112104
thread_id = threading.get_ident()
113-
# There is limit to the name length, and note that without
105+
# There is limit to the name length, and note that without
114106
# the dot (.) in the name mongo will not work!
115107
return f"{name}.{pid}_{thread_id}_{datetime.utcnow().strftime('%Y-%m-%dT%H_%M_%S_')}_{uuid.uuid4()}"
116108

@@ -181,7 +173,6 @@ def lmdb_library_static_dynamic(request):
181173
def lmdb_library_factory(lmdb_storage, lib_name):
182174
def f(library_options: LibraryOptions = LibraryOptions()):
183175
return lmdb_storage.create_arctic().create_library(lib_name, library_options=library_options)
184-
185176
return f
186177

187178

@@ -675,9 +666,7 @@ def s3_no_ssl_store_factory(lib_name, s3_no_ssl_storage) -> Callable[..., Native
675666

676667

677668
@pytest.fixture
678-
def mock_s3_store_with_error_simulation_factory(
679-
lib_name, mock_s3_storage_with_error_simulation
680-
) -> Callable[..., NativeVersionStore]:
669+
def mock_s3_store_with_error_simulation_factory(lib_name, mock_s3_storage_with_error_simulation) -> Callable[..., NativeVersionStore]:
681670
return mock_s3_storage_with_error_simulation.create_version_store_factory(lib_name)
682671

683672

@@ -761,9 +750,8 @@ def nfs_backed_s3_version_store_v1(nfs_backed_s3_store_factory):
761750
@pytest.fixture
762751
def nfs_backed_s3_version_store_v2(nfs_backed_s3_store_factory, lib_name):
763752
library_name = lib_name + "_v2"
764-
return nfs_backed_s3_store_factory(
765-
dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name
766-
)
753+
return nfs_backed_s3_store_factory(dynamic_strings=True,
754+
encoding_version=int(EncodingVersion.V2), name=library_name)
767755

768756

769757
@pytest.fixture
@@ -821,7 +809,7 @@ def nfs_backed_s3_version_store(nfs_backed_s3_version_store_v1, nfs_backed_s3_ve
821809
return nfs_backed_s3_version_store_v2
822810
else:
823811
raise ValueError(f"Unexpected encoding version: {encoding_version}")
824-
812+
825813

826814
@pytest.fixture(scope="function")
827815
def mongo_version_store(mongo_store_factory):
@@ -1403,7 +1391,7 @@ def old_venv_and_arctic_uri(old_venv, arctic_uri):
14031391

14041392
yield old_venv, arctic_uri
14051393

1406-
1394+
14071395
@pytest.fixture
14081396
def clear_query_stats():
14091397
yield

python/tests/integration/arcticdb/test_s3.py

Lines changed: 0 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import re
1010
import time
1111
from multiprocessing import Queue, Process
12-
from difflib import unified_diff
13-
from collections import defaultdict
1412

1513
import pytest
1614
import pandas as pd
@@ -26,8 +24,6 @@
2624
from arcticdb.storage_fixtures.s3 import MotoNfsBackedS3StorageFixtureFactory
2725
from arcticdb.storage_fixtures.s3 import MotoS3StorageFixtureFactory
2826

29-
import arcticdb.toolbox.query_stats as qs
30-
3127
from arcticdb.util.test import config_context, config_context_string
3228

3329
pytestmark = pytest.mark.skipif(
@@ -226,161 +222,3 @@ def test_library_get_key_path(lib_name, storage_bucket, test_prefix):
226222
assert path.startswith(test_prefix)
227223

228224
assert keys_count > 0
229-
230-
231-
def sum_operations(stats):
232-
"""Sum up all operations from query stats.
233-
234-
Args:
235-
stats: Dictionary containing query stats
236-
237-
Returns:
238-
Dictionary with total counts, sizes, and times for each operation type
239-
"""
240-
totals = {}
241-
242-
for op_type, key_types in stats["storage_operations"].items():
243-
totals[op_type] = {"count": 0, "size_bytes": 0, "total_time_ms": 0}
244-
245-
for key_type, metrics in key_types.items():
246-
totals[op_type]["count"] += metrics["count"]
247-
totals[op_type]["size_bytes"] += metrics["size_bytes"]
248-
totals[op_type]["total_time_ms"] += metrics["total_time_ms"]
249-
250-
return totals
251-
252-
253-
def sum_all_operations(stats):
254-
totals = sum_operations(stats)
255-
total_count = 0
256-
for op_type, metrics in totals.items():
257-
total_count += metrics["count"]
258-
return total_count
259-
260-
261-
def visualize_stats_diff(stats1, stats2):
262-
"""Visualize count differences between two stats dictionaries in a table format.
263-
264-
Args:
265-
stats1: First stats dictionary
266-
stats2: Second stats dictionary
267-
268-
Returns:
269-
String containing formatted count differences in a table
270-
"""
271-
272-
def get_counts(stats):
273-
counts = defaultdict(lambda: defaultdict(int))
274-
for op_type, key_types in stats.get("storage_operations", {}).items():
275-
for key_type, metrics in key_types.items():
276-
counts[op_type][key_type] = metrics.get("count", 0)
277-
return counts
278-
279-
counts1 = get_counts(stats1)
280-
counts2 = get_counts(stats2)
281-
282-
# Get all unique operations and key types
283-
all_ops = sorted(set(counts1.keys()) | set(counts2.keys()))
284-
all_key_types = set()
285-
for op in all_ops:
286-
all_key_types.update(counts1[op].keys())
287-
all_key_types.update(counts2[op].keys())
288-
all_key_types = sorted(all_key_types)
289-
290-
# Build the table
291-
output = []
292-
output.append("Count Differences:")
293-
output.append("=" * 80)
294-
295-
# Header
296-
header = "Operation".ljust(30) + "Key Type".ljust(20) + "Before".rjust(10) + "After".rjust(10) + "Diff".rjust(10)
297-
output.append(header)
298-
output.append("-" * 80)
299-
300-
# Table rows
301-
for op in all_ops:
302-
for key_type in all_key_types:
303-
count1 = counts1[op][key_type]
304-
count2 = counts2[op][key_type]
305-
if count1 != count2:
306-
diff = count2 - count1
307-
diff_str = f"{diff:+d}" if diff != 0 else "0"
308-
row = f"{op[:28]:<30} {key_type[:18]:<20} {count1:>10} {count2:>10} {diff_str:>10}"
309-
output.append(row)
310-
311-
if len(output) == 3: # Only header, separator, and header row
312-
return "No count differences found"
313-
314-
# Add summary
315-
output.append("-" * 80)
316-
total1 = sum(sum(counts.values()) for counts in counts1.values())
317-
total2 = sum(sum(counts.values()) for counts in counts2.values())
318-
total_diff = total2 - total1
319-
diff_str = f"{total_diff:+d}" if total_diff != 0 else "0"
320-
summary = f"Total:".ljust(50) + f"{total1:>10} {total2:>10} {diff_str:>10}"
321-
output.append(summary)
322-
323-
return "\n".join(output)
324-
325-
326-
def test_delete_over_time(lib_name, storage_bucket, clear_query_stats):
327-
qs.enable()
328-
expected_ops = 14
329-
lib = storage_bucket.create_version_store_factory(lib_name)()
330-
331-
with config_context("VersionMap.ReloadInterval", 0):
332-
# Setup
333-
# First write and delete will add an extra couple of version keys
334-
lib.write("s", data=create_df())
335-
qs.reset_stats()
336-
lib.delete("s")
337-
338-
assert sum_all_operations(qs.get_query_stats()) == expected_ops
339-
lib.write("s", data=create_df())
340-
qs.reset_stats()
341-
342-
lib.delete("s")
343-
base_stats = qs.get_query_stats()
344-
base_ops_count = sum_all_operations(base_stats)
345-
# expected_ops + 2 (read the new version and the tombstone all key)
346-
assert base_ops_count == (expected_ops + 2)
347-
qs.reset_stats()
348-
349-
iters = 10
350-
351-
# make sure that the delete makes a constant number of operations
352-
for i in range(iters):
353-
lib.write("s", data=create_df())
354-
qs.reset_stats()
355-
356-
lib.delete("s")
357-
stats = qs.get_query_stats()
358-
qs.reset_stats()
359-
assert sum_all_operations(stats) == base_ops_count == (expected_ops + 2), visualize_stats_diff(
360-
base_stats, stats
361-
)
362-
363-
364-
def test_wrute_and_prune_previous_over_time(lib_name, storage_bucket, clear_query_stats):
365-
expected_ops = 9
366-
with config_context("VersionMap.ReloadInterval", 0):
367-
lib = storage_bucket.create_version_store_factory(lib_name)()
368-
qs.enable()
369-
lib.write("s", data=create_df())
370-
qs.reset_stats()
371-
372-
lib.write("s", data=create_df(), prune_previous=True)
373-
374-
base_stats = qs.get_query_stats()
375-
base_ops_count = sum_all_operations(base_stats)
376-
assert base_ops_count == expected_ops
377-
qs.reset_stats()
378-
379-
iters = 10
380-
381-
# make sure that the write and prune makes a constant number of operations
382-
for i in range(iters):
383-
lib.write("s", data=create_df(), prune_previous=True)
384-
stats = qs.get_query_stats()
385-
qs.reset_stats()
386-
assert sum_all_operations(stats) == base_ops_count == expected_ops, visualize_stats_diff(base_stats, stats)

0 commit comments

Comments
 (0)