Revert "Change delete to load only undeleted versions (#2375)" (#2401)

G-D-Petrov · web-flow · commit 01f6a6a7c6ae · 2025-06-11T20:57:19.000+03:00
This reverts commit 4c596f2. #### Reference Issues/PRs  #### What does this implement or fix? #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details>
diff --git a/cpp/arcticdb/version/local_versioned_engine.cpp b/cpp/arcticdb/version/local_versioned_engine.cpp
@@ -890,8 +890,8 @@ folly::Future<folly::Unit> delete_trees_responsibly(
             auto min_versions = min_versions_for_each_stream(orig_keys_to_delete);
             for (const auto& min : min_versions) {
                 auto load_strategy = load_type == LoadType::DOWNTO
-                        ? LoadStrategy{load_type, LoadObjective::UNDELETED_ONLY, static_cast<SignedVersionId>(min.second)}
-                        : LoadStrategy{load_type, LoadObjective::UNDELETED_ONLY};
+                        ? LoadStrategy{load_type, LoadObjective::INCLUDE_DELETED, static_cast<SignedVersionId>(min.second)}
+                        : LoadStrategy{load_type, LoadObjective::INCLUDE_DELETED};
                 const auto entry = version_map->check_reload(store, min.first, load_strategy, __FUNCTION__);
                 entry_map.try_emplace(std::move(min.first), entry);
             }
diff --git a/python/.asv/results/benchmarks.json b/python/.asv/results/benchmarks.json
diff --git a/python/benchmarks/basic_functions.py b/python/benchmarks/basic_functions.py
@@ -10,8 +10,6 @@
 from typing import List
 from arcticdb import Arctic
 from arcticdb.version_store.library import WritePayload, ReadRequest
-from arcticdb.util.test import config_context
-
 import pandas as pd
 
 from benchmarks.common import *
@@ -363,9 +361,3 @@ def time_delete_multiple_versions(self, lad: LargeAppendDataModify, rows):
 
     def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):
         self.lib_short_wide.delete("short_wide_sym")
-
-    def time_delete_over_time(self, lad: LargeAppendDataModify, rows):
-        with config_context("VersionMap.ReloadInterval", 0):
-            for i in range(100):
-                self.lib.write("delete_over_time", pd.DataFrame())
-                self.lib.delete("delete_over_time")
diff --git a/python/benchmarks/real_modification_functions.py b/python/benchmarks/real_modification_functions.py
@@ -14,7 +14,6 @@
 from arcticdb.util.utils import TimestampNumber
 from arcticdb.version_store.library import Library
 from benchmarks.common import AsvBase
-from arcticdb.util.test import config_context
 
 
 # region Setup classes
@@ -315,10 +314,3 @@ def teardown(self, cache, num_rows):
 
     def time_delete(self, cache, num_rows):
         self.lib.delete(self.symbol)
-        self.symbol_deleted = True
-
-    def time_delete_over_time(self, cache, num_rows):
-        with config_context("VersionMap.ReloadInterval", 0):
-            for i in range(25):
-                self.lib.write("delete_over_time", pd.DataFrame())
-                self.lib.delete("delete_over_time")
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
@@ -83,14 +83,6 @@
 # Use a smaller memory mapped limit for all tests
 MsgPackNormalizer.MMAP_DEFAULT_SIZE = 20 * (1 << 20)
 
-
-# silence warnings about custom markers
-def pytest_configure(config):
-    config.addinivalue_line("markers", "storage: Mark tests related to storage functionality")
-    config.addinivalue_line("markers", "authentication: Mark tests related to authentication functionality")
-    config.addinivalue_line("markers", "pipeline: Mark tests related to pipeline functionality")
-
-
 if platform.system() == "Linux":
     try:
         from ctypes import cdll
@@ -110,7 +102,7 @@ def lib_name(request: "pytest.FixtureRequest") -> str:
     name = re.sub(r"[^\w]", "_", request.node.name)[:30]
     pid = os.getpid()
     thread_id = threading.get_ident()
-    # There is limit to the name length, and note that without
+    # There is limit to the name length, and note that without 
     # the dot (.) in the name mongo will not work!
     return f"{name}.{pid}_{thread_id}_{datetime.utcnow().strftime('%Y-%m-%dT%H_%M_%S_')}_{uuid.uuid4()}"
 
@@ -181,7 +173,6 @@ def lmdb_library_static_dynamic(request):
 def lmdb_library_factory(lmdb_storage, lib_name):
     def f(library_options: LibraryOptions = LibraryOptions()):
         return lmdb_storage.create_arctic().create_library(lib_name, library_options=library_options)
-
     return f
 
 
@@ -675,9 +666,7 @@ def s3_no_ssl_store_factory(lib_name, s3_no_ssl_storage) -> Callable[..., Native
 
 
 @pytest.fixture
-def mock_s3_store_with_error_simulation_factory(
-    lib_name, mock_s3_storage_with_error_simulation
-) -> Callable[..., NativeVersionStore]:
+def mock_s3_store_with_error_simulation_factory(lib_name, mock_s3_storage_with_error_simulation) -> Callable[..., NativeVersionStore]:
     return mock_s3_storage_with_error_simulation.create_version_store_factory(lib_name)
 
 
@@ -761,9 +750,8 @@ def nfs_backed_s3_version_store_v1(nfs_backed_s3_store_factory):
 @pytest.fixture
 def nfs_backed_s3_version_store_v2(nfs_backed_s3_store_factory, lib_name):
     library_name = lib_name + "_v2"
-    return nfs_backed_s3_store_factory(
-        dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name
-    )
+    return nfs_backed_s3_store_factory(dynamic_strings=True, 
+                                       encoding_version=int(EncodingVersion.V2), name=library_name)
 
 
 @pytest.fixture
@@ -821,7 +809,7 @@ def nfs_backed_s3_version_store(nfs_backed_s3_version_store_v1, nfs_backed_s3_ve
         return nfs_backed_s3_version_store_v2
     else:
         raise ValueError(f"Unexpected encoding version: {encoding_version}")
-
+    
 
 @pytest.fixture(scope="function")
 def mongo_version_store(mongo_store_factory):
@@ -1403,7 +1391,7 @@ def old_venv_and_arctic_uri(old_venv, arctic_uri):
 
     yield old_venv, arctic_uri
 
-
+    
 @pytest.fixture
 def clear_query_stats():
     yield
diff --git a/python/tests/integration/arcticdb/test_s3.py b/python/tests/integration/arcticdb/test_s3.py
@@ -9,8 +9,6 @@
 import re
 import time
 from multiprocessing import Queue, Process
-from difflib import unified_diff
-from collections import defaultdict
 
 import pytest
 import pandas as pd
@@ -26,8 +24,6 @@
 from arcticdb.storage_fixtures.s3 import MotoNfsBackedS3StorageFixtureFactory
 from arcticdb.storage_fixtures.s3 import MotoS3StorageFixtureFactory
 
-import arcticdb.toolbox.query_stats as qs
-
 from arcticdb.util.test import config_context, config_context_string
 
 pytestmark = pytest.mark.skipif(
@@ -226,161 +222,3 @@ def test_library_get_key_path(lib_name, storage_bucket, test_prefix):
             assert path.startswith(test_prefix)
 
     assert keys_count > 0
-
-
-def sum_operations(stats):
-    """Sum up all operations from query stats.
-
-    Args:
-        stats: Dictionary containing query stats
-
-    Returns:
-        Dictionary with total counts, sizes, and times for each operation type
-    """
-    totals = {}
-
-    for op_type, key_types in stats["storage_operations"].items():
-        totals[op_type] = {"count": 0, "size_bytes": 0, "total_time_ms": 0}
-
-        for key_type, metrics in key_types.items():
-            totals[op_type]["count"] += metrics["count"]
-            totals[op_type]["size_bytes"] += metrics["size_bytes"]
-            totals[op_type]["total_time_ms"] += metrics["total_time_ms"]
-
-    return totals
-
-
-def sum_all_operations(stats):
-    totals = sum_operations(stats)
-    total_count = 0
-    for op_type, metrics in totals.items():
-        total_count += metrics["count"]
-    return total_count
-
-
-def visualize_stats_diff(stats1, stats2):
-    """Visualize count differences between two stats dictionaries in a table format.
-
-    Args:
-        stats1: First stats dictionary
-        stats2: Second stats dictionary
-
-    Returns:
-        String containing formatted count differences in a table
-    """
-
-    def get_counts(stats):
-        counts = defaultdict(lambda: defaultdict(int))
-        for op_type, key_types in stats.get("storage_operations", {}).items():
-            for key_type, metrics in key_types.items():
-                counts[op_type][key_type] = metrics.get("count", 0)
-        return counts
-
-    counts1 = get_counts(stats1)
-    counts2 = get_counts(stats2)
-
-    # Get all unique operations and key types
-    all_ops = sorted(set(counts1.keys()) | set(counts2.keys()))
-    all_key_types = set()
-    for op in all_ops:
-        all_key_types.update(counts1[op].keys())
-        all_key_types.update(counts2[op].keys())
-    all_key_types = sorted(all_key_types)
-
-    # Build the table
-    output = []
-    output.append("Count Differences:")
-    output.append("=" * 80)
-
-    # Header
-    header = "Operation".ljust(30) + "Key Type".ljust(20) + "Before".rjust(10) + "After".rjust(10) + "Diff".rjust(10)
-    output.append(header)
-    output.append("-" * 80)
-
-    # Table rows
-    for op in all_ops:
-        for key_type in all_key_types:
-            count1 = counts1[op][key_type]
-            count2 = counts2[op][key_type]
-            if count1 != count2:
-                diff = count2 - count1
-                diff_str = f"{diff:+d}" if diff != 0 else "0"
-                row = f"{op[:28]:<30} {key_type[:18]:<20} {count1:>10} {count2:>10} {diff_str:>10}"
-                output.append(row)
-
-    if len(output) == 3:  # Only header, separator, and header row
-        return "No count differences found"
-
-    # Add summary
-    output.append("-" * 80)
-    total1 = sum(sum(counts.values()) for counts in counts1.values())
-    total2 = sum(sum(counts.values()) for counts in counts2.values())
-    total_diff = total2 - total1
-    diff_str = f"{total_diff:+d}" if total_diff != 0 else "0"
-    summary = f"Total:".ljust(50) + f"{total1:>10} {total2:>10} {diff_str:>10}"
-    output.append(summary)
-
-    return "\n".join(output)
-
-
-def test_delete_over_time(lib_name, storage_bucket, clear_query_stats):
-    qs.enable()
-    expected_ops = 14
-    lib = storage_bucket.create_version_store_factory(lib_name)()
-
-    with config_context("VersionMap.ReloadInterval", 0):
-        # Setup
-        # First write and delete will add an extra couple of version keys
-        lib.write("s", data=create_df())
-        qs.reset_stats()
-        lib.delete("s")
-
-        assert sum_all_operations(qs.get_query_stats()) == expected_ops
-        lib.write("s", data=create_df())
-        qs.reset_stats()
-
-        lib.delete("s")
-        base_stats = qs.get_query_stats()
-        base_ops_count = sum_all_operations(base_stats)
-        # expected_ops + 2 (read the new version and the tombstone all key)
-        assert base_ops_count == (expected_ops + 2)
-        qs.reset_stats()
-
-        iters = 10
-
-        # make sure that the delete makes a constant number of operations
-        for i in range(iters):
-            lib.write("s", data=create_df())
-            qs.reset_stats()
-
-            lib.delete("s")
-            stats = qs.get_query_stats()
-            qs.reset_stats()
-            assert sum_all_operations(stats) == base_ops_count == (expected_ops + 2), visualize_stats_diff(
-                base_stats, stats
-            )
-
-
-def test_wrute_and_prune_previous_over_time(lib_name, storage_bucket, clear_query_stats):
-    expected_ops = 9
-    with config_context("VersionMap.ReloadInterval", 0):
-        lib = storage_bucket.create_version_store_factory(lib_name)()
-        qs.enable()
-        lib.write("s", data=create_df())
-        qs.reset_stats()
-
-        lib.write("s", data=create_df(), prune_previous=True)
-
-        base_stats = qs.get_query_stats()
-        base_ops_count = sum_all_operations(base_stats)
-        assert base_ops_count == expected_ops
-        qs.reset_stats()
-
-        iters = 10
-
-        # make sure that the write and prune makes a constant number of operations
-        for i in range(iters):
-            lib.write("s", data=create_df(), prune_previous=True)
-            stats = qs.get_query_stats()
-            qs.reset_stats()
-            assert sum_all_operations(stats) == base_ops_count == expected_ops, visualize_stats_diff(base_stats, stats)
diff --git a/python/tests/integration/toolbox/test_query_stats.py b/python/tests/integration/toolbox/test_query_stats.py