diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9d685f684c..fecccf67f5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ on: branches: ["**"] schedule: # IMPORTANT: For scheduled job we execute AWS_S3 - - cron: '0 23 * * 0,1,2,3,4' # Start previous dat at 23:00 to finish next day + - cron: '0 1 * * 1,2,3,4' # Mon-Thu at 1 am workflow_dispatch: inputs: persistent_storage: @@ -69,9 +69,9 @@ jobs: _storage='not_scheduled' day=$(date +'%a') echo "Today is $day" - if [[ "$day" == "Sun" || "$day" == "Tue" || "$day" == "Thu" ]]; then + if [[ "$day" == "Mon" || "$day" == "Wed" ]]; then _storage='AWS_S3' - elif [[ "$day" == "Mon" || "$day" == "Wed" ]]; then + elif [[ "$day" == "Tue" || "$day" == "Thu" ]]; then _storage='GCPXML' else echo "UNSPECIFIED RESULT for this day (assumed LMDB)" diff --git a/.github/workflows/build_steps.yml b/.github/workflows/build_steps.yml index 4c11fb4f02..fd398b217b 100644 --- a/.github/workflows/build_steps.yml +++ b/.github/workflows/build_steps.yml @@ -400,7 +400,6 @@ jobs: - name: Run test run: | - ulimit -a if [[ "$(echo "$ARCTICDB_PYTEST_ARGS" | xargs)" == pytest* ]]; then python -m pip install pytest-repeat setuptools wheel python setup.py protoc --build-lib python diff --git a/python/arcticdb/storage_fixtures/s3.py b/python/arcticdb/storage_fixtures/s3.py index 1c7a3f3eb9..a1498d3212 100644 --- a/python/arcticdb/storage_fixtures/s3.py +++ b/python/arcticdb/storage_fixtures/s3.py @@ -27,8 +27,6 @@ import botocore.exceptions from moto.server import DomainDispatcherApplication, create_backend_app -from arcticdb.util.utils import get_logger - from .api import * from .utils import ( get_ephemeral_port, @@ -40,7 +38,6 @@ from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap from arcticdb.version_store.helper import add_gcp_library_to_env, add_s3_library_to_env from arcticdb_ext.storage import AWSAuthMethod, NativeVariantStorage, GCPXMLSettings as NativeGCPXMLSettings -from arcticdb_ext.tools import S3Tool # All storage client libraries to be imported on-demand to speed up start-up of ad-hoc test runs @@ -116,8 +113,6 @@ def __init__( def __exit__(self, exc_type, exc_value, traceback): if self.factory.clean_bucket_on_fixture_exit: self.factory.cleanup_bucket(self) - if len(self.libs_from_factory) > 0: - get_logger().warning(f"Libraries not cleared remaining {self.libs_from_factory.keys()}") def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap: cfg = EnvironmentConfigsMap() @@ -177,21 +172,6 @@ def copy_underlying_objects_to(self, destination: "S3Bucket"): for key in self.iter_underlying_object_names(): dest.copy({"Bucket": self.bucket, "Key": key}, key, SourceClient=source_client) - def check_bucket(self, assert_on_fail = True): - s3_tool = S3Tool(self.bucket, self.factory.default_key.id, - self.factory.default_key.secret, self.factory.endpoint) - content = s3_tool.list_bucket(self.bucket) - - logger.warning(f"Total objects left: {len(content)}") - logger.warning(f"First 100: {content[0:100]}") - logger.warning(f"BUCKET: {self.bucket}") - left_from = set() - for key in content: - library_name = key.split("/")[1] # get the name from object - left_from.add(library_name) - logger.warning(f"Left overs from libraries: {left_from}") - if assert_on_fail: - assert len(content) < 1 class NfsS3Bucket(S3Bucket): def create_test_cfg(self, lib_name: str) -> EnvironmentConfigsMap: @@ -321,7 +301,6 @@ def cleanup_bucket(self, b: S3Bucket): # We are not writing to buckets in this case # and if we try to delete the bucket, it will fail b.slow_cleanup(failure_consequence="The following delete bucket call will also fail. ") - b.check_bucket(assert_on_fail=True) class BaseGCPStorageFixtureFactory(StorageFixtureFactory): @@ -356,7 +335,6 @@ def cleanup_bucket(self, b: GcpS3Bucket): # We are not writing to buckets in this case # and if we try to delete the bucket, it will fail b.slow_cleanup(failure_consequence="The following delete bucket call will also fail. ") - b.check_bucket(assert_on_fail=True) def real_s3_from_environment_variables( diff --git a/python/tests/compat/arcticdb/test_lib_naming.py b/python/tests/compat/arcticdb/test_lib_naming.py index fae3502183..9e741631a1 100644 --- a/python/tests/compat/arcticdb/test_lib_naming.py +++ b/python/tests/compat/arcticdb/test_lib_naming.py @@ -38,17 +38,9 @@ def test_create_library_with_all_chars(arctic_client_v1, prefix, suffix): assert all(name in result for name in created_libraries) finally: logger.info("Delete started") - failed_to_delete = [] for cnt, lib in enumerate(created_libraries): logger.info(f"Deletion: {cnt}/{len(created_libraries)} lib_name [{repr(lib)}] ") - try: - ac.delete_library(lib) - except Exception as e: - try: - ac.delete_library(lib) - except Exception as e: - failed_to_delete.append(lib) - assert len(failed_to_delete) < 1, f"Following libraries failed to delete: {failed_to_delete}" + ac.delete_library(lib) logger.info("Delete ended") assert not failed, "There is at least one failure look at the result" diff --git a/python/tests/conftest.py b/python/tests/conftest.py index a0f220b606..bbdddcd4da 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -8,7 +8,6 @@ import enum from typing import Callable, Generator, Union -from arcticdb.util.environment_setup import get_console_logger from arcticdb.version_store._store import NativeVersionStore from arcticdb.version_store.library import Library import hypothesis @@ -55,7 +54,6 @@ from arcticdb.arctic import Arctic from .util.mark import ( LMDB_TESTS_MARK, - MACOS_WHEEL_BUILD, MEM_TESTS_MARK, SIM_GCP_TESTS_MARK, SIM_NFS_TESTS_MARK, @@ -151,7 +149,6 @@ def pytest_generate_tests(metafunc): # endregion # region ======================================= Storage Fixtures ======================================= - @pytest.fixture(scope="session") def lmdb_shared_storage(tmp_path_factory) -> Generator[LmdbStorageFixture, None, None]: tmp_path = tmp_path_factory.mktemp("lmdb") @@ -576,6 +573,26 @@ def arctic_client_v1(request) -> Arctic: return ac +@pytest.fixture( + scope="function", + params=[ + pytest.param("s3", marks=SIM_S3_TESTS_MARK), + pytest.param("nfs_backed_s3", marks=SIM_NFS_TESTS_MARK), + pytest.param("gcp", marks=SIM_GCP_TESTS_MARK), + pytest.param("mem", marks=MEM_TESTS_MARK), + pytest.param("azurite", marks=AZURE_TESTS_MARK), + pytest.param("mongo", marks=MONGO_TESTS_MARK), + pytest.param("real_s3", marks=REAL_S3_TESTS_MARK), + pytest.param("real_gcp", marks=REAL_GCP_TESTS_MARK), + ], +) +def arctic_client_no_lmdb(request, encoding_version) -> Arctic: + filter_out_unwanted_mark(request, request.param) + storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage") + ac = storage_fixture.create_arctic(encoding_version=encoding_version) + return ac + + @pytest.fixture( scope="function", params=["lmdb"], @@ -587,26 +604,26 @@ def arctic_client_lmdb(request, encoding_version) -> Arctic: @pytest.fixture -def arctic_library(arctic_client, lib_name) -> Generator[Library, None, None]: +def arctic_library(arctic_client, lib_name) -> Library: yield arctic_client.create_library(lib_name) arctic_client.delete_library(lib_name) @pytest.fixture -def arctic_library_dynamic(arctic_client, lib_name) -> Generator[Library, None, None]: +def arctic_library_dynamic(arctic_client, lib_name) -> Library: lib_opts = LibraryOptions(dynamic_schema=True) yield arctic_client.create_library(lib_name, library_options=lib_opts) arctic_client.delete_library(lib_name) @pytest.fixture -def arctic_library_v1(arctic_client_v1, lib_name) -> Generator[Library, None, None]: +def arctic_library_v1(arctic_client_v1, lib_name) -> Library: yield arctic_client_v1.create_library(lib_name) arctic_client_v1.delete_library(lib_name) @pytest.fixture -def arctic_library_lmdb(arctic_client_lmdb, lib_name) -> Generator[Library, None, None]: +def arctic_library_lmdb(arctic_client_lmdb, lib_name) -> Library: yield arctic_client_lmdb.create_library(lib_name) arctic_client_lmdb.delete_library(lib_name) @@ -647,22 +664,9 @@ def basic_arctic_library(basic_arctic_client, lib_name) -> Library: # endregion # region ============================ `NativeVersionStore` Fixture Factories ============================ -def _store_factory(lib_name, bucket, delete_bucket = True) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield bucket.create_version_store_factory(lib_name) - if delete_bucket: - try: - bucket.slow_cleanup() - except Exception as e: - get_console_logger().warning(f"Exception caught during NativeVersionStore clear: {repr(e)}") - - @pytest.fixture -def version_store_factory(lib_name, lmdb_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - # Do not delete LMDB library on windows - # Otherwise there will be no storage space left for unit tests - # very peculiar behavior for LMDB, not investigated yet - # On MacOS ARM build this will sometimes hang test execution, so no clearing there either - yield from _store_factory(lib_name, lmdb_storage, not (WINDOWS or MACOS_WHEEL_BUILD)) +def version_store_factory(lib_name, lmdb_storage) -> Callable[..., NativeVersionStore]: + return lmdb_storage.create_version_store_factory(lib_name) @pytest.fixture @@ -679,55 +683,55 @@ def s3_store_factory_mock_storage_exception(lib_name, s3_storage): @pytest.fixture -def s3_store_factory(lib_name, s3_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, s3_storage) +def s3_store_factory(lib_name, s3_storage) -> Callable[..., NativeVersionStore]: + return s3_storage.create_version_store_factory(lib_name) @pytest.fixture -def s3_no_ssl_store_factory(lib_name, s3_no_ssl_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, s3_no_ssl_storage) +def s3_no_ssl_store_factory(lib_name, s3_no_ssl_storage) -> Callable[..., NativeVersionStore]: + return s3_no_ssl_storage.create_version_store_factory(lib_name) + @pytest.fixture def mock_s3_store_with_error_simulation_factory( lib_name, mock_s3_storage_with_error_simulation -) -> Callable[..., NativeVersionStore]: - # NOTE: this store simulates errors, therefore there is no way to delete it +) -> Callable[..., NativeVersionStore]: return mock_s3_storage_with_error_simulation.create_version_store_factory(lib_name) @pytest.fixture -def real_s3_store_factory(lib_name, real_s3_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, real_s3_storage) +def real_s3_store_factory(lib_name, real_s3_storage) -> Callable[..., NativeVersionStore]: + return real_s3_storage.create_version_store_factory(lib_name) @pytest.fixture -def nfs_backed_s3_store_factory(lib_name, nfs_backed_s3_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, nfs_backed_s3_storage) +def nfs_backed_s3_store_factory(lib_name, nfs_backed_s3_storage) -> Callable[..., NativeVersionStore]: + return nfs_backed_s3_storage.create_version_store_factory(lib_name) @pytest.fixture -def real_gcp_store_factory(lib_name, real_gcp_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, real_gcp_storage) +def real_gcp_store_factory(lib_name, real_gcp_storage) -> Callable[..., NativeVersionStore]: + return real_gcp_storage.create_version_store_factory(lib_name) @pytest.fixture -def real_s3_sts_store_factory(lib_name, real_s3_sts_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, real_s3_sts_storage) +def real_s3_sts_store_factory(lib_name, real_s3_sts_storage) -> Callable[..., NativeVersionStore]: + return real_s3_sts_storage.create_version_store_factory(lib_name) @pytest.fixture -def azure_store_factory(lib_name, azurite_storage) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, azurite_storage) +def azure_store_factory(lib_name, azurite_storage) -> Callable[..., NativeVersionStore]: + return azurite_storage.create_version_store_factory(lib_name) @pytest.fixture -def mongo_store_factory(mongo_storage, lib_name) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, mongo_storage) +def mongo_store_factory(mongo_storage, lib_name): + return mongo_storage.create_version_store_factory(lib_name) @pytest.fixture -def in_memory_store_factory(mem_storage, lib_name) -> Generator[Callable[..., NativeVersionStore], None, None]: - yield from _store_factory(lib_name, mem_storage) +def in_memory_store_factory(mem_storage, lib_name) -> Callable[..., NativeVersionStore]: + return mem_storage.create_version_store_factory(lib_name) # endregion @@ -738,42 +742,42 @@ def real_s3_version_store(real_s3_store_factory): @pytest.fixture -def real_s3_version_store_dynamic_schema(real_s3_store_factory) -> NativeVersionStore: +def real_s3_version_store_dynamic_schema(real_s3_store_factory): return real_s3_store_factory(dynamic_strings=True, dynamic_schema=True) @pytest.fixture -def real_gcp_version_store(real_gcp_store_factory) -> NativeVersionStore: +def real_gcp_version_store(real_gcp_store_factory): return real_gcp_store_factory() @pytest.fixture -def real_gcp_version_store_dynamic_schema(real_gcp_store_factory) -> NativeVersionStore: +def real_gcp_version_store_dynamic_schema(real_gcp_store_factory): return real_gcp_store_factory(dynamic_strings=True, dynamic_schema=True) @pytest.fixture -def real_s3_sts_version_store(real_s3_sts_store_factory) -> NativeVersionStore: +def real_s3_sts_version_store(real_s3_sts_store_factory): return real_s3_sts_store_factory() @pytest.fixture -def mock_s3_store_with_error_simulation(mock_s3_store_with_error_simulation_factory) -> NativeVersionStore: +def mock_s3_store_with_error_simulation(mock_s3_store_with_error_simulation_factory): return mock_s3_store_with_error_simulation_factory() @pytest.fixture -def mock_s3_store_with_mock_storage_exception(s3_store_factory_mock_storage_exception) -> NativeVersionStore: +def mock_s3_store_with_mock_storage_exception(s3_store_factory_mock_storage_exception): return s3_store_factory_mock_storage_exception() @pytest.fixture -def nfs_backed_s3_version_store_v1(nfs_backed_s3_store_factory) -> NativeVersionStore: +def nfs_backed_s3_version_store_v1(nfs_backed_s3_store_factory): return nfs_backed_s3_store_factory(dynamic_strings=True) @pytest.fixture -def nfs_backed_s3_version_store_v2(nfs_backed_s3_store_factory, lib_name) -> NativeVersionStore: +def nfs_backed_s3_version_store_v2(nfs_backed_s3_store_factory, lib_name): library_name = lib_name + "_v2" return nfs_backed_s3_store_factory( dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name @@ -781,23 +785,23 @@ def nfs_backed_s3_version_store_v2(nfs_backed_s3_store_factory, lib_name) -> Nat @pytest.fixture -def s3_version_store_v1(s3_store_factory) -> NativeVersionStore: +def s3_version_store_v1(s3_store_factory): return s3_store_factory(dynamic_strings=True) @pytest.fixture -def s3_version_store_v2(s3_store_factory, lib_name) -> NativeVersionStore: +def s3_version_store_v2(s3_store_factory, lib_name): library_name = lib_name + "_v2" return s3_store_factory(dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name) @pytest.fixture -def s3_version_store_dynamic_schema_v1(s3_store_factory) -> NativeVersionStore: +def s3_version_store_dynamic_schema_v1(s3_store_factory): return s3_store_factory(dynamic_strings=True, dynamic_schema=True) @pytest.fixture -def s3_version_store_dynamic_schema_v2(s3_store_factory, lib_name) -> NativeVersionStore: +def s3_version_store_dynamic_schema_v2(s3_store_factory, lib_name): library_name = lib_name + "_v2" return s3_store_factory( dynamic_strings=True, dynamic_schema=True, encoding_version=int(EncodingVersion.V2), name=library_name @@ -805,7 +809,7 @@ def s3_version_store_dynamic_schema_v2(s3_store_factory, lib_name) -> NativeVers @pytest.fixture -def s3_version_store(s3_version_store_v1, s3_version_store_v2, encoding_version) -> NativeVersionStore: +def s3_version_store(s3_version_store_v1, s3_version_store_v2, encoding_version): if encoding_version == EncodingVersion.V1: return s3_version_store_v1 elif encoding_version == EncodingVersion.V2: @@ -815,12 +819,12 @@ def s3_version_store(s3_version_store_v1, s3_version_store_v2, encoding_version) @pytest.fixture -def nfs_backed_s3_version_store_dynamic_schema_v1(nfs_backed_s3_store_factory) -> NativeVersionStore: +def nfs_backed_s3_version_store_dynamic_schema_v1(nfs_backed_s3_store_factory): return nfs_backed_s3_store_factory(dynamic_strings=True, dynamic_schema=True) @pytest.fixture -def nfs_backed_s3_version_store_dynamic_schema_v2(nfs_backed_s3_store_factory, lib_name) -> NativeVersionStore: +def nfs_backed_s3_version_store_dynamic_schema_v2(nfs_backed_s3_store_factory, lib_name): library_name = lib_name + "_v2" return nfs_backed_s3_store_factory( dynamic_strings=True, dynamic_schema=True, encoding_version=int(EncodingVersion.V2), name=library_name @@ -828,7 +832,7 @@ def nfs_backed_s3_version_store_dynamic_schema_v2(nfs_backed_s3_store_factory, l @pytest.fixture -def nfs_backed_s3_version_store(nfs_backed_s3_version_store_v1, nfs_backed_s3_version_store_v2, encoding_version) -> NativeVersionStore: +def nfs_backed_s3_version_store(nfs_backed_s3_version_store_v1, nfs_backed_s3_version_store_v2, encoding_version): if encoding_version == EncodingVersion.V1: return nfs_backed_s3_version_store_v1 elif encoding_version == EncodingVersion.V2: @@ -838,7 +842,7 @@ def nfs_backed_s3_version_store(nfs_backed_s3_version_store_v1, nfs_backed_s3_ve @pytest.fixture(scope="function") -def mongo_version_store(mongo_store_factory) -> NativeVersionStore: +def mongo_version_store(mongo_store_factory): return mongo_store_factory() @@ -892,7 +896,7 @@ def local_object_store_factory(request): @pytest.fixture -def local_object_version_store(local_object_store_factory) -> NativeVersionStore: +def local_object_version_store(local_object_store_factory): """ Designed to test all local object stores and their simulations Doesn't support LMDB or persistent storages @@ -901,7 +905,7 @@ def local_object_version_store(local_object_store_factory) -> NativeVersionStore @pytest.fixture -def local_object_version_store_prune_previous(local_object_store_factory) -> NativeVersionStore: +def local_object_version_store_prune_previous(local_object_store_factory): """ Designed to test all local object stores and their simulations Doesn't support LMDB or persistent storages @@ -951,12 +955,12 @@ def basic_store(basic_store_factory) -> NativeVersionStore: @pytest.fixture -def azure_version_store(azure_store_factory) -> NativeVersionStore: +def azure_version_store(azure_store_factory): return azure_store_factory() @pytest.fixture -def azure_version_store_dynamic_schema(azure_store_factory) -> NativeVersionStore: +def azure_version_store_dynamic_schema(azure_store_factory): return azure_store_factory(dynamic_schema=True, dynamic_strings=True) @@ -977,7 +981,7 @@ def lmdb_version_store_v2(version_store_factory, lib_name) -> NativeVersionStore @pytest.fixture(scope="function", params=("lmdb_version_store_v1", "lmdb_version_store_v2")) -def lmdb_version_store(request) -> Generator[NativeVersionStore, None, None]: +def lmdb_version_store(request): yield request.getfixturevalue(request.param) @@ -1017,7 +1021,7 @@ def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name) -> Nat @pytest.fixture def lmdb_version_store_dynamic_schema( lmdb_version_store_dynamic_schema_v1, lmdb_version_store_dynamic_schema_v2, encoding_version -) -> NativeVersionStore: +): if encoding_version == EncodingVersion.V1: return lmdb_version_store_dynamic_schema_v1 elif encoding_version == EncodingVersion.V2: @@ -1279,7 +1283,7 @@ def get_df(ts, width, max_col_width): "lmdb_version_store_empty_types_dynamic_schema_v2", ), ) -def lmdb_version_store_static_and_dynamic(request) -> Generator[NativeVersionStore, None, None]: +def lmdb_version_store_static_and_dynamic(request): """ Designed to test all combinations between schema and encoding version for LMDB """ @@ -1302,7 +1306,7 @@ def lmdb_version_store_static_and_dynamic(request) -> Generator[NativeVersionSto pytest.param("real_gcp_version_store", marks=REAL_GCP_TESTS_MARK), ), ) -def object_and_mem_and_lmdb_version_store(request) -> Generator[NativeVersionStore, None, None]: +def object_and_mem_and_lmdb_version_store(request): """ Designed to test all supported stores """ @@ -1325,7 +1329,7 @@ def object_and_mem_and_lmdb_version_store(request) -> Generator[NativeVersionSto pytest.param("real_gcp_version_store_dynamic_schema", marks=REAL_GCP_TESTS_MARK), ), ) -def object_and_mem_and_lmdb_version_store_dynamic_schema(request) -> Generator[NativeVersionStore, None, None]: +def object_and_mem_and_lmdb_version_store_dynamic_schema(request): filter_out_unwanted_mark(request, request.param) """ Designed to test all supported stores @@ -1334,22 +1338,22 @@ def object_and_mem_and_lmdb_version_store_dynamic_schema(request) -> Generator[N @pytest.fixture -def in_memory_version_store(in_memory_store_factory) -> NativeVersionStore: +def in_memory_version_store(in_memory_store_factory): return in_memory_store_factory() @pytest.fixture -def in_memory_version_store_dynamic_schema(in_memory_store_factory) -> NativeVersionStore: +def in_memory_version_store_dynamic_schema(in_memory_store_factory): return in_memory_store_factory(dynamic_schema=True) @pytest.fixture -def in_memory_version_store_tiny_segment(in_memory_store_factory) -> NativeVersionStore: +def in_memory_version_store_tiny_segment(in_memory_store_factory): return in_memory_store_factory(column_group_size=2, segment_row_size=2) @pytest.fixture(params=["lmdb_version_store_tiny_segment", "in_memory_version_store_tiny_segment"]) -def lmdb_or_in_memory_version_store_tiny_segment(request) -> NativeVersionStore: +def lmdb_or_in_memory_version_store_tiny_segment(request): return request.getfixturevalue(request.param) diff --git a/python/tests/integration/arcticdb/test_config.py b/python/tests/integration/arcticdb/test_config.py index 36857292ab..345310e29b 100644 --- a/python/tests/integration/arcticdb/test_config.py +++ b/python/tests/integration/arcticdb/test_config.py @@ -3,8 +3,6 @@ from subprocess import run, PIPE from arcticdb_ext.log import LogLevel -from tests.util.mark import MACOS - _LEVELS = tuple(LogLevel.__entries) diff --git a/python/tests/integration/arcticdb/version_store/test_basic_version_store.py b/python/tests/integration/arcticdb/version_store/test_basic_version_store.py index 3d0bbb2976..d4af0f905f 100644 --- a/python/tests/integration/arcticdb/version_store/test_basic_version_store.py +++ b/python/tests/integration/arcticdb/version_store/test_basic_version_store.py @@ -47,7 +47,6 @@ from tests.util.date import DateRange from arcticdb.util.test import equals from arcticdb.version_store._store import resolve_defaults -from tests.util.mark import MACOS_WHEEL_BUILD @pytest.fixture() @@ -101,7 +100,6 @@ def test_special_chars(object_version_store): @pytest.mark.parametrize("breaking_char", [chr(0), "\0", "*", "<", ">"]) @pytest.mark.storage -@pytest.mark.skipif(MACOS_WHEEL_BUILD, reason="Halts execution on MacOS ARM") def test_s3_breaking_chars(object_version_store, breaking_char): """Test that chars that are not supported are raising the appropriate exception and that we fail on write without corrupting the db. @@ -116,7 +114,6 @@ def test_s3_breaking_chars(object_version_store, breaking_char): @pytest.mark.parametrize("breaking_char", [chr(0), "\0", "*", "<", ">"]) @pytest.mark.storage -@pytest.mark.skipif(MACOS_WHEEL_BUILD, reason="Halts execution on MacOS ARM") def test_s3_breaking_chars_staged(object_version_store, breaking_char): """Test that chars that are not supported are raising the appropriate exception and that we fail on write without corrupting the db. @@ -131,7 +128,6 @@ def test_s3_breaking_chars_staged(object_version_store, breaking_char): @pytest.mark.parametrize("unhandled_char", [chr(0), chr(30), chr(127), chr(128)]) @pytest.mark.storage -@pytest.mark.skipif(MACOS_WHEEL_BUILD, reason="Halts execution on MacOS ARM") def test_unhandled_chars_default(object_version_store, unhandled_char): """Test that by default, the problematic chars are raising an exception""" sym = f"prefix{unhandled_char}postfix" @@ -144,7 +140,6 @@ def test_unhandled_chars_default(object_version_store, unhandled_char): @pytest.mark.parametrize("sym", [chr(0), chr(30), chr(127), chr(128), "", "l" * 255]) @pytest.mark.storage -@pytest.mark.skipif(MACOS_WHEEL_BUILD, reason="Halts execution on MacOS ARM") def test_unhandled_chars_staged_data(object_version_store, sym): """Test that by default, the problematic chars are raising an exception at staging time.""" df = sample_dataframe() diff --git a/python/tests/nonreg/arcticdb/version_store/test_nonreg_specific.py b/python/tests/nonreg/arcticdb/version_store/test_nonreg_specific.py index 662804fbad..93bea4430a 100644 --- a/python/tests/nonreg/arcticdb/version_store/test_nonreg_specific.py +++ b/python/tests/nonreg/arcticdb/version_store/test_nonreg_specific.py @@ -19,7 +19,6 @@ from arcticdb_ext.storage import KeyType from arcticc.pb2.descriptors_pb2 import TypeDescriptor from tests.util.date import DateRange -from tests.util.mark import MACOS_WHEEL_BUILD @pytest.mark.storage @@ -74,7 +73,6 @@ def test_update_nan_int(object_and_mem_and_lmdb_version_store_dynamic_schema): @pytest.mark.storage -@pytest.mark.skipif(MACOS_WHEEL_BUILD, reason="Fatal Python error: Segmentation fault (monday:9520391456)") def test_update_int_nan(object_and_mem_and_lmdb_version_store_dynamic_schema): lib = object_and_mem_and_lmdb_version_store_dynamic_schema symbol = "test_update_int_nan" diff --git a/python/tests/stress/arcticdb/version_store/test_stress_finalize_staged_data.py b/python/tests/stress/arcticdb/version_store/test_stress_finalize_staged_data.py index 77d2886a53..3e3737f8d4 100644 --- a/python/tests/stress/arcticdb/version_store/test_stress_finalize_staged_data.py +++ b/python/tests/stress/arcticdb/version_store/test_stress_finalize_staged_data.py @@ -20,7 +20,7 @@ from arcticdb.util.utils import CachedDFGenerator, TimestampNumber, stage_chunks -from tests.util.mark import MACOS, REAL_S3_TESTS_MARK, SKIP_CONDA_MARK, SLOW_TESTS_MARK +from tests.util.mark import REAL_S3_TESTS_MARK, SKIP_CONDA_MARK, SLOW_TESTS_MARK # Uncomment for logging # set_log_level(default_level="DEBUG", console_output=False, file_output_path="/tmp/arcticdb.log") @@ -90,9 +90,7 @@ def test_finalize_monotonic_unique_chunks(arctic_library_lmdb): df = cachedDF.generate_dataframe_timestamp_indexed(num_rows_initially, total_number_rows, cachedDF.TIME_UNIT) cnt = 0 - # On MacOS there is segmentation fault or not enough memory of more chunks are staged - iterations = [500, 1000] if MACOS else [500, 1000, 1500, 2000] - for iter in iterations: + for iter in [500, 1000, 1500, 2000]: res = Results() total_number_rows = INITIAL_TIMESTAMP + num_rows_initially diff --git a/python/tests/unit/arcticdb/version_store/test_column_type_changes.py b/python/tests/unit/arcticdb/version_store/test_column_type_changes.py index 988c4eec6e..6c47d2033f 100644 --- a/python/tests/unit/arcticdb/version_store/test_column_type_changes.py +++ b/python/tests/unit/arcticdb/version_store/test_column_type_changes.py @@ -5,6 +5,7 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ + import numpy as np import pandas as pd import pytest @@ -17,7 +18,7 @@ from arcticdb.util.test import assert_frame_equal from arcticdb_ext.types import DataType -from tests.util.mark import MACOS_WHEEL_BUILD +from tests.util.mark import MACOS @pytest.mark.parametrize("dynamic_schema", [True, False]) @@ -45,7 +46,9 @@ def test_changing_numeric_type(version_store_factory, dynamic_schema): received_append = lib.read(sym_append).data assert_frame_equal(expected_append, received_append) - expected_update = pd.DataFrame({"col": np.array([0, 0, 2], dtype=np.int64)}, index=pd.date_range("2024-01-01", periods=3)) + expected_update = pd.DataFrame( + {"col": np.array([0, 0, 2], dtype=np.int64)}, index=pd.date_range("2024-01-01", periods=3) + ) received_update = lib.read(sym_update).data assert_frame_equal(expected_update, received_update) @@ -82,8 +85,12 @@ def test_changing_fixed_string_width(version_store_factory, dynamic_schema, wide sym_append = "test_changing_fixed_string_width_append" sym_update = "test_changing_fixed_string_width_update" df_write = pd.DataFrame({"col": ["aa", "bb", "cc"]}, index=pd.date_range("2024-01-01", periods=3)) - df_append = pd.DataFrame({"col": ["d" * (1 if wider_strings_first else 3)]}, index=pd.date_range("2024-01-04", periods=1)) - df_update = pd.DataFrame({"col": ["d" * (1 if wider_strings_first else 3)]}, index=pd.date_range("2024-01-02", periods=1)) + df_append = pd.DataFrame( + {"col": ["d" * (1 if wider_strings_first else 3)]}, index=pd.date_range("2024-01-04", periods=1) + ) + df_update = pd.DataFrame( + {"col": ["d" * (1 if wider_strings_first else 3)]}, index=pd.date_range("2024-01-02", periods=1) + ) lib.write(sym_append, df_write) lib.write(sym_update, df_write) @@ -95,7 +102,9 @@ def test_changing_fixed_string_width(version_store_factory, dynamic_schema, wide received_append = lib.read(sym_append).data assert_frame_equal(expected_append, received_append) - expected_update = pd.DataFrame({"col": ["aa", "d" * (1 if wider_strings_first else 3), "cc"]}, index=pd.date_range("2024-01-01", periods=3)) + expected_update = pd.DataFrame( + {"col": ["aa", "d" * (1 if wider_strings_first else 3), "cc"]}, index=pd.date_range("2024-01-01", periods=3) + ) received_update = lib.read(sym_update).data assert_frame_equal(expected_update, received_update) @@ -133,7 +142,9 @@ def get_type_of_column(): @pytest.mark.parametrize("float_type", float_types) @pytest.mark.parametrize("second_append_type", [np.int64, np.uint64, np.int32, np.uint32]) @pytest.mark.parametrize("int_first", (True, False)) -def test_type_promotion_ints_and_floats_up_to_float64(lmdb_version_store_dynamic_schema, int_type, float_type, second_append_type, int_first): +def test_type_promotion_ints_and_floats_up_to_float64( + lmdb_version_store_dynamic_schema, int_type, float_type, second_append_type, int_first +): # Given lib = lmdb_version_store_dynamic_schema @@ -169,7 +180,9 @@ def test_type_promotion_ints_and_floats_up_to_float64(lmdb_version_store_dynamic @pytest.mark.parametrize("original_type", [np.int8, np.uint8, np.int16, np.uint16]) @pytest.mark.parametrize("second_append_type", [np.int8, np.uint8, np.int16, np.uint16]) -def test_type_promotion_ints_and_floats_up_to_float32(lmdb_version_store_dynamic_schema, original_type, second_append_type): +def test_type_promotion_ints_and_floats_up_to_float32( + lmdb_version_store_dynamic_schema, original_type, second_append_type +): """Cases where we promote an integral type and a float32 to a float32""" # Given lib = lmdb_version_store_dynamic_schema @@ -198,12 +211,16 @@ def test_type_promotion_ints_and_floats_up_to_float32(lmdb_version_store_dynamic @pytest.mark.parametrize("original_type", [np.int32, np.uint32]) def test_type_promotion_int32_and_float32_up_to_float64(lmdb_version_store_dynamic_schema, original_type): - """We promote int32 and float32 up to float64 so we can save the int32 without a loss of precision. """ + """We promote int32 and float32 up to float64 so we can save the int32 without a loss of precision.""" # Given lib = lmdb_version_store_dynamic_schema - original_data = pd.DataFrame({"a": np.array([0, np.iinfo(original_type).min, np.iinfo(original_type).max], original_type)}, index=[0, 1, 2]) - first_append = pd.DataFrame({"a": np.array([0, np.finfo(np.float32).min, np.finfo(np.float32).max], np.float32)}, index=[3, 4, 5]) + original_data = pd.DataFrame( + {"a": np.array([0, np.iinfo(original_type).min, np.iinfo(original_type).max], original_type)}, index=[0, 1, 2] + ) + first_append = pd.DataFrame( + {"a": np.array([0, np.finfo(np.float32).min, np.finfo(np.float32).max], np.float32)}, index=[3, 4, 5] + ) lib.write("test", original_data) lib.append("test", first_append) @@ -227,14 +244,18 @@ def test_type_promotion_int64_and_float64_up_to_float64(lmdb_version_store_dynam lib = lmdb_version_store_dynamic_schema original_type = np.int64 - original_data = pd.DataFrame({"a": np.array([ - np.iinfo(original_type).min + 1, - np.iinfo(original_type).max - 1, - 2 ** 53 - 1, - 2 ** 53, - 2 ** 53 + 1 - ], original_type)}, index=[0, 1, 2, 3, 4]) - append = pd.DataFrame({"a": np.array([np.finfo(np.float64).min, np.finfo(np.float64).max], np.float64)}, index=[5, 6]) + original_data = pd.DataFrame( + { + "a": np.array( + [np.iinfo(original_type).min + 1, np.iinfo(original_type).max - 1, 2**53 - 1, 2**53, 2**53 + 1], + original_type, + ) + }, + index=[0, 1, 2, 3, 4], + ) + append = pd.DataFrame( + {"a": np.array([np.finfo(np.float64).min, np.finfo(np.float64).max], np.float64)}, index=[5, 6] + ) lib.write("test", original_data) lib.append("test", append) @@ -242,16 +263,16 @@ def test_type_promotion_int64_and_float64_up_to_float64(lmdb_version_store_dynam data = lib.read("test").data.astype(original_type) # Then - if MACOS_WHEEL_BUILD: + if MACOS: # This test gives other results on MacOS, but it's not a problem for us as the assertions below are meant # for illustrating the issue, not for testing the behaviour strictly. return assert data.iloc[0, 0] == np.iinfo(original_type).min # out by one compared to original assert data.iloc[1, 0] == np.iinfo(original_type).min # overflowed - assert data.iloc[2, 0] == 2 ** 53 - 1 # fine, this fits in float64 which has an 11 bit exponent - assert data.iloc[3, 0] == 2 ** 53 # also fine - assert data.iloc[4, 0] == 2 ** 53 # off by one, should be 2 ** 53 + 1 but we lost precision + assert data.iloc[2, 0] == 2**53 - 1 # fine, this fits in float64 which has an 11 bit exponent + assert data.iloc[3, 0] == 2**53 # also fine + assert data.iloc[4, 0] == 2**53 # off by one, should be 2 ** 53 + 1 but we lost precision @pytest.mark.parametrize("integral_type", [np.int64, np.int32, np.uint64, np.uint32]) @@ -260,10 +281,13 @@ def test_querybuilder_project_int_gt_32_float(lmdb_version_store_tiny_segment, i # Given lib = lmdb_version_store_tiny_segment symbol = "test" - df = pd.DataFrame({ - "col1": np.array([1, 2, 3, 4], dtype=integral_type), - "col2": np.array([-1.0, 2.0, 0.0, 1.0], dtype=float_type) - }, index=np.arange(4)) + df = pd.DataFrame( + { + "col1": np.array([1, 2, 3, 4], dtype=integral_type), + "col2": np.array([-1.0, 2.0, 0.0, 1.0], dtype=float_type), + }, + index=np.arange(4), + ) lib.write(symbol, df) # When @@ -293,10 +317,13 @@ def test_querybuilder_project_int32_float32_boundary(lmdb_version_store_tiny_seg min_int = np.iinfo(integral_type).min max_float32 = np.finfo(np.float32).max min_float32 = np.finfo(np.float32).min - df = pd.DataFrame({ - "col1": np.array([min_int, min_int + 1, 0, max_int - 1, max_int], dtype=integral_type), - "col2": np.array([min_float32, min_float32 + 1, 0, max_float32 - 1, max_float32], dtype=np.float32) - }, index=np.arange(5)) + df = pd.DataFrame( + { + "col1": np.array([min_int, min_int + 1, 0, max_int - 1, max_int], dtype=integral_type), + "col2": np.array([min_float32, min_float32 + 1, 0, max_float32 - 1, max_float32], dtype=np.float32), + }, + index=np.arange(5), + ) lib.write(symbol, df) # When @@ -325,10 +352,10 @@ def test_querybuilder_project_int_lt_16_float(lmdb_version_store_tiny_segment, i # Given lib = lmdb_version_store_tiny_segment symbol = "test" - df = pd.DataFrame({ - "col1": np.array([1, 2, 3, 4], dtype=np.int64), - "col2": np.array([-1.0, 2.0, 0.0, 1.0], dtype=float_type) - }, index=np.arange(4)) + df = pd.DataFrame( + {"col1": np.array([1, 2, 3, 4], dtype=np.int64), "col2": np.array([-1.0, 2.0, 0.0, 1.0], dtype=float_type)}, + index=np.arange(4), + ) lib.write(symbol, df) # When @@ -348,7 +375,9 @@ def test_querybuilder_project_int_lt_16_float(lmdb_version_store_tiny_segment, i @pytest.mark.parametrize("original_type", [np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64]) @pytest.mark.parametrize("append_type", [np.float32, np.float64]) -def test_type_promotion_ints_and_floats_then_project_float64_result(lmdb_version_store_dynamic_schema_v1, original_type, append_type): +def test_type_promotion_ints_and_floats_then_project_float64_result( + lmdb_version_store_dynamic_schema_v1, original_type, append_type +): # Given lib = lmdb_version_store_dynamic_schema_v1 @@ -379,7 +408,9 @@ def test_type_promotion_ints_and_floats_then_project_float64_result(lmdb_version @pytest.mark.parametrize("original_type", [np.int8, np.uint8]) -def test_type_promotion_ints_and_floats_then_project_float32_result(lmdb_version_store_dynamic_schema_v1, original_type): +def test_type_promotion_ints_and_floats_then_project_float32_result( + lmdb_version_store_dynamic_schema_v1, original_type +): # Given lib = lmdb_version_store_dynamic_schema_v1 diff --git a/python/tests/unit/arcticdb/version_store/test_symbol_concatenation.py b/python/tests/unit/arcticdb/version_store/test_symbol_concatenation.py index aeaf95002d..9c788d60db 100644 --- a/python/tests/unit/arcticdb/version_store/test_symbol_concatenation.py +++ b/python/tests/unit/arcticdb/version_store/test_symbol_concatenation.py @@ -5,6 +5,7 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ + import numpy as np import pandas as pd import pytest @@ -24,7 +25,11 @@ @pytest.mark.parametrize("index", [None, pd.date_range("2025-01-01", periods=12)]) @pytest.mark.parametrize("join", ["inner", "outer"]) def test_symbol_concat_basic(lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, index, join): - lib = lmdb_library_factory(LibraryOptions(dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment)) + lib = lmdb_library_factory( + LibraryOptions( + dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment + ) + ) df_0 = pd.DataFrame( { "col1": np.arange(3, dtype=np.int64), @@ -65,8 +70,12 @@ def test_symbol_concat_basic(lmdb_library_factory, dynamic_schema, rows_per_segm assert version.metadata == (None if idx == 1 else idx) -@pytest.mark.parametrize("first_type", ["uint8", "uint16", "uint32", "uint64", "int8", "int16", "int32", "int64", "float32", "float64"]) -@pytest.mark.parametrize("second_type", ["uint8", "uint16", "uint32", "uint64", "int8", "int16", "int32", "int64", "float32", "float64"]) +@pytest.mark.parametrize( + "first_type", ["uint8", "uint16", "uint32", "uint64", "int8", "int16", "int32", "int64", "float32", "float64"] +) +@pytest.mark.parametrize( + "second_type", ["uint8", "uint16", "uint32", "uint64", "int8", "int16", "int32", "int64", "float32", "float64"] +) def test_symbol_concat_type_promotion(lmdb_library, first_type, second_type): lib = lmdb_library df0 = pd.DataFrame({"col": np.arange(1, dtype=np.dtype(first_type))}) @@ -85,7 +94,7 @@ def test_symbol_concat_type_promotion(lmdb_library, first_type, second_type): None, pd.date_range("2025-01-01", periods=8), pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=2), [0, 1], ["hello", "goodbye"]]), - ] + ], ) @pytest.mark.parametrize("name_0", [None, "", "s1", "s2"]) @pytest.mark.parametrize("name_1", [None, "", "s1", "s2"]) @@ -217,7 +226,9 @@ def test_symbol_concat_dynamic_schema_missing_columns(lmdb_library_factory, join @pytest.mark.parametrize("columns_per_segment", [2, 100_000]) @pytest.mark.parametrize("index", [None, pd.date_range("2025-01-01", periods=5)]) @pytest.mark.parametrize("join", ["inner", "outer"]) -def test_symbol_concat_empty_column_intersection(lmdb_library_factory, dynamic_schema, columns_per_segment, index, join): +def test_symbol_concat_empty_column_intersection( + lmdb_library_factory, dynamic_schema, columns_per_segment, index, join +): lib = lmdb_library_factory(LibraryOptions(dynamic_schema=dynamic_schema, columns_per_segment=columns_per_segment)) df_0 = pd.DataFrame( { @@ -253,10 +264,18 @@ def test_symbol_concat_empty_column_intersection(lmdb_library_factory, dynamic_s @pytest.mark.parametrize("dynamic_schema", [True, False]) @pytest.mark.parametrize("rows_per_segment", [2, 100_000]) @pytest.mark.parametrize("columns_per_segment", [2, 100_000]) -@pytest.mark.parametrize("columns", [["col1"], ["col2"], ["col3"], ["col1", "col2"], ["col1", "col3"], ["col2", "col3"]]) +@pytest.mark.parametrize( + "columns", [["col1"], ["col2"], ["col3"], ["col1", "col2"], ["col1", "col3"], ["col2", "col3"]] +) @pytest.mark.parametrize("join", ["inner", "outer"]) -def test_symbol_concat_column_slicing(lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, columns, join): - lib = lmdb_library_factory(LibraryOptions(dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment)) +def test_symbol_concat_column_slicing( + lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, columns, join +): + lib = lmdb_library_factory( + LibraryOptions( + dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment + ) + ) df_0 = pd.DataFrame( { "col1": np.arange(3, dtype=np.int64), @@ -289,11 +308,20 @@ def test_symbol_concat_column_slicing(lmdb_library_factory, dynamic_schema, rows def test_symbol_concat_with_streaming_incompletes(lmdb_library, only_incompletes, join): lib = lmdb_library if not only_incompletes: - df_0 = pd.DataFrame({"col1": np.arange(3, dtype=np.float64), "col2": np.arange(3, 6, dtype=np.float64)}, index=pd.date_range("2025-01-01", periods=3)) + df_0 = pd.DataFrame( + {"col1": np.arange(3, dtype=np.float64), "col2": np.arange(3, 6, dtype=np.float64)}, + index=pd.date_range("2025-01-01", periods=3), + ) lib.write("sym0", df_0) - df_1 = pd.DataFrame({"col1": np.arange(6, 9, dtype=np.float64), "col2": np.arange(9, 12, dtype=np.float64)}, index=pd.date_range("2025-01-04", periods=3)) + df_1 = pd.DataFrame( + {"col1": np.arange(6, 9, dtype=np.float64), "col2": np.arange(9, 12, dtype=np.float64)}, + index=pd.date_range("2025-01-04", periods=3), + ) lib._dev_tools.library_tool().append_incomplete("sym0", df_1) - df_2 = pd.DataFrame({"col2": np.arange(12, 15, dtype=np.float64), "col3": np.arange(15, 18, dtype=np.float64)}, index=pd.date_range("2025-01-07", periods=3)) + df_2 = pd.DataFrame( + {"col2": np.arange(12, 15, dtype=np.float64), "col3": np.arange(15, 18, dtype=np.float64)}, + index=pd.date_range("2025-01-07", periods=3), + ) lib.write("sym1", df_2) # incomplete kwarg Not part of the V2 API received = lib._nvs.batch_read_and_join( @@ -304,7 +332,7 @@ def test_symbol_concat_with_streaming_incompletes(lmdb_library, only_incompletes [None, None], [None, None], [None, None], - incomplete=True + incomplete=True, ) if only_incompletes: expected = pd.concat([df_1, df_2], join=join) @@ -323,15 +351,23 @@ def test_symbol_concat_with_streaming_incompletes(lmdb_library, only_incompletes @pytest.mark.parametrize("rows_per_segment", [2, 100_000]) @pytest.mark.parametrize("columns_per_segment", [2, 100_000]) @pytest.mark.parametrize("join", ["inner", "outer"]) -def test_symbol_concat_multiindex_basic(lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, join): - lib = lmdb_library_factory(LibraryOptions(dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment)) +def test_symbol_concat_multiindex_basic( + lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, join +): + lib = lmdb_library_factory( + LibraryOptions( + dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment + ) + ) df = pd.DataFrame( { "col1": np.arange(12, dtype=np.int64), "col2": np.arange(100, 112, dtype=np.int64), "col3": np.arange(1000, 1012, dtype=np.int64), }, - index=pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=4), [0, 1, 2]], names=["datetime", "level"]), + index=pd.MultiIndex.from_product( + [pd.date_range("2025-01-01", periods=4), [0, 1, 2]], names=["datetime", "level"] + ), ) lib.write("sym0", df[:3]) lib.write("sym1", df[3:7]) @@ -375,28 +411,32 @@ def test_symbol_concat_with_date_range(lmdb_library, join): @pytest.mark.parametrize("columns_per_segment", [2, 100_000]) @pytest.mark.parametrize("join", ["inner", "outer"]) def test_symbol_concat_complex(lmdb_library_factory, dynamic_schema, rows_per_segment, columns_per_segment, join): - lib = lmdb_library_factory(LibraryOptions(dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment)) + lib = lmdb_library_factory( + LibraryOptions( + dynamic_schema=dynamic_schema, rows_per_segment=rows_per_segment, columns_per_segment=columns_per_segment + ) + ) df_0 = pd.DataFrame( { - "col1": np.arange(3, dtype=np.int64), - "col2": np.arange(100, 103, dtype=np.int64), - "col3": np.arange(1000, 1003, dtype=np.int64), + "col1": np.arange(3, dtype=np.int64), + "col2": np.arange(100, 103, dtype=np.int64), + "col3": np.arange(1000, 1003, dtype=np.int64), }, index=pd.date_range(pd.Timestamp(0), freq="1000ns", periods=3), ) df_1 = pd.DataFrame( { - "col1": np.arange(4, dtype=np.int64), - "col2": np.arange(200, 204, dtype=np.int64), - "col3": np.arange(2000, 2004, dtype=np.int64), + "col1": np.arange(4, dtype=np.int64), + "col2": np.arange(200, 204, dtype=np.int64), + "col3": np.arange(2000, 2004, dtype=np.int64), }, index=pd.date_range(pd.Timestamp(2000), freq="1000ns", periods=4), ) df_2 = pd.DataFrame( { - "col1": np.arange(5, dtype=np.int64), - "col2": np.arange(300, 305, dtype=np.int64), - "col3": np.arange(3000, 3005, dtype=np.int64), + "col1": np.arange(5, dtype=np.int64), + "col2": np.arange(300, 305, dtype=np.int64), + "col3": np.arange(3000, 3005, dtype=np.int64), }, index=pd.date_range(pd.Timestamp(6000), freq="1000ns", periods=5), ) @@ -415,7 +455,9 @@ def test_symbol_concat_complex(lmdb_library_factory, dynamic_schema, rows_per_se received = lazy_df.collect().data received = received.reindex(columns=sorted(received.columns)) - expected = pd.concat([df_0, df_1[1:], df_2[:4]]).resample("2000ns").agg({"col1": "sum", "col2": "mean", "col3": "min"}) + expected = ( + pd.concat([df_0, df_1[1:], df_2[:4]]).resample("2000ns").agg({"col1": "sum", "col2": "mean", "col3": "min"}) + ) assert_frame_equal(expected, received) @@ -458,16 +500,23 @@ def test_symbol_concat_querybuilder_syntax(lmdb_library): received = lib.read_batch_and_join([sym_0, read_request_1, read_request_2], query_builder=q).data received = received.reindex(columns=sorted(received.columns)) - expected = pd.concat([df_0, df_1[1:], df_2[:4]]).resample("2000ns").agg({"col1": "sum", "col2": "mean", "col3": "min"}) + expected = ( + pd.concat([df_0, df_1[1:], df_2[:4]]).resample("2000ns").agg({"col1": "sum", "col2": "mean", "col3": "min"}) + ) assert_frame_equal(expected, received) + @pytest.mark.parametrize("index_name_0", [None, "ts1", "ts2"]) @pytest.mark.parametrize("index_name_1", [None, "ts1", "ts2"]) @pytest.mark.parametrize("join", ["inner", "outer"]) def test_symbol_concat_differently_named_timeseries(lmdb_library, index_name_0, index_name_1, join): lib = lmdb_library - df_0 = pd.DataFrame({"col1": np.arange(1, dtype=np.float64), "col2": np.arange(1, 2, dtype=np.float64)}, index=[pd.Timestamp(0)]) - df_1 = pd.DataFrame({"col1": np.arange(2, 3, dtype=np.float64), "col3": np.arange(3, 4, dtype=np.float64)}, index=[pd.Timestamp(1)]) + df_0 = pd.DataFrame( + {"col1": np.arange(1, dtype=np.float64), "col2": np.arange(1, 2, dtype=np.float64)}, index=[pd.Timestamp(0)] + ) + df_1 = pd.DataFrame( + {"col1": np.arange(2, 3, dtype=np.float64), "col3": np.arange(3, 4, dtype=np.float64)}, index=[pd.Timestamp(1)] + ) df_0.index.name = index_name_0 df_1.index.name = index_name_1 lib.write("sym0", df_0) @@ -484,27 +533,28 @@ def test_symbol_concat_differently_named_timeseries(lmdb_library, index_name_0, @pytest.mark.parametrize("index_name_1_level_1", [None, "hello", "goodbye"]) @pytest.mark.parametrize("join", ["inner", "outer"]) def test_symbol_concat_differently_named_multiindexes( - lmdb_library, - index_name_0_level_0, - index_name_0_level_1, - index_name_1_level_0, - index_name_1_level_1, - join + lmdb_library, index_name_0_level_0, index_name_0_level_1, index_name_1_level_0, index_name_1_level_1, join ): lib = lmdb_library df_0 = pd.DataFrame( { "col1": np.arange(1, dtype=np.float64), "col2": np.arange(1, 2, dtype=np.float64), - }, - index=pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=4), ["hello", None, "goodbye"]], names=[index_name_0_level_0, index_name_0_level_1]) + }, + index=pd.MultiIndex.from_product( + [pd.date_range("2025-01-01", periods=4), ["hello", None, "goodbye"]], + names=[index_name_0_level_0, index_name_0_level_1], + ), ) df_1 = pd.DataFrame( { "col1": np.arange(2, 3, dtype=np.float64), "col2": np.arange(3, 4, dtype=np.float64), - }, - index=pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=4), ["bonjour", "au revoir", None]], names=[index_name_1_level_0, index_name_1_level_1]) + }, + index=pd.MultiIndex.from_product( + [pd.date_range("2025-01-01", periods=4), ["bonjour", "au revoir", None]], + names=[index_name_1_level_0, index_name_1_level_1], + ), ) lib.write("sym0", df_0) lib.write("sym1", df_1) @@ -519,12 +569,7 @@ def test_symbol_concat_differently_named_multiindexes( @pytest.mark.parametrize("tz_0", [None, "Europe/Amsterdam", "US/Eastern"]) @pytest.mark.parametrize("tz_1", [None, "Europe/Amsterdam", "US/Eastern"]) @pytest.mark.parametrize("join", ["inner", "outer"]) -def test_symbol_concat_timezone_handling( - lmdb_library, - tz_0, - tz_1, - join -): +def test_symbol_concat_timezone_handling(lmdb_library, tz_0, tz_1, join): lib = lmdb_library df_0 = pd.DataFrame( { @@ -554,12 +599,7 @@ def test_symbol_concat_timezone_handling( @pytest.mark.parametrize("tz_1_level_1", [None, "Europe/Amsterdam", "Australia/Sydney"]) @pytest.mark.parametrize("join", ["inner", "outer"]) def test_symbol_concat_multiindex_timezone_handling( - lmdb_library, - tz_0_level_0, - tz_0_level_1, - tz_1_level_0, - tz_1_level_1, - join + lmdb_library, tz_0_level_0, tz_0_level_1, tz_1_level_0, tz_1_level_1, join ): lib = lmdb_library df_0 = pd.DataFrame( @@ -567,21 +607,39 @@ def test_symbol_concat_multiindex_timezone_handling( "col1": np.arange(1, dtype=np.float64), "col2": np.arange(1, 2, dtype=np.float64), }, - index=pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=4, tz=tz_0_level_0), pd.date_range("2025-01-01", periods=3, tz=tz_0_level_1)]) + index=pd.MultiIndex.from_product( + [ + pd.date_range("2025-01-01", periods=4, tz=tz_0_level_0), + pd.date_range("2025-01-01", periods=3, tz=tz_0_level_1), + ] + ), ) df_1 = pd.DataFrame( { "col1": np.arange(2, 3, dtype=np.float64), "col2": np.arange(3, 4, dtype=np.float64), }, - index=pd.MultiIndex.from_product([pd.date_range("2025-01-01", periods=4, tz=tz_1_level_0), pd.date_range("2025-01-01", periods=3, tz=tz_1_level_1)]) + index=pd.MultiIndex.from_product( + [ + pd.date_range("2025-01-01", periods=4, tz=tz_1_level_0), + pd.date_range("2025-01-01", periods=3, tz=tz_1_level_1), + ] + ), ) lib.write("sym0", df_0) lib.write("sym1", df_1) received = concat(lib.read_batch(["sym0", "sym1"], lazy=True), join).collect().data - expected_level_0_tz = f"datetime64[ns, {tz_0_level_0}]" if (tz_0_level_0 == tz_1_level_0 and tz_0_level_0 is not None) else "datetime64[ns]" - expected_level_1_tz = f"datetime64[ns, {tz_0_level_1}]" if (tz_0_level_1 == tz_1_level_1 and tz_0_level_1 is not None) else "datetime64[ns]" + expected_level_0_tz = ( + f"datetime64[ns, {tz_0_level_0}]" + if (tz_0_level_0 == tz_1_level_0 and tz_0_level_0 is not None) + else "datetime64[ns]" + ) + expected_level_1_tz = ( + f"datetime64[ns, {tz_0_level_1}]" + if (tz_0_level_1 == tz_1_level_1 and tz_0_level_1 is not None) + else "datetime64[ns]" + ) assert str(received.index.dtypes[0]) == expected_level_0_tz assert str(received.index.dtypes[1]) == expected_level_1_tz @@ -661,4 +719,4 @@ def test_symbol_concat_docstring_example(lmdb_library): lazy_df = concat([lazy_df0, lazy_df1]) lazy_df = lazy_df.resample("10min").agg({"col": "sum"}) received = lazy_df.collect().data - assert_frame_equal(pd.DataFrame({"col": [14]}, index=[pd.Timestamp("2025-01-01")]), received) \ No newline at end of file + assert_frame_equal(pd.DataFrame({"col": [14]}, index=[pd.Timestamp("2025-01-01")]), received)