Skip to content

Commit f0a5cc3

Browse files
authored
Support recursive normalizer in v2 API write (#2736)
#### Reference Issues/PRs <!--Example: Fixes #1234. See also #3456.--> https://man312219.monday.com/boards/7852509418/pulses/18298965201 #### What does this implement or fix? 1. Makes `def write` and `def write_pickle` in v2 API support recursive normalizer - Recursive normalizer enables ArcricDB to write and read nest data structures (`dict`, `list`, `tuple`) of dataframes and arrays without having to pickling the entire structure - The data stored is not filterable, as is pickled data - Please refer to https://docs.arcticdb.io/latest/notebooks/arcticdb_demo_recursive_normalizers for more details 3. Minor changes on formatter to support formatting individual files Per discussed, in v2 API, recursive normalizer setting in LibraryOption will be respected. Default library option of recursive normalizer will be `False`. Existing libraries are unaffected. #### Any other comments? `def batch_write` in v1 API doesn't support recursive normalizer. Ticket: https://man312219.monday.com/boards/7852509418/pulses/7855436309 Therefore the support of recursive normalizer in corresponding v2 API will not be covered in this PR. ##### Pickling `pickling` is a bit of a mess in V1 API. For `arrow` and `pandas` data, if the normalization fails, it can fallback to msgpack and pickling, depending on whether `pickle_on_failure` is `True`. However, for other kinds of data, it almost certainly fallback to msgpack and pickling. The only option to prevent pickling is a library config `strict_mode`. But I don't see there is any API to enable this option, in V1/V2 nor internally. #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details> <!-- Thanks for contributing a Pull Request to ArcticDB! Please ensure you have taken a look at: - ArcticDB's Code of Conduct: https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md - ArcticDB's Contribution Licensing: https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing -->
1 parent d89bf57 commit f0a5cc3

File tree

15 files changed

+861
-42
lines changed

15 files changed

+861
-42
lines changed

build_tooling/format.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,38 @@ def install_tools():
3131
return black or clang
3232

3333

34-
def lint_python(in_place: bool):
34+
def lint_python(in_place: bool, specific_file: str = None):
3535
try:
3636
import black
3737
assert black.__version__ == black_version
3838
except ImportError:
3939
raise RuntimeError("black not installed. Run this script with --install-tools then try again")
4040

41+
if specific_file:
42+
path = specific_file
43+
else:
44+
path = "python/"
45+
4146
if in_place:
42-
return subprocess.run(["black", "-l", "120", "python/"]).returncode
47+
return subprocess.run(["black", "-l", "120", path]).returncode
4348
else:
44-
return subprocess.run(["black", "-l", "120", "--check", "python/"]).returncode
49+
return subprocess.run(["black", "-l", "120", "--check", path]).returncode
4550

4651

47-
def lint_cpp(in_place: bool):
52+
def lint_cpp(in_place: bool, specific_file: str = None):
4853
try:
4954
import clang_format
5055
except ImportError:
5156
raise RuntimeError("clang-format not installed. Run this script with --install-tools then try again")
5257

5358
files = []
54-
root = pathlib.Path("cpp", "arcticdb")
55-
for e in ("*.cpp", "*.hpp"):
56-
for f in root.rglob(e):
57-
files.append(str(f))
59+
if specific_file:
60+
files.append(specific_file)
61+
else:
62+
root = pathlib.Path("cpp", "arcticdb")
63+
for e in ("*.cpp", "*.hpp"):
64+
for f in root.rglob(e):
65+
files.append(str(f))
5866

5967
args = ["clang-format"]
6068
if in_place:
@@ -69,11 +77,11 @@ def lint_cpp(in_place: bool):
6977
return subprocess.run(args).returncode
7078

7179

72-
def main(type: str, in_place: bool):
80+
def main(type: str, in_place: bool, specific_file: str):
7381
if type == "python":
74-
return lint_python(in_place)
82+
return lint_python(in_place, specific_file)
7583
elif type == "cpp":
76-
return lint_cpp(in_place)
84+
return lint_cpp(in_place, specific_file)
7785
else:
7886
return lint_python(in_place) or lint_cpp(in_place)
7987

@@ -105,6 +113,11 @@ def main(type: str, in_place: bool):
105113
action='store_true',
106114
help="Apply linting rules to your working copy. Changes files."
107115
)
116+
parser.add_argument(
117+
"-f",
118+
"--file",
119+
help="Apply linting rules to a specific file."
120+
)
108121
args = parser.parse_args()
109122

110123
if args.install_tools:
@@ -122,6 +135,7 @@ def main(type: str, in_place: bool):
122135
return_code = main(
123136
type=args.type,
124137
in_place=args.in_place,
138+
specific_file=args.file,
125139
)
126140

127141
sys.exit(return_code)

cpp/arcticdb/storage/library_manager.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ void LibraryManager::modify_library_option(
155155
case ModifiableLibraryOption::COLUMNS_PER_SEGMENT:
156156
mutable_write_options->set_column_group_size(get_positive_int(new_value));
157157
break;
158+
case ModifiableLibraryOption::RECURSIVE_NORMALIZERS:
159+
mutable_write_options->set_recursive_normalizers(get_bool(new_value));
160+
break;
158161
default:
159162
throw UnsupportedLibraryOptionValue(fmt::format("Invalid library option: {}", option));
160163
}

cpp/arcticdb/storage/library_manager.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
#include <arcticdb/entity/protobufs.hpp>
1717

1818
namespace arcticdb::storage {
19-
enum class ModifiableLibraryOption { DEDUP = 1, ROWS_PER_SEGMENT = 2, COLUMNS_PER_SEGMENT = 3 };
19+
enum class ModifiableLibraryOption {
20+
DEDUP = 1,
21+
ROWS_PER_SEGMENT = 2,
22+
COLUMNS_PER_SEGMENT = 3,
23+
RECURSIVE_NORMALIZERS = 4
24+
};
2025
enum class ModifiableEnterpriseLibraryOption { REPLICATION = 1, BACKGROUND_DELETION = 2 };
2126
using LibraryOptionValue = std::variant<bool, int64_t>;
2227

@@ -101,6 +106,8 @@ struct formatter<arcticdb::storage::ModifiableLibraryOption> {
101106
return fmt::format_to(ctx.out(), "ROWS_PER_SEGMENT");
102107
case arcticdb::storage::ModifiableLibraryOption::COLUMNS_PER_SEGMENT:
103108
return fmt::format_to(ctx.out(), "COLUMNS_PER_SEGMENT");
109+
case arcticdb::storage::ModifiableLibraryOption::RECURSIVE_NORMALIZERS:
110+
return fmt::format_to(ctx.out(), "RECURSIVE_NORMALIZERS");
104111
default:
105112
arcticdb::util::raise_rte("Unrecognized modifiable option {}", int(o));
106113
}

cpp/arcticdb/storage/python_bindings.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ void register_bindings(py::module& storage, py::exception<arcticdb::ArcticExcept
148148
)pbdoc")
149149
.value("DEDUP", ModifiableLibraryOption::DEDUP)
150150
.value("ROWS_PER_SEGMENT", ModifiableLibraryOption::ROWS_PER_SEGMENT)
151-
.value("COLUMNS_PER_SEGMENT", ModifiableLibraryOption::COLUMNS_PER_SEGMENT);
151+
.value("COLUMNS_PER_SEGMENT", ModifiableLibraryOption::COLUMNS_PER_SEGMENT)
152+
.value("RECURSIVE_NORMALIZERS", ModifiableLibraryOption::RECURSIVE_NORMALIZERS);
152153

153154
py::enum_<ModifiableEnterpriseLibraryOption>(storage, "ModifiableEnterpriseLibraryOption", R"pbdoc(
154155
Enterprise library options that can be modified after library creation.

0 commit comments

Comments
 (0)