Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions tree/ntuple/doc/BinaryFormatSpecification.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# RNTuple Binary Format Specification 1.0.2.0
# RNTuple Binary Format Specification 1.1.0.0

## Versioning Notes

Expand Down Expand Up @@ -167,7 +167,12 @@ That means that readers need to continue reading feature flags as long as their

Readers should gracefully abort reading when they encounter unknown bits set.

At the moment, there are no feature flag bits defined.
Here is the list of all currently-defined feature flags. Note that the flag name is only for informational purposes
and is not normative.

| Flag Bit | Introduced in | Name | Meaning |
|----------|---------------|-------------------------|----------------------------------------------|
| 0 | 1.1.0.0 | Nested Deferred Columns | Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended<br>(i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field<br>backed by columns with different encoding, e.g. a `vector<float>` whose elements are represented by SplitReal32<br>in the first ntuple and by Real32 in the second. |


## Frames
Expand Down
4 changes: 2 additions & 2 deletions tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -399,11 +399,11 @@ protected:
fAvailableColumns.emplace_back(ROOT::Internal::RColumn::Create<T>(onDiskTypes[0], 0, representationIndex));
if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Trunc) {
const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId());
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]);
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
} else if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Quant) {
const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId());
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]);
assert(coldesc.GetValueRange().has_value());
const auto [valMin, valMax] = *coldesc.GetValueRange();
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
Expand Down
7 changes: 4 additions & 3 deletions tree/ntuple/inc/ROOT/RFieldBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,15 @@ private:
func(target);
}

/// Translate an entry index to a column element index of the principal column and vice versa. These functions
/// take into account the role and number of repetitions on each level of the field hierarchy as follows:
/// Translate an entry index to a column element index of the principal column. This function
/// takes into account the role and number of repetitions on each level of the field hierarchy as follows:
/// - Top level fields: element index == entry index
/// - Record fields propagate their principal column index to the principal columns of direct descendant fields
/// - Collection and variant fields set the principal column index of their children to 0
///
/// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a
/// field with type `std::array<std::array<float, 4>, 2>`, this function returns 8 for the innermost field.
/// field with type `std::array<std::array<float, 4>, 2>`, this function called with `globalIndex == 1`
/// returns 8 for the innermost field.
ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const;

/// Flushes data from active columns
Expand Down
6 changes: 6 additions & 0 deletions tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,12 @@ public:
/// All known feature flags.
/// Note that the flag values represent the bit _index_, not the already-bitshifted integer.
enum EFeatureFlags {
/// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
/// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
/// backed by columns with different encoding, e.g. a vector<float> whose elements are represented by SplitReal32
/// in the first ntuple and by Real32 in the second.
/// Added in version 1.1.0.0 of the binary format.
kFeatureFlag_NestedDeferredColumns = 0,
// Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature
// flags values, the check in RNTupleSerialize must be updated.

Expand Down
9 changes: 3 additions & 6 deletions tree/ntuple/src/RFieldBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -683,14 +683,14 @@ void ROOT::RFieldBase::Attach(std::unique_ptr<ROOT::RFieldBase> child, std::stri

ROOT::NTupleSize_t ROOT::RFieldBase::EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const
{
std::size_t result = globalIndex;
ROOT::NTupleSize_t result = globalIndex;
for (auto f = this; f != nullptr; f = f->GetParent()) {
auto parent = f->GetParent();
if (parent && (parent->GetStructure() == ROOT::ENTupleStructure::kCollection ||
parent->GetStructure() == ROOT::ENTupleStructure::kVariant)) {
return 0U;
}
result *= std::max(f->GetNRepetitions(), std::size_t{1U});
result *= std::max<ROOT::NTupleSize_t>(f->GetNRepetitions(), ROOT::NTupleSize_t{1U});
}
return result;
}
Expand Down Expand Up @@ -850,10 +850,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se
if (itRepresentative == std::end(validTypes))
throw RException(R__FAIL("invalid column representative"));

// don't add a duplicate representation
if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(),
[&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end())
fColumnRepresentatives.emplace_back(*itRepresentative);
fColumnRepresentatives.emplace_back(*itRepresentative);
}
}

Expand Down
25 changes: 23 additions & 2 deletions tree/ntuple/src/RNTupleDescriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -931,8 +931,21 @@ ROOT::Internal::RClusterDescriptorBuilder::AddExtendedColumnRanges(const RNTuple
// `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
// field zero excluding subfields of collection and variant fields.
if (c.IsDeferredColumn()) {
columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
if (c.GetRepresentationIndex() == 0) {
columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
} else {
// Deferred representations which are not the first cannot count on the number of elements being
// equal to Entries * nRepetitions because they might have been added in a later cluster. But they
// can rely on the first representation having the correct FirstElement/NElements (by definition
// the first representation cannot be an "extended" one), therefore they can just copy the value
// from it.
const auto &field = desc.GetFieldDescriptor(fieldId);
const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()];
const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId];
columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex());
columnRange.SetNElements(firstReprColumnRange.GetNElements());
}
if (!columnRange.IsSuppressed()) {
auto &pageRange = fCluster.fPageRanges[physicalId];
pageRange.fPhysicalColumnId = physicalId;
Expand Down Expand Up @@ -1350,6 +1363,14 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ShiftAliasColumns(std::uint32_t o
R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
}

// Patch up column ids in the header extension
if (auto &xHeader = fDescriptor.fHeaderExtension) {
for (auto &columnId : xHeader->fExtendedColumnRepresentations) {
if (columnId >= fDescriptor.GetNPhysicalColumns())
columnId += offset;
}
}
}

ROOT::RResult<void> ROOT::Internal::RNTupleDescriptorBuilder::AddCluster(RClusterDescriptor &&clusterDesc)
Expand Down
20 changes: 13 additions & 7 deletions tree/ntuple/test/ntuple_merger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1015,12 +1015,13 @@ TEST(RNTupleMerger, MergeLateModelExtension)
{
auto model = RNTupleModel::Create();
auto fieldFoo = model->MakeField<std::unordered_map<std::string, int>>("foo");
auto fieldVfoo = model->MakeField<std::vector<int>>("vfoo");
auto fieldVfoo = model->MakeField<std::vector<int>[3]>("vfoo");
auto fieldBar = model->MakeField<int>("bar");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath(), RNTupleWriteOptions());
for (size_t i = 0; i < 10; ++i) {
fieldFoo->insert(std::make_pair(std::to_string(i), i * 123));
*fieldVfoo = {(int)i * 123};
fieldVfoo[0] = {(int)i * 123};
fieldVfoo[2] = {(int)i * 345};
*fieldBar = i * 321;
ntuple->Fill();
}
Expand All @@ -1031,14 +1032,15 @@ TEST(RNTupleMerger, MergeLateModelExtension)
auto model = RNTupleModel::Create();
auto fieldBaz = model->MakeField<int>("baz");
auto fieldFoo = model->MakeField<std::unordered_map<std::string, int>>("foo");
auto fieldVfoo = model->MakeField<std::vector<int>>("vfoo");
auto fieldVfoo = model->MakeField<std::vector<int>[3]>("vfoo");
auto wopts = RNTupleWriteOptions();
wopts.SetCompression(0);
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath(), wopts);
for (size_t i = 0; i < 10; ++i) {
*fieldBaz = i * 567;
fieldFoo->insert(std::make_pair(std::to_string(i), i * 765));
*fieldVfoo = {(int)i * 765};
fieldVfoo[0] = {(int)i * 765};
fieldVfoo[2] = {(int)i * 987};
ntuple->Fill();
}
}
Expand Down Expand Up @@ -1072,21 +1074,25 @@ TEST(RNTupleMerger, MergeLateModelExtension)
auto ntuple = RNTupleReader::Open("ntuple", fileGuard3.GetPath());
EXPECT_EQ(ntuple->GetNEntries(), 20);
auto foo = ntuple->GetModel().GetDefaultEntry().GetPtr<std::unordered_map<std::string, int>>("foo");
auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr<std::vector<int>>("vfoo");
auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr<std::vector<int>[3]>("vfoo");
auto bar = ntuple->GetModel().GetDefaultEntry().GetPtr<int>("bar");
auto baz = ntuple->GetModel().GetDefaultEntry().GetPtr<int>("baz");

for (int i = 0; i < 10; ++i) {
ntuple->LoadEntry(i);
ASSERT_EQ((*foo)[std::to_string(i)], i * 123);
ASSERT_EQ((*vfoo)[0], i * 123);
ASSERT_EQ(vfoo[0][0], i * 123);
ASSERT_EQ(vfoo[2][0], i * 345);
ASSERT_TRUE(vfoo[1].empty());
ASSERT_EQ(*bar, i * 321);
ASSERT_EQ(*baz, 0);
}
for (int i = 10; i < 20; ++i) {
ntuple->LoadEntry(i);
ASSERT_EQ((*foo)[std::to_string(i - 10)], (i - 10) * 765);
ASSERT_EQ((*vfoo)[0], (i - 10) * 765);
ASSERT_EQ(vfoo[0][0], (i - 10) * 765);
ASSERT_EQ(vfoo[2][0], (i - 10) * 987);
ASSERT_TRUE(vfoo[1].empty());
ASSERT_EQ(*bar, 0);
ASSERT_EQ(*baz, (i - 10) * 567);
}
Expand Down
31 changes: 26 additions & 5 deletions tree/ntuple/test/ntuple_multi_column.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -377,11 +377,32 @@ TEST(RNTuple, MultiColumnRepresentationBulk)
EXPECT_FLOAT_EQ(2.0, arr[0]);
}

TEST(RNTuple, MultiColumnRepresentationDedup)
TEST(RNTuple, MultiColumnRepresentationVariableBitWidth)
{
FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root");
FileRaii fileGuard("test_ntuple_multi_column_representation_varbitwidth.root");

auto fldPx = RFieldBase::Create("px", "float").Unwrap();
fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}});
EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1);
{
auto model = RNTupleModel::Create();
auto fldPx = std::make_unique<RField<float>>("px");
fldPx->SetTruncated(26);
fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal32}, {ROOT::ENTupleColumnType::kReal32Trunc}});
model->AddField(std::move(fldPx));
auto ptrPx = model->GetDefaultEntry().GetPtr<float>("px");
auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath());
*ptrPx = 1.0;
writer->Fill();
writer->CommitCluster();
ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation(
const_cast<RFieldBase &>(writer->GetModel().GetConstField("px")), 1);
*ptrPx = 2.0;
writer->Fill();
}

auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
auto fldPx = reader->GetModel().GetDefaultEntry().GetPtr<float>("px");

reader->LoadEntry(0);
EXPECT_FLOAT_EQ(1.0, *fldPx);
reader->LoadEntry(1);
EXPECT_FLOAT_EQ(2.0, *fldPx);
}
Loading