Skip to content

Commit 5bf81dd

Browse files
committed
[ntuple] conditionally create page lookup data structure
To save space in the cluster descriptor, create the lookup data structure for large number pages in a cluster+column only if that cluster+column actually has many (>10) pages. Reduces the typical size of RPageRange from 56B to 40B.
1 parent 01b0039 commit 5bf81dd

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,14 +445,19 @@ public:
445445
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
446446
const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
447447

448+
std::vector<RPageInfo> fPageInfos;
449+
448450
/// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
449451
/// up to and including a given index. Used for binary search in Find().
450-
std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
452+
/// This vector is only created if fPageInfos has at least kLargeRangeThreshold elements.
453+
std::unique_ptr<std::vector<ROOT::NTupleSize_t>> fCumulativeNElements;
451454

452455
ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
453-
std::vector<RPageInfo> fPageInfos;
454456

455457
public:
458+
/// Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search through.
459+
static constexpr std::size_t kLargeRangeThreshold = 10;
460+
456461
RPageRange() = default;
457462
RPageRange(const RPageRange &other) = delete;
458463
RPageRange &operator=(const RPageRange &other) = delete;
@@ -464,7 +469,9 @@ public:
464469
RPageRange clone;
465470
clone.fPhysicalColumnId = fPhysicalColumnId;
466471
clone.fPageInfos = fPageInfos;
467-
clone.fCumulativeNElements = fCumulativeNElements;
472+
if (fCumulativeNElements) {
473+
clone.fCumulativeNElements = std::make_unique<std::vector<ROOT::NTupleSize_t>>(*fCumulativeNElements);
474+
}
468475
return clone;
469476
}
470477

tree/ntuple/src/RNTupleDescriptor.cxx

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,21 @@ ROOT::RColumnDescriptor ROOT::RColumnDescriptor::Clone() const
208208
ROOT::RClusterDescriptor::RPageInfoExtended
209209
ROOT::RClusterDescriptor::RPageRange::Find(ROOT::NTupleSize_t idxInCluster) const
210210
{
211-
const auto N = fCumulativeNElements.size();
211+
if (!fCumulativeNElements) {
212+
// Small range, just iterate through fPageInfos
213+
NTupleSize_t pageNumber = 0;
214+
NTupleSize_t firstInPage = 0;
215+
for (const auto &pi : fPageInfos) {
216+
if (firstInPage + pi.GetNElements() > idxInCluster) {
217+
return RPageInfoExtended{pi, firstInPage, pageNumber};
218+
}
219+
pageNumber++;
220+
firstInPage += pi.GetNElements();
221+
}
222+
R__ASSERT(false);
223+
}
224+
225+
const auto N = fCumulativeNElements->size();
212226
R__ASSERT(N > 0);
213227
R__ASSERT(N == fPageInfos.size());
214228

@@ -217,20 +231,20 @@ ROOT::RClusterDescriptor::RPageRange::Find(ROOT::NTupleSize_t idxInCluster) cons
217231
std::size_t midpoint = N;
218232
while (left <= right) {
219233
midpoint = (left + right) / 2;
220-
if (fCumulativeNElements[midpoint] <= idxInCluster) {
234+
if ((*fCumulativeNElements)[midpoint] <= idxInCluster) {
221235
left = midpoint + 1;
222236
continue;
223237
}
224238

225-
if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
239+
if ((midpoint == 0) || ((*fCumulativeNElements)[midpoint - 1] <= idxInCluster))
226240
break;
227241

228242
right = midpoint - 1;
229243
}
230244
R__ASSERT(midpoint < N);
231245

232246
auto pageInfo = fPageInfos[midpoint];
233-
decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
247+
decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : (*fCumulativeNElements)[midpoint - 1];
234248
R__ASSERT(firstInPage <= idxInCluster);
235249
R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
236250
return RPageInfoExtended{pageInfo, firstInPage, midpoint};
@@ -975,12 +989,16 @@ ROOT::RResult<ROOT::RClusterDescriptor> ROOT::Internal::RClusterDescriptorBuilde
975989
if (fCluster.fColumnRanges.count(pr.first) == 0) {
976990
return R__FAIL("missing column range");
977991
}
978-
pr.second.fCumulativeNElements.clear();
979-
pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
980-
ROOT::NTupleSize_t sum = 0;
981-
for (const auto &pi : pr.second.fPageInfos) {
982-
sum += pi.GetNElements();
983-
pr.second.fCumulativeNElements.emplace_back(sum);
992+
pr.second.fCumulativeNElements.reset();
993+
const auto nPages = pr.second.fPageInfos.size();
994+
if (nPages > RClusterDescriptor::RPageRange::kLargeRangeThreshold) {
995+
pr.second.fCumulativeNElements = std::make_unique<std::vector<NTupleSize_t>>();
996+
pr.second.fCumulativeNElements->reserve(nPages);
997+
ROOT::NTupleSize_t sum = 0;
998+
for (const auto &pi : pr.second.fPageInfos) {
999+
sum += pi.GetNElements();
1000+
pr.second.fCumulativeNElements->emplace_back(sum);
1001+
}
9841002
}
9851003
}
9861004
RClusterDescriptor result;

0 commit comments

Comments
 (0)