Skip to content

Commit f5fd866

Browse files
author
Grok Compression
committed
TagTree: micro optimization
1 parent 404e1b1 commit f5fd866

File tree

2 files changed

+141
-130
lines changed

2 files changed

+141
-130
lines changed

.vscode/launch.json

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,26 @@
485485
"MIMode": "gdb",
486486
"miDebuggerPath": "/usr/bin/gdb"
487487
},
488-
{
488+
{
489+
"name": "Excalibur Test",
490+
"type": "cppdbg",
491+
"request": "launch",
492+
"program": "/usr/bin/time",
493+
"args": [
494+
"-v",
495+
"${workspaceFolder}/build/bin/core_decompress",
496+
"-i",
497+
"$HOME/temp/2048x2048.jp2",
498+
],
499+
"cwd": "${workspaceFolder}",
500+
"environment": [
501+
{ "name": "GRK_DEBUG", "value": "3" },
502+
{ "name": "GRK_EXCALIBUR", "value": "1" }
503+
],
504+
"MIMode": "gdb",
505+
"miDebuggerPath": "/usr/bin/gdb"
506+
},
507+
{
489508
"name": "Pleiades",
490509
"type": "cppdbg",
491510
"request": "launch",

src/lib/core/t2/TagTree.h

Lines changed: 121 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,11 @@
2020
#include <limits>
2121
#include <stdexcept>
2222
#include <iostream>
23+
#include <vector>
2324

2425
namespace grk
2526
{
2627

27-
/**
28-
Tag node
29-
*/
30-
template<typename T>
31-
struct TagTreeNode
32-
{
33-
TagTreeNode() : parent(nullptr), value(0), low(0), known(false) {}
34-
35-
TagTreeNode* parent;
36-
T value;
37-
T low;
38-
bool known;
39-
};
40-
41-
/**
42-
Tag tree
43-
*/
4428
template<typename T>
4529
class TagTree
4630
{
@@ -53,79 +37,17 @@ class TagTree
5337
*
5438
* @return a new tag tree if successful, otherwise nullptr
5539
*/
40+
5641
TagTree(uint16_t leavesWidth, uint16_t leavesHeight)
57-
: leavesWidth_(leavesWidth), leavesHeight_(leavesHeight), nodeCount(0), nodes(nullptr)
42+
: leavesWidth_(leavesWidth), leavesHeight_(leavesHeight)
5843
{
59-
uint16_t resLeavesWidth[16];
60-
uint16_t resLeavesHeight[16];
61-
int8_t numLevels = 0;
62-
resLeavesWidth[0] = leavesWidth_;
63-
resLeavesHeight[0] = leavesHeight_;
64-
nodeCount = 0;
65-
uint32_t nodesPerLevel;
66-
67-
do
68-
{
69-
if(numLevels == 16)
70-
{
71-
grklog.error("TagTree constructor: num level overflow");
72-
throw std::runtime_error("TagTree constructor: num level overflow");
73-
}
74-
nodesPerLevel = static_cast<uint32_t>(resLeavesWidth[numLevels]) * resLeavesHeight[numLevels];
75-
resLeavesWidth[numLevels + 1] = (uint16_t)((resLeavesWidth[numLevels] + 1) >> 1);
76-
resLeavesHeight[numLevels + 1] = (uint16_t)((resLeavesHeight[numLevels] + 1) >> 1);
77-
nodeCount += nodesPerLevel;
78-
++numLevels;
79-
} while(nodesPerLevel > 1);
80-
81-
if(nodeCount == 0)
82-
{
83-
grklog.warn("tgt_create numnodes == 0, no tree created.");
84-
throw std::runtime_error("tgt_create numnodes == 0, no tree created");
85-
}
86-
87-
nodes = new TagTreeNode<T>[nodeCount];
88-
auto currentNode = nodes;
89-
auto parentNode = nodes + static_cast<uint32_t>(leavesWidth_) * leavesHeight_;
90-
auto parentNodeNext = parentNode;
91-
92-
for(int8_t i = 0; i < numLevels - 1; ++i)
93-
{
94-
for(uint16_t j = 0U; j < resLeavesHeight[i]; ++j)
95-
{
96-
int64_t k = resLeavesWidth[i];
97-
while(--k >= 0)
98-
{
99-
currentNode->parent = parentNode;
100-
++currentNode;
101-
if(--k >= 0)
102-
{
103-
currentNode->parent = parentNode;
104-
++currentNode;
105-
}
106-
++parentNode;
107-
}
108-
if((j & 1) || j == resLeavesHeight[i] - 1)
109-
{
110-
parentNodeNext = parentNode;
111-
}
112-
else
113-
{
114-
parentNode = parentNodeNext;
115-
parentNodeNext += resLeavesWidth[i];
116-
}
117-
}
118-
}
119-
currentNode->parent = nullptr;
44+
buildTree();
12045
reset();
12146
}
12247

123-
~TagTree()
124-
{
125-
delete[] nodes;
126-
}
48+
~TagTree() = default;
12749

128-
constexpr T getUninitializedValue(void)
50+
constexpr T getUninitializedValue() const noexcept
12951
{
13052
return (std::numeric_limits<T>::max)();
13153
}
@@ -135,13 +57,14 @@ class TagTree
13557
*/
13658
void reset()
13759
{
138-
for(auto i = 0U; i < nodeCount; ++i)
60+
for(auto& n : nodes_)
13961
{
140-
auto current_node = nodes + i;
141-
current_node->value = getUninitializedValue();
142-
current_node->low = 0;
143-
current_node->known = false;
62+
n.value = getUninitializedValue();
63+
n.low = 0;
64+
n.known = false;
14465
}
66+
for(auto& v : leafCache_)
67+
v = getUninitializedValue();
14568
}
14669

14770
/**
@@ -151,11 +74,11 @@ class TagTree
15174
*/
15275
void set(uint64_t leafno, T value)
15376
{
154-
auto node = nodes + leafno;
155-
while(node && node->value > value)
77+
uint32_t node = static_cast<uint32_t>(leafno);
78+
while(node != UINT32_MAX && nodes_[node].value > value)
15679
{
157-
node->value = value;
158-
node = node->parent;
80+
nodes_[node].value = value;
81+
node = parents_[node];
15982
}
16083
}
16184

@@ -168,42 +91,44 @@ class TagTree
16891
*/
16992
bool encode(t1_t2::BitIO* bio, uint64_t leafno, T threshold)
17093
{
171-
TagTreeNode<T>* nodeStack[15];
172-
auto nodeStackPtr = nodeStack;
173-
auto node = nodes + leafno;
174-
while(node->parent)
94+
// exact original encode logic, using flat indices
95+
uint32_t nodeStack[16];
96+
int stackPtr = 0;
97+
uint32_t node = static_cast<uint32_t>(leafno);
98+
while(parents_[node] != UINT32_MAX)
17599
{
176-
*nodeStackPtr++ = node;
177-
node = node->parent;
100+
nodeStack[stackPtr++] = node;
101+
node = parents_[node];
178102
}
179103
T low = 0;
180104
while(true)
181105
{
182-
if(node->low < low)
183-
node->low = low;
106+
auto& n = nodes_[node];
107+
if(n.low < low)
108+
n.low = low;
184109
else
185-
low = node->low;
110+
low = n.low;
186111

187112
while(low < threshold)
188113
{
189-
if(low >= node->value)
114+
if(low >= n.value)
190115
{
191-
if(!node->known)
116+
if(!n.known)
192117
{
193118
if(!bio->write(1))
194119
return false;
195-
node->known = true;
120+
n.known = true;
196121
}
197122
break;
198123
}
199124
if(!bio->write(0))
200125
return false;
201126
++low;
202127
}
203-
node->low = low;
204-
if(nodeStackPtr == nodeStack)
128+
n.low = low;
129+
if(stackPtr == 0)
205130
break;
206-
node = *--nodeStackPtr;
131+
node = nodeStack[--stackPtr];
207132
}
208133
return true;
209134
}
@@ -217,49 +142,116 @@ class TagTree
217142
*/
218143
void decode(t1_t2::BitIO* bio, uint64_t leafno, T threshold, T* value)
219144
{
220-
TagTreeNode<T>* nodeStack[15];
145+
if(leafCache_[leafno] < threshold) [[likely]]
146+
{
147+
*value = leafCache_[leafno];
148+
return;
149+
}
150+
221151
*value = getUninitializedValue();
222-
auto nodeStackPtr = nodeStack;
223-
auto node = nodes + leafno;
224-
// climb to top of tree
225-
while(node->parent)
152+
153+
uint32_t nodeStack[16];
154+
int stackPtr = 0;
155+
uint32_t node = static_cast<uint32_t>(leafno);
156+
157+
// climb to root (exact same path as encode)
158+
while(parents_[node] != UINT32_MAX)
226159
{
227-
*nodeStackPtr++ = node;
228-
node = node->parent;
160+
nodeStack[stackPtr++] = node;
161+
node = parents_[node];
229162
}
230-
// descend to bottom of tree
163+
231164
T low = 0;
232165
while(true)
233166
{
234-
if(node->low < low)
235-
node->low = low;
167+
auto& n = nodes_[node];
168+
169+
if(n.low < low)
170+
n.low = low;
236171
else
237-
low = node->low;
238-
while(low < threshold && low < node->value)
172+
low = n.low;
173+
174+
while(low < threshold && low < n.value) [[likely]]
239175
{
240176
if(bio->read())
241177
{
242-
node->value = low;
178+
n.value = low;
243179
break;
244180
}
245-
low++;
181+
++low;
246182
}
247-
node->low = low;
248-
if(nodeStackPtr == nodeStack)
183+
n.low = low;
184+
185+
if(stackPtr == 0) [[unlikely]]
249186
break;
250-
node = *--nodeStackPtr;
187+
188+
node = nodeStack[--stackPtr]; // descend to child
251189
}
252-
*value = node->value;
190+
191+
*value = nodes_[node].value; // now guaranteed to be the leaf node
192+
if(*value < threshold)
193+
leafCache_[leafno] = *value;
253194
}
254195

255196
private:
197+
struct Node
198+
{
199+
T value;
200+
T low;
201+
bool known;
202+
};
203+
204+
void buildTree()
205+
{
206+
// same level calculation as original
207+
uint16_t resW[16]{}, resH[16]{};
208+
int8_t levels = 0;
209+
resW[0] = leavesWidth_;
210+
resH[0] = leavesHeight_;
211+
uint64_t totalNodes = 0;
212+
uint32_t nodesPerLevel;
213+
214+
do
215+
{
216+
nodesPerLevel = static_cast<uint32_t>(resW[levels]) * resH[levels];
217+
resW[levels + 1] = (uint16_t)((resW[levels] + 1) >> 1);
218+
resH[levels + 1] = (uint16_t)((resH[levels] + 1) >> 1);
219+
totalNodes += nodesPerLevel;
220+
++levels;
221+
} while(nodesPerLevel > 1);
222+
223+
nodes_.resize(totalNodes);
224+
parents_.resize(totalNodes, UINT32_MAX);
225+
leafCache_.resize(static_cast<uint64_t>(leavesWidth_) * leavesHeight_);
226+
227+
// build parents (exact same linking logic as original, but with indices)
228+
uint64_t parentBase = static_cast<uint64_t>(leavesWidth_) * leavesHeight_;
229+
uint64_t cur = 0;
230+
231+
for(int8_t lvl = 0; lvl < levels - 1; ++lvl)
232+
{
233+
uint32_t w = resW[lvl];
234+
uint32_t h = resH[lvl];
235+
for(uint32_t j = 0; j < h; ++j)
236+
{
237+
for(uint32_t k = 0; k < w; ++k)
238+
{
239+
parents_[cur] = static_cast<uint32_t>(parentBase + (j >> 1) * resW[lvl + 1] + (k >> 1));
240+
++cur;
241+
}
242+
}
243+
parentBase += static_cast<uint64_t>(resW[lvl + 1]) * resH[lvl + 1];
244+
}
245+
}
246+
256247
uint16_t leavesWidth_;
257248
uint16_t leavesHeight_;
258-
uint64_t nodeCount;
259-
TagTreeNode<T>* nodes;
249+
std::vector<Node> nodes_;
250+
std::vector<uint32_t> parents_; // UINT32_MAX = root
251+
std::vector<T> leafCache_;
260252
};
261253

262254
using TagTreeU8 = TagTree<uint8_t>;
263255
using TagTreeU16 = TagTree<uint16_t>;
264256

265-
} // namespace grk
257+
} // namespace grk

0 commit comments

Comments
 (0)