Skip to content

Commit 5c1088c

Browse files
author
Grok Compression
committed
compress 16 bit: II
1 parent 2cf8b8f commit 5c1088c

7 files changed

Lines changed: 383 additions & 90 deletions

File tree

src/lib/core/scheduling/standard/CompressScheduler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ bool CompressScheduler::scheduleT1(ITileProcessor* proc)
119119
block->mct_norms = mct_norms_;
120120
block->mct_numcomps = mct_numcomps_;
121121
block->k_msbs = (uint8_t)(band->maxBitPlanes_ - cblk->numbps());
122+
block->use16BitDwt = tilec->is16BitDwt();
122123
blocks.push_back(block);
123124
}
124125
}
@@ -206,6 +207,7 @@ bool CompressScheduler::populateT1Flow(FlowComponent* flow)
206207
block->mct_norms = mct_norms_;
207208
block->mct_numcomps = mct_numcomps_;
208209
block->k_msbs = (uint8_t)(band->maxBitPlanes_ - cblk->numbps());
210+
block->use16BitDwt = tilec->is16BitDwt();
209211
blocks.push_back(block);
210212
}
211213
}

src/lib/core/t1/BlockExec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ struct CompressBlockExec : public BlockExec
153153
int32_t* unencodedData = nullptr;
154154
#endif
155155
uint16_t mct_numcomps = 0;
156+
bool use16BitDwt = false;
156157

157158
// Delete copy constructor and assignment operator
158159
CompressBlockExec(const CompressBlockExec&) = delete;

src/lib/core/t1/part1/Coder.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,25 @@ bool Coder::preCompress(CompressBlockExec* block, uint32_t& maximum)
7171
tile_index += tileLineAdvance;
7272
}
7373
}
74+
else if(block->use16BitDwt)
75+
{
76+
// 16-bit DWT produces int32_t values (int16 range) — read directly, not as float
77+
double quant = 1.0 / block->stepsize;
78+
for(auto j = 0U; j < h; ++j)
79+
{
80+
for(auto i = 0U; i < w; ++i)
81+
{
82+
int32_t temp = (int32_t)grk_lrintf((float)(((double)block->tiledp[tile_index++] * quant)) *
83+
(1 << T1_NMSEDEC_FRACBITS));
84+
int32_t mag = temp * ((temp > 0) - (temp < 0));
85+
if((uint32_t)mag > maximum)
86+
maximum = (uint32_t)mag;
87+
int32_t sgn = int32_t((uint32_t)(mag != temp) * 0x80000000);
88+
uncompressedData[cblk_index++] = sgn | mag;
89+
}
90+
tile_index += tileLineAdvance;
91+
}
92+
}
7493
else
7594
{
7695
const auto* const tiledp = (float*)block->tiledp;

src/lib/core/t1/part15/CoderOJPH.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,24 @@ bool T1OJPH::preCompress([[maybe_unused]] CompressBlockExec* block)
137137
tiledp += tileLineAdvance;
138138
}
139139
}
140+
else if(block->use16BitDwt)
141+
{
142+
// 16-bit DWT produces int32_t values (int16 range) — read directly, not as float
143+
auto tiledp = block->tiledp;
144+
for(auto j = 0U; j < h; ++j)
145+
{
146+
for(auto i = 0U; i < w; ++i)
147+
{
148+
int32_t t = (int32_t)((float)*tiledp++ * block->inv_step_ht * (float)(1 << shift));
149+
uint32_t val = t >= 0 ? (uint32_t)t : -(uint32_t)t;
150+
uint32_t sign = t >= 0 ? 0U : 0x80000000U;
151+
int32_t res = (int32_t)(sign | val);
152+
unencoded_data[cblk_index] = res;
153+
cblk_index++;
154+
}
155+
tiledp += tileLineAdvance;
156+
}
157+
}
140158
else
141159
{
142160
auto tiledp = (float*)block->tiledp;

src/lib/core/tile_processor/TileProcessorCompress.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,49 @@ bool TileProcessorCompress::preCompressTile([[maybe_unused]] size_t thread_id)
123123
return false;
124124
auto unreducedTileComp = tileComp;
125125
tileComp->createWindow(Rect32(unreducedTileComp));
126+
127+
// 16-bit forward DWT eligibility.
128+
//
129+
// Reversible 5/3 (ITU-T T.800 Annex F.3.4):
130+
// The 5/3 analysis lifting steps are:
131+
// D[n] -= floor((S[n] + S[n+1]) / 2) (prediction)
132+
// S[n] += floor((D[n-1] + D[n] + 2) / 4) (update)
133+
// BIBO (Bounded-Input Bounded-Output) gain analysis shows intermediate
134+
// values can grow by at most 2^3 (≤6 levels) or 2^4 (>6 levels), plus
135+
// 1 extra bit when the reversible colour transform (RCT, ITU-T T.800
136+
// Annex G.2) is applied. The update step uses an overflow-safe
137+
// averaging operator (see WaveletFwd.cpp) so only the prediction step's
138+
// pre-accumulation headroom limits the working precision:
139+
// prec + headroom ≤ 16
140+
// where headroom = 4 (no MCT) or 5 (MCT, RCT component).
141+
//
142+
// Irreversible 9/7 (ITU-T T.800 Annex F.3.5):
143+
// The 9/7 analysis uses four lifting steps with coefficients
144+
// α=-1.586, β=-0.053, γ=0.883, δ=0.444 followed by K-scaling.
145+
// Because the lowpass BIBO gain per level ≈ 6× (dominated by the large
146+
// |α| coefficient), intermediate values compound across decomposition
147+
// levels. Fixed-point 16-bit processing is feasible only when
148+
// prec + 6 ≤ 16 → prec ≤ 10.
149+
// The implementation uses an odd-branch (high-pass) halving strategy
150+
// that stores D samples at half magnitude through the lifting chain,
151+
// with adjusted coefficients and a normalizing factor computed from
152+
// BIBO gains (see WaveletFwd.cpp).
153+
// MCT components are excluded because the irreversible colour transform
154+
// (ICT) operates on float buffers.
155+
auto tccp = tcp_->tccps_ + compno;
156+
if(tccp->qmfbid_ == 1)
157+
{
158+
bool isMctComp = needsMctDecompress(compno) && tcp_->mct_ == 1;
159+
uint32_t headroom = isMctComp ? 5 : 4;
160+
if(imageComp->prec + headroom <= 16)
161+
tileComp->setUse16BitDwt(true);
162+
}
163+
else if(tccp->qmfbid_ == 0)
164+
{
165+
bool isMctComp = needsMctDecompress(compno) && tcp_->mct_ == 1;
166+
if(!isMctComp && imageComp->prec + 6 <= 16)
167+
tileComp->setUse16BitDwt(true);
168+
}
126169
}
127170
uint32_t numTiles = (uint32_t)cp_->t_grid_height_ * cp_->t_grid_width_;
128171

src/lib/core/wavelet/WaveletCommon.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,19 @@ class dwt97
5050
uint32_t rows, float dcShift = 0.0f);
5151
};
5252

53+
class dwt97_16
54+
{
55+
public:
56+
// Adapts int32_t template interface to int16_t 9/7 DWT functions.
57+
// The template passes int32_t* scratch; we cast to int16_t* internally
58+
// (the scratch buffer is large enough since sizeof(int32_t) >= sizeof(int16_t)).
59+
void encode_v(int32_t* res, int32_t* scratch, uint32_t height, uint8_t parity, uint32_t stride,
60+
uint32_t cols, int32_t dcShift = 0, bool intInput = false);
61+
62+
void encode_h(int32_t* row, int32_t* scratch, uint32_t width, uint8_t parity, uint32_t stride,
63+
uint32_t rows, int32_t dcShift = 0);
64+
};
65+
5366
template<typename T, size_t N>
5467
struct vec
5568
{

0 commit comments

Comments
 (0)