Skip to content

Commit 8fbc321

Browse files
committed
Merge branch '2.21' into 2.x
2 parents 909fde0 + 33e5cf1 commit 8fbc321

5 files changed

Lines changed: 443 additions & 7 deletions

File tree

release-notes/CREDITS-2.x

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,3 +515,8 @@ Vitor Pamplona (@vitorpamplona)
515515
Lars Hagen (@larshagencognite)
516516
* Contributed #1470: Add method `copyCurrentStructureExact()` to `JsonGenerator`
517517
(2.21.0)
518+
519+
Mike Pedersen (@mpdncrwd)
520+
* Reported #1581: `NonBlockingByteBufferParser`: Unexpected Illegal surrogate
521+
character when parsing field names
522+
(2.21.3)

release-notes/VERSION-2.x

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ a pure JSON library.
1414
=== Releases ===
1515
------------------------------------------------------------------------
1616

17+
2.21.3 (not yet released)
18+
19+
#1581: `NonBlockingByteBufferParser`: Unexpected Illegal surrogate
20+
character when parsing field names
21+
(reported by Mike P)
22+
(fix by @pjfanning)
23+
1724
2.21.2 (20-Mar-2026)
1825

1926
#1541: Unexpected Illegal surrogate character when parsing field names

src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,14 @@ public abstract class NonBlockingJsonParserBase
162162

163163
protected int _quotedDigits;
164164

165+
/**
166+
* High surrogate code point awaiting matching low surrogate during
167+
* field name parsing, or 0 if none pending.
168+
*
169+
* @since 2.21.3
170+
*/
171+
protected int _pendingSurrogateInName;
172+
165173
/*
166174
/**********************************************************************
167175
/* Additional parsing state

src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingUtf8JsonParserBase.java

Lines changed: 102 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,6 +2127,22 @@ private final JsonToken _parseEscapedName(int qlen, int currQuad, int currQuadBy
21272127
_pendingBytes = currQuadBytes;
21282128
return _updateTokenToNA();
21292129
}
2130+
// [jackson-core#1581]: Check if decoded value is a high surrogate
2131+
if (ch >= 0xD800 && ch <= 0xDBFF) {
2132+
_pendingSurrogateInName = ch;
2133+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2134+
_minorStateAfterSplit = MINOR_FIELD_NAME;
2135+
_quadLength = qlen;
2136+
_pending32 = currQuad;
2137+
_pendingBytes = currQuadBytes;
2138+
_quoted32 = 0;
2139+
_quotedDigits = -2;
2140+
// Recurse to immediately attempt reading the low surrogate escape
2141+
return _finishFieldWithEscape();
2142+
} else if (ch >= 0xDC00 && ch <= 0xDFFF) {
2143+
_reportError("Unexpected low surrogate character (0x"
2144+
+ Integer.toHexString(ch) + ") in field name");
2145+
}
21302146
}
21312147

21322148
// May need to UTF-8 (re-)encode it, if it's beyond
@@ -2318,6 +2334,22 @@ private JsonToken _finishAposName(int qlen, int currQuad, int currQuadBytes)
23182334
_pendingBytes = currQuadBytes;
23192335
return _updateTokenToNA();
23202336
}
2337+
// [jackson-core#1581]: Check if decoded value is a high surrogate
2338+
if (ch >= 0xD800 && ch <= 0xDBFF) {
2339+
_pendingSurrogateInName = ch;
2340+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2341+
_minorStateAfterSplit = MINOR_FIELD_APOS_NAME;
2342+
_quadLength = qlen;
2343+
_pending32 = currQuad;
2344+
_pendingBytes = currQuadBytes;
2345+
_quoted32 = 0;
2346+
_quotedDigits = -2;
2347+
// Recurse to immediately attempt reading the low surrogate escape
2348+
return _finishFieldWithEscape();
2349+
} else if (ch >= 0xDC00 && ch <= 0xDFFF) {
2350+
_reportError("Unexpected low surrogate character (0x"
2351+
+ Integer.toHexString(ch) + ") in field name");
2352+
}
23212353
}
23222354
if (ch > 127) {
23232355
// Ok, we'll need room for first byte right away
@@ -2383,12 +2415,60 @@ private JsonToken _finishAposName(int qlen, int currQuad, int currQuadBytes)
23832415

23842416
protected final JsonToken _finishFieldWithEscape() throws IOException
23852417
{
2386-
// First: try finishing what wasn't yet:
2387-
int ch = _decodeSplitEscaped(_quoted32, _quotedDigits);
2388-
if (ch < 0) { // ... if possible
2389-
_minorState = MINOR_FIELD_NAME_ESCAPE;
2390-
return JsonToken.NOT_AVAILABLE;
2418+
int ch;
2419+
2420+
// [jackson-core#1581]: Handle pending high surrogate saved from previous chunk
2421+
if (_pendingSurrogateInName != 0) {
2422+
if (_quotedDigits == -2) {
2423+
// Need to read the backslash that starts the low surrogate escape
2424+
if (_inputPtr >= _inputEnd) {
2425+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2426+
return JsonToken.NOT_AVAILABLE;
2427+
}
2428+
int b = getNextUnsignedByteFromBuffer();
2429+
if (b != INT_BACKSLASH) {
2430+
_reportError("Broken surrogate pair in field name: expected '\\' to start low surrogate escape, got 0x"
2431+
+ Integer.toHexString(b));
2432+
}
2433+
_quotedDigits = -1;
2434+
_quoted32 = 0;
2435+
}
2436+
ch = _decodeSplitEscaped(_quoted32, _quotedDigits);
2437+
if (ch < 0) {
2438+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2439+
return JsonToken.NOT_AVAILABLE;
2440+
}
2441+
// Combine high + low surrogate into supplementary code point
2442+
int highSurrogate = _pendingSurrogateInName;
2443+
_pendingSurrogateInName = 0;
2444+
if (ch < 0xDC00 || ch > 0xDFFF) {
2445+
_reportError("Broken surrogate pair in field name: high surrogate 0x"
2446+
+ Integer.toHexString(highSurrogate)
2447+
+ " not followed by valid low surrogate, got 0x"
2448+
+ Integer.toHexString(ch));
2449+
}
2450+
ch = 0x10000 + ((highSurrogate - 0xD800) << 10) + (ch - 0xDC00);
2451+
} else {
2452+
// First: try finishing what wasn't yet:
2453+
ch = _decodeSplitEscaped(_quoted32, _quotedDigits);
2454+
if (ch < 0) { // ... if possible
2455+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2456+
return JsonToken.NOT_AVAILABLE;
2457+
}
2458+
// [jackson-core#1581]: high surrogate - save and wait for low surrogate
2459+
if (ch >= 0xD800 && ch <= 0xDBFF) {
2460+
_pendingSurrogateInName = ch;
2461+
_quoted32 = 0;
2462+
_quotedDigits = -2;
2463+
_minorState = MINOR_FIELD_NAME_ESCAPE;
2464+
// Recurse to immediately attempt reading the low surrogate escape
2465+
return _finishFieldWithEscape();
2466+
} else if (ch >= 0xDC00 && ch <= 0xDFFF) {
2467+
_reportError("Unexpected low surrogate character (0x"
2468+
+ Integer.toHexString(ch) + ") in field name");
2469+
}
23912470
}
2471+
23922472
if (_quadLength >= _quadBuffer.length) {
23932473
_quadBuffer = _growNameDecodeBuffer(_quadBuffer, 32);
23942474
}
@@ -2405,7 +2485,7 @@ protected final JsonToken _finishFieldWithEscape() throws IOException
24052485
currQuad = (currQuad << 8) | (0xc0 | (ch >> 6));
24062486
++currQuadBytes;
24072487
// Second byte gets output below:
2408-
} else { // 3 bytes; no need to worry about surrogates here
2488+
} else if (ch < 0x10000) { // 3 bytes (BMP, non-surrogate)
24092489
currQuad = (currQuad << 8) | (0xe0 | (ch >> 12));
24102490
// need room for middle byte?
24112491
if (++currQuadBytes >= 4) {
@@ -2415,8 +2495,23 @@ protected final JsonToken _finishFieldWithEscape() throws IOException
24152495
}
24162496
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
24172497
++currQuadBytes;
2498+
} else { // 4 bytes (supplementary code point from surrogate pair)
2499+
currQuad = (currQuad << 8) | (0xf0 | (ch >> 18));
2500+
if (++currQuadBytes >= 4) {
2501+
_quadBuffer[_quadLength++] = currQuad;
2502+
currQuad = 0;
2503+
currQuadBytes = 0;
2504+
}
2505+
currQuad = (currQuad << 8) | (0x80 | ((ch >> 12) & 0x3f));
2506+
if (++currQuadBytes >= 4) {
2507+
_quadBuffer[_quadLength++] = currQuad;
2508+
currQuad = 0;
2509+
currQuadBytes = 0;
2510+
}
2511+
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
2512+
++currQuadBytes;
24182513
}
2419-
// And same last byte in both cases, gets output below:
2514+
// And same last byte in all cases, gets output below:
24202515
ch = 0x80 | (ch & 0x3f);
24212516
}
24222517
if (currQuadBytes < 4) {

0 commit comments

Comments
 (0)