@@ -2127,6 +2127,22 @@ private final JsonToken _parseEscapedName(int qlen, int currQuad, int currQuadBy
21272127 _pendingBytes = currQuadBytes ;
21282128 return _updateTokenToNA ();
21292129 }
2130+ // [jackson-core#1581]: Check if decoded value is a high surrogate
2131+ if (ch >= 0xD800 && ch <= 0xDBFF ) {
2132+ _pendingSurrogateInName = ch ;
2133+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2134+ _minorStateAfterSplit = MINOR_FIELD_NAME ;
2135+ _quadLength = qlen ;
2136+ _pending32 = currQuad ;
2137+ _pendingBytes = currQuadBytes ;
2138+ _quoted32 = 0 ;
2139+ _quotedDigits = -2 ;
2140+ // Recurse to immediately attempt reading the low surrogate escape
2141+ return _finishFieldWithEscape ();
2142+ } else if (ch >= 0xDC00 && ch <= 0xDFFF ) {
2143+ _reportError ("Unexpected low surrogate character (0x"
2144+ + Integer .toHexString (ch ) + ") in field name" );
2145+ }
21302146 }
21312147
21322148 // May need to UTF-8 (re-)encode it, if it's beyond
@@ -2318,6 +2334,22 @@ private JsonToken _finishAposName(int qlen, int currQuad, int currQuadBytes)
23182334 _pendingBytes = currQuadBytes ;
23192335 return _updateTokenToNA ();
23202336 }
2337+ // [jackson-core#1581]: Check if decoded value is a high surrogate
2338+ if (ch >= 0xD800 && ch <= 0xDBFF ) {
2339+ _pendingSurrogateInName = ch ;
2340+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2341+ _minorStateAfterSplit = MINOR_FIELD_APOS_NAME ;
2342+ _quadLength = qlen ;
2343+ _pending32 = currQuad ;
2344+ _pendingBytes = currQuadBytes ;
2345+ _quoted32 = 0 ;
2346+ _quotedDigits = -2 ;
2347+ // Recurse to immediately attempt reading the low surrogate escape
2348+ return _finishFieldWithEscape ();
2349+ } else if (ch >= 0xDC00 && ch <= 0xDFFF ) {
2350+ _reportError ("Unexpected low surrogate character (0x"
2351+ + Integer .toHexString (ch ) + ") in field name" );
2352+ }
23212353 }
23222354 if (ch > 127 ) {
23232355 // Ok, we'll need room for first byte right away
@@ -2383,12 +2415,60 @@ private JsonToken _finishAposName(int qlen, int currQuad, int currQuadBytes)
23832415
23842416 protected final JsonToken _finishFieldWithEscape () throws IOException
23852417 {
2386- // First: try finishing what wasn't yet:
2387- int ch = _decodeSplitEscaped (_quoted32 , _quotedDigits );
2388- if (ch < 0 ) { // ... if possible
2389- _minorState = MINOR_FIELD_NAME_ESCAPE ;
2390- return JsonToken .NOT_AVAILABLE ;
2418+ int ch ;
2419+
2420+ // [jackson-core#1581]: Handle pending high surrogate saved from previous chunk
2421+ if (_pendingSurrogateInName != 0 ) {
2422+ if (_quotedDigits == -2 ) {
2423+ // Need to read the backslash that starts the low surrogate escape
2424+ if (_inputPtr >= _inputEnd ) {
2425+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2426+ return JsonToken .NOT_AVAILABLE ;
2427+ }
2428+ int b = getNextUnsignedByteFromBuffer ();
2429+ if (b != INT_BACKSLASH ) {
2430+ _reportError ("Broken surrogate pair in field name: expected '\\ ' to start low surrogate escape, got 0x"
2431+ + Integer .toHexString (b ));
2432+ }
2433+ _quotedDigits = -1 ;
2434+ _quoted32 = 0 ;
2435+ }
2436+ ch = _decodeSplitEscaped (_quoted32 , _quotedDigits );
2437+ if (ch < 0 ) {
2438+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2439+ return JsonToken .NOT_AVAILABLE ;
2440+ }
2441+ // Combine high + low surrogate into supplementary code point
2442+ int highSurrogate = _pendingSurrogateInName ;
2443+ _pendingSurrogateInName = 0 ;
2444+ if (ch < 0xDC00 || ch > 0xDFFF ) {
2445+ _reportError ("Broken surrogate pair in field name: high surrogate 0x"
2446+ + Integer .toHexString (highSurrogate )
2447+ + " not followed by valid low surrogate, got 0x"
2448+ + Integer .toHexString (ch ));
2449+ }
2450+ ch = 0x10000 + ((highSurrogate - 0xD800 ) << 10 ) + (ch - 0xDC00 );
2451+ } else {
2452+ // First: try finishing what wasn't yet:
2453+ ch = _decodeSplitEscaped (_quoted32 , _quotedDigits );
2454+ if (ch < 0 ) { // ... if possible
2455+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2456+ return JsonToken .NOT_AVAILABLE ;
2457+ }
2458+ // [jackson-core#1581]: high surrogate - save and wait for low surrogate
2459+ if (ch >= 0xD800 && ch <= 0xDBFF ) {
2460+ _pendingSurrogateInName = ch ;
2461+ _quoted32 = 0 ;
2462+ _quotedDigits = -2 ;
2463+ _minorState = MINOR_FIELD_NAME_ESCAPE ;
2464+ // Recurse to immediately attempt reading the low surrogate escape
2465+ return _finishFieldWithEscape ();
2466+ } else if (ch >= 0xDC00 && ch <= 0xDFFF ) {
2467+ _reportError ("Unexpected low surrogate character (0x"
2468+ + Integer .toHexString (ch ) + ") in field name" );
2469+ }
23912470 }
2471+
23922472 if (_quadLength >= _quadBuffer .length ) {
23932473 _quadBuffer = _growNameDecodeBuffer (_quadBuffer , 32 );
23942474 }
@@ -2405,7 +2485,7 @@ protected final JsonToken _finishFieldWithEscape() throws IOException
24052485 currQuad = (currQuad << 8 ) | (0xc0 | (ch >> 6 ));
24062486 ++currQuadBytes ;
24072487 // Second byte gets output below:
2408- } else { // 3 bytes; no need to worry about surrogates here
2488+ } else if ( ch < 0x10000 ) { // 3 bytes (BMP, non-surrogate)
24092489 currQuad = (currQuad << 8 ) | (0xe0 | (ch >> 12 ));
24102490 // need room for middle byte?
24112491 if (++currQuadBytes >= 4 ) {
@@ -2415,8 +2495,23 @@ protected final JsonToken _finishFieldWithEscape() throws IOException
24152495 }
24162496 currQuad = (currQuad << 8 ) | (0x80 | ((ch >> 6 ) & 0x3f ));
24172497 ++currQuadBytes ;
2498+ } else { // 4 bytes (supplementary code point from surrogate pair)
2499+ currQuad = (currQuad << 8 ) | (0xf0 | (ch >> 18 ));
2500+ if (++currQuadBytes >= 4 ) {
2501+ _quadBuffer [_quadLength ++] = currQuad ;
2502+ currQuad = 0 ;
2503+ currQuadBytes = 0 ;
2504+ }
2505+ currQuad = (currQuad << 8 ) | (0x80 | ((ch >> 12 ) & 0x3f ));
2506+ if (++currQuadBytes >= 4 ) {
2507+ _quadBuffer [_quadLength ++] = currQuad ;
2508+ currQuad = 0 ;
2509+ currQuadBytes = 0 ;
2510+ }
2511+ currQuad = (currQuad << 8 ) | (0x80 | ((ch >> 6 ) & 0x3f ));
2512+ ++currQuadBytes ;
24182513 }
2419- // And same last byte in both cases, gets output below:
2514+ // And same last byte in all cases, gets output below:
24202515 ch = 0x80 | (ch & 0x3f );
24212516 }
24222517 if (currQuadBytes < 4 ) {
0 commit comments