Skip to content

Commit fff9333

Browse files
authored
Use JSON grammar constants as much as possible in the parser (#2245)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 65f8293 commit fff9333

File tree

2 files changed

+85
-70
lines changed

2 files changed

+85
-70
lines changed

src/core/json/grammar.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,17 @@ static constexpr CharT token_object_delimiter{'\u002C'};
7272
// https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf
7373

7474
// Boolean
75+
template <typename CharT> static constexpr CharT token_true{'\u0074'};
7576
template <typename CharT, typename Traits>
7677
static constexpr std::basic_string_view<CharT, Traits> constant_true{
7778
"\u0074\u0072\u0075\u0065"};
79+
template <typename CharT> static constexpr CharT token_false{'\u0066'};
7880
template <typename CharT, typename Traits>
7981
static constexpr std::basic_string_view<CharT, Traits> constant_false{
8082
"\u0066\u0061\u006C\u0073\u0065"};
8183

8284
// Null
85+
template <typename CharT> static constexpr CharT token_null{'\u006E'};
8386
template <typename CharT, typename Traits>
8487
static constexpr std::basic_string_view<CharT, Traits> constant_null{
8588
"\u006E\u0075\u006C\u006C"};

src/core/json/parser.h

Lines changed: 82 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,11 @@ inline auto scan_string_escape(const std::uint64_t line, std::uint64_t &column,
229229
template <bool TrackPositions>
230230
inline auto scan_string(const std::uint64_t line, std::uint64_t &column,
231231
const char *&cursor, const char *end) -> void {
232+
using CharT = typename JSON::Char;
232233
while (cursor < end) {
233234
const char *scan{cursor};
234-
while (scan < end && *scan != '"' && *scan != '\\' &&
235+
while (scan < end && *scan != internal::token_string_quote<CharT> &&
236+
*scan != internal::token_string_escape<CharT> &&
235237
static_cast<unsigned char>(*scan) >= 0x20) {
236238
scan++;
237239
}
@@ -276,8 +278,10 @@ template <bool TrackPositions>
276278
inline auto scan_digits(const std::uint64_t line, std::uint64_t &column,
277279
const char *&cursor, const char *end,
278280
const bool at_least_one) -> void {
281+
using CharT = typename JSON::Char;
279282
bool found{false};
280-
while (cursor < end && *cursor >= '0' && *cursor <= '9') {
283+
while (cursor < end && *cursor >= internal::token_number_zero<CharT> &&
284+
*cursor <= internal::token_number_nine<CharT>) {
281285
found = true;
282286
if constexpr (TrackPositions) {
283287
column += 1;
@@ -296,25 +300,29 @@ template <bool TrackPositions>
296300
inline auto scan_number(const std::uint64_t line, std::uint64_t &column,
297301
const char *&cursor, const char *end, const char first)
298302
-> void {
299-
if (first == '-') {
300-
if (cursor >= end || *cursor < '0' || *cursor > '9') {
303+
using CharT = typename JSON::Char;
304+
if (first == internal::token_number_minus<CharT>) {
305+
if (cursor >= end || *cursor < internal::token_number_zero<CharT> ||
306+
*cursor > internal::token_number_nine<CharT>) {
301307
if constexpr (TrackPositions) {
302308
column += 1;
303309
}
304310
throw JSONParseError(line, column);
305311
}
306312
}
307313

308-
const char int_start{first == '-' ? *cursor : first};
309-
if (first == '-') {
314+
const char int_start{first == internal::token_number_minus<CharT> ? *cursor
315+
: first};
316+
if (first == internal::token_number_minus<CharT>) {
310317
if constexpr (TrackPositions) {
311318
column += 1;
312319
}
313320
cursor++;
314321
}
315322

316-
if (int_start == '0') {
317-
if (cursor < end && *cursor >= '0' && *cursor <= '9') {
323+
if (int_start == internal::token_number_zero<CharT>) {
324+
if (cursor < end && *cursor >= internal::token_number_zero<CharT> &&
325+
*cursor <= internal::token_number_nine<CharT>) {
318326
if constexpr (TrackPositions) {
319327
column += 1;
320328
}
@@ -324,20 +332,23 @@ inline auto scan_number(const std::uint64_t line, std::uint64_t &column,
324332
scan_digits<TrackPositions>(line, column, cursor, end, false);
325333
}
326334

327-
if (cursor < end && *cursor == '.') {
335+
if (cursor < end && *cursor == internal::token_number_decimal_point<CharT>) {
328336
if constexpr (TrackPositions) {
329337
column += 1;
330338
}
331339
cursor++;
332340
scan_digits<TrackPositions>(line, column, cursor, end, true);
333341
}
334342

335-
if (cursor < end && (*cursor == 'e' || *cursor == 'E')) {
343+
if (cursor < end &&
344+
(*cursor == internal::token_number_exponent_lowercase<CharT> ||
345+
*cursor == internal::token_number_exponent_uppercase<CharT>)) {
336346
if constexpr (TrackPositions) {
337347
column += 1;
338348
}
339349
cursor++;
340-
if (cursor < end && (*cursor == '+' || *cursor == '-')) {
350+
if (cursor < end && (*cursor == internal::token_number_plus<CharT> ||
351+
*cursor == internal::token_number_minus<CharT>)) {
341352
if constexpr (TrackPositions) {
342353
column += 1;
343354
}
@@ -361,6 +372,7 @@ inline auto scan_json(const char *&cursor, const char *end,
361372
std::uint32_t child_count;
362373
};
363374

375+
using CharT = typename JSON::Char;
364376
char character = 0;
365377
std::vector<ContainerFrame> container_stack;
366378
container_stack.reserve(32);
@@ -381,19 +393,19 @@ inline auto scan_json(const char *&cursor, const char *end,
381393
const auto value_line{line};
382394
const auto value_column{column};
383395
switch (character) {
384-
case 't':
396+
case internal::token_true<CharT>:
385397
internal::scan_true<TrackPositions>(line, column, cursor, end);
386398
tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column});
387399
return;
388-
case 'f':
400+
case internal::token_false<CharT>:
389401
internal::scan_false<TrackPositions>(line, column, cursor, end);
390402
tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column});
391403
return;
392-
case 'n':
404+
case internal::token_null<CharT>:
393405
internal::scan_null<TrackPositions>(line, column, cursor, end);
394406
tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column});
395407
return;
396-
case '"': {
408+
case internal::token_string_quote<CharT>: {
397409
const auto string_start{
398410
static_cast<std::uint32_t>(cursor - buffer_start)};
399411
internal::scan_string<TrackPositions>(line, column, cursor, end);
@@ -403,21 +415,21 @@ inline auto scan_json(const char *&cursor, const char *end,
403415
value_line, value_column});
404416
return;
405417
}
406-
case '[':
418+
case internal::token_array_begin<CharT>:
407419
goto do_scan_array;
408-
case '{':
420+
case internal::token_object_begin<CharT>:
409421
goto do_scan_object;
410-
case '-':
411-
case '0':
412-
case '1':
413-
case '2':
414-
case '3':
415-
case '4':
416-
case '5':
417-
case '6':
418-
case '7':
419-
case '8':
420-
case '9': {
422+
case internal::token_number_minus<CharT>:
423+
case internal::token_number_zero<CharT>:
424+
case internal::token_number_one<CharT>:
425+
case internal::token_number_two<CharT>:
426+
case internal::token_number_three<CharT>:
427+
case internal::token_number_four<CharT>:
428+
case internal::token_number_five<CharT>:
429+
case internal::token_number_six<CharT>:
430+
case internal::token_number_seven<CharT>:
431+
case internal::token_number_eight<CharT>:
432+
case internal::token_number_nine<CharT>: {
421433
const auto number_start{
422434
static_cast<std::uint32_t>(cursor - buffer_start - 1)};
423435
internal::scan_number<TrackPositions>(line, column, cursor, end,
@@ -450,7 +462,7 @@ do_scan_array: {
450462
throw JSONParseError(line, column);
451463
}
452464

453-
if (*cursor == ']') {
465+
if (*cursor == internal::token_array_end<CharT>) {
454466
if constexpr (TrackPositions) {
455467
column += 1;
456468
}
@@ -484,23 +496,23 @@ do_scan_array: {
484496
const auto value_line{line};
485497
const auto value_column{column};
486498
switch (character) {
487-
case '[':
499+
case internal::token_array_begin<CharT>:
488500
goto do_scan_array;
489-
case '{':
501+
case internal::token_object_begin<CharT>:
490502
goto do_scan_object;
491-
case 't':
503+
case internal::token_true<CharT>:
492504
internal::scan_true<TrackPositions>(line, column, cursor, end);
493505
tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column});
494506
goto do_scan_array_item_separator;
495-
case 'f':
507+
case internal::token_false<CharT>:
496508
internal::scan_false<TrackPositions>(line, column, cursor, end);
497509
tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column});
498510
goto do_scan_array_item_separator;
499-
case 'n':
511+
case internal::token_null<CharT>:
500512
internal::scan_null<TrackPositions>(line, column, cursor, end);
501513
tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column});
502514
goto do_scan_array_item_separator;
503-
case '"': {
515+
case internal::token_string_quote<CharT>: {
504516
const auto string_start{
505517
static_cast<std::uint32_t>(cursor - buffer_start)};
506518
internal::scan_string<TrackPositions>(line, column, cursor, end);
@@ -510,17 +522,17 @@ do_scan_array: {
510522
value_line, value_column});
511523
goto do_scan_array_item_separator;
512524
}
513-
case '-':
514-
case '0':
515-
case '1':
516-
case '2':
517-
case '3':
518-
case '4':
519-
case '5':
520-
case '6':
521-
case '7':
522-
case '8':
523-
case '9': {
525+
case internal::token_number_minus<CharT>:
526+
case internal::token_number_zero<CharT>:
527+
case internal::token_number_one<CharT>:
528+
case internal::token_number_two<CharT>:
529+
case internal::token_number_three<CharT>:
530+
case internal::token_number_four<CharT>:
531+
case internal::token_number_five<CharT>:
532+
case internal::token_number_six<CharT>:
533+
case internal::token_number_seven<CharT>:
534+
case internal::token_number_eight<CharT>:
535+
case internal::token_number_nine<CharT>: {
524536
const auto number_start{
525537
static_cast<std::uint32_t>(cursor - buffer_start - 1)};
526538
internal::scan_number<TrackPositions>(line, column, cursor, end,
@@ -549,9 +561,9 @@ do_scan_array: {
549561
}
550562
character = *cursor++;
551563
switch (character) {
552-
case ',':
564+
case internal::token_array_delimiter<CharT>:
553565
goto do_scan_array_item;
554-
case ']': {
566+
case internal::token_array_end<CharT>: {
555567
assert(!container_stack.empty());
556568
auto &frame{container_stack.back()};
557569
tape[frame.tape_index].count = frame.child_count;
@@ -580,7 +592,7 @@ do_scan_object: {
580592
throw JSONParseError(line, column);
581593
}
582594

583-
if (*cursor == '}') {
595+
if (*cursor == internal::token_object_end<CharT>) {
584596
if constexpr (TrackPositions) {
585597
column += 1;
586598
}
@@ -610,7 +622,7 @@ do_scan_object: {
610622
}
611623
character = *cursor++;
612624
switch (character) {
613-
case '"': {
625+
case internal::token_string_quote<CharT>: {
614626
const auto key_start{static_cast<std::uint32_t>(cursor - buffer_start)};
615627
const auto key_line{line};
616628
const auto key_column{column};
@@ -638,7 +650,7 @@ do_scan_object: {
638650
}
639651
character = *cursor++;
640652
switch (character) {
641-
case ':':
653+
case internal::token_object_key_delimiter<CharT>:
642654
goto do_scan_object_value;
643655
default:
644656
throw JSONParseError(line, column);
@@ -661,23 +673,23 @@ do_scan_object: {
661673
const auto value_line{line};
662674
const auto value_column{column};
663675
switch (character) {
664-
case '[':
676+
case internal::token_array_begin<CharT>:
665677
goto do_scan_array;
666-
case '{':
678+
case internal::token_object_begin<CharT>:
667679
goto do_scan_object;
668-
case 't':
680+
case internal::token_true<CharT>:
669681
internal::scan_true<TrackPositions>(line, column, cursor, end);
670682
tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column});
671683
goto do_scan_object_property_end;
672-
case 'f':
684+
case internal::token_false<CharT>:
673685
internal::scan_false<TrackPositions>(line, column, cursor, end);
674686
tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column});
675687
goto do_scan_object_property_end;
676-
case 'n':
688+
case internal::token_null<CharT>:
677689
internal::scan_null<TrackPositions>(line, column, cursor, end);
678690
tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column});
679691
goto do_scan_object_property_end;
680-
case '"': {
692+
case internal::token_string_quote<CharT>: {
681693
const auto string_start{
682694
static_cast<std::uint32_t>(cursor - buffer_start)};
683695
internal::scan_string<TrackPositions>(line, column, cursor, end);
@@ -687,17 +699,17 @@ do_scan_object: {
687699
value_line, value_column});
688700
goto do_scan_object_property_end;
689701
}
690-
case '-':
691-
case '0':
692-
case '1':
693-
case '2':
694-
case '3':
695-
case '4':
696-
case '5':
697-
case '6':
698-
case '7':
699-
case '8':
700-
case '9': {
702+
case internal::token_number_minus<CharT>:
703+
case internal::token_number_zero<CharT>:
704+
case internal::token_number_one<CharT>:
705+
case internal::token_number_two<CharT>:
706+
case internal::token_number_three<CharT>:
707+
case internal::token_number_four<CharT>:
708+
case internal::token_number_five<CharT>:
709+
case internal::token_number_six<CharT>:
710+
case internal::token_number_seven<CharT>:
711+
case internal::token_number_eight<CharT>:
712+
case internal::token_number_nine<CharT>: {
701713
const auto number_start{
702714
static_cast<std::uint32_t>(cursor - buffer_start - 1)};
703715
internal::scan_number<TrackPositions>(line, column, cursor, end,
@@ -726,9 +738,9 @@ do_scan_object: {
726738
}
727739
character = *cursor++;
728740
switch (character) {
729-
case ',':
741+
case internal::token_object_delimiter<CharT>:
730742
goto do_scan_object_key;
731-
case '}': {
743+
case internal::token_object_end<CharT>: {
732744
assert(!container_stack.empty());
733745
auto &frame{container_stack.back()};
734746
tape[frame.tape_index].count = frame.child_count;

0 commit comments

Comments
 (0)