Skip to content

Commit e05a344

Browse files
authored
Add support for filter selectors in bracketed segments. (#36)
1 parent 3c2a1d5 commit e05a344

File tree

12 files changed

+463
-260
lines changed

12 files changed

+463
-260
lines changed

CHANGELOG.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
**Breaking Changes**
66

7-
- We now enforce JSONPath filter expression "well-typedness" by default. That is, filter expressions are checked at compile time according to the IETF JSONPath Draft function extension type system and rules regarding non-singular query usage. If an expression is deemed to not be well-typed, a `JSONPathTypeError` is raised. This can be disabled in Python JSONPath by setting the `well_typed` argument to `JSONPathEnvironment` to `False`, or using `--no-type-checks` on the command line.
8-
- The JSONPath lexer now yields distinct tokens for single and double quoted string literals. This is so the parser can do a better job of detecting invalid escape sequences.
9-
- Changed the canonical representation of a JSONPath string literal to use double quotes instead of single quotes.
7+
- We now enforce JSONPath filter expression "well-typedness" by default. That is, filter expressions are checked at compile time according to the [IETF JSONPath Draft function extension type system](https://datatracker.ietf.org/doc/html/draft-ietf-jsonpath-base-21#section-2.4.1) and rules regarding non-singular query usage. If an expression is deemed to not be well-typed, a `JSONPathTypeError` is raised. This can be disabled in Python JSONPath by setting the `well_typed` argument to `JSONPathEnvironment` to `False`, or using `--no-type-checks` on the command line.
8+
- The JSONPath lexer and parser have been refactored to accommodate [#30](https://github.com/jg-rp/python-jsonpath/issues/30). As a result, the tokens generated by the lexer and the ATS built by the parser have changed significantly. In the unlikely event that anyone is customizing the lexer or parser through subclassing, please [open an issue](https://github.com/jg-rp/python-jsonpath/issues) and I'll provide more details.
9+
- Changed the normalized representation of JSONPath string literals to use double quotes instead of single quotes.
10+
- Changed the normalized representation of JSONPath filter expressions to not include parentheses unless the expression includes one or more logical operators.
1011
- The built-in implementation of the standard `length()` filter function is now a class and is renamed to `jsonpath.function_extensions.Length`.
1112
- The built-in implementation of the standard `value()` filter function is now a class and is renamed to `jsonpath.function_extensions.Value`.
1213

@@ -16,6 +17,7 @@
1617
- Fixed parsing of JSONPath integer literals that use scientific notation. Previously we raised a `JSONPathSyntaxError` for literals such as `1e2`.
1718
- Fixed parsing of JSONPath comparison and logical expressions as filter function arguments. Previously we raised a `JSONPathSyntaxError` if a comparison or logical expression appeared as a filter function argument. Note that none of the built-in, standard filter functions accept arguments of `LogicalType`.
1819
- Fixed parsing of nested JSONPath filter functions, where a function is used as an argument to another.
20+
- Fixed JSONPath bracketed segments. We now handle an arbitrary number of filter selectors alongside name, index, slice and wildcard selectors, separated by commas. See [#30](https://github.com/jg-rp/python-jsonpath/issues/30).
1921

2022
## Version 0.9.0
2123

jsonpath/filter.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .function_extensions import FilterFunction
2424
from .match import NodeList
2525
from .selectors import Filter as FilterSelector
26+
from .selectors import ListSelector
2627

2728
if TYPE_CHECKING:
2829
from .path import JSONPath
@@ -330,6 +331,8 @@ def __init__(
330331
super().__init__()
331332

332333
def __str__(self) -> str:
334+
if self.operator in ("&&", "||"):
335+
return f"({self.left} {self.operator} {self.right})"
333336
return f"{self.left} {self.operator} {self.right}"
334337

335338
def __eq__(self, other: object) -> bool:
@@ -470,9 +473,15 @@ def __eq__(self, other: object) -> bool:
470473
return isinstance(other, Path) and str(self) == str(other)
471474

472475
def children(self) -> List[FilterExpression]:
473-
return [
474-
s.expression for s in self.path.selectors if isinstance(s, FilterSelector)
475-
]
476+
_children: List[FilterExpression] = []
477+
for segment in self.path.selectors:
478+
if isinstance(segment, ListSelector):
479+
_children.extend(
480+
selector.expression
481+
for selector in segment.items
482+
if isinstance(selector, FilterSelector)
483+
)
484+
return _children
476485

477486
def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002
478487
# self.path has its own cache

jsonpath/lex.py

Lines changed: 3 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,21 @@
1010
from .exceptions import JSONPathSyntaxError
1111
from .token import TOKEN_AND
1212
from .token import TOKEN_BARE_PROPERTY
13-
from .token import TOKEN_BRACKET_PROPERTY
1413
from .token import TOKEN_COMMA
1514
from .token import TOKEN_CONTAINS
1615
from .token import TOKEN_DDOT
17-
from .token import TOKEN_DOT_INDEX
1816
from .token import TOKEN_DOT_PROPERTY
1917
from .token import TOKEN_DOUBLE_QUOTE_STRING
2018
from .token import TOKEN_EQ
2119
from .token import TOKEN_FALSE
20+
from .token import TOKEN_FILTER
2221
from .token import TOKEN_FILTER_CONTEXT
23-
from .token import TOKEN_FILTER_END
24-
from .token import TOKEN_FILTER_START
2522
from .token import TOKEN_FLOAT
2623
from .token import TOKEN_FUNCTION
2724
from .token import TOKEN_GE
2825
from .token import TOKEN_GT
2926
from .token import TOKEN_ILLEGAL
3027
from .token import TOKEN_IN
31-
from .token import TOKEN_INDEX
3228
from .token import TOKEN_INT
3329
from .token import TOKEN_INTERSECTION
3430
from .token import TOKEN_KEY
@@ -56,7 +52,6 @@
5652
from .token import TOKEN_SELF
5753
from .token import TOKEN_SINGLE_QUOTE_STRING
5854
from .token import TOKEN_SKIP
59-
from .token import TOKEN_SLICE
6055
from .token import TOKEN_SLICE_START
6156
from .token import TOKEN_SLICE_STEP
6257
from .token import TOKEN_SLICE_STOP
@@ -84,28 +79,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
8479
# .thing
8580
self.dot_property_pattern = rf"\.(?P<G_PROP>{self.key_pattern})"
8681

87-
# [thing]
88-
self.bracketed_property_pattern = rf"\[\s*(?P<G_BPROP>{self.key_pattern})\s*]"
89-
90-
# [1] or [-1]
91-
self.index_pattern = r"\[\s*(?P<G_INDEX>\-?\s*\d+)\s*]"
92-
93-
# [:] or [1:-1] or [1:] or [:1] or [-1:] or [:-1] or [::] or [-1:0:-1]
94-
self.slice_pattern = (
95-
r"\[\s*(?P<G_SLICE_START>\-?\d*)\s*"
96-
r":\s*(?P<G_SLICE_STOP>\-?\d*)\s*"
97-
r"(?::\s*(?P<G_SLICE_STEP>\-?\d*))?\s*]"
98-
)
99-
10082
self.slice_list_pattern = (
10183
r"(?P<G_LSLICE_START>\-?\d*)\s*"
10284
r":\s*(?P<G_LSLICE_STOP>\-?\d*)\s*"
10385
r"(?::\s*(?P<G_LSLICE_STEP>\-?\d*))?"
10486
)
10587

106-
# .* or [*] or .[*]
107-
self.wild_pattern = r"\.?(?:\[\s*\*\s*]|\*)"
108-
10988
# `not` or !
11089
self.logical_not_pattern = r"(?:not|!)"
11190

@@ -129,14 +108,10 @@ def compile_rules(self) -> Pattern[str]:
129108
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
130109
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
131110
(TOKEN_RE_PATTERN, self.re_pattern),
132-
(TOKEN_INDEX, self.index_pattern),
133-
(TOKEN_SLICE, self.slice_pattern),
134-
(TOKEN_WILD, self.wild_pattern),
111+
(TOKEN_WILD, r"\*"),
135112
(TOKEN_LIST_SLICE, self.slice_list_pattern),
136-
(TOKEN_FILTER_START, r"\[\s*\?\s*\(?"),
137-
(TOKEN_FILTER_END, r"\)\s*]"),
113+
(TOKEN_FILTER, r"\?"),
138114
(TOKEN_FUNCTION, self.function_pattern),
139-
(TOKEN_BRACKET_PROPERTY, self.bracketed_property_pattern),
140115
(TOKEN_DOT_PROPERTY, self.dot_property_pattern),
141116
(TOKEN_FLOAT, r"-?\d+\.\d*(?:e[+-]?\d+)?"),
142117
(TOKEN_INT, r"-?\d+(?P<G_EXP>e[+\-]?\d+)?\b"),
@@ -197,12 +172,6 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
197172
value=match.group("G_PROP"),
198173
index=match.start("G_PROP"),
199174
)
200-
elif kind == TOKEN_BRACKET_PROPERTY:
201-
yield _token(
202-
kind=TOKEN_PROPERTY,
203-
value=match.group("G_BPROP"),
204-
index=match.start("G_BPROP"),
205-
)
206175
elif kind == TOKEN_BARE_PROPERTY:
207176
yield _token(
208177
kind=TOKEN_BARE_PROPERTY,
@@ -225,34 +194,6 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
225194
value=match.group("G_LSLICE_STEP") or "",
226195
index=match.start("G_LSLICE_STEP"),
227196
)
228-
elif kind == TOKEN_DOT_INDEX:
229-
yield _token(
230-
kind=TOKEN_INDEX,
231-
value=match.group("G_DINDEX"),
232-
index=match.start("G_DINDEX"),
233-
)
234-
elif kind == TOKEN_INDEX:
235-
yield _token(
236-
kind=TOKEN_INDEX,
237-
value=match.group("G_INDEX"),
238-
index=match.start("G_INDEX"),
239-
)
240-
elif kind == TOKEN_SLICE:
241-
yield _token(
242-
kind=TOKEN_SLICE_START,
243-
value=match.group("G_SLICE_START"),
244-
index=match.start("G_SLICE_START"),
245-
)
246-
yield _token(
247-
kind=TOKEN_SLICE_STOP,
248-
value=match.group("G_SLICE_STOP"),
249-
index=match.start("G_SLICE_STOP"),
250-
)
251-
yield _token(
252-
kind=TOKEN_SLICE_STEP,
253-
value=match.group("G_SLICE_STEP") or "",
254-
index=match.start("G_SLICE_STEP"),
255-
)
256197
elif kind == TOKEN_DOUBLE_QUOTE_STRING:
257198
yield _token(
258199
kind=TOKEN_DOUBLE_QUOTE_STRING,

jsonpath/parse.py

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,13 @@
5454
from .token import TOKEN_EOF
5555
from .token import TOKEN_EQ
5656
from .token import TOKEN_FALSE
57+
from .token import TOKEN_FILTER
5758
from .token import TOKEN_FILTER_CONTEXT
58-
from .token import TOKEN_FILTER_END
59-
from .token import TOKEN_FILTER_START
6059
from .token import TOKEN_FLOAT
6160
from .token import TOKEN_FUNCTION
6261
from .token import TOKEN_GE
6362
from .token import TOKEN_GT
6463
from .token import TOKEN_IN
65-
from .token import TOKEN_INDEX
6664
from .token import TOKEN_INT
6765
from .token import TOKEN_INTERSECTION
6866
from .token import TOKEN_KEY
@@ -293,20 +291,21 @@ def parse_path(
293291
env=self.env,
294292
token=stream.current,
295293
name=stream.current.value,
294+
shorthand=True,
296295
)
297-
elif stream.current.kind == TOKEN_INDEX:
298-
yield self.parse_index(stream)
299296
elif stream.current.kind == TOKEN_SLICE_START:
300297
yield self.parse_slice(stream)
301298
elif stream.current.kind == TOKEN_WILD:
302299
yield WildSelector(
303300
env=self.env,
304301
token=stream.current,
302+
shorthand=True,
305303
)
306304
elif stream.current.kind == TOKEN_KEYS:
307305
yield KeysSelector(
308306
env=self.env,
309307
token=stream.current,
308+
shorthand=True,
310309
)
311310
elif stream.current.kind == TOKEN_DDOT:
312311
yield RecursiveDescentSelector(
@@ -315,29 +314,13 @@ def parse_path(
315314
)
316315
elif stream.current.kind == TOKEN_LIST_START:
317316
yield self.parse_selector_list(stream)
318-
elif stream.current.kind == TOKEN_FILTER_START:
319-
yield self.parse_filter(stream)
320317
else:
321318
if in_filter:
322319
stream.push(stream.current)
323320
break
324321

325322
stream.next_token()
326323

327-
def parse_index(self, stream: TokenStream) -> IndexSelector:
328-
"""Parse an index selector from a stream of tokens."""
329-
if (
330-
len(stream.current.value) > 1 and stream.current.value.startswith("0")
331-
) or stream.current.value.startswith("-0"):
332-
raise JSONPathSyntaxError(
333-
"leading zero in index selector", token=stream.current
334-
)
335-
return IndexSelector(
336-
env=self.env,
337-
token=stream.current,
338-
index=int(stream.current.value),
339-
)
340-
341324
def parse_slice(self, stream: TokenStream) -> SliceSelector:
342325
"""Parse a slice JSONPath expression from a stream of tokens."""
343326
start_token = stream.next_token()
@@ -379,11 +362,19 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR
379362
PropertySelector,
380363
SliceSelector,
381364
WildSelector,
365+
Filter,
382366
]
383367
] = []
384368

385369
while stream.current.kind != TOKEN_RBRACKET:
386370
if stream.current.kind == TOKEN_INT:
371+
if (
372+
len(stream.current.value) > 1
373+
and stream.current.value.startswith("0")
374+
) or stream.current.value.startswith("-0"):
375+
raise JSONPathSyntaxError(
376+
"leading zero in index selector", token=stream.current
377+
)
387378
list_items.append(
388379
IndexSelector(
389380
env=self.env,
@@ -397,13 +388,15 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR
397388
env=self.env,
398389
token=stream.current,
399390
name=stream.current.value,
391+
shorthand=False,
400392
),
401393
)
402394
elif stream.current.kind == TOKEN_KEYS:
403395
list_items.append(
404396
KeysSelector(
405397
env=self.env,
406398
token=stream.current,
399+
shorthand=False,
407400
)
408401
)
409402
elif stream.current.kind in (
@@ -421,12 +414,30 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR
421414
env=self.env,
422415
token=stream.current,
423416
name=self._decode_string_literal(stream.current),
417+
shorthand=False,
424418
),
425419
)
426420
elif stream.current.kind == TOKEN_SLICE_START:
427421
list_items.append(self.parse_slice(stream))
428422
elif stream.current.kind == TOKEN_WILD:
429-
list_items.append(WildSelector(env=self.env, token=stream.current))
423+
list_items.append(
424+
WildSelector(
425+
env=self.env,
426+
token=stream.current,
427+
shorthand=False,
428+
)
429+
)
430+
elif stream.current.kind == TOKEN_FILTER:
431+
list_items.append(self.parse_filter(stream))
432+
elif stream.current.kind == TOKEN_EOF:
433+
raise JSONPathSyntaxError(
434+
"unexpected end of query", token=stream.current
435+
)
436+
else:
437+
raise JSONPathSyntaxError(
438+
f"unexpected token in bracketed selection {stream.current.kind!r}",
439+
token=stream.current,
440+
)
430441

431442
if stream.peek.kind == TOKEN_EOF:
432443
raise JSONPathSyntaxError(
@@ -441,7 +452,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR
441452
stream.next_token()
442453

443454
if not list_items:
444-
raise JSONPathSyntaxError("empty segment", token=tok)
455+
raise JSONPathSyntaxError("empty bracketed segment", token=tok)
445456

446457
return ListSelector(env=self.env, token=tok, items=list_items)
447458

@@ -460,11 +471,6 @@ def parse_filter(self, stream: TokenStream) -> Filter:
460471
f"result of {expr.name}() must be compared", token=tok
461472
)
462473

463-
if stream.peek.kind == TOKEN_RPAREN:
464-
raise JSONPathSyntaxError("unbalanced ')'", token=stream.current)
465-
466-
stream.next_token()
467-
stream.expect(TOKEN_FILTER_END, TOKEN_RBRACKET)
468474
return Filter(env=self.env, token=tok, expression=BooleanExpression(expr))
469475

470476
def parse_boolean(self, stream: TokenStream) -> FilterExpression:
@@ -525,14 +531,9 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
525531
raise JSONPathSyntaxError(
526532
"unbalanced parentheses", token=stream.current
527533
)
528-
if stream.current.kind == TOKEN_FILTER_END:
529-
# In some cases, an RPAREN followed by an RBRACKET can
530-
# look like the long form "end of filter" token.
531-
stream.push(stream.current)
532-
break
533534
expr = self.parse_infix_expression(stream, expr)
534535

535-
stream.expect(TOKEN_RPAREN, TOKEN_FILTER_END)
536+
stream.expect(TOKEN_RPAREN)
536537
return expr
537538

538539
def parse_root_path(self, stream: TokenStream) -> FilterExpression:
@@ -611,8 +612,6 @@ def parse_function_extension(self, stream: TokenStream) -> FilterExpression:
611612
function_arguments.append(expr)
612613

613614
if stream.peek.kind != TOKEN_RPAREN:
614-
if stream.peek.kind == TOKEN_FILTER_END:
615-
break
616615
stream.expect_peek(TOKEN_COMMA)
617616
stream.next_token()
618617

@@ -629,7 +628,7 @@ def parse_filter_selector(
629628
try:
630629
left = self.token_map[stream.current.kind](stream)
631630
except KeyError as err:
632-
if stream.current.kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET):
631+
if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET):
633632
msg = "end of expression"
634633
else:
635634
msg = repr(stream.current.value)
@@ -640,7 +639,7 @@ def parse_filter_selector(
640639
while True:
641640
peek_kind = stream.peek.kind
642641
if (
643-
peek_kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET)
642+
peek_kind in (TOKEN_EOF, TOKEN_RBRACKET)
644643
or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence
645644
):
646645
break

0 commit comments

Comments
 (0)