jg-rp
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 3 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎jsonpath/filter.py‎
Lines changed: 12 additions & 3 deletions b/‎jsonpath/filter.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎jsonpath/lex.py‎
Lines changed: 3 additions & 62 deletions b/‎jsonpath/lex.py‎
Lines changed: 3 additions & 62 deletions
diff --git a/‎jsonpath/parse.py‎
Lines changed: 37 additions & 38 deletions b/‎jsonpath/parse.py‎
Lines changed: 37 additions & 38 deletions
@@ -4,9 +4,10 @@
 
 **Breaking Changes**
 
-- We now enforce JSONPath filter expression "well-typedness" by default. That is, filter expressions are checked at compile time according to the IETF JSONPath Draft function extension type system and rules regarding non-singular query usage. If an expression is deemed to not be well-typed, a `JSONPathTypeError` is raised. This can be disabled in Python JSONPath by setting the `well_typed` argument to `JSONPathEnvironment` to `False`, or using `--no-type-checks` on the command line.
-- The JSONPath lexer now yields distinct tokens for single and double quoted string literals. This is so the parser can do a better job of detecting invalid escape sequences.
-- Changed the canonical representation of a JSONPath string literal to use double quotes instead of single quotes.
+- We now enforce JSONPath filter expression "well-typedness" by default. That is, filter expressions are checked at compile time according to the [IETF JSONPath Draft function extension type system](https://datatracker.ietf.org/doc/html/draft-ietf-jsonpath-base-21#section-2.4.1) and rules regarding non-singular query usage. If an expression is deemed to not be well-typed, a `JSONPathTypeError` is raised. This can be disabled in Python JSONPath by setting the `well_typed` argument to `JSONPathEnvironment` to `False`, or using `--no-type-checks` on the command line.
+- The JSONPath lexer and parser have been refactored to accommodate [#30](https://github.com/jg-rp/python-jsonpath/issues/30). As a result, the tokens generated by the lexer and the ATS built by the parser have changed significantly. In the unlikely event that anyone is customizing the lexer or parser through subclassing, please [open an issue](https://github.com/jg-rp/python-jsonpath/issues) and I'll provide more details.
+- Changed the normalized representation of JSONPath string literals to use double quotes instead of single quotes.
+- Changed the normalized representation of JSONPath filter expressions to not include parentheses unless the expression includes one or more logical operators.
 - The built-in implementation of the standard `length()` filter function is now a class and is renamed to `jsonpath.function_extensions.Length`.
 - The built-in implementation of the standard `value()` filter function is now a class and is renamed to `jsonpath.function_extensions.Value`.
 
@@ -16,6 +17,7 @@
 - Fixed parsing of JSONPath integer literals that use scientific notation. Previously we raised a `JSONPathSyntaxError` for literals such as `1e2`.
 - Fixed parsing of JSONPath comparison and logical expressions as filter function arguments. Previously we raised a `JSONPathSyntaxError` if a comparison or logical expression appeared as a filter function argument. Note that none of the built-in, standard filter functions accept arguments of `LogicalType`.
 - Fixed parsing of nested JSONPath filter functions, where a function is used as an argument to another.
+- Fixed JSONPath bracketed segments. We now handle an arbitrary number of filter selectors alongside name, index, slice and wildcard selectors, separated by commas. See [#30](https://github.com/jg-rp/python-jsonpath/issues/30).
 
 ## Version 0.9.0
 
 
@@ -23,6 +23,7 @@
 from .function_extensions import FilterFunction
 from .match import NodeList
 from .selectors import Filter as FilterSelector
+from .selectors import ListSelector
 
 if TYPE_CHECKING:
     from .path import JSONPath
@@ -330,6 +331,8 @@ def __init__(
         super().__init__()
 
     def __str__(self) -> str:
+        if self.operator in ("&&", "||"):
+            return f"({self.left} {self.operator} {self.right})"
         return f"{self.left} {self.operator} {self.right}"
 
     def __eq__(self, other: object) -> bool:
@@ -470,9 +473,15 @@ def __eq__(self, other: object) -> bool:
         return isinstance(other, Path) and str(self) == str(other)
 
     def children(self) -> List[FilterExpression]:
-        return [
-            s.expression for s in self.path.selectors if isinstance(s, FilterSelector)
-        ]
+        _children: List[FilterExpression] = []
+        for segment in self.path.selectors:
+            if isinstance(segment, ListSelector):
+                _children.extend(
+                    selector.expression
+                    for selector in segment.items
+                    if isinstance(selector, FilterSelector)
+                )
+        return _children
 
     def set_children(self, children: List[FilterExpression]) -> None:  # noqa: ARG002
         # self.path has its own cache
 
@@ -10,25 +10,21 @@
 from .exceptions import JSONPathSyntaxError
 from .token import TOKEN_AND
 from .token import TOKEN_BARE_PROPERTY
-from .token import TOKEN_BRACKET_PROPERTY
 from .token import TOKEN_COMMA
 from .token import TOKEN_CONTAINS
 from .token import TOKEN_DDOT
-from .token import TOKEN_DOT_INDEX
 from .token import TOKEN_DOT_PROPERTY
 from .token import TOKEN_DOUBLE_QUOTE_STRING
 from .token import TOKEN_EQ
 from .token import TOKEN_FALSE
+from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
-from .token import TOKEN_FILTER_END
-from .token import TOKEN_FILTER_START
 from .token import TOKEN_FLOAT
 from .token import TOKEN_FUNCTION
 from .token import TOKEN_GE
 from .token import TOKEN_GT
 from .token import TOKEN_ILLEGAL
 from .token import TOKEN_IN
-from .token import TOKEN_INDEX
 from .token import TOKEN_INT
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_KEY
@@ -56,7 +52,6 @@
 from .token import TOKEN_SELF
 from .token import TOKEN_SINGLE_QUOTE_STRING
 from .token import TOKEN_SKIP
-from .token import TOKEN_SLICE
 from .token import TOKEN_SLICE_START
 from .token import TOKEN_SLICE_STEP
 from .token import TOKEN_SLICE_STOP
@@ -84,28 +79,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
         # .thing
         self.dot_property_pattern = rf"\.(?P<G_PROP>{self.key_pattern})"
 
-        # [thing]
-        self.bracketed_property_pattern = rf"\[\s*(?P<G_BPROP>{self.key_pattern})\s*]"
-
-        # [1] or [-1]
-        self.index_pattern = r"\[\s*(?P<G_INDEX>\-?\s*\d+)\s*]"
-
-        # [:] or [1:-1] or [1:] or [:1] or [-1:] or [:-1] or [::] or [-1:0:-1]
-        self.slice_pattern = (
-            r"\[\s*(?P<G_SLICE_START>\-?\d*)\s*"
-            r":\s*(?P<G_SLICE_STOP>\-?\d*)\s*"
-            r"(?::\s*(?P<G_SLICE_STEP>\-?\d*))?\s*]"
-        )
-
         self.slice_list_pattern = (
             r"(?P<G_LSLICE_START>\-?\d*)\s*"
             r":\s*(?P<G_LSLICE_STOP>\-?\d*)\s*"
             r"(?::\s*(?P<G_LSLICE_STEP>\-?\d*))?"
         )
 
-        # .* or [*] or .[*]
-        self.wild_pattern = r"\.?(?:\[\s*\*\s*]|\*)"
-
         # `not` or !
         self.logical_not_pattern = r"(?:not|!)"
 
@@ -129,14 +108,10 @@ def compile_rules(self) -> Pattern[str]:
             (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
             (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
             (TOKEN_RE_PATTERN, self.re_pattern),
-            (TOKEN_INDEX, self.index_pattern),
-            (TOKEN_SLICE, self.slice_pattern),
-            (TOKEN_WILD, self.wild_pattern),
+            (TOKEN_WILD, r"\*"),
             (TOKEN_LIST_SLICE, self.slice_list_pattern),
-            (TOKEN_FILTER_START, r"\[\s*\?\s*\(?"),
-            (TOKEN_FILTER_END, r"\)\s*]"),
+            (TOKEN_FILTER, r"\?"),
             (TOKEN_FUNCTION, self.function_pattern),
-            (TOKEN_BRACKET_PROPERTY, self.bracketed_property_pattern),
             (TOKEN_DOT_PROPERTY, self.dot_property_pattern),
             (TOKEN_FLOAT, r"-?\d+\.\d*(?:e[+-]?\d+)?"),
             (TOKEN_INT, r"-?\d+(?P<G_EXP>e[+\-]?\d+)?\b"),
@@ -197,12 +172,6 @@ def tokenize(self, path: str) -> Iterator[Token]:  # noqa PLR0912
                     value=match.group("G_PROP"),
                     index=match.start("G_PROP"),
                 )
-            elif kind == TOKEN_BRACKET_PROPERTY:
-                yield _token(
-                    kind=TOKEN_PROPERTY,
-                    value=match.group("G_BPROP"),
-                    index=match.start("G_BPROP"),
-                )
             elif kind == TOKEN_BARE_PROPERTY:
                 yield _token(
                     kind=TOKEN_BARE_PROPERTY,
@@ -225,34 +194,6 @@ def tokenize(self, path: str) -> Iterator[Token]:  # noqa PLR0912
                     value=match.group("G_LSLICE_STEP") or "",
                     index=match.start("G_LSLICE_STEP"),
                 )
-            elif kind == TOKEN_DOT_INDEX:
-                yield _token(
-                    kind=TOKEN_INDEX,
-                    value=match.group("G_DINDEX"),
-                    index=match.start("G_DINDEX"),
-                )
-            elif kind == TOKEN_INDEX:
-                yield _token(
-                    kind=TOKEN_INDEX,
-                    value=match.group("G_INDEX"),
-                    index=match.start("G_INDEX"),
-                )
-            elif kind == TOKEN_SLICE:
-                yield _token(
-                    kind=TOKEN_SLICE_START,
-                    value=match.group("G_SLICE_START"),
-                    index=match.start("G_SLICE_START"),
-                )
-                yield _token(
-                    kind=TOKEN_SLICE_STOP,
-                    value=match.group("G_SLICE_STOP"),
-                    index=match.start("G_SLICE_STOP"),
-                )
-                yield _token(
-                    kind=TOKEN_SLICE_STEP,
-                    value=match.group("G_SLICE_STEP") or "",
-                    index=match.start("G_SLICE_STEP"),
-                )
             elif kind == TOKEN_DOUBLE_QUOTE_STRING:
                 yield _token(
                     kind=TOKEN_DOUBLE_QUOTE_STRING,
 
@@ -54,15 +54,13 @@
 from .token import TOKEN_EOF
 from .token import TOKEN_EQ
 from .token import TOKEN_FALSE
+from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
-from .token import TOKEN_FILTER_END
-from .token import TOKEN_FILTER_START
 from .token import TOKEN_FLOAT
 from .token import TOKEN_FUNCTION
 from .token import TOKEN_GE
 from .token import TOKEN_GT
 from .token import TOKEN_IN
-from .token import TOKEN_INDEX
 from .token import TOKEN_INT
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_KEY
@@ -293,20 +291,21 @@ def parse_path(
                     env=self.env,
                     token=stream.current,
                     name=stream.current.value,
+                    shorthand=True,
                 )
-            elif stream.current.kind == TOKEN_INDEX:
-                yield self.parse_index(stream)
             elif stream.current.kind == TOKEN_SLICE_START:
                 yield self.parse_slice(stream)
             elif stream.current.kind == TOKEN_WILD:
                 yield WildSelector(
                     env=self.env,
                     token=stream.current,
+                    shorthand=True,
                 )
             elif stream.current.kind == TOKEN_KEYS:
                 yield KeysSelector(
                     env=self.env,
                     token=stream.current,
+                    shorthand=True,
                 )
             elif stream.current.kind == TOKEN_DDOT:
                 yield RecursiveDescentSelector(
@@ -315,29 +314,13 @@ def parse_path(
                 )
             elif stream.current.kind == TOKEN_LIST_START:
                 yield self.parse_selector_list(stream)
-            elif stream.current.kind == TOKEN_FILTER_START:
-                yield self.parse_filter(stream)
             else:
                 if in_filter:
                     stream.push(stream.current)
                 break
 
             stream.next_token()
 
-    def parse_index(self, stream: TokenStream) -> IndexSelector:
-        """Parse an index selector from a stream of tokens."""
-        if (
-            len(stream.current.value) > 1 and stream.current.value.startswith("0")
-        ) or stream.current.value.startswith("-0"):
-            raise JSONPathSyntaxError(
-                "leading zero in index selector", token=stream.current
-            )
-        return IndexSelector(
-            env=self.env,
-            token=stream.current,
-            index=int(stream.current.value),
-        )
-
     def parse_slice(self, stream: TokenStream) -> SliceSelector:
         """Parse a slice JSONPath expression from a stream of tokens."""
         start_token = stream.next_token()
@@ -379,11 +362,19 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:  # noqa: PLR
                 PropertySelector,
                 SliceSelector,
                 WildSelector,
+                Filter,
             ]
         ] = []
 
         while stream.current.kind != TOKEN_RBRACKET:
             if stream.current.kind == TOKEN_INT:
+                if (
+                    len(stream.current.value) > 1
+                    and stream.current.value.startswith("0")
+                ) or stream.current.value.startswith("-0"):
+                    raise JSONPathSyntaxError(
+                        "leading zero in index selector", token=stream.current
+                    )
                 list_items.append(
                     IndexSelector(
                         env=self.env,
@@ -397,13 +388,15 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:  # noqa: PLR
                         env=self.env,
                         token=stream.current,
                         name=stream.current.value,
+                        shorthand=False,
                     ),
                 )
             elif stream.current.kind == TOKEN_KEYS:
                 list_items.append(
                     KeysSelector(
                         env=self.env,
                         token=stream.current,
+                        shorthand=False,
                     )
                 )
             elif stream.current.kind in (
@@ -421,12 +414,30 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:  # noqa: PLR
                         env=self.env,
                         token=stream.current,
                         name=self._decode_string_literal(stream.current),
+                        shorthand=False,
                     ),
                 )
             elif stream.current.kind == TOKEN_SLICE_START:
                 list_items.append(self.parse_slice(stream))
             elif stream.current.kind == TOKEN_WILD:
-                list_items.append(WildSelector(env=self.env, token=stream.current))
+                list_items.append(
+                    WildSelector(
+                        env=self.env,
+                        token=stream.current,
+                        shorthand=False,
+                    )
+                )
+            elif stream.current.kind == TOKEN_FILTER:
+                list_items.append(self.parse_filter(stream))
+            elif stream.current.kind == TOKEN_EOF:
+                raise JSONPathSyntaxError(
+                    "unexpected end of query", token=stream.current
+                )
+            else:
+                raise JSONPathSyntaxError(
+                    f"unexpected token in bracketed selection {stream.current.kind!r}",
+                    token=stream.current,
+                )
 
             if stream.peek.kind == TOKEN_EOF:
                 raise JSONPathSyntaxError(
@@ -441,7 +452,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:  # noqa: PLR
             stream.next_token()
 
         if not list_items:
-            raise JSONPathSyntaxError("empty segment", token=tok)
+            raise JSONPathSyntaxError("empty bracketed segment", token=tok)
 
         return ListSelector(env=self.env, token=tok, items=list_items)
 
@@ -460,11 +471,6 @@ def parse_filter(self, stream: TokenStream) -> Filter:
                     f"result of {expr.name}() must be compared", token=tok
                 )
 
-        if stream.peek.kind == TOKEN_RPAREN:
-            raise JSONPathSyntaxError("unbalanced ')'", token=stream.current)
-
-        stream.next_token()
-        stream.expect(TOKEN_FILTER_END, TOKEN_RBRACKET)
         return Filter(env=self.env, token=tok, expression=BooleanExpression(expr))
 
     def parse_boolean(self, stream: TokenStream) -> FilterExpression:
@@ -525,14 +531,9 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
                 raise JSONPathSyntaxError(
                     "unbalanced parentheses", token=stream.current
                 )
-            if stream.current.kind == TOKEN_FILTER_END:
-                # In some cases, an RPAREN followed by an RBRACKET can
-                # look like the long form "end of filter" token.
-                stream.push(stream.current)
-                break
             expr = self.parse_infix_expression(stream, expr)
 
-        stream.expect(TOKEN_RPAREN, TOKEN_FILTER_END)
+        stream.expect(TOKEN_RPAREN)
         return expr
 
     def parse_root_path(self, stream: TokenStream) -> FilterExpression:
@@ -611,8 +612,6 @@ def parse_function_extension(self, stream: TokenStream) -> FilterExpression:
             function_arguments.append(expr)
 
             if stream.peek.kind != TOKEN_RPAREN:
-                if stream.peek.kind == TOKEN_FILTER_END:
-                    break
                 stream.expect_peek(TOKEN_COMMA)
                 stream.next_token()
 
@@ -629,7 +628,7 @@ def parse_filter_selector(
         try:
             left = self.token_map[stream.current.kind](stream)
         except KeyError as err:
-            if stream.current.kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET):
+            if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET):
                 msg = "end of expression"
             else:
                 msg = repr(stream.current.value)
@@ -640,7 +639,7 @@ def parse_filter_selector(
         while True:
             peek_kind = stream.peek.kind
             if (
-                peek_kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET)
+                peek_kind in (TOKEN_EOF, TOKEN_RBRACKET)
                 or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence
             ):
                 break