Skip to content

Commit 04d4ddd

Browse files
authored
Merge pull request #51 from jg-rp/fake-root
Add non-standard fake root identifier.
2 parents 8e72725 + fc44327 commit 04d4ddd

File tree

12 files changed

+158
-25
lines changed

12 files changed

+158
-25
lines changed

.github/workflows/tests.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@ jobs:
99
fail-fast: false
1010
matrix:
1111
os: [ubuntu-latest, windows-latest, macos-latest]
12-
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"]
12+
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
13+
exclude:
14+
- os: macos-latest
15+
python-version: "3.7"
16+
- os: windows-latest
17+
python-version: "3.7"
1318
steps:
1419
- uses: actions/checkout@v3
1520
with:

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Python JSONPath Change Log
22

3+
## Version 0.11.0 (unreleased)
4+
5+
**Fixes**
6+
7+
- The lexer now sorts environment-controlled tokens by their length in descending order. This allows one custom token to be a prefix of another.
8+
9+
**Features**
10+
11+
- Added the non-standard "fake root" identifier, which defaults to `^` and can be customized with the `fake_root_token` attribute on a `JSONPathEnvironment` subclass. Using the fake root identifier is equivalent to the standard root identifier (`$`), but wraps the target JSON value in an array, so the root value can be conditionally selected using a filter.
12+
- Non-standard environment-controlled tokens can now be disabled by setting them to the empty string.
13+
314
## Version 0.10.3
415

516
**Changes**

docs/syntax.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,16 @@ Filter expressions can call predefined [function extensions](functions.md) too.
162162
$.categories[?count(@.products.*) >= 2]
163163
```
164164

165+
### Fake root (`^`)
166+
167+
**_New in version 0.11.0_**
168+
169+
This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector.
170+
171+
```text
172+
^[?length(categories) > 0]
173+
```
174+
165175
### Union (`|`) and intersection (`&`)
166176

167177
Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects).
@@ -209,3 +219,4 @@ And this is a list of features that are uncommon or unique to Python JSONPath.
209219
- `#` is the current key/property or index identifier when filtering a mapping or sequence.
210220
- `_` is a filter context selector. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`.
211221
- `~` is a "keys" or "properties" selector.
222+
- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector.

jsonpath/env.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from .path import JSONPath
3939
from .stream import TokenStream
4040
from .token import TOKEN_EOF
41+
from .token import TOKEN_FAKE_ROOT
4142
from .token import TOKEN_INTERSECTION
4243
from .token import TOKEN_UNION
4344
from .token import Token
@@ -89,6 +90,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
8990
**New in version 0.10.0**
9091
9192
Attributes:
93+
fake_root_token (str): The pattern used to select a "fake" root node, one level
94+
above the real root node.
9295
filter_context_token (str): The pattern used to select extra filter context
9396
data. Defaults to `"_"`.
9497
intersection_token (str): The pattern used as the intersection operator.
@@ -112,6 +115,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
112115

113116
# These should be unescaped strings. `re.escape` will be called
114117
# on them automatically when compiling lexer rules.
118+
fake_root_token = "^"
115119
filter_context_token = "_"
116120
intersection_token = "&"
117121
key_token = "#"
@@ -174,8 +178,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
174178
"""
175179
tokens = self.lexer.tokenize(path)
176180
stream = TokenStream(tokens)
181+
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
177182
_path: Union[JSONPath, CompoundJSONPath] = JSONPath(
178-
env=self, selectors=self.parser.parse(stream)
183+
env=self, selectors=self.parser.parse(stream), fake_root=fake_root
179184
)
180185

181186
if stream.current.kind != TOKEN_EOF:
@@ -190,18 +195,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
190195

191196
if stream.current.kind == TOKEN_UNION:
192197
stream.next_token()
198+
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
193199
_path = _path.union(
194200
JSONPath(
195201
env=self,
196202
selectors=self.parser.parse(stream),
203+
fake_root=fake_root,
197204
)
198205
)
199206
elif stream.current.kind == TOKEN_INTERSECTION:
200207
stream.next_token()
208+
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
201209
_path = _path.intersection(
202210
JSONPath(
203211
env=self,
204212
selectors=self.parser.parse(stream),
213+
fake_root=fake_root,
205214
)
206215
)
207216
else: # pragma: no cover

jsonpath/lex.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from .token import TOKEN_DOT_PROPERTY
1717
from .token import TOKEN_DOUBLE_QUOTE_STRING
1818
from .token import TOKEN_EQ
19+
from .token import TOKEN_FAKE_ROOT
1920
from .token import TOKEN_FALSE
2021
from .token import TOKEN_FILTER
2122
from .token import TOKEN_FILTER_CONTEXT
@@ -119,6 +120,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
119120

120121
def compile_rules(self) -> Pattern[str]:
121122
"""Prepare regular expression rules."""
123+
env_tokens = [
124+
(TOKEN_ROOT, self.env.root_token),
125+
(TOKEN_FAKE_ROOT, self.env.fake_root_token),
126+
(TOKEN_SELF, self.env.self_token),
127+
(TOKEN_KEY, self.env.key_token),
128+
(TOKEN_UNION, self.env.union_token),
129+
(TOKEN_INTERSECTION, self.env.intersection_token),
130+
(TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
131+
(TOKEN_KEYS, self.env.keys_selector_token),
132+
]
133+
122134
rules = [
123135
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
124136
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
@@ -131,13 +143,13 @@ def compile_rules(self) -> Pattern[str]:
131143
(TOKEN_DDOT, r"\.\."),
132144
(TOKEN_AND, self.logical_and_pattern),
133145
(TOKEN_OR, self.logical_or_pattern),
134-
(TOKEN_ROOT, re.escape(self.env.root_token)),
135-
(TOKEN_SELF, re.escape(self.env.self_token)),
136-
(TOKEN_KEY, re.escape(self.env.key_token)),
137-
(TOKEN_UNION, re.escape(self.env.union_token)),
138-
(TOKEN_INTERSECTION, re.escape(self.env.intersection_token)),
139-
(TOKEN_FILTER_CONTEXT, re.escape(self.env.filter_context_token)),
140-
(TOKEN_KEYS, re.escape(self.env.keys_selector_token)),
146+
*[
147+
(token, re.escape(pattern))
148+
for token, pattern in sorted(
149+
env_tokens, key=lambda x: len(x[1]), reverse=True
150+
)
151+
if pattern
152+
],
141153
(TOKEN_WILD, r"\*"),
142154
(TOKEN_FILTER, r"\?"),
143155
(TOKEN_IN, r"in"),

jsonpath/parse.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from .token import TOKEN_DOUBLE_QUOTE_STRING
5454
from .token import TOKEN_EOF
5555
from .token import TOKEN_EQ
56+
from .token import TOKEN_FAKE_ROOT
5657
from .token import TOKEN_FALSE
5758
from .token import TOKEN_FILTER
5859
from .token import TOKEN_FILTER_CONTEXT
@@ -213,8 +214,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
213214
self.env = env
214215

215216
self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
217+
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
218+
TOKEN_FAKE_ROOT: self.parse_root_path,
216219
TOKEN_FALSE: self.parse_boolean,
220+
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
217221
TOKEN_FLOAT: self.parse_float_literal,
222+
TOKEN_FUNCTION: self.parse_function_extension,
218223
TOKEN_INT: self.parse_integer_literal,
219224
TOKEN_KEY: self.parse_current_key,
220225
TOKEN_LIST_START: self.parse_list_literal,
@@ -227,12 +232,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
227232
TOKEN_RE_PATTERN: self.parse_regex,
228233
TOKEN_ROOT: self.parse_root_path,
229234
TOKEN_SELF: self.parse_self_path,
230-
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
231-
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
232235
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
233236
TOKEN_TRUE: self.parse_boolean,
234237
TOKEN_UNDEFINED: self.parse_undefined,
235-
TOKEN_FUNCTION: self.parse_function_extension,
236238
}
237239

238240
self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
@@ -250,25 +252,26 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
250252
self.function_argument_map: Dict[
251253
str, Callable[[TokenStream], FilterExpression]
252254
] = {
255+
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
256+
TOKEN_FAKE_ROOT: self.parse_root_path,
253257
TOKEN_FALSE: self.parse_boolean,
258+
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
254259
TOKEN_FLOAT: self.parse_float_literal,
260+
TOKEN_FUNCTION: self.parse_function_extension,
255261
TOKEN_INT: self.parse_integer_literal,
256262
TOKEN_KEY: self.parse_current_key,
257263
TOKEN_NIL: self.parse_nil,
258264
TOKEN_NONE: self.parse_nil,
259265
TOKEN_NULL: self.parse_nil,
260-
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
261-
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
262-
TOKEN_TRUE: self.parse_boolean,
263266
TOKEN_ROOT: self.parse_root_path,
264267
TOKEN_SELF: self.parse_self_path,
265-
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
266-
TOKEN_FUNCTION: self.parse_function_extension,
268+
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
269+
TOKEN_TRUE: self.parse_boolean,
267270
}
268271

269272
def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]:
270273
"""Parse a JSONPath from a stream of tokens."""
271-
if stream.current.kind == TOKEN_ROOT:
274+
if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}:
272275
stream.next_token()
273276
yield from self.parse_path(stream, in_filter=False)
274277

@@ -533,9 +536,14 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
533536
return expr
534537

535538
def parse_root_path(self, stream: TokenStream) -> FilterExpression:
536-
stream.next_token()
539+
root = stream.next_token()
540+
assert root.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT} # XXX:
537541
return RootPath(
538-
JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
542+
JSONPath(
543+
env=self.env,
544+
selectors=self.parse_path(stream, in_filter=True),
545+
fake_root=root.kind == TOKEN_FAKE_ROOT,
546+
)
539547
)
540548

541549
def parse_self_path(self, stream: TokenStream) -> FilterExpression:

jsonpath/path.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,27 @@ class JSONPath:
3535
env: The `JSONPathEnvironment` this path is bound to.
3636
selectors: An iterable of `JSONPathSelector` objects, as generated by
3737
a `Parser`.
38+
fake_root: Indicates if target JSON values should be wrapped in a single-
39+
element array, so as to make the target root value selectable.
40+
3841
3942
Attributes:
4043
env: The `JSONPathEnvironment` this path is bound to.
4144
selectors: The `JSONPathSelector` instances that make up this path.
4245
"""
4346

44-
__slots__ = ("env", "selectors")
47+
__slots__ = ("env", "fake_root", "selectors")
4548

4649
def __init__(
4750
self,
4851
*,
4952
env: JSONPathEnvironment,
5053
selectors: Iterable[JSONPathSelector],
54+
fake_root: bool = False,
5155
) -> None:
5256
self.env = env
5357
self.selectors = tuple(selectors)
58+
self.fake_root = fake_root
5459

5560
def __str__(self) -> str:
5661
return self.env.root_token + "".join(
@@ -122,7 +127,7 @@ def finditer(
122127
matches: Iterable[JSONPathMatch] = [
123128
JSONPathMatch(
124129
filter_context=filter_context or {},
125-
obj=_data,
130+
obj=[_data] if self.fake_root else _data,
126131
parent=None,
127132
path=self.env.root_token,
128133
parts=(),
@@ -161,7 +166,7 @@ async def finditer_async(
161166
async def root_iter() -> AsyncIterable[JSONPathMatch]:
162167
yield self.env.match_class(
163168
filter_context=filter_context or {},
164-
obj=_data,
169+
obj=[_data] if self.fake_root else _data,
165170
parent=None,
166171
path=self.env.root_token,
167172
parts=(),

jsonpath/token.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
TOKEN_DOT_INDEX = sys.intern("DINDEX")
1616
TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY")
1717
TOKEN_FILTER = sys.intern("FILTER")
18+
TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT")
1819
TOKEN_KEY = sys.intern("KEY")
1920
TOKEN_KEYS = sys.intern("KEYS")
2021
TOKEN_RBRACKET = sys.intern("RBRACKET")

tests/test_env.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from jsonpath import JSONPathEnvironment
8+
from jsonpath import JSONPathSyntaxError
89
from jsonpath import JSONPathTypeError
910

1011

@@ -173,6 +174,41 @@ class MyJSONPathEnvironment(JSONPathEnvironment):
173174
assert env.findall("$.foo.*", data) == [1, 2, 3]
174175

175176

177+
def test_custom_fake_root_identifier_token() -> None:
178+
"""Test that we can change the non-standard fake root identifier."""
179+
180+
class MyJSONPathEnvironment(JSONPathEnvironment):
181+
fake_root_token = "$$"
182+
183+
env = MyJSONPathEnvironment()
184+
data = {"foo": {"a": 1, "b": 2, "c": 3}}
185+
assert env.findall("$$[[email protected] == 1]", data) == [data]
186+
assert env.findall("$$[[email protected] == 7]", data) == []
187+
assert env.findall("$.*", data) == [{"a": 1, "b": 2, "c": 3}]
188+
189+
190+
def test_disable_fake_root_identifier() -> None:
191+
"""Test that we can disable the non-standard fake root identifier."""
192+
193+
class MyJSONPathEnvironment(JSONPathEnvironment):
194+
fake_root_token = ""
195+
196+
env = MyJSONPathEnvironment()
197+
with pytest.raises(JSONPathSyntaxError):
198+
env.compile("^[[email protected] == 42]")
199+
200+
201+
def test_disable_keys_selector() -> None:
202+
"""Test that we can disable the non-standard keys selector."""
203+
204+
class MyJSONPathEnvironment(JSONPathEnvironment):
205+
keys_selector_token = ""
206+
207+
env = MyJSONPathEnvironment()
208+
with pytest.raises(JSONPathSyntaxError):
209+
env.compile("*..~")
210+
211+
176212
def test_disable_well_typed_checks() -> None:
177213
"""Test that we can disable checks for well-typedness."""
178214
env = JSONPathEnvironment(well_typed=True)

tests/test_find.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ class Case:
5757
},
5858
want=[{"foo": 1}, {"foo": 2}],
5959
),
60+
Case(
61+
description="select root value using fake root",
62+
path="^[[email protected] > 7]",
63+
data={"some": {"thing": 42}},
64+
want=[{"some": {"thing": 42}}],
65+
),
66+
Case(
67+
description="fake root in a filter query",
68+
path="^[[email protected] > value(^.*.num)]",
69+
data={"some": {"thing": 42}, "num": 7},
70+
want=[{"some": {"thing": 42}, "num": 7}],
71+
),
72+
Case(
73+
description="recurse object keys",
74+
path="$..~",
75+
data={"some": {"thing": "else", "foo": {"bar": "baz"}}},
76+
want=["some", "thing", "foo", "bar"],
77+
),
6078
]
6179

6280

0 commit comments

Comments
 (0)