Skip to content

Commit 2666a6e

Browse files
authored
Merge pull request #67 from ddn0/add-re2
Use re2 instead of re for match expressions
2 parents 1ef8706 + 12a792f commit 2666a6e

File tree

10 files changed

+592
-300
lines changed

10 files changed

+592
-300
lines changed

README.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,21 @@ Installation
3737

3838
You now have the CEL run-time available to Python-based applications.
3939

40+
41+
re2
42+
---
43+
44+
CEL specifies that regular expressions use re2 syntax,
45+
https://github.com/google/re2/wiki/Syntax. To keep its dependencies minimal and
46+
this implementation easily embeddable, cel-python uses the Python standard
47+
library ``re`` syntax by default. If a ``re2`` package is installed or the
48+
``re2`` extra is provided, cel-python will use ``re2`` syntax instead.
49+
50+
::
51+
52+
pip install cel-python[re2]
53+
54+
4055
Command Line
4156
============
4257

docs/source/c7n_functions.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,13 +515,13 @@ It returns two values: the sentinel and, generally, a converted value that shoul
515515
- 'age' -- ``parse_date(value), datetime.datetime.now(tz=tzutc()) - timedelta(sentinel)``
516516
Note that these are reversed to make it easier to compare age against a given value.
517517
A global ``Now`` variable removes the need for an implicit age computation.
518-
The :func:`parse_date` is the :func:`dateutil.parser.parse` function.
518+
The :func:`parse_date` is the :func:`pendulum.parse` function.
519519

520520
- 'integer' -- ``sentinel, int(str(value).strip())``
521521

522522
- 'expiration' -- ``datetime.datetime.now(tz=tzutc()) + timedelta(sentinel), parse_date(value)``
523523
A global ``Now`` variable removes the need for an implicit expiration computation.
524-
The :func:`parse_date` is the :func:`dateutil.parser.parse` function.
524+
The :func:`parse_date` is the :func:`pendulum.parse` function.
525525

526526
- 'normalize' -- ``sentinel, value.strip().lower()``
527527

features/steps/c7n_integration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
from unittest.mock import Mock
3939

4040
from behave import *
41-
from dateutil.parser import parse as parse_date
41+
# from dateutil.parser import parse as parse_date
42+
from pendulum import parse as parse_date
4243

4344
import celpy
4445
import celpy.c7nlib

poetry.lock

Lines changed: 488 additions & 267 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,20 @@ packages = [{ include = "celpy", from = "src"}]
2929
[tool.poetry.dependencies]
3030
python = "^3.9"
3131
lark = "^0.12"
32-
python-dateutil = "^2.9.0.post0"
33-
types-python-dateutil = "^2.9.0.20240316"
3432

3533
# specify as optional for pip install cel-python[c7n] or poetry install -e behavior
3634
jmespath = { version = "^1.0.1", optional = true }
37-
pyyaml = { version = "^6.0.1", optional = true }
38-
types-pyyaml = { version = "^6.0.12.20240311", optional = true}
3935

4036
# specify as a non default group for building automatically in ci without -e
37+
pendulum = "^3.1.0"
38+
pyyaml = "^6.0.2"
39+
types-pyyaml = "^6.0.12.20250516"
4140
[tool.poetry.group.c7n.dependencies]
4241
jmespath = "^1.0.1"
43-
pyyaml = "^6.0.1"
44-
types-pyyaml = "^6.0.12.20240311"
42+
google-re2 = { version = "^1.0", optional = true }
43+
44+
[tool.poetry.extras]
45+
re2 = ["google-re2"]
4546

4647
[tool.poetry.group.dev.dependencies]
4748
behave = "^1.2.6"
@@ -51,6 +52,8 @@ pytest = "^8.2.1"
5152
pytest-cov = "^5.0.0"
5253
sphinx = "^6.0"
5354
tox = "^4.15.0"
55+
pre-commit = "^3.5"
56+
google-re2-stubs = "^0.1.0"
5457

5558
[build-system]
5659
requires = ["poetry-core"]

src/celpy/c7nlib.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,8 @@ def process(self, resources):
288288
from typing import (Any, Callable, Dict, Iterator, List, Optional, Type, Union,
289289
cast)
290290

291-
import dateutil
291+
# import dateutil
292+
from pendulum import parse as parse_date
292293
import jmespath # type: ignore [import-untyped]
293294

294295
from celpy import InterpretedRunner, celtypes
@@ -677,7 +678,8 @@ def image(resource: celtypes.MapType) -> celtypes.Value:
677678
image_name = ""
678679

679680
return json_to_cel(
680-
{"CreationDate": dateutil.parser.isoparse(creation_date), "Name": image_name}
681+
# {"CreationDate": dateutil.parser.isoparse(creation_date), "Name": image_name}
682+
{"CreationDate": parse_date(creation_date), "Name": image_name}
681683
)
682684

683685

src/celpy/celtypes.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,13 @@
172172
===============
173173
174174
An implementation may have additional timezone names that must be injected into
175-
th dateutil.gettz() processing.
175+
the ``pendulum`` processing. (Formerly ``dateutil.gettz()``.)
176176
177177
For example, there may be the following sequence:
178178
179-
1. A lowercase match for an alias or an existing dateutil timezone.
179+
1. A lowercase match for an alias or an existing timezone.
180180
181-
2. A titlecase match for an existing dateutil timezone.
181+
2. A titlecase match for an existing timezone.
182182
183183
3. The fallback, which is a +/-HH:MM string.
184184
@@ -194,8 +194,10 @@
194194
Optional, Sequence, Tuple, Type, TypeVar, Union, cast,
195195
overload)
196196

197-
import dateutil.parser
198-
import dateutil.tz
197+
import pendulum
198+
from pendulum import timezone
199+
import pendulum.tz.exceptions
200+
199201

200202
logger = logging.getLogger("celtypes")
201203

@@ -1035,12 +1037,12 @@ class TimestampType(datetime.datetime):
10351037
The Joda project (https://www.joda.org/joda-time/timezones.html)
10361038
says "Time zone data is provided by the public IANA time zone database."
10371039
1038-
The ``dateutil`` project (https://pypi.org/project/python-dateutil/)
1039-
is used for TZ handling and timestamp parsing.
1040+
TZ handling and timestamp parsing is doine with
1041+
the ``pendulum`` (https://pendulum.eustace.io) project.
10401042
10411043
Additionally, there is a ``TZ_ALIASES`` mapping available in this class to permit additional
10421044
timezone names. By default, the mapping is empty, and the only names
1043-
available are those recognized by :mod:`dateutil.tz`.
1045+
available are those recognized by :mod:`pendulum.timezone`.
10441046
"""
10451047

10461048
TZ_ALIASES: Dict[str, str] = {}
@@ -1076,7 +1078,7 @@ def __new__(
10761078

10771079
elif isinstance(source, str):
10781080
# Use dateutil to try a variety of text formats.
1079-
parsed_datetime = dateutil.parser.isoparse(source)
1081+
parsed_datetime = cast(datetime.datetime, pendulum.parse(source))
10801082
return super().__new__(
10811083
cls,
10821084
year=parsed_datetime.year,
@@ -1143,10 +1145,15 @@ def tz_name_lookup(cls, tz_name: str) -> Optional[datetime.tzinfo]:
11431145
Tweak ``celpy.celtypes.TimestampType.TZ_ALIASES``.
11441146
"""
11451147
tz_lookup = str(tz_name)
1148+
tz: Optional[datetime.tzinfo]
11461149
if tz_lookup in cls.TZ_ALIASES:
1147-
tz = dateutil.tz.gettz(cls.TZ_ALIASES[tz_lookup])
1150+
tz = timezone(cls.TZ_ALIASES[tz_lookup])
11481151
else:
1149-
tz = dateutil.tz.gettz(tz_lookup)
1152+
try:
1153+
tz = cast(datetime.tzinfo, timezone(tz_lookup))
1154+
except pendulum.tz.exceptions.InvalidTimezone:
1155+
# ±hh:mm format...
1156+
tz = cls.tz_offset_parse(tz_name)
11501157
return tz
11511158

11521159
@classmethod
@@ -1165,11 +1172,9 @@ def tz_offset_parse(cls, tz_name: str) -> Optional[datetime.tzinfo]:
11651172
def tz_parse(tz_name: Optional[str]) -> Optional[datetime.tzinfo]:
11661173
if tz_name:
11671174
tz = TimestampType.tz_name_lookup(tz_name)
1168-
if tz is None:
1169-
tz = TimestampType.tz_offset_parse(tz_name)
11701175
return tz
11711176
else:
1172-
return dateutil.tz.UTC
1177+
return timezone("UTC")
11731178

11741179
def getDate(self, tz_name: Optional[StringType] = None) -> IntType:
11751180
new_tz = self.tz_parse(tz_name)

src/celpy/evaluation.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@
5151
import celpy.celtypes
5252
from celpy.celparser import tree_dump
5353

54+
_USE_RE2 = True
55+
try:
56+
import re2
57+
except ImportError: # pragma: no cover
58+
_USE_RE2 = False
59+
5460
# A CEL type annotation. Used in an environment to describe objects as well as functions.
5561
# This is a list of types, plus Callable for conversion functions.
5662
Annotation = Union[
@@ -59,10 +65,8 @@
5965
Type[celpy.celtypes.FunctionType], # Concrete class for annotations
6066
]
6167

62-
6368
logger = logging.getLogger("evaluation")
6469

65-
6670
class CELSyntaxError(Exception):
6771
"""CEL Syntax error -- the AST did not have the expected structure."""
6872
def __init__(self, arg: Any, line: Optional[int] = None, column: Optional[int] = None) -> None:
@@ -293,6 +297,27 @@ def operator_in(item: Result, container: Result) -> Result:
293297
return result
294298

295299

300+
def _function_matches_re(text: str, pattern: str) -> Result:
301+
try:
302+
m = re.search(pattern, text)
303+
except re.error as ex:
304+
return CELEvalError("match error", ex.__class__, ex.args)
305+
306+
return celpy.celtypes.BoolType(m is not None)
307+
308+
309+
def _function_matches_re2(text: str, pattern: str) -> Result:
310+
try:
311+
m = re2.search(pattern, text)
312+
except re2.error as ex:
313+
return CELEvalError("match error", ex.__class__, ex.args)
314+
315+
return celpy.celtypes.BoolType(m is not None)
316+
317+
318+
function_matches = _function_matches_re2 if _USE_RE2 else _function_matches_re
319+
320+
296321
def function_size(container: Result) -> Result:
297322
"""
298323
The size() function applied to a Value. Delegate to Python's :py:func:`len`.
@@ -340,7 +365,7 @@ def function_size(container: Result) -> Result:
340365
# StringType methods
341366
"endsWith": lambda s, text: celpy.celtypes.BoolType(s.endswith(text)),
342367
"startsWith": lambda s, text: celpy.celtypes.BoolType(s.startswith(text)),
343-
"matches": lambda s, pattern: celpy.celtypes.BoolType(re.search(pattern, s) is not None),
368+
"matches": function_matches,
344369
"contains": lambda s, text: celpy.celtypes.BoolType(text in s),
345370
# TimestampType methods. Type details are redundant, but required because of the lambdas
346371
"getDate": lambda ts, tz_name=None: celpy.celtypes.IntType(ts.getDate(tz_name)),

tests/test_evaluation.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import celpy.evaluation # For monkeypatching
3838
from celpy import celparser, celtypes
3939
from celpy.evaluation import *
40+
from celpy.evaluation import _function_matches_re, _function_matches_re2
4041

4142

4243
def test_exception_syntax_error():
@@ -158,6 +159,25 @@ def test_operator_in():
158159
assert isinstance(operator_in(celtypes.IntType(-1), container_2), CELEvalError)
159160

160161

162+
def test_function_matches_re2():
163+
empty_string = celtypes.StringType("")
164+
# re2-specific patterns which behave differently than standard re
165+
assert _function_matches_re2(empty_string, "^\\z")
166+
assert isinstance(_function_matches_re2(empty_string, "^\\Z"), CELEvalError)
167+
168+
169+
def test_function_matches_re():
170+
empty_string = celtypes.StringType("")
171+
# re2-specific patterns which behave differently than standard re
172+
assert isinstance(_function_matches_re(empty_string, "^\\z"), CELEvalError)
173+
assert _function_matches_re(empty_string, "^\\Z")
174+
175+
176+
def test_function_matches():
177+
empty_string = celtypes.StringType("")
178+
assert function_matches(empty_string, "^$")
179+
180+
161181
def test_function_size():
162182
container_1 = celtypes.ListType([
163183
celtypes.IntType(42),

type_check/lineprecision.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
Name Lines Precise Imprecise Any Empty Unanalyzed
22
-------------------------------------------------------------------
33
celpy 293 76 0 4 213 0
4-
celpy.__main__ 465 164 15 42 244 0
4+
celpy.__main__ 465 172 7 42 244 0
55
celpy.adapter 137 35 3 9 85 5
6-
celpy.c7nlib 1582 340 15 154 1073 0
6+
celpy.c7nlib 1584 340 15 154 1075 0
77
celpy.celparser 402 208 2 23 169 0
8-
celpy.celtypes 1495 430 17 221 788 39
9-
celpy.evaluation 2446 827 33 173 1398 15
8+
celpy.celtypes 1500 435 14 221 791 39
9+
celpy.evaluation 2471 839 33 176 1408 15
1010
xlate 0 0 0 0 0 0
11-
xlate.c7n_to_cel 1730 384 105 145 1091 5
11+
xlate.c7n_to_cel 1730 387 102 145 1091 5

0 commit comments

Comments
 (0)