Skip to content

Commit f61565d

Browse files
authored
Merge pull request #48 from regisb/regisb/multiline-with-singlelinecomment
[BD-21] Multiline annotations with single-line comment prefix ("#")
2 parents 12dc1a7 + 097db99 commit f61565d

File tree

6 files changed

+95
-24
lines changed

6 files changed

+95
-24
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ Change Log
1111

1212
.. There should always be an "Unreleased" section for changes pending release.
1313
14+
[0.6.0] - 2020-08-27
15+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16+
17+
* Add support for multiline annotations for lines prefixed with single-line comment signs ("#")
18+
1419
[0.5.1] - 2020-08-25
1520
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1621

code_annotations/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
Extensible tools for parsing annotations in codebases.
33
"""
44

5-
__version__ = '0.5.1'
5+
__version__ = '0.6.0'

code_annotations/extensions/base.py

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,27 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
4242
# Javascript and Python extensions for examples.
4343
lang_comment_definition = None
4444

45-
r"""
46-
This format string/regex finds all comments in the file. The format tokens will be replaced with the
47-
language-specific comment definitions defined in the sub-classes.
48-
49-
{multi_start} - start of the language-specific multi-line comment (ex. /*)
50-
([\d\D]*?) - capture all of the characters...
51-
{multi_end} - until you find the end of the language-specific multi-line comment (ex. */)
52-
| - If you don't find any of those...
53-
{single} - start by finding the single-line comment token (ex. //)
54-
(.*) - and capture all characters until the end of the line
55-
56-
Returns a 2-tuple of:
57-
- ("Comment text", None) in the case of a multi-line comment OR
58-
- (None, "Comment text") in the case of a single-line comment
45+
# This format string/regex finds all comments in the file. The format tokens will be replaced with the
46+
# language-specific comment definitions defined in the sub-classes.
47+
#
48+
# Match groupdict will contain two named subgroups: 'comment' and 'prefixed_comment', of which at most
49+
# one will be non-None.
50+
comment_regex_fmt = r"""
51+
{multi_start} # start of the language-specific multi-line comment (ex. /*)
52+
(?P<comment> # Look for a multiline comment
53+
[\d\D]*? # capture all of the characters...
54+
)
55+
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
56+
| # If you don't find any of those...
57+
(?P<prefixed_comment> # Look for a group of single-line comments
58+
(?: # Non-capture mode
59+
{single} # start by finding the single-line comment token (ex. //)
60+
.* # and capture all characters until the end of the line
61+
\n? # followed by an optional carriage return
62+
\ * # and some empty space
63+
)* # multiple times
64+
)
5965
"""
60-
comment_regex_fmt = r'{multi_start}([\d\D]*?){multi_end}|{single}(.*)'
6166

6267
def __init__(self, config, echo):
6368
"""
@@ -74,7 +79,12 @@ def __init__(self, config, echo):
7479

7580
# pylint: disable=not-a-mapping
7681
self.comment_regex = re.compile(
77-
self.comment_regex_fmt.format(**self.lang_comment_definition)
82+
self.comment_regex_fmt.format(**self.lang_comment_definition),
83+
flags=re.VERBOSE
84+
)
85+
self.prefixed_comment_regex = re.compile(
86+
r"^ *{single}".format(**self.lang_comment_definition),
87+
flags=re.MULTILINE
7888
)
7989

8090
# Parent class will allow this class to populate self.strings_to_search via
@@ -102,15 +112,15 @@ def search(self, file_handle):
102112
if any(anno in txt for anno in self.config.annotation_tokens):
103113
fname = clean_abs_path(file_handle.name, self.config.source_path)
104114

115+
# Iterate on all comments: both prefixed- and non-prefixed.
105116
for match in self.comment_regex.finditer(txt):
106-
# Should only be one match
107-
comment_content = [item for item in match.groups() if item is not None][0]
108-
for inner_match in self.query.finditer(comment_content):
109-
# Get the line number by counting newlines + 1 (for the first line).
110-
# Note that this is the line number of the beginning of the comment, not the
111-
# annotation token itself.
112-
line = txt.count('\n', 0, match.start()) + 1
117+
# Get the line number by counting newlines + 1 (for the first line).
118+
# Note that this is the line number of the beginning of the comment, not the
119+
# annotation token itself.
120+
line = txt.count('\n', 0, match.start()) + 1
113121

122+
comment_content = self._find_comment_content(match)
123+
for inner_match in self.query.finditer(comment_content):
114124
try:
115125
annotation_token = inner_match.group('token')
116126
annotation_data = inner_match.group('data')
@@ -131,3 +141,27 @@ def search(self, file_handle):
131141
})
132142

133143
return found_annotations
144+
145+
def _find_comment_content(self, match):
146+
"""
147+
Return the comment content as text.
148+
149+
Args:
150+
match (sre.SRE_MATCH): one of the matches of the self.comment_regex regular expression.
151+
"""
152+
comment_content = match.groupdict()["comment"]
153+
if comment_content:
154+
return comment_content
155+
156+
# Find single-line comments and strip comment tokens
157+
comment_content = match.groupdict()["prefixed_comment"]
158+
return self._strip_single_line_comment_tokens(comment_content)
159+
160+
def _strip_single_line_comment_tokens(self, content):
161+
"""
162+
Strip the leading single-line comment tokens from a comment text.
163+
164+
Args:
165+
content (str): token-prefixed multi-line comment string.
166+
"""
167+
return self.prefixed_comment_regex.sub("", content)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Docstring
2+
#.. pii: A long description that
3+
# spans multiple
4+
# lines
5+
# A comment that is not indented and not part of the above multi-line annotation
6+
#.. pii_types: id, name
7+
# Some comment that comes after the multiple-line annotation

tests/extensions/test_base_extensions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,19 @@ def test_nothing_found():
2828
r = FakeExtension(config, VerboseEcho())
2929
with open('tests/extensions/base_test_files/empty.foo') as f:
3030
r.search(f)
31+
32+
33+
def test_strip_single_line_comment_tokens():
34+
config = FakeConfig()
35+
36+
extension = FakeExtension(config, VerboseEcho())
37+
text = """baz line1
38+
baz line2
39+
bazline3
40+
baz line4"""
41+
expected_result = """ line1
42+
line2
43+
line3
44+
line4"""
45+
# pylint: disable=protected-access
46+
assert expected_result == extension._strip_single_line_comment_tokens(text)

tests/extensions/test_extension_python.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,15 @@ def test_grouping_and_choice_failures(test_file, expected_exit_code, expected_me
7676
Multi-line and multi-paragraph.""")
7777
]
7878
),
79+
(
80+
'multiline_singlelinecomment.pyt',
81+
[
82+
('.. pii:', """A long description that
83+
spans multiple
84+
lines"""),
85+
('.. pii_types:', 'id, name'),
86+
]
87+
),
7988
])
8089
def test_multi_line_annotations(test_file, annotations):
8190
config = AnnotationConfig('tests/test_configurations/.annotations_test')

0 commit comments

Comments
 (0)