Skip to content

Commit dbdbd1c

Browse files
committed
reviewer comments.
1 parent 5ebd6af commit dbdbd1c

File tree

3 files changed

+19
-15
lines changed

3 files changed

+19
-15
lines changed

machine/corpora/scripture_ref_usfm_parser_handler.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class ScriptureTextType(Enum):
1919
NOTE_TEXT = auto()
2020

2121

22-
EMBED_STARTING_CHARS = ("f", "x", "z")
22+
EMBED_PART_START_CHAR_STYLES = ("f", "x", "z")
23+
EMBED_STYLES = ("f", "fe", "fig", "fm", "x")
2324

2425

2526
class ScriptureRefUsfmParserHandler(UsfmParserHandler, ABC):
@@ -151,12 +152,12 @@ def opt_break(self, state: UsfmParserState) -> None:
151152
def start_char(
152153
self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
153154
) -> None:
154-
if self._is_embed_part(marker) and self._in_note_text:
155+
if self._is_embed_part_style(marker) and self._in_note_text:
155156
self._in_nested_embed = True
156157
# if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
157158
self._check_convert_verse_para_to_non_verse(state)
158159

159-
if self._is_embed_character(marker):
160+
if self._is_embed_character_style(marker):
160161
self._in_embed = True
161162
self._start_embed_wrapper(state, marker)
162163

@@ -166,12 +167,12 @@ def start_char(
166167
def end_char(
167168
self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
168169
) -> None:
169-
if self._is_embed_part(marker):
170+
if self._is_embed_part_style(marker):
170171
if self._in_nested_embed:
171172
self._in_nested_embed = False
172173
else:
173174
self._end_note_text_wrapper(state)
174-
if self._is_embed_character(marker):
175+
if self._is_embed_character_style(marker):
175176
self._end_embed(state, marker, attributes, closed)
176177
self._in_embed = False
177178

@@ -236,7 +237,7 @@ def _end_parent_element(self) -> None:
236237
self._cur_elements_stack.pop()
237238

238239
def _end_embed_elements(self) -> None:
239-
if self._cur_elements_stack and self._is_embed_character(self._cur_elements_stack[-1].name):
240+
if self._cur_elements_stack and self._is_embed_character_style(self._cur_elements_stack[-1].name):
240241
self._cur_elements_stack.pop()
241242

242243
def _create_verse_refs(self) -> List[ScriptureRef]:
@@ -267,16 +268,18 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
267268
self._start_non_verse_text_wrapper(state)
268269

269270
def _is_in_embed(self, marker: Optional[str]) -> bool:
270-
return self._in_embed or self._is_embed_character(marker)
271+
return self._in_embed or self._is_embed_character_style(marker)
271272

272273
def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
273-
return self._in_nested_embed or (marker is not None and marker[0] == "+" and marker[1] in EMBED_STARTING_CHARS)
274+
return self._in_nested_embed or (
275+
marker is not None and marker[0] == "+" and marker[1] in EMBED_PART_START_CHAR_STYLES
276+
)
274277

275278
def _is_note_text(self, marker: Optional[str]) -> bool:
276279
return marker == "ft"
277280

278-
def _is_embed_part(self, marker: Optional[str]) -> bool:
279-
return marker is not None and marker.startswith(EMBED_STARTING_CHARS)
281+
def _is_embed_part_style(self, marker: Optional[str]) -> bool:
282+
return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES)
280283

281-
def _is_embed_character(self, marker: Optional[str]) -> bool:
282-
return marker in ("f", "fe", "fig", "fm", "x")
284+
def _is_embed_character_style(self, marker: Optional[str]) -> bool:
285+
return marker in EMBED_STYLES

machine/corpora/update_usfm_parser_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
295295
marker: Optional[str] = state.token if state.token is None else state.token.marker
296296
in_embed: bool = self._is_in_embed(marker)
297297
in_nested_embed: bool = self._is_in_nested_embed(marker)
298-
is_style_tag: bool = marker is not None and not self._is_embed_part(marker)
298+
is_style_tag: bool = marker is not None and not self._is_embed_part_style(marker)
299299

300300
existing_text = any(
301301
t.type == UsfmTokenType.TEXT and t.text

machine/corpora/usfm_text_base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,12 @@ def text(self, state: UsfmParserState, text: str) -> None:
202202
text = text.lstrip()
203203
row_text += text
204204
elif len(text) > 0 and (self._current_text_type != ScriptureTextType.VERSE or state.is_verse_text):
205-
if (
205+
is_embed_or_nested_dont_update = (
206206
state.token is not None
207207
and self._is_in_embed(state.token.marker)
208208
and (not self._is_in_note_text() or self._is_in_nested_embed(state.token.marker))
209-
):
209+
)
210+
if is_embed_or_nested_dont_update:
210211
return
211212

212213
if (

0 commit comments

Comments
 (0)