@@ -27,15 +27,18 @@ def __init__(
2727 rows : Optional [Sequence [Tuple [Sequence [ScriptureRef ], str ]]] = None ,
2828 id_text : Optional [str ] = None ,
2929 text_behavior : UpdateUsfmTextBehavior = UpdateUsfmTextBehavior .PREFER_EXISTING ,
30+ paragraph_behavior : UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior .PRESERVE ,
3031 embed_behavior : UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior .PRESERVE ,
3132 style_behavior : UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior .STRIP ,
3233 ) -> None :
3334 super ().__init__ ()
3435 self ._rows = rows or []
3536 self ._tokens : List [UsfmToken ] = []
3637 self ._new_tokens : List [UsfmToken ] = []
38+ self ._new_embed_tokens : List [UsfmToken ] = []
3739 self ._id_text = id_text
3840 self ._text_behavior = text_behavior
41+ self ._paragraph_behavior = paragraph_behavior
3942 self ._embed_behavior = embed_behavior
4043 self ._style_behavior = style_behavior
4144 self ._replace_stack : List [bool ] = []
@@ -74,7 +77,14 @@ def start_para(
7477 unknown : bool ,
7578 attributes : Optional [Sequence [UsfmAttribute ]],
7679 ) -> None :
77- self ._collect_tokens (state )
80+ if (
81+ state .verse_ref .verse_num != 0
82+ and (self ._has_new_text () or self ._text_behavior == UpdateUsfmTextBehavior .STRIP_EXISTING )
83+ and self ._paragraph_behavior == UpdateUsfmMarkerBehavior .STRIP
84+ ):
85+ self ._skip_tokens (state )
86+ else :
87+ self ._collect_tokens (state )
7888
7989 super ().start_para (state , marker , unknown , attributes )
8090
@@ -202,13 +212,13 @@ def ref(self, state: UsfmParserState, marker: str, display: str, target: str) ->
202212 super ().ref (state , marker , display , target )
203213
204214 def text (self , state : UsfmParserState , text : str ) -> None :
215+ super ().text (state , text )
216+
205217 if self ._replace_with_new_tokens (state ):
206218 self ._skip_tokens (state )
207219 else :
208220 self ._collect_tokens (state )
209221
210- super ().text (state , text )
211-
212222 def opt_break (self , state : UsfmParserState ) -> None :
213223 if self ._replace_with_new_tokens (state ):
214224 self ._skip_tokens (state )
@@ -240,7 +250,7 @@ def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRe
240250 self ._pop_new_tokens ()
241251
242252 def _start_note_text (self , state : UsfmParserState ) -> None :
243- self ._push_new_tokens ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in self ._embed_row_texts ])
253+ self ._push_new_embed_tokens ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in self ._embed_row_texts ])
244254
245255 def _end_note_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None :
246256 self ._embed_row_texts .clear ()
@@ -287,13 +297,9 @@ def _skip_tokens(self, state: UsfmParserState) -> None:
287297 self ._token_index = state .index + 1 + state .special_token_count
288298
289299 def _replace_with_new_tokens (self , state : UsfmParserState , closed : bool = True ) -> bool :
290- if self ._text_behavior == UpdateUsfmTextBehavior .STRIP_EXISTING :
291- self ._add_new_tokens ()
292- return True
293-
294- new_text : bool = bool (self ._replace_stack ) and self ._replace_stack [- 1 ]
295300 marker : Optional [str ] = state .token if state .token is None else state .token .marker
296301 in_embed : bool = self ._is_in_embed (marker )
302+
297303 in_nested_embed : bool = self ._is_in_nested_embed (marker )
298304 is_style_tag : bool = marker is not None and not self ._is_embed_part_style (marker )
299305
@@ -303,8 +309,14 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
303309 )
304310
305311 use_new_tokens = (
306- new_text
307- and (not existing_text or self ._text_behavior == UpdateUsfmTextBehavior .PREFER_NEW )
312+ (
313+ (self ._text_behavior == UpdateUsfmTextBehavior .STRIP_EXISTING )
314+ or (
315+ self ._has_new_text ()
316+ and (not existing_text or self ._text_behavior == UpdateUsfmTextBehavior .PREFER_NEW )
317+ )
318+ )
319+ and not self ._is_in_preserved_paragraph (marker )
308320 and (
309321 not in_embed
310322 or (
@@ -316,26 +328,37 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
316328 )
317329
318330 if use_new_tokens :
319- self ._add_new_tokens ()
331+ if in_embed :
332+ self ._add_new_embed_tokens ()
333+ else :
334+ self ._add_new_tokens ()
320335
321336 if existing_text and self ._text_behavior == UpdateUsfmTextBehavior .PREFER_EXISTING :
322- self ._clear_new_tokens ()
337+ if in_embed :
338+ self ._clear_new_embed_tokens ()
339+ else :
340+ self ._clear_new_tokens ()
323341
324- embed_in_new_verse_text = any (self ._replace_stack ) and in_embed
342+ embed_in_new_verse_text = (
343+ any (self ._replace_stack ) or self ._text_behavior == UpdateUsfmTextBehavior .STRIP_EXISTING
344+ ) and in_embed
325345 if embed_in_new_verse_text or self ._embed_updated :
326346 if self ._embed_behavior == UpdateUsfmMarkerBehavior .STRIP :
327- self ._clear_new_tokens ()
347+ self ._clear_new_embed_tokens ()
328348 return True
329349 if not self ._is_in_note_text () or in_nested_embed :
330350 return False
331351
332352 skip_tokens = use_new_tokens and closed
333353
334- if new_text and is_style_tag :
354+ if use_new_tokens and is_style_tag :
335355 skip_tokens = self ._style_behavior == UpdateUsfmMarkerBehavior .STRIP
336356
337357 return skip_tokens
338358
359+ def _has_new_text (self ) -> bool :
360+ return bool (self ._replace_stack ) and self ._replace_stack [- 1 ]
361+
339362 def _push_new_tokens (self , tokens : List [UsfmToken ]) -> None :
340363 self ._replace_stack .append (any (tokens ))
341364 if tokens :
@@ -349,6 +372,19 @@ def _add_new_tokens(self) -> None:
349372 def _clear_new_tokens (self ) -> None :
350373 self ._new_tokens .clear ()
351374
375+ def _push_new_embed_tokens (self , tokens : List [UsfmToken ]) -> None :
376+ self ._replace_stack .append (any (tokens ))
377+ if tokens :
378+ self ._new_embed_tokens .extend (tokens )
379+
380+ def _add_new_embed_tokens (self ) -> None :
381+ if self ._new_embed_tokens :
382+ self ._tokens .extend (self ._new_embed_tokens )
383+ self ._new_embed_tokens .clear ()
384+
385+ def _clear_new_embed_tokens (self ) -> None :
386+ self ._new_embed_tokens .clear ()
387+
352388 def _push_token_as_previous (self ) -> None :
353389 self ._replace_stack .append (self ._replace_stack [- 1 ])
354390
0 commit comments