11from enum import Enum , auto
22from typing import List , Optional , Sequence , Tuple , Union
33
4+ from ..scripture .verse_ref import VerseRef
45from .scripture_ref import ScriptureRef
56from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler
7+ from .scripture_update_block import ScriptureUpdateBlock
8+ from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
9+ from .scripture_update_block_handler_first_elements_first import ScriptureUpdateBlockHandlerFirstElementsFirst
610from .usfm_parser_state import UsfmParserState
711from .usfm_stylesheet import UsfmStylesheet
812from .usfm_token import UsfmAttribute , UsfmToken , UsfmTokenType
@@ -31,13 +35,20 @@ def __init__(
3135 embed_behavior : UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior .PRESERVE ,
3236 style_behavior : UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior .STRIP ,
3337 preserve_paragraph_styles : Optional [Sequence [str ]] = None ,
38+ update_block_handlers : Optional [list [ScriptureUpdateBlockHandlerBase ]] = None ,
3439 ) -> None :
3540 super ().__init__ ()
3641 self ._rows = rows or []
3742 self ._tokens : List [UsfmToken ] = []
38- self ._new_tokens : List [UsfmToken ] = []
39- self ._new_embed_tokens : List [UsfmToken ] = []
43+ self ._updated_text : List [UsfmToken ] = []
44+ self ._updated_embed_text : List [UsfmToken ] = []
45+ self ._update_block : ScriptureUpdateBlock = ScriptureUpdateBlock ()
46+ self ._embed_update_block : ScriptureUpdateBlock = ScriptureUpdateBlock ()
4047 self ._id_text = id_text
48+ if update_block_handlers is None :
49+ self ._update_block_handlers = [ScriptureUpdateBlockHandlerFirstElementsFirst ()]
50+ else :
51+ self ._update_block_handlers = update_block_handlers
4152 if preserve_paragraph_styles is None :
4253 self ._preserve_paragraph_styles = set (["r" , "rem" ])
4354 elif isinstance (preserve_paragraph_styles , str ):
@@ -60,21 +71,20 @@ def tokens(self) -> List[UsfmToken]:
6071
6172 def end_usfm (self , state : UsfmParserState ) -> None :
6273 self ._collect_tokens (state )
63-
74+ self . _process_update_block ()
6475 super ().end_usfm (state )
6576
6677 def start_book (self , state : UsfmParserState , marker : str , code : str ) -> None :
6778 self ._collect_tokens (state )
6879 start_book_tokens : List [UsfmToken ] = []
6980 if self ._id_text is not None :
7081 start_book_tokens .append (UsfmToken (UsfmTokenType .TEXT , text = self ._id_text + " " ))
71- self ._push_new_tokens (start_book_tokens )
82+ self ._update_block . add_tokens (start_book_tokens )
7283
7384 super ().start_book (state , marker , code )
7485
7586 def end_book (self , state : UsfmParserState , marker : str ) -> None :
76- self ._pop_new_tokens ()
77-
87+ self ._process_update_block ()
7888 super ().end_book (state , marker )
7989
8090 def start_para (
@@ -99,6 +109,7 @@ def start_para(
99109 super ().start_para (state , marker , unknown , attributes )
100110
101111 def end_para (self , state : UsfmParserState , marker : str ) -> None :
112+ self ._process_update_block ()
102113 super ().end_para (state , marker )
103114 self ._in_preserved_paragraph = False
104115
@@ -114,7 +125,7 @@ def start_cell(self, state: UsfmParserState, marker: str, align: str, colspan: i
114125
115126 def end_cell (self , state : UsfmParserState , marker : str ) -> None :
116127 self ._collect_tokens (state )
117-
128+ self . _process_update_block ()
118129 super ().end_cell (state , marker )
119130
120131 def start_sidebar (self , state : UsfmParserState , marker : str , category : str ) -> None :
@@ -125,6 +136,7 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
125136 def end_sidebar (self , state : UsfmParserState , marker : str , closed : bool ) -> None :
126137 if closed :
127138 self ._collect_tokens (state )
139+ self ._process_update_block ()
128140
129141 super ().end_sidebar (state , marker , closed )
130142
@@ -137,6 +149,7 @@ def chapter(
137149 pub_number : str ,
138150 ) -> None :
139151 self ._collect_tokens (state )
152+ self ._process_update_block ()
140153
141154 super ().chapter (state , number , marker , alt_number , pub_number )
142155
@@ -148,6 +161,7 @@ def milestone(
148161 attributes : Sequence [UsfmAttribute ],
149162 ) -> None :
150163 self ._collect_tokens (state )
164+ self ._process_update_block ()
151165
152166 super ().milestone (state , marker , start_milestone , attributes )
153167
@@ -160,6 +174,7 @@ def verse(
160174 pub_number : str ,
161175 ) -> None :
162176 self ._collect_tokens (state )
177+ self ._process_update_block ()
163178
164179 super ().verse (state , number , marker , alt_number , pub_number )
165180
@@ -196,6 +211,7 @@ def _start_embed(
196211 state : UsfmParserState ,
197212 scripture_ref : ScriptureRef ,
198213 ) -> None :
214+ self ._embed_update_block .update_ref (scripture_ref )
199215 self ._embed_row_texts = self ._advance_rows ([scripture_ref ])
200216 self ._embed_updated = any (self ._embed_row_texts )
201217
@@ -212,6 +228,7 @@ def _end_embed(
212228 else :
213229 self ._collect_tokens (state )
214230
231+ self ._process_embed_update_block ()
215232 self ._embed_row_texts .clear ()
216233 self ._embed_updated = False
217234
@@ -251,20 +268,20 @@ def unmatched(self, state: UsfmParserState, marker: str) -> None:
251268
252269 def _start_verse_text (self , state : UsfmParserState , scripture_refs : Sequence [ScriptureRef ]) -> None :
253270 row_texts : List [str ] = self ._advance_rows (scripture_refs )
254- self ._push_new_tokens ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in row_texts ])
271+ self ._push_updated_text ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in row_texts ])
255272
256273 def _end_verse_text (self , state : UsfmParserState , scripture_refs : Sequence [ScriptureRef ]) -> None :
257274 self ._pop_new_tokens ()
258275
259276 def _start_non_verse_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None :
260277 row_texts = self ._advance_rows ([scripture_ref ])
261- self ._push_new_tokens ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in row_texts ])
278+ self ._push_updated_text ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in row_texts ])
262279
263280 def _end_non_verse_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None :
264281 self ._pop_new_tokens ()
265282
266283 def _start_note_text (self , state : UsfmParserState ) -> None :
267- self ._push_new_embed_tokens ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in self ._embed_row_texts ])
284+ self ._push_updated_embed_text ([UsfmToken (UsfmTokenType .TEXT , text = t + " " ) for t in self ._embed_row_texts ])
268285
269286 def _end_note_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None :
270287 self ._embed_row_texts .clear ()
@@ -301,13 +318,15 @@ def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> List[str]:
301318 return row_texts
302319
303320 def _collect_tokens (self , state : UsfmParserState ) -> None :
304- self ._tokens .extend (self ._new_tokens )
305- self ._new_tokens .clear ()
321+ self ._use_updated_text ()
306322 while self ._token_index <= state .index + state .special_token_count :
307- self ._tokens . append (state .tokens [self ._token_index ])
323+ self ._update_block . add_token (state .tokens [self ._token_index ])
308324 self ._token_index += 1
309325
310326 def _skip_tokens (self , state : UsfmParserState ) -> None :
327+ while self ._token_index <= state .index + state .special_token_count :
328+ self ._update_block .add_token (state .tokens [self ._token_index ], marked_for_removal = True )
329+ self ._token_index += 1
311330 self ._token_index = state .index + 1 + state .special_token_count
312331
313332 def _replace_with_new_tokens (self , state : UsfmParserState , closed : bool = True ) -> bool :
@@ -343,24 +362,24 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
343362
344363 if use_new_tokens :
345364 if in_embed :
346- self ._add_new_embed_tokens ()
365+ self ._use_updated_embed_text ()
347366 else :
348- self ._add_new_tokens ()
367+ self ._use_updated_text ()
349368
350369 if existing_text and (
351370 self ._text_behavior == UpdateUsfmTextBehavior .PREFER_EXISTING or self ._is_in_preserved_paragraph (marker )
352371 ):
353372 if in_embed :
354- self ._clear_new_embed_tokens ()
373+ self ._clear_updated_embed_text ()
355374 else :
356- self ._clear_new_tokens ()
375+ self ._clear_updated_text ()
357376
358377 embed_in_new_verse_text = (
359378 any (self ._replace_stack ) or self ._text_behavior == UpdateUsfmTextBehavior .STRIP_EXISTING
360379 ) and in_embed
361380 if embed_in_new_verse_text or self ._embed_updated :
362381 if self ._embed_behavior == UpdateUsfmMarkerBehavior .STRIP :
363- self ._clear_new_embed_tokens ()
382+ self ._clear_updated_embed_text ()
364383 return True
365384 if not self ._is_in_note_text () or in_nested_embed :
366385 return False
@@ -375,33 +394,56 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
375394 def _has_new_text (self ) -> bool :
376395 return any (self ._replace_stack ) and self ._replace_stack [- 1 ]
377396
378- def _push_new_tokens (self , tokens : List [UsfmToken ]) -> None :
397+ def _update_verse_ref (self , verse_ref : VerseRef , marker : str ) -> None :
398+ super ()._update_verse_ref (verse_ref , marker )
399+ self ._update_block .update_ref (ScriptureRef (verse_ref .copy ()))
400+
401+ def _create_non_verse_ref (self ) -> ScriptureRef :
402+ ref = super ()._create_non_verse_ref ()
403+ self ._update_block .update_ref (ref )
404+ return ref
405+
406+ def _process_update_block (self ) -> None :
407+ self ._use_updated_text ()
408+ for handler in self ._update_block_handlers :
409+ self ._update_block = handler .process_block (self ._update_block )
410+ self ._tokens .extend (self ._update_block .get_tokens ())
411+ self ._update_block .clear ()
412+
413+ def _process_embed_update_block (self ) -> None :
414+ self ._use_updated_embed_text ()
415+ for handler in self ._update_block_handlers :
416+ self ._embed_update_block = handler .process_block (self ._embed_update_block )
417+ self ._update_block .add_tokens (self ._embed_update_block .get_tokens ())
418+ self ._embed_update_block .clear ()
419+
420+ def _push_updated_text (self , tokens : List [UsfmToken ]) -> None :
379421 self ._replace_stack .append (any (tokens ))
380422 if tokens :
381- self ._new_tokens .extend (tokens )
423+ self ._updated_text .extend (tokens )
382424
383- def _add_new_tokens (self ) -> None :
384- if self ._new_tokens :
385- self ._tokens . extend (self ._new_tokens )
386- self ._new_tokens .clear ()
425+ def _use_updated_text (self ) -> None :
426+ if self ._updated_text :
427+ self ._update_block . add_inserted_text (self ._updated_text )
428+ self ._updated_text .clear ()
387429
388- def _clear_new_tokens (self ) -> None :
389- self ._new_tokens .clear ()
430+ def _clear_updated_text (self ) -> None :
431+ self ._updated_text .clear ()
390432
391- def _push_new_embed_tokens (self , tokens : List [UsfmToken ]) -> None :
433+ def _push_updated_embed_text (self , tokens : List [UsfmToken ]) -> None :
392434 self ._replace_stack .append (any (tokens ))
393435 if tokens :
394- self ._new_embed_tokens .extend (tokens )
436+ self ._updated_embed_text .extend (tokens )
395437
396- def _add_new_embed_tokens (self ) -> None :
397- if self ._new_embed_tokens :
398- self ._tokens . extend (self ._new_embed_tokens )
399- self ._new_embed_tokens .clear ()
438+ def _use_updated_embed_text (self ) -> None :
439+ if self ._updated_embed_text :
440+ self ._embed_update_block . add_inserted_text (self ._updated_embed_text )
441+ self ._updated_embed_text .clear ()
400442
401- def _clear_new_embed_tokens (self ) -> None :
402- self ._new_embed_tokens .clear ()
443+ def _clear_updated_embed_text (self ) -> None :
444+ self ._updated_embed_text .clear ()
403445
404- def _push_token_as_previous (self ) -> None :
446+ def _push_updated_text_as_previous (self ) -> None :
405447 self ._replace_stack .append (self ._replace_stack [- 1 ])
406448
407449 def _pop_new_tokens (self ) -> None :
0 commit comments