55from ..scripture .verse_ref import VerseRef , are_overlapping_verse_ranges
66from .corpora_utils import merge_verse_ranges
77from .scripture_element import ScriptureElement
8- from .scripture_embed import EMBED_PART_START_CHAR_STYLES , is_embed_part_style , is_embed_style , is_note_text
98from .scripture_ref import ScriptureRef
109from .usfm_parser_handler import UsfmParserHandler
1110from .usfm_parser_state import UsfmParserState
@@ -16,7 +15,14 @@ class ScriptureTextType(Enum):
1615 NONE = auto ()
1716 NONVERSE = auto ()
1817 VERSE = auto ()
19- NOTE_TEXT = auto ()
18+ EMBED = auto ()
19+
20+
21+ _EMBED_STYLES = {"f" , "fe" , "x" , "fig" }
22+
23+
24+ def _is_embed_style (marker : Optional [str ]) -> bool :
25+ return marker is not None and (marker .strip ("*" ) in _EMBED_STYLES or marker .startswith ("z" ))
2026
2127
2228class ScriptureRefUsfmParserHandler (UsfmParserHandler , ABC ):
@@ -25,18 +31,11 @@ def __init__(self) -> None:
2531 self ._cur_elements_stack : List [ScriptureElement ] = []
2632 self ._cur_text_type_stack : List [ScriptureTextType ] = []
2733 self ._duplicate_verse : bool = False
28- self ._in_preserved_paragraph : bool = False
29- self ._in_embed : bool = False
30- self ._in_note_text : bool = False
31- self ._in_nested_embed : bool = False
3234
3335 @property
3436 def _current_text_type (self ) -> ScriptureTextType :
3537 return ScriptureTextType .NONE if len (self ._cur_text_type_stack ) == 0 else self ._cur_text_type_stack [- 1 ]
3638
37- def _is_in_note_text (self ) -> bool :
38- return self ._in_note_text
39-
4039 def end_usfm (self , state : UsfmParserState ) -> None :
4140 self ._end_verse_text_wrapper (state )
4241
@@ -112,32 +111,6 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
112111 def end_sidebar (self , state : UsfmParserState , marker : str , closed : bool ) -> None :
113112 self ._end_parent_element ()
114113
115- def start_note (self , state : UsfmParserState , marker : str , caller : str , category : Optional [str ]) -> None :
116- self ._in_embed = True
117- self ._start_embed_wrapper (state , marker )
118-
119- def end_note (self , state : UsfmParserState , marker : str , closed : bool ) -> None :
120- self ._end_note_text_wrapper (state )
121- self ._end_embed (state , marker , None , closed )
122- self ._in_embed = False
123-
124- def _start_embed_wrapper (self , state : UsfmParserState , marker : str ) -> None :
125- if self ._cur_verse_ref .is_default :
126- self ._update_verse_ref (state .verse_ref , marker )
127-
128- if not self ._duplicate_verse :
129- self ._check_convert_verse_para_to_non_verse (state )
130- self ._next_element (marker )
131-
132- self ._start_embed (state , self ._create_non_verse_ref ())
133-
134- def _start_embed (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
135-
136- def _end_embed (
137- self , state : UsfmParserState , marker : str , attributes : Optional [Sequence [UsfmAttribute ]], closed : bool
138- ) -> None :
139- pass
140-
141114 def text (self , state : UsfmParserState , text : str ) -> None :
142115 # if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment
143116 if text .strip ():
@@ -149,29 +122,23 @@ def opt_break(self, state: UsfmParserState) -> None:
149122 def start_char (
150123 self , state : UsfmParserState , marker : str , unknown : bool , attributes : Optional [Sequence [UsfmAttribute ]]
151124 ) -> None :
152- if is_embed_part_style (marker ) and self ._in_note_text :
153- self ._in_nested_embed = True
154125 # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
155126 self ._check_convert_verse_para_to_non_verse (state )
156127
157- if is_embed_style (marker ):
158- self ._in_embed = True
159- self ._start_embed_wrapper (state , marker )
160-
161- if is_note_text (marker ):
162- self ._start_note_text_wrapper (state )
128+ if _is_embed_style (marker ):
129+ self ._start_embed_text_wrapper (state , marker )
163130
164131 def end_char (
165132 self , state : UsfmParserState , marker : str , attributes : Optional [Sequence [UsfmAttribute ]], closed : bool
166133 ) -> None :
167- if is_embed_part_style (marker ):
168- if self ._in_nested_embed :
169- self . _in_nested_embed = False
170- else :
171- self ._end_note_text_wrapper (state )
172- if is_embed_style ( marker ):
173- self . _end_embed ( state , marker , attributes , closed )
174- self ._in_embed = False
134+ if _is_embed_style (marker ):
135+ self ._end_embed_text_wrapper ( state )
136+
137+ def start_note ( self , state , marker , caller , category ) -> None :
138+ self ._start_embed_text_wrapper (state , marker )
139+
140+ def end_note ( self , state , marker , closed ) -> None :
141+ self ._end_embed_text_wrapper ( state )
175142
176143 def _start_verse_text (self , state : UsfmParserState , scripture_refs : Optional [Sequence [ScriptureRef ]]) -> None : ...
177144
@@ -181,20 +148,9 @@ def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: Scripture
181148
182149 def _end_non_verse_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
183150
184- def _start_note_text_wrapper (self , state : UsfmParserState ):
185- self ._in_note_text = True
186- self ._cur_text_type_stack .append (ScriptureTextType .NOTE_TEXT )
187- self ._start_note_text (state )
188-
189- def _start_note_text (self , state : UsfmParserState ) -> None : ...
190-
191- def _end_note_text_wrapper (self , state : UsfmParserState ):
192- if self ._cur_text_type_stack and self ._cur_text_type_stack [- 1 ] == ScriptureTextType .NOTE_TEXT :
193- self ._end_note_text (state , self ._create_non_verse_ref ())
194- self ._cur_text_type_stack .pop ()
195- self ._in_note_text = False
151+ def _start_embed_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
196152
197- def _end_note_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
153+ def _end_embed_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
198154
199155 def _start_verse_text_wrapper (self , state : UsfmParserState ) -> None :
200156 self ._duplicate_verse = False
@@ -222,6 +178,25 @@ def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
222178 self ._cur_elements_stack .append (ScriptureElement (0 , marker ))
223179 self ._cur_verse_ref = verse_ref .copy ()
224180
181+ def _start_embed_text_wrapper (self , state : UsfmParserState , marker : str ) -> None :
182+ if self ._cur_verse_ref .is_default :
183+ self ._update_verse_ref (state .verse_ref , marker )
184+
185+ if not self ._duplicate_verse :
186+ self ._check_convert_verse_para_to_non_verse (state )
187+ self ._next_element (marker )
188+ self ._cur_text_type_stack .append (ScriptureTextType .EMBED )
189+ self ._start_embed_text (state , self ._create_non_verse_ref ())
190+
191+ def _end_embed_text_wrapper (self , state : UsfmParserState ) -> None :
192+ if (
193+ not self ._duplicate_verse
194+ and self ._cur_text_type_stack
195+ and self ._cur_text_type_stack [- 1 ] == ScriptureTextType .EMBED
196+ ):
197+ self ._end_embed_text (state , self ._create_non_verse_ref ())
198+ self ._cur_text_type_stack .pop ()
199+
225200 def _next_element (self , marker : str ) -> None :
226201 prev_elem : ScriptureElement = self ._cur_elements_stack .pop ()
227202 self ._cur_elements_stack .append (ScriptureElement (prev_elem .position + 1 , marker ))
@@ -234,7 +209,7 @@ def _end_parent_element(self) -> None:
234209 self ._cur_elements_stack .pop ()
235210
236211 def _end_embed_elements (self ) -> None :
237- if self ._cur_elements_stack and is_embed_style (self ._cur_elements_stack [- 1 ].name ):
212+ if self ._cur_elements_stack and _is_embed_style (self ._cur_elements_stack [- 1 ].name ):
238213 self ._cur_elements_stack .pop ()
239214
240215 def _create_verse_refs (self ) -> List [ScriptureRef ]:
@@ -263,11 +238,3 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
263238 ):
264239 self ._start_parent_element (para_tag .marker )
265240 self ._start_non_verse_text_wrapper (state )
266-
267- def _is_in_embed (self , marker : Optional [str ]) -> bool :
268- return self ._in_embed or is_embed_style (marker )
269-
270- def _is_in_nested_embed (self , marker : Optional [str ]) -> bool :
271- return self ._in_nested_embed or (
272- marker is not None and marker .startswith ("+" ) and marker [1 ] in EMBED_PART_START_CHAR_STYLES
273- )
0 commit comments