@@ -42,8 +42,12 @@ public UpdateUsfmRow(
4242 */
4343 public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
4444 {
45- private readonly Dictionary < ScriptureRef , List < UpdateUsfmRow > > _rowMapIgnoreSegments ;
46- private readonly Dictionary < ScriptureRef , List < UpdateUsfmRow > > _rowMapCheckSegments ;
45+ private readonly IReadOnlyList < UpdateUsfmRow > _rows ;
46+ private int _rowIndex ;
47+ private VerseRef _verseRowsRef ;
48+ private readonly List < int > _verseRows ;
49+ private int _verseRowIndex ;
50+ private readonly Dictionary < VerseRef , List < RowInfo > > _verseRowsMap ;
4751 private readonly ScrVers _updateRowsVersification ;
4852 private readonly List < UsfmToken > _tokens ;
4953 private readonly List < UsfmToken > _updatedText ;
@@ -60,6 +64,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
6064 private readonly Stack < bool > _replace ;
6165 private int _tokenIndex ;
6266 private readonly Func < UsfmUpdateBlockHandlerException , bool > _errorHandler ;
67+ private readonly bool _compareSegments ;
6368
6469 public UpdateUsfmParserHandler (
6570 IReadOnlyList < UpdateUsfmRow > rows = null ,
@@ -71,17 +76,18 @@ public UpdateUsfmParserHandler(
7176 IEnumerable < string > preserveParagraphStyles = null ,
7277 IEnumerable < IUsfmUpdateBlockHandler > updateBlockHandlers = null ,
7378 IEnumerable < string > remarks = null ,
74- Func < UsfmUpdateBlockHandlerException , bool > errorHandler = null
79+ Func < UsfmUpdateBlockHandlerException , bool > errorHandler = null ,
80+ bool compareSegments = false
7581 )
7682 {
77- // We need two maps so that update rows can be specified per segment
78- // but be handled correctly whether or not the USFM has segments for that verse
79- ( _rowMapIgnoreSegments , _rowMapCheckSegments ) = GetRowMap ( rows ?? Array . Empty < UpdateUsfmRow > ( ) ) ;
83+ _rows = rows ?? Array . Empty < UpdateUsfmRow > ( ) ;
84+ _verseRows = new List < int > ( ) ;
85+ _verseRowsMap = new Dictionary < VerseRef , List < RowInfo > > (
86+ compareSegments ? VerseRefComparer . Default : VerseRefComparer . IgnoreSegments
87+ ) ;
8088 _updateRowsVersification = ScrVers . English ;
81- if ( rows != null && rows . Count > 0 )
82- {
83- _updateRowsVersification = rows . First ( r => r . Refs . Count > 0 ) . Refs [ 0 ] . Versification ;
84- }
89+ if ( _rows . Count > 0 )
90+ _updateRowsVersification = _rows . First ( r => r . Refs . Count > 0 ) . Refs [ 0 ] . Versification ;
8591 _tokens = new List < UsfmToken > ( ) ;
8692 _updatedText = new List < UsfmToken > ( ) ;
8793 _updateBlocks = new Stack < UsfmUpdateBlock > ( ) ;
@@ -104,6 +110,7 @@ public UpdateUsfmParserHandler(
104110 _errorHandler = errorHandler ;
105111 if ( _errorHandler == null )
106112 _errorHandler = ( error ) => false ;
113+ _compareSegments = compareSegments ;
107114 }
108115
109116 public IReadOnlyList < UsfmToken > Tokens => _tokens ;
@@ -116,6 +123,10 @@ public override void EndUsfm(UsfmParserState state)
116123
117124 public override void StartBook ( UsfmParserState state , string marker , string code )
118125 {
126+ _verseRowsRef = state . VerseRef ;
127+ UpdateVerseRowsMap ( ) ;
128+ UpdateVerseRows ( ) ;
129+
119130 CollectReadonlyTokens ( state ) ;
120131 _updateBlocks . Push ( new UsfmUpdateBlock ( ) ) ;
121132 var startBookTokens = new List < UsfmToken > ( ) ;
@@ -146,7 +157,7 @@ IReadOnlyList<UsfmAttribute> attributes
146157 if ( state . IsVerseText )
147158 {
148159 // Only strip paragraph markers in a verse
149- if ( _paragraphBehavior == UpdateUsfmMarkerBehavior . Preserve && ! _duplicateVerse )
160+ if ( _paragraphBehavior == UpdateUsfmMarkerBehavior . Preserve && ! DuplicateVerse )
150161 {
151162 CollectUpdatableTokens ( state ) ;
152163 }
@@ -202,6 +213,13 @@ string pubNumber
202213 {
203214 UseUpdatedText ( ) ;
204215
216+ if ( ! _verseRowsRef . Equals ( state . VerseRef ) )
217+ {
218+ _verseRowsRef = state . VerseRef ;
219+ UpdateVerseRowsMap ( ) ;
220+ UpdateVerseRows ( ) ;
221+ }
222+
205223 base . Chapter ( state , number , marker , altNumber , pubNumber ) ;
206224
207225 CollectReadonlyTokens ( state ) ;
@@ -239,9 +257,15 @@ string pubNumber
239257 }
240258 }
241259
260+ if ( ! _verseRowsRef . Equals ( state . VerseRef ) )
261+ {
262+ _verseRowsRef = state . VerseRef ;
263+ UpdateVerseRows ( ) ;
264+ }
265+
242266 base . Verse ( state , number , marker , altNumber , pubNumber ) ;
243267
244- if ( _duplicateVerse )
268+ if ( DuplicateVerse )
245269 {
246270 SkipUpdatableTokens ( state ) ;
247271 }
@@ -254,7 +278,7 @@ string pubNumber
254278 public override void StartNote ( UsfmParserState state , string marker , string caller , string category )
255279 {
256280 base . StartNote ( state , marker , caller , category ) ;
257- if ( ! _duplicateVerse )
281+ if ( ! DuplicateVerse )
258282 CollectUpdatableTokens ( state ) ;
259283 else
260284 SkipUpdatableTokens ( state ) ;
@@ -337,7 +361,7 @@ public override void Text(UsfmParserState state, string text)
337361 base . Text ( state , text ) ;
338362
339363 // strip out text in verses that are being replaced
340- if ( ReplaceWithNewTokens ( state ) || ( _duplicateVerse && CurrentTextType == ScriptureTextType . Verse ) )
364+ if ( ReplaceWithNewTokens ( state ) || ( DuplicateVerse && CurrentTextType == ScriptureTextType . Verse ) )
341365 SkipUpdatableTokens ( state ) ;
342366 else
343367 CollectUpdatableTokens ( state ) ;
@@ -425,63 +449,48 @@ public string GetUsfm(UsfmStylesheet stylesheet)
425449 return tokenizer . Detokenize ( tokens ) ;
426450 }
427451
428- private (
429- Dictionary < ScriptureRef , List < UpdateUsfmRow > > RowMapIgnoreSegments ,
430- Dictionary < ScriptureRef , List < UpdateUsfmRow > > RowMapCheckSegments
431- ) GetRowMap ( IEnumerable < UpdateUsfmRow > rows )
432- {
433- var rowMapIgnoreSegments = new Dictionary < ScriptureRef , List < UpdateUsfmRow > > (
434- comparer : ScriptureRefComparer . IgnoreSegments
435- ) ;
436- var rowMapCheckSegments = new Dictionary < ScriptureRef , List < UpdateUsfmRow > > (
437- comparer : ScriptureRefComparer . Default
438- ) ;
439- foreach ( UpdateUsfmRow row in rows )
440- {
441- ScriptureRef sr = row . Refs [ 0 ] ;
442- if ( ! rowMapIgnoreSegments . ContainsKey ( sr ) )
443- rowMapIgnoreSegments [ sr ] = new List < UpdateUsfmRow > ( ) ;
444- rowMapIgnoreSegments [ sr ] . Add ( row ) ;
445- if ( ! rowMapCheckSegments . ContainsKey ( sr ) )
446- rowMapCheckSegments [ sr ] = new List < UpdateUsfmRow > ( ) ;
447- rowMapCheckSegments [ sr ] . Add ( row ) ;
448- }
449- return ( rowMapIgnoreSegments , rowMapCheckSegments ) ;
450- }
451-
452- private List < UpdateUsfmRow > GetRowsForRef ( ScriptureRef sr )
453- {
454- var normalizedScriptureRef = sr . ChangeVersification ( _updateRowsVersification ) ;
455- if ( _rowMapCheckSegments . TryGetValue ( normalizedScriptureRef , out List < UpdateUsfmRow > rows ) )
456- {
457- return rows ;
458- }
459- else if ( _rowMapIgnoreSegments . TryGetValue ( normalizedScriptureRef , out rows ) )
460- {
461- return rows ;
462- }
463- return new List < UpdateUsfmRow > ( ) ;
464- }
465-
466- private ( IReadOnlyList < string > RowTexts , Dictionary < string , object > Metadata ) GetRows (
452+ private ( IReadOnlyList < string > RowTexts , Dictionary < string , object > Metadata ) AdvanceRows (
467453 IReadOnlyList < ScriptureRef > segScrRefs
468454 )
469455 {
470456 var rowTexts = new List < string > ( ) ;
471457 Dictionary < string , object > rowMetadata = null ;
472- foreach ( ScriptureRef sr in segScrRefs )
458+ int sourceIndex = 0 ;
459+ // search the sorted rows with updated text, starting from where we left off last.
460+ while ( _verseRowIndex < _verseRows . Count && sourceIndex < segScrRefs . Count )
473461 {
474- List < UpdateUsfmRow > rows = GetRowsForRef ( sr ) ;
475- foreach ( UpdateUsfmRow row in rows )
462+ // get the set of references for the current row
463+ int compare = 0 ;
464+ UpdateUsfmRow row = _rows [ _verseRows [ _verseRowIndex ] ] ;
465+ ( IReadOnlyList < ScriptureRef > rowScrRefs , string text , IReadOnlyDictionary < string , object > metadata ) = (
466+ row . Refs ,
467+ row . Text ,
468+ row . Metadata
469+ ) ;
470+ foreach ( ScriptureRef rowScrRef in rowScrRefs )
471+ {
472+ while ( sourceIndex < segScrRefs . Count )
473+ {
474+ compare = rowScrRef . CompareTo ( segScrRefs [ sourceIndex ] , compareSegments : _compareSegments ) ;
475+ if ( compare > 0 )
476+ // row is ahead of source, increment source
477+ sourceIndex ++ ;
478+ else
479+ break ;
480+ }
481+ if ( compare == 0 )
482+ {
483+ // source and row match
484+ // grab the text - both source and row will be incremented in due time...
485+ rowTexts . Add ( text ) ;
486+ rowMetadata = metadata . ToDictionary ( kvp => kvp . Key , kvp => kvp . Value ) ;
487+ break ;
488+ }
489+ }
490+ if ( compare <= 0 )
476491 {
477- (
478- IReadOnlyList < ScriptureRef > rowScrRefs ,
479- string text ,
480- IReadOnlyDictionary < string , object > metadata
481- ) = ( row . Refs , row . Text , row . Metadata ) ;
482-
483- rowTexts . Add ( text ) ;
484- rowMetadata = metadata . ToDictionary ( kvp => kvp . Key , kvp => kvp . Value ) ;
492+ // source is ahead row, increment row
493+ _verseRowIndex ++ ;
485494 }
486495 }
487496 return ( rowTexts , rowMetadata ) ;
@@ -588,7 +597,7 @@ private bool HasNewText()
588597
589598 private void StartUpdateBlock ( IReadOnlyList < ScriptureRef > scriptureRefs )
590599 {
591- ( IReadOnlyList < string > rowTexts , Dictionary < string , object > metadata ) = GetRows ( scriptureRefs ) ;
600+ ( IReadOnlyList < string > rowTexts , Dictionary < string , object > metadata ) = AdvanceRows ( scriptureRefs ) ;
592601 _updateBlocks . Push (
593602 new UsfmUpdateBlock ( scriptureRefs , metadata : metadata ?? new Dictionary < string , object > ( ) )
594603 ) ;
@@ -679,5 +688,63 @@ private bool IsNonverseParagraph(UsfmParserState state, UsfmUpdateBlockElement e
679688 UsfmTag paraTag = state . Stylesheet . GetTag ( paraToken . Marker ) ;
680689 return paraTag . TextType != UsfmTextType . VerseText && paraTag . TextType != UsfmTextType . NotSpecified ;
681690 }
691+
692+ private void UpdateVerseRowsMap ( )
693+ {
694+ _verseRowsMap . Clear ( ) ;
695+ while ( _rowIndex < _rows . Count && _rows [ _rowIndex ] . Refs [ 0 ] . ChapterNum == _verseRowsRef . ChapterNum )
696+ {
697+ UpdateUsfmRow row = _rows [ _rowIndex ] ;
698+ var ri = new RowInfo ( _rowIndex ) ;
699+ foreach ( ScriptureRef sr in row . Refs )
700+ {
701+ if ( ! _verseRowsMap . TryGetValue ( sr . VerseRef , out List < RowInfo > rows ) )
702+ {
703+ rows = new List < RowInfo > ( ) ;
704+ _verseRowsMap [ sr . VerseRef ] = rows ;
705+ }
706+ rows . Add ( ri ) ;
707+ }
708+ _rowIndex ++ ;
709+ }
710+ }
711+
712+ private void UpdateVerseRows ( )
713+ {
714+ VerseRef vref = _verseRowsRef ;
715+ // We are using a dictionary, which uses an equality comparer. As a result, we need to change the
716+ // source verse ref to use the row versification. If we used a SortedList, it wouldn't be necessary, but it
717+ // would be less efficient.
718+ vref . ChangeVersification ( _updateRowsVersification ) ;
719+
720+ _verseRows . Clear ( ) ;
721+ _verseRowIndex = 0 ;
722+
723+ foreach ( VerseRef vr in vref . AllVerses ( ) )
724+ {
725+ if ( _verseRowsMap . TryGetValue ( vr , out List < RowInfo > rows ) )
726+ {
727+ foreach ( RowInfo row in rows )
728+ {
729+ if ( ! row . IsConsumed )
730+ {
731+ _verseRows . Add ( row . RowIndex ) ;
732+ row . IsConsumed = true ;
733+ }
734+ }
735+ }
736+ }
737+ }
738+
739+ private class RowInfo
740+ {
741+ public RowInfo ( int rowIndex )
742+ {
743+ RowIndex = rowIndex ;
744+ }
745+
746+ public int RowIndex { get ; set ; }
747+ public bool IsConsumed { get ; set ; }
748+ }
682749 }
683750}
0 commit comments