@@ -9,12 +9,8 @@ public class TextSegment : IEquatable<TextSegment>
99 {
1010 public string Text
1111 {
12- get => _text ;
13- private set
14- {
15- _codePointString = new CodePointString ( value ) ;
16- _text = value ;
17- }
12+ get => _codePointString . ToString ( ) ;
13+ private set { _codePointString = new CodePointString ( value ) ; }
1814 }
1915 public UsfmMarkerType ImmediatePrecedingMarker { get ; private set ; }
2016 public HashSet < UsfmMarkerType > MarkersInPrecedingContext { get ; private set ; }
@@ -23,8 +19,6 @@ private set
2319 public int IndexInVerse { get ; set ; }
2420 public int NumSegmentsInVerse { get ; set ; }
2521 public UsfmToken UsfmToken { get ; private set ; }
26-
27- private string _text ;
2822 private CodePointString _codePointString ;
2923
3024 public TextSegment ( )
@@ -164,6 +158,9 @@ public TextSegment Build()
164158 }
165159 }
166160
161+ /// <summary>
162+ /// Class to handle indexing of strings by unicode code point, treating surrogate pairs as single characters.
163+ /// </summary>
167164 public class CodePointString
168165 {
169166 public string String => _stringValue ;
@@ -180,8 +177,18 @@ public CodePointString(string stringValue)
180177 . Select ( ( c , i ) => ( c , i ) )
181178 . Where ( tup => ! char . IsLowSurrogate ( tup . c ) )
182179 . Select ( ( tup , i ) => ( tup . i , i ) ) ;
183- _codePointIndexByStringIndex = indexPairs . ToDictionary ( tup => tup . StringIndex , tup => tup . CodePointIndex ) ;
184- _stringIndexByCodePointIndex = indexPairs . ToDictionary ( tup => tup . CodePointIndex , tup => tup . StringIndex ) ;
180+ _codePointIndexByStringIndex = new Dictionary < int , int > ( ) ;
181+ _stringIndexByCodePointIndex = new Dictionary < int , int > ( ) ;
182+ foreach ( ( int codePointIndex , int stringIndex ) in indexPairs )
183+ {
184+ _codePointIndexByStringIndex [ stringIndex ] = codePointIndex ;
185+ _stringIndexByCodePointIndex [ codePointIndex ] = stringIndex ;
186+ }
187+ }
188+
189+ public override string ToString ( )
190+ {
191+ return _stringValue ;
185192 }
186193
187194 public string this [ int codePointIndex ]
0 commit comments