@@ -9,8 +9,8 @@ public class TextSegment : IEquatable<TextSegment>
99 {
1010 public string Text
1111 {
12- get => _codePointString . ToString ( ) ;
13- private set => _codePointString = new CodePointString ( value ) ;
12+ get => _surrogatePairString . ToString ( ) ;
13+ private set => _surrogatePairString = new SurrogatePairString ( value ) ;
1414 }
1515 public UsfmMarkerType ImmediatePrecedingMarker { get ; private set ; }
1616 public HashSet < UsfmMarkerType > MarkersInPrecedingContext { get ; private set ; }
@@ -19,7 +19,7 @@ public string Text
1919 public int IndexInVerse { get ; set ; }
2020 public int NumSegmentsInVerse { get ; set ; }
2121 public UsfmToken UsfmToken { get ; private set ; }
22- private CodePointString _codePointString ;
22+ private SurrogatePairString _surrogatePairString ;
2323
2424 public TextSegment ( )
2525 {
@@ -76,11 +76,11 @@ public override int GetHashCode()
7676 return hashCode * 31 + ImmediatePrecedingMarker . GetHashCode ( ) ;
7777 }
7878
79- public int Length => _codePointString . Length ;
79+ public int Length => _surrogatePairString . Length ;
8080
8181 public string Substring ( int startIndex , int length )
8282 {
83- return _codePointString . Substring ( startIndex , length ) ;
83+ return _surrogatePairString . Substring ( startIndex , length ) ;
8484 }
8585
8686 public string SubstringBefore ( int index )
@@ -161,28 +161,28 @@ public TextSegment Build()
161161 /// <summary>
162162 /// Class to handle indexing of strings by unicode code point, treating surrogate pairs as single characters.
163163 /// </summary>
164- public class CodePointString
164+ public class SurrogatePairString
165165 {
166166 public string String => _stringValue ;
167- public int Length => _stringIndexByCodePointIndex . Count ;
167+ public int Length => _stringIndexBySurrogatePairIndex . Count ;
168168
169169 private readonly string _stringValue ;
170- private readonly Dictionary < int , int > _codePointIndexByStringIndex ;
171- private readonly Dictionary < int , int > _stringIndexByCodePointIndex ;
170+ private readonly Dictionary < int , int > _surrogatePairIndexByStringIndex ;
171+ private readonly Dictionary < int , int > _stringIndexBySurrogatePairIndex ;
172172
173- public CodePointString ( string stringValue )
173+ public SurrogatePairString ( string stringValue )
174174 {
175175 _stringValue = stringValue ;
176- IEnumerable < ( int CodePointIndex , int StringIndex ) > indexPairs = _stringValue
176+ IEnumerable < ( int SurrogatePairIndex , int StringIndex ) > indexPairs = _stringValue
177177 . Select ( ( c , i ) => ( c , i ) )
178178 . Where ( tup => ! char . IsLowSurrogate ( tup . c ) )
179179 . Select ( ( tup , i ) => ( tup . i , i ) ) ;
180- _codePointIndexByStringIndex = new Dictionary < int , int > ( ) ;
181- _stringIndexByCodePointIndex = new Dictionary < int , int > ( ) ;
182- foreach ( ( int codePointIndex , int stringIndex ) in indexPairs )
180+ _surrogatePairIndexByStringIndex = new Dictionary < int , int > ( ) ;
181+ _stringIndexBySurrogatePairIndex = new Dictionary < int , int > ( ) ;
182+ foreach ( ( int surrogatePairIndex , int stringIndex ) in indexPairs )
183183 {
184- _codePointIndexByStringIndex [ stringIndex ] = codePointIndex ;
185- _stringIndexByCodePointIndex [ codePointIndex ] = stringIndex ;
184+ _surrogatePairIndexByStringIndex [ stringIndex ] = surrogatePairIndex ;
185+ _stringIndexBySurrogatePairIndex [ surrogatePairIndex ] = stringIndex ;
186186 }
187187 }
188188
@@ -191,17 +191,17 @@ public override string ToString()
191191 return _stringValue ;
192192 }
193193
194- public string this [ int codePointIndex ]
194+ public string this [ int surrogatePairIndex ]
195195 {
196196 get
197197 {
198- if ( codePointIndex < 0 || codePointIndex > Length )
198+ if ( surrogatePairIndex < 0 || surrogatePairIndex > Length )
199199 {
200200 throw new IndexOutOfRangeException (
201- $ "Index { codePointIndex } is out of bounds for CodePointString with length { Length } ."
201+ $ "Index { surrogatePairIndex } is out of bounds for SurrogatePairString with length { Length } ."
202202 ) ;
203203 }
204- int stringIndex = _stringIndexByCodePointIndex [ codePointIndex ] ;
204+ int stringIndex = _stringIndexBySurrogatePairIndex [ surrogatePairIndex ] ;
205205 char characterAtStringIndex = _stringValue [ stringIndex ] ;
206206 if (
207207 stringIndex < _stringValue . Length
@@ -214,34 +214,34 @@ public string this[int codePointIndex]
214214 }
215215 }
216216
217- public int GetCodePointIndexForStringIndex ( int stringIndex )
217+ public int GetSurrogatePairIndexForStringIndex ( int stringIndex )
218218 {
219219 if ( stringIndex == _stringValue . Length )
220220 {
221- return _codePointIndexByStringIndex . Count ;
221+ return _surrogatePairIndexByStringIndex . Count ;
222222 }
223- if ( ! _codePointIndexByStringIndex . TryGetValue ( stringIndex , out int codePointIndex ) )
223+ if ( ! _surrogatePairIndexByStringIndex . TryGetValue ( stringIndex , out int surrogatePairIndex ) )
224224 {
225225 throw new ArgumentException ( $ "No non-surrogate code point begins at index { stringIndex } ") ;
226226 }
227- return codePointIndex ;
227+ return surrogatePairIndex ;
228228 }
229229
230- public string Substring ( int startCodePointIndex , int length )
230+ public string Substring ( int startSurrogatePairIndex , int length )
231231 {
232- int endCodePointIndex = startCodePointIndex + length ;
233- int startStringIndex = GetStringIndexForCodePointIndex ( startCodePointIndex ) ;
234- int endStringIndex = GetStringIndexForCodePointIndex ( endCodePointIndex ) ;
232+ int endSurrogatePairIndex = startSurrogatePairIndex + length ;
233+ int startStringIndex = GetStringIndexForSurrogatePairIndex ( startSurrogatePairIndex ) ;
234+ int endStringIndex = GetStringIndexForSurrogatePairIndex ( endSurrogatePairIndex ) ;
235235 return _stringValue . Substring ( startStringIndex , endStringIndex - startStringIndex ) ;
236236 }
237237
238- public int GetStringIndexForCodePointIndex ( int codePointIndex )
238+ public int GetStringIndexForSurrogatePairIndex ( int surrogatePairIndex )
239239 {
240- if ( codePointIndex == _codePointIndexByStringIndex . Count )
240+ if ( surrogatePairIndex == _surrogatePairIndexByStringIndex . Count )
241241 {
242242 return _stringValue . Length ;
243243 }
244- return _codePointIndexByStringIndex [ codePointIndex ] ;
244+ return _surrogatePairIndexByStringIndex [ surrogatePairIndex ] ;
245245 }
246246 }
247247}
0 commit comments