Skip to content

Commit 46a3e2a

Browse files
committed
Address reviewer comments
1 parent bc5f656 commit 46a3e2a

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

src/SIL.Machine/PunctuationAnalysis/TextSegment.cs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,8 @@ public class TextSegment : IEquatable<TextSegment>
99
{
1010
public string Text
1111
{
12-
get => _text;
13-
private set
14-
{
15-
_codePointString = new CodePointString(value);
16-
_text = value;
17-
}
12+
get => _codePointString.ToString();
13+
private set { _codePointString = new CodePointString(value); }
1814
}
1915
public UsfmMarkerType ImmediatePrecedingMarker { get; private set; }
2016
public HashSet<UsfmMarkerType> MarkersInPrecedingContext { get; private set; }
@@ -23,8 +19,6 @@ private set
2319
public int IndexInVerse { get; set; }
2420
public int NumSegmentsInVerse { get; set; }
2521
public UsfmToken UsfmToken { get; private set; }
26-
27-
private string _text;
2822
private CodePointString _codePointString;
2923

3024
public TextSegment()
@@ -164,6 +158,9 @@ public TextSegment Build()
164158
}
165159
}
166160

161+
/// <summary>
162+
/// Class to handle indexing of strings by unicode code point, treating surrogate pairs as single characters.
163+
/// </summary>
167164
public class CodePointString
168165
{
169166
public string String => _stringValue;
@@ -180,8 +177,18 @@ public CodePointString(string stringValue)
180177
.Select((c, i) => (c, i))
181178
.Where(tup => !char.IsLowSurrogate(tup.c))
182179
.Select((tup, i) => (tup.i, i));
183-
_codePointIndexByStringIndex = indexPairs.ToDictionary(tup => tup.StringIndex, tup => tup.CodePointIndex);
184-
_stringIndexByCodePointIndex = indexPairs.ToDictionary(tup => tup.CodePointIndex, tup => tup.StringIndex);
180+
_codePointIndexByStringIndex = new Dictionary<int, int>();
181+
_stringIndexByCodePointIndex = new Dictionary<int, int>();
182+
foreach ((int codePointIndex, int stringIndex) in indexPairs)
183+
{
184+
_codePointIndexByStringIndex[stringIndex] = codePointIndex;
185+
_stringIndexByCodePointIndex[codePointIndex] = stringIndex;
186+
}
187+
}
188+
189+
public override string ToString()
190+
{
191+
return _stringValue;
185192
}
186193

187194
public string this[int codePointIndex]

0 commit comments

Comments
 (0)