Skip to content

Commit 36ec6b6

Browse files
committed
Add tests; make finder more efficient
1 parent 444f222 commit 36ec6b6

File tree

3 files changed

+30
-10
lines changed

3 files changed

+30
-10
lines changed

src/SIL.Machine/PunctuationAnalysis/QuotationMarkFinder.cs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Globalization;
33
using System.Linq;
44
using PCRE;
5-
using SIL.Extensions;
65

76
namespace SIL.Machine.PunctuationAnalysis
87
{
@@ -47,15 +46,18 @@ public List<QuotationMarkStringMatch> FindAllPotentialQuotationMarksInTextSegmen
4746
)
4847
.Select(m =>
4948
{
50-
int[] textElementBeginnings = StringInfo.ParseCombiningCharacters(textSegment.Text);
51-
int endIndex = textElementBeginnings.IndexOf(m.Groups[0].EndIndex);
52-
if (endIndex == -1)
53-
endIndex = textElementBeginnings.Length;
54-
return new QuotationMarkStringMatch(
55-
textSegment,
56-
textElementBeginnings.IndexOf(m.Groups[0].Index),
57-
endIndex
58-
);
49+
int[] textElementIndices = StringInfo.ParseCombiningCharacters(textSegment.Text);
50+
int startIndex = 0;
51+
int endIndex = textElementIndices.Length;
52+
for (int textElementIndex = 0; textElementIndex < textElementIndices.Length; textElementIndex++)
53+
{
54+
int stringIndex = textElementIndices[textElementIndex];
55+
if (stringIndex == m.Groups[0].Index)
56+
startIndex = textElementIndex;
57+
if (stringIndex == m.Groups[0].EndIndex)
58+
endIndex = textElementIndex;
59+
}
60+
return new QuotationMarkStringMatch(textSegment, startIndex, endIndex);
5961
})
6062
.ToList();
6163
}

tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkFinderTests.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,22 @@ public void ThatAllPossibleQuotationMarksAreIdentified()
282282
]
283283
)
284284
);
285+
286+
Assert.That(
287+
quotationMarkFinder
288+
.FindAllPotentialQuotationMarksInTextSegment(
289+
new TextSegment.Builder().SetText("उत्पत्ति \"पुस्तकले").Build()
290+
)
291+
.SequenceEqual(
292+
[
293+
new QuotationMarkStringMatch(
294+
new TextSegment.Builder().SetText("उत्पत्ति \"पुस्तकले").Build(),
295+
6,
296+
7
297+
),
298+
]
299+
)
300+
);
285301
}
286302

287303
[Test]

tests/SIL.Machine.Tests/PunctuationAnalysis/TextSegmentTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ public void Length()
188188

189189
textSegment = new TextSegment.Builder().SetText("new example text").Build();
190190
Assert.That(textSegment.Length, Is.EqualTo("new example text".Length));
191+
textSegment = new TextSegment.Builder().SetText("उत्पत्ति पुस्तकले").Build();
192+
Assert.That(textSegment.Length, Is.EqualTo(11));
191193
}
192194

193195
[Test]

0 commit comments

Comments
 (0)