Skip to content

Commit 444f222

Browse files
committed
Consistently use text elements for indexing and string length; add manual test for running quote convention analysis
1 parent 8e35c45 commit 444f222

File tree

4 files changed

+43
-6
lines changed

4 files changed

+43
-6
lines changed

src/SIL.Machine/PunctuationAnalysis/QuotationMarkFinder.cs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using System.Collections.Generic;
2+
using System.Globalization;
23
using System.Linq;
34
using PCRE;
5+
using SIL.Extensions;
46

57
namespace SIL.Machine.PunctuationAnalysis
68
{
@@ -43,11 +45,18 @@ public List<QuotationMarkStringMatch> FindAllPotentialQuotationMarksInTextSegmen
4345
_quoteConventions.IsValidOpeningQuotationMark(match.Groups[0].Value)
4446
|| _quoteConventions.IsValidClosingQuotationMark(match.Groups[0].Value)
4547
)
46-
.Select(m => new QuotationMarkStringMatch(
47-
textSegment,
48-
m.Groups[0].Index,
49-
m.Groups[0].Index + m.Groups[0].Length
50-
))
48+
.Select(m =>
49+
{
50+
int[] textElementBeginnings = StringInfo.ParseCombiningCharacters(textSegment.Text);
51+
int endIndex = textElementBeginnings.IndexOf(m.Groups[0].EndIndex);
52+
if (endIndex == -1)
53+
endIndex = textElementBeginnings.Length;
54+
return new QuotationMarkStringMatch(
55+
textSegment,
56+
textElementBeginnings.IndexOf(m.Groups[0].Index),
57+
endIndex
58+
);
59+
})
5160
.ToList();
5261
}
5362
}

src/SIL.Machine/PunctuationAnalysis/TextSegment.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Globalization;
34
using SIL.Machine.Corpora;
45

56
namespace SIL.Machine.PunctuationAnalysis
@@ -70,7 +71,7 @@ public override int GetHashCode()
7071
return hashCode * 31 + ImmediatePrecedingMarker.GetHashCode();
7172
}
7273

73-
public int Length => Text.Length;
74+
public int Length => StringInfo.ParseCombiningCharacters(Text).Length;
7475

7576
public string SubstringBefore(int index)
7677
{

tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ internal static class CorporaTestHelpers
1616
);
1717
public static readonly string UsfmTestProjectPath = Path.Combine(TestDataPath, "usfm", "Tes");
1818
public static readonly string UsfmTargetProjectPath = Path.Combine(TestDataPath, "usfm", "target");
19+
public static readonly string UsfmTargetProjectZipPath = Path.Combine(TestDataPath, "project", "target");
1920
public static readonly string UsfmTargetCustomVrsPath = Path.Combine(TestDataPath, "usfm", "target", "custom.vrs");
2021
public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source");
22+
public static readonly string UsfmSourceProjectZipPath = Path.Combine(TestDataPath, "project", "source");
2123
public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes");
2224
public static readonly string TextTestProjectPath = Path.Combine(TestDataPath, "txt");
2325
public static readonly string DeuterocanonicalsSourcePath = Path.Combine(

tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System.IO.Compression;
22
using System.Text.Json;
33
using NUnit.Framework;
4+
using SIL.Machine.PunctuationAnalysis;
45

56
namespace SIL.Machine.Corpora;
67

@@ -170,4 +171,28 @@ async Task GetUsfmAsync(string projectPath)
170171
await GetUsfmAsync(ParatextProjectPath);
171172
}
172173
}
174+
175+
[Test]
176+
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
177+
public void AnalyzeCorporaQuoteConventions()
178+
{
179+
var sourceHandler = new QuoteConventionDetector();
180+
using ZipArchive zipArchive = ZipFile.OpenRead(CorporaTestHelpers.UsfmSourceProjectZipPath);
181+
var quoteConventionDetector = new ZipParatextProjectQuoteConventionDetector(zipArchive);
182+
quoteConventionDetector.GetQuoteConventionAnalysis(sourceHandler);
183+
184+
var targetHandler = new QuoteConventionDetector();
185+
using ZipArchive zipArchive2 = ZipFile.OpenRead(CorporaTestHelpers.UsfmTargetProjectZipPath);
186+
var quoteConventionDetector2 = new ZipParatextProjectQuoteConventionDetector(zipArchive2);
187+
quoteConventionDetector2.GetQuoteConventionAnalysis(targetHandler);
188+
189+
QuoteConventionAnalysis sourceAnalysis = sourceHandler.DetectQuotationConvention();
190+
QuoteConventionAnalysis targetAnalysis = targetHandler.DetectQuotationConvention();
191+
192+
Assert.Multiple(() =>
193+
{
194+
Assert.NotNull(sourceAnalysis);
195+
Assert.NotNull(targetAnalysis);
196+
});
197+
}
173198
}

0 commit comments

Comments
 (0)