Skip to content

Commit 528dc72

Browse files
authored
Usfm versification error detection improvements (#368)
* ExpectedVerse should always be empty even if verse ref fails to parse if type is ExtraVerse; filter books; add project name * Add tests for expected/actual verse ref since there is so much logic in them; fix glaring verseref parsing error * Add project name to error
1 parent ceef559 commit 528dc72

4 files changed

Lines changed: 61 additions & 14 deletions

File tree

src/SIL.Machine/Corpora/ParatextProjectSettings.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,13 @@ public string GetBookFileName(string bookId)
104104
return FileNamePrefix + bookPart + FileNameSuffix;
105105
}
106106

107-
public IEnumerable<string> GetAllScriptureBookFileNames()
107+
public IEnumerable<string> GetAllScriptureBookIds()
108108
{
109109
BookSet scriptureBooks = Canon.ScriptureBooks;
110110
scriptureBooks.SelectAll();
111111
foreach (string bookId in scriptureBooks.SelectedBookIds)
112112
{
113-
yield return GetBookFileName(bookId);
113+
yield return bookId;
114114
}
115115
}
116116

src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Collections.Generic;
33
using System.IO;
44
using System.Text;
5+
using SIL.Scripture;
56

67
namespace SIL.Machine.Corpora
78
{
@@ -20,15 +21,21 @@ ParatextProjectSettings settings
2021
}
2122

2223
public IReadOnlyList<UsfmVersificationError> GetUsfmVersificationErrors(
23-
UsfmVersificationErrorDetector handler = null
24+
UsfmVersificationErrorDetector handler = null,
25+
HashSet<int> books = null
2426
)
2527
{
26-
handler = handler ?? new UsfmVersificationErrorDetector(_settings.Versification);
27-
foreach (string fileName in _settings.GetAllScriptureBookFileNames())
28+
handler = handler ?? new UsfmVersificationErrorDetector(_settings);
29+
foreach (string bookId in _settings.GetAllScriptureBookIds())
2830
{
31+
string fileName = _settings.GetBookFileName(bookId);
32+
2933
if (!_paratextProjectFileHandler.Exists(fileName))
3034
continue;
3135

36+
if (books != null && !books.Contains(Canon.BookIdToNumber(bookId)))
37+
continue;
38+
3239
string usfm;
3340
using (var reader = new StreamReader(_paratextProjectFileHandler.Open(fileName)))
3441
{

src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public UsfmVersificationError(
3030
int expectedVerse,
3131
int actualChapter,
3232
int actualVerse,
33+
string projectName,
3334
VerseRef? verseRef = null
3435
)
3536
{
@@ -39,8 +40,11 @@ public UsfmVersificationError(
3940
_actualChapter = actualChapter;
4041
_actualVerse = actualVerse;
4142
_verseRef = verseRef;
43+
ProjectName = projectName;
4244
}
4345

46+
public string ProjectName { get; private set; }
47+
4448
public UsfmVersificationErrorType Type { get; private set; }
4549

4650
// Returns true if there is an error
@@ -100,14 +104,20 @@ public string ExpectedVerseRef
100104
{
101105
get
102106
{
107+
if (Type == UsfmVersificationErrorType.ExtraVerse)
108+
return "";
109+
103110
// We do not want to throw an exception here, and the VerseRef constructor can throw
104111
// an exception with certain invalid verse data; use TryParse instead.
105-
if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef))
112+
if (
113+
!VerseRef.TryParse(
114+
$"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_expectedVerse}",
115+
out VerseRef defaultVerseRef
116+
)
117+
)
106118
{
107119
return DefaultVerse(_expectedChapter, _expectedVerse);
108120
}
109-
if (Type == UsfmVersificationErrorType.ExtraVerse)
110-
return "";
111121
if (
112122
Type == UsfmVersificationErrorType.MissingVerseSegment
113123
&& VerseRef.TryParse(
@@ -154,7 +164,12 @@ public string ActualVerseRef
154164
}
155165
else
156166
{
157-
if (VerseRef.TryParse($"{_bookNum} {_actualChapter}:{_actualVerse}", out VerseRef actualVerseRef))
167+
if (
168+
VerseRef.TryParse(
169+
$"{Canon.BookNumberToId(_bookNum)} {_actualChapter}:{_actualVerse}",
170+
out VerseRef actualVerseRef
171+
)
172+
)
158173
{
159174
return actualVerseRef.ToString();
160175
}
@@ -172,15 +187,17 @@ private string DefaultVerse(int chapter, int verse)
172187

173188
public class UsfmVersificationErrorDetector : UsfmParserHandlerBase
174189
{
190+
private readonly string _projectName;
175191
private readonly ScrVers _versification;
176192
private int _currentBook;
177193
private int _currentChapter;
178194
private VerseRef _currentVerse;
179195
private readonly List<UsfmVersificationError> _errors;
180196

181-
public UsfmVersificationErrorDetector(ScrVers versification)
197+
public UsfmVersificationErrorDetector(ParatextProjectSettings settings)
182198
{
183-
_versification = versification;
199+
_projectName = settings.Name;
200+
_versification = settings.Versification;
184201
_currentBook = 0;
185202
_currentChapter = 0;
186203
_currentVerse = new VerseRef();
@@ -198,7 +215,8 @@ public override void EndUsfm(UsfmParserState state)
198215
_versification.GetLastChapter(_currentBook),
199216
_versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)),
200217
_currentChapter,
201-
_currentVerse.AllVerses().Last().VerseNum
218+
_currentVerse.AllVerses().Last().VerseNum,
219+
_projectName
202220
);
203221
if (versificationError.CheckError())
204222
_errors.Add(versificationError);
@@ -227,7 +245,8 @@ string pubNumber
227245
_currentChapter,
228246
_versification.GetLastVerse(_currentBook, _currentChapter),
229247
_currentChapter,
230-
_currentVerse.AllVerses().Last().VerseNum
248+
_currentVerse.AllVerses().Last().VerseNum,
249+
_projectName
231250
);
232251
if (versificationError.CheckError())
233252
_errors.Add(versificationError);
@@ -254,6 +273,7 @@ string pubNumber
254273
_currentVerse.AllVerses().Last().VerseNum,
255274
_currentChapter,
256275
_currentVerse.AllVerses().Last().VerseNum,
276+
_projectName,
257277
_currentVerse
258278
);
259279
if (versificationError.CheckError())

tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
namespace SIL.Machine.Corpora;
77

88
[TestFixture]
9-
public class ParatextProjectQuoteConventionDetectorTests
9+
public class ParatextProjectVersificationErrorDetectorTests
1010
{
1111
[Test]
1212
public void GetUsfmVersificationErrors_NoErrors()
@@ -75,6 +75,8 @@ public void GetUsfmVersificationErrors_MissingVerse()
7575
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
7676
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
7777
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
78+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
79+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14"));
7880
}
7981

8082
[Test]
@@ -93,6 +95,8 @@ public void GetUsfmVersificationErrors_MissingChapter()
9395
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
9496
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
9597
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter));
98+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
99+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 0:0"));
96100
}
97101

98102
[Test]
@@ -128,6 +132,8 @@ public void GetUsfmVersificationErrors_ExtraVerse()
128132
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
129133
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
130134
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
135+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo(""));
136+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:16"));
131137
}
132138

133139
[Test]
@@ -161,6 +167,8 @@ public void GetUsfmVersificationErrors_InvalidVerse()
161167
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
162168
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
163169
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseRange));
170+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:12-13"));
171+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13-12"));
164172
}
165173

166174
[Test]
@@ -196,6 +204,8 @@ public void GetUsfmVersificationErrors_ExtraVerseSegment()
196204
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
197205
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
198206
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerseSegment));
207+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:14"));
208+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14a"));
199209
}
200210

201211
[Test]
@@ -233,6 +243,8 @@ public void GetUsfmVersificationErrors_MissingVerseSegment()
233243
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
234244
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
235245
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerseSegment));
246+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:13a"));
247+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13"));
236248
}
237249

238250
[Test]
@@ -289,6 +301,8 @@ public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs()
289301
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
290302
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
291303
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
304+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo(""));
305+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13"));
292306
}
293307

294308
[Test]
@@ -341,6 +355,8 @@ public void GetUsfmVersificationErrors_MultipleBooks()
341355
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
342356
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
343357
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
358+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13"));
359+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12"));
344360
}
345361

346362
[Test]
@@ -375,6 +391,10 @@ public void GetUsfmVersificationErrors_MultipleChapters()
375391
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
376392
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
377393
Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
394+
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13"));
395+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12"));
396+
Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo(""));
397+
Assert.That(errors[1].ActualVerseRef, Is.EqualTo("2JN 2:1"));
378398
}
379399

380400
private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary<string, string>? files = null)

0 commit comments

Comments
 (0)