Skip to content

Commit 196ca8e

Browse files
committed
Fix crashes while reading USFM
- properly handle verse 0 - ignore private-use markers - fixes #329
1 parent 9638a92 commit 196ca8e

File tree

2 files changed

+86
-4
lines changed

2 files changed

+86
-4
lines changed

src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@ protected ScriptureRefUsfmParserHandlerBase()
3333

3434
private static bool IsEmbedStyle(string marker)
3535
{
36-
return marker != null && (EmbedStyles.Contains(marker.Trim('*')) || marker.StartsWith("z"));
36+
return marker != null && EmbedStyles.Contains(marker.Trim('*'));
37+
}
38+
39+
private static bool IsPrivateUseMarker(string marker)
40+
{
41+
return marker != null && marker.StartsWith("z");
3742
}
3843

3944
public override void EndUsfm(UsfmParserState state)
@@ -63,9 +68,12 @@ string pubNumber
6368
{
6469
if (state.VerseRef.Equals(_curVerseRef) && !_duplicateVerse)
6570
{
66-
EndVerseText(state, CreateVerseRefs());
67-
// ignore duplicate verses
68-
_duplicateVerse = true;
71+
if (state.VerseRef.VerseNum > 0)
72+
{
73+
EndVerseText(state, CreateVerseRefs());
74+
// ignore duplicate verses
75+
_duplicateVerse = true;
76+
}
6977
}
7078
else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse))
7179
{
@@ -92,6 +100,10 @@ public override void StartPara(
92100
IReadOnlyList<UsfmAttribute> attributes
93101
)
94102
{
103+
// ignore private-use markers
104+
if (IsPrivateUseMarker(marker))
105+
return;
106+
95107
if (_curVerseRef.IsDefault)
96108
UpdateVerseRef(state.VerseRef, marker);
97109

@@ -104,6 +116,10 @@ IReadOnlyList<UsfmAttribute> attributes
104116

105117
public override void EndPara(UsfmParserState state, string marker)
106118
{
119+
// ignore private-use markers
120+
if (IsPrivateUseMarker(marker))
121+
return;
122+
107123
if (CurrentTextType == ScriptureTextType.NonVerse)
108124
{
109125
EndParentElement();
@@ -185,6 +201,10 @@ public override void StartChar(
185201
IReadOnlyList<UsfmAttribute> attributes
186202
)
187203
{
204+
// ignore private-use markers
205+
if (IsPrivateUseMarker(markerWithoutPlus))
206+
return;
207+
188208
// if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
189209
// segment
190210
CheckConvertVerseParaToNonVerse(state);
@@ -199,6 +219,10 @@ public override void EndChar(
199219
bool closed
200220
)
201221
{
222+
// ignore private-use markers
223+
if (IsPrivateUseMarker(marker))
224+
return;
225+
202226
if (IsEmbedStyle(marker))
203227
EndEmbedText(state);
204228
}
@@ -332,6 +356,7 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
332356
&& paraTag.Marker != "tr"
333357
&& state.IsVersePara
334358
&& _curVerseRef.VerseNum == 0
359+
&& !IsPrivateUseMarker(paraTag.Marker)
335360
)
336361
{
337362
StartParentElement(paraTag.Marker);

tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,63 @@ public void GetRows_StyleStartingNonVerseParagraphAfterEmptyParagraph()
278278
});
279279
}
280280

281+
[Test]
282+
public void GetRows_VerseZero()
283+
{
284+
TextRow[] rows = GetRows(
285+
@"\id MAT - Test
286+
\h
287+
\mt
288+
\c 1
289+
\p \v 0
290+
\s
291+
\p \v 1 Verse one.
292+
"
293+
);
294+
295+
Assert.Multiple(() =>
296+
{
297+
Assert.That(rows, Has.Length.EqualTo(1));
298+
299+
Assert.That(
300+
rows[0].Ref,
301+
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
302+
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
303+
);
304+
Assert.That(rows[0].Text, Is.EqualTo("Verse one."), string.Join(",", rows.ToList().Select(tr => tr.Text)));
305+
});
306+
}
307+
308+
[Test]
309+
public void GetRows_PrivateUseMarker()
310+
{
311+
TextRow[] rows = GetRows(
312+
@"\id FRT - Test English Apocrypha
313+
\zmt Ignore this paragraph
314+
\mt1 Test English Apocrypha
315+
\pc Copyright Statement \zimagecopyrights
316+
\pc Further copyright statements
317+
",
318+
includeAllText: true
319+
);
320+
321+
Assert.Multiple(() =>
322+
{
323+
Assert.That(rows, Has.Length.EqualTo(3));
324+
325+
Assert.That(
326+
rows[1].Ref,
327+
Is.EqualTo(ScriptureRef.Parse("FRT 1:0/2:pc")),
328+
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
329+
);
330+
Assert.That(
331+
rows[1].Text,
332+
Is.EqualTo("Copyright Statement"),
333+
string.Join(",", rows.ToList().Select(tr => tr.Text))
334+
);
335+
});
336+
}
337+
281338
private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
282339
{
283340
UsfmMemoryText text =

0 commit comments

Comments
 (0)