Skip to content

Commit 5139443

Browse files
committed
Port: Fix spacing around end markers
1 parent 90dcee6 commit 5139443

File tree

2 files changed

+22
-9
lines changed

2 files changed

+22
-9
lines changed

src/SIL.Machine/Corpora/PlaceMarkersUsfmUpdateBlockHandler.cs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
5050
|| !elements.Any(e =>
5151
e.Type.IsOneOf(UsfmUpdateBlockElementType.Paragraph, UsfmUpdateBlockElementType.Style)
5252
&& !e.MarkedForRemoval
53+
&& e.Tokens.Count == 1
5354
)
5455
)
5556
{
@@ -173,10 +174,24 @@ public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block)
173174
sourceTokens,
174175
targetTokens
175176
);
176-
int targetStringIndex =
177-
adjacentTargetToken < targetTokenStarts.Count
178-
? targetTokenStarts[adjacentTargetToken]
179-
: targetSentence.Length;
177+
int targetStringIndex;
178+
if (
179+
adjacentSourceToken > 0
180+
&& element.Type == UsfmUpdateBlockElementType.Style
181+
&& element.Tokens[0].Marker.Last() == '*'
182+
)
183+
{
184+
targetStringIndex =
185+
targetTokenStarts[adjacentTargetToken - 1] + targetTokens[adjacentTargetToken - 1].Length;
186+
}
187+
else if (adjacentTargetToken < targetTokenStarts.Count)
188+
{
189+
targetStringIndex = targetTokenStarts[adjacentTargetToken];
190+
}
191+
else
192+
{
193+
targetStringIndex = targetSentence.Length;
194+
}
180195
toInsert.Add((targetStringIndex, element));
181196
}
182197
toInsert.Sort((p1, p2) => p1.Index.CompareTo(p2.Index));

tests/SIL.Machine.Tests/Corpora/PlaceMarkersUsfmUpdateBlockHandlerTests.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,9 @@ public void UpdateUsfm_StyleMarkers()
8787
string result =
8888
@"\id MAT
8989
\c 1
90-
\v 1 Esta es la \w primera \w*oración. Este texto está en \w inglés \w*y esta prueba es \w para \w*marcadores de estilo.
90+
\v 1 Esta es la \w primera\w* oración. Este texto está en \w inglés\w* y esta prueba es \w para\w* marcadores de estilo.
9191
";
9292

93-
// NOTE: the spacing before/after end markers is incorrect,
94-
// but this is an issue with how the is USFM is generated from the tokens
9593
AssertUsfmEquals(target, result);
9694

9795
target = UpdateUsfm(
@@ -307,7 +305,7 @@ public void UpdateUsfm_ConsecutiveMarkers()
307305
@"\id MAT
308306
\c 1
309307
\v 1 Old verse 1
310-
\p \qt \+w word \+w* \qt*
308+
\p \qt \+w word\+w*\qt*
311309
";
312310
IReadOnlyList<PlaceMarkersAlignmentInfo> alignInfo =
313311
[
@@ -331,7 +329,7 @@ public void UpdateUsfm_ConsecutiveMarkers()
331329
@"\id MAT
332330
\c 1
333331
\v 1 New verse 1
334-
\p \qt \+w WORD \+w*\qt*
332+
\p \qt \+w WORD\+w*\qt*
335333
";
336334

337335
AssertUsfmEquals(target, result);

0 commit comments

Comments
 (0)