Skip to content

Commit 9e915d7

Browse files
committed
I want to process the data in segments that correspond to individual translations. These updates make it happen.
1 parent 361a1f0 commit 9e915d7

File tree

3 files changed

+10
-0
lines changed

3 files changed

+10
-0
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
"source.organizeImports": "explicit"
1414
},
1515
},
16+
"files.associations": {
17+
"*.SFM": "usfm",
18+
},
1619
"black-formatter.path": [
1720
"poetry",
1821
"run",

machine/corpora/usfm_parser_state.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ def is_verse_para(self) -> bool:
108108

109109
@property
110110
def is_verse_text(self) -> bool:
111+
# anything before verse 1 is not verse text
112+
if self.verse_ref.verse_num == 0:
113+
return False
114+
111115
# Sidebars and notes are not verse text
112116
if any(e.type in {UsfmElementType.SIDEBAR, UsfmElementType.NOTE} for e in self._stack):
113117
return False

tests/corpora/test_update_usfm_parser_handler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def test_paragraph_in_verse():
202202
]
203203
usfm = r"""\id MAT - Test
204204
\c 1
205+
\p paragraph not in a verse
205206
\v 1 verse 1 \p inner verse paragraph
206207
\s1 Section Header
207208
\v 2 Verse 2 \p inner verse paragraph
@@ -211,6 +212,7 @@ def test_paragraph_in_verse():
211212

212213
result = r"""\id MAT - Test
213214
\c 1
215+
\p paragraph not in a verse
214216
\v 1 Update 1
215217
\s1 Section Header
216218
\v 2 Verse 2
@@ -228,6 +230,7 @@ def test_paragraph_in_verse():
228230

229231
result_strip = r"""\id MAT
230232
\c 1
233+
\p
231234
\v 1 Update 1
232235
\s1
233236
\v 2

0 commit comments

Comments
 (0)