Skip to content

Commit b9336fa

Browse files
committed
Added more test framework
1 parent 9e915d7 commit b9336fa

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

machine/corpora/paratext_project_text_updater_base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from abc import ABC, abstractmethod
22
from typing import BinaryIO, Optional, Sequence, Tuple, Union
33

4+
from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
5+
46
from ..utils.typeshed import StrPath
57
from .paratext_project_settings import ParatextProjectSettings
68
from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
@@ -26,6 +28,7 @@ def update_usfm(
2628
embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
2729
style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
2830
preserve_paragraph_styles: Optional[Sequence[str]] = None,
31+
update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
2932
) -> Optional[str]:
3033
file_name: str = self._settings.get_book_file_name(book_id)
3134
if not self._exists(file_name):
@@ -40,6 +43,7 @@ def update_usfm(
4043
embed_behavior,
4144
style_behavior,
4245
preserve_paragraph_styles,
46+
update_block_handlers=update_block_handlers,
4347
)
4448
try:
4549
parse_usfm(usfm, handler, self._settings.stylesheet, self._settings.versification)

machine/corpora/update_usfm_parser_handler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ def start_para(
109109
super().start_para(state, marker, unknown, attributes)
110110

111111
def end_para(self, state: UsfmParserState, marker: str) -> None:
112+
if not state.is_verse_text:
113+
self._process_update_block()
112114
super().end_para(state, marker)
113115
self._in_preserved_paragraph = False
114116

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from typing import List, Optional, Sequence, Tuple
2+
3+
from machine.corpora.scripture_update_block_handler_first_elements_first import (
4+
ScriptureUpdateBlockHandlerFirstElementsFirst,
5+
)
6+
7+
from machine.corpora.scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
8+
from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH
9+
10+
from machine.corpora import (
11+
FileParatextProjectTextUpdater,
12+
ScriptureRef,
13+
UpdateUsfmMarkerBehavior,
14+
UpdateUsfmParserHandler,
15+
UpdateUsfmTextBehavior,
16+
parse_usfm,
17+
)
18+
19+
20+
def test_preserve_paragraphs():
21+
rows = [
22+
(scr_ref("MAT 1:1"), str("U1")),
23+
(
24+
scr_ref("MAT 1:1/1:f"),
25+
str("UF1"),
26+
),
27+
(scr_ref("MAT 1:2"), str("U2")),
28+
(
29+
scr_ref("MAT 1:2/1:f"),
30+
str("UF2"),
31+
),
32+
(scr_ref("MAT 1:3"), str("U3")),
33+
(
34+
scr_ref("MAT 1:3/1:f"),
35+
str("UF3"),
36+
),
37+
]
38+
usfm = r"""\id MAT
39+
\c 1
40+
\v 1 \f \ft \fm ' \fm* hello world \f* it comes first
41+
\v 2 it comes \f \ft hello \fm ' \fm* world \f* middling
42+
\v 3 it comes last \f \ft hello world \fm ' \fm* \f*
43+
"""
44+
45+
target = update_usfm(rows, usfm)
46+
result = r"""\id MAT
47+
\c 1
48+
\v 1 U1 \f \ft UF1 \fm ' \fm*\f*
49+
\v 2 U2 \f \ft UF2 \fm ' \fm*\f*
50+
\v 3 U3 \f \ft UF3 \fm ' \fm*\f*
51+
"""
52+
53+
assess(target, result)
54+
55+
target_first_element = update_usfm(
56+
rows, usfm, update_block_handlers=[ScriptureUpdateBlockHandlerFirstElementsFirst()]
57+
)
58+
result_first_element = r"""\id MAT
59+
\c 1
60+
\v 1 \f \ft \fm ' \fm* UF1 \f* U1
61+
\v 2 U2 \f \ft UF2 \fm ' \fm*\f*
62+
\v 3 U3 \f \ft UF3 \fm ' \fm*\f*
63+
"""
64+
assess(target_first_element, result_first_element)
65+
66+
67+
def scr_ref(*refs: str) -> List[ScriptureRef]:
68+
return [ScriptureRef.parse(ref) for ref in refs]
69+
70+
71+
def update_usfm(
72+
rows: Optional[Sequence[Tuple[Sequence[ScriptureRef], str]]] = None,
73+
source: Optional[str] = None,
74+
id_text: Optional[str] = None,
75+
text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_NEW,
76+
paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
77+
embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
78+
style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
79+
preserve_paragraph_styles: Optional[Sequence[str]] = None,
80+
update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
81+
) -> Optional[str]:
82+
if source is None:
83+
updater = FileParatextProjectTextUpdater(USFM_TEST_PROJECT_PATH)
84+
return updater.update_usfm(
85+
"MAT",
86+
rows,
87+
id_text,
88+
text_behavior,
89+
paragraph_behavior,
90+
embed_behavior,
91+
style_behavior,
92+
preserve_paragraph_styles,
93+
update_block_handlers,
94+
)
95+
else:
96+
source = source.strip().replace("\r\n", "\n") + "\r\n"
97+
updater = UpdateUsfmParserHandler(
98+
rows,
99+
id_text,
100+
text_behavior,
101+
paragraph_behavior,
102+
embed_behavior,
103+
style_behavior,
104+
preserve_paragraph_styles,
105+
update_block_handlers,
106+
)
107+
parse_usfm(source, updater)
108+
return updater.get_usfm()
109+
110+
111+
def assess(target: Optional[str], truth: str) -> None:
112+
assert target is not None
113+
for target_line, truth_line in zip(target.split("\n"), truth.split("\n")):
114+
assert target_line.strip() == truth_line.strip()
115+
116+
117+
def read_usfm() -> str:
118+
with (USFM_TEST_PROJECT_PATH / "41MATTes.SFM").open("r", encoding="utf-8-sig", newline="\r\n") as file:
119+
return file.read()

0 commit comments

Comments
 (0)