|
| 1 | +from typing import List, Optional, Sequence, Tuple |
| 2 | + |
| 3 | +from machine.corpora.scripture_update_block_handler_first_elements_first import ( |
| 4 | + ScriptureUpdateBlockHandlerFirstElementsFirst, |
| 5 | +) |
| 6 | + |
| 7 | +from machine.corpora.scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase |
| 8 | +from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH |
| 9 | + |
| 10 | +from machine.corpora import ( |
| 11 | + FileParatextProjectTextUpdater, |
| 12 | + ScriptureRef, |
| 13 | + UpdateUsfmMarkerBehavior, |
| 14 | + UpdateUsfmParserHandler, |
| 15 | + UpdateUsfmTextBehavior, |
| 16 | + parse_usfm, |
| 17 | +) |
| 18 | + |
| 19 | + |
| 20 | +def test_preserve_paragraphs(): |
| 21 | + rows = [ |
| 22 | + (scr_ref("MAT 1:1"), str("U1")), |
| 23 | + ( |
| 24 | + scr_ref("MAT 1:1/1:f"), |
| 25 | + str("UF1"), |
| 26 | + ), |
| 27 | + (scr_ref("MAT 1:2"), str("U2")), |
| 28 | + ( |
| 29 | + scr_ref("MAT 1:2/1:f"), |
| 30 | + str("UF2"), |
| 31 | + ), |
| 32 | + (scr_ref("MAT 1:3"), str("U3")), |
| 33 | + ( |
| 34 | + scr_ref("MAT 1:3/1:f"), |
| 35 | + str("UF3"), |
| 36 | + ), |
| 37 | + ] |
| 38 | + usfm = r"""\id MAT |
| 39 | +\c 1 |
| 40 | +\v 1 \f \ft \fm ' \fm* hello world \f* it comes first |
| 41 | +\v 2 it comes \f \ft hello \fm ' \fm* world \f* middling |
| 42 | +\v 3 it comes last \f \ft hello world \fm ' \fm* \f* |
| 43 | +""" |
| 44 | + |
| 45 | + target = update_usfm(rows, usfm) |
| 46 | + result = r"""\id MAT |
| 47 | +\c 1 |
| 48 | +\v 1 U1 \f \ft UF1 \fm ' \fm*\f* |
| 49 | +\v 2 U2 \f \ft UF2 \fm ' \fm*\f* |
| 50 | +\v 3 U3 \f \ft UF3 \fm ' \fm*\f* |
| 51 | +""" |
| 52 | + |
| 53 | + assess(target, result) |
| 54 | + |
| 55 | + target_first_element = update_usfm( |
| 56 | + rows, usfm, update_block_handlers=[ScriptureUpdateBlockHandlerFirstElementsFirst()] |
| 57 | + ) |
| 58 | + result_first_element = r"""\id MAT |
| 59 | +\c 1 |
| 60 | +\v 1 \f \ft \fm ' \fm* UF1 \f* U1 |
| 61 | +\v 2 U2 \f \ft UF2 \fm ' \fm*\f* |
| 62 | +\v 3 U3 \f \ft UF3 \fm ' \fm*\f* |
| 63 | +""" |
| 64 | + assess(target_first_element, result_first_element) |
| 65 | + |
| 66 | + |
| 67 | +def scr_ref(*refs: str) -> List[ScriptureRef]: |
| 68 | + return [ScriptureRef.parse(ref) for ref in refs] |
| 69 | + |
| 70 | + |
| 71 | +def update_usfm( |
| 72 | + rows: Optional[Sequence[Tuple[Sequence[ScriptureRef], str]]] = None, |
| 73 | + source: Optional[str] = None, |
| 74 | + id_text: Optional[str] = None, |
| 75 | + text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_NEW, |
| 76 | + paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE, |
| 77 | + embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE, |
| 78 | + style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP, |
| 79 | + preserve_paragraph_styles: Optional[Sequence[str]] = None, |
| 80 | + update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None, |
| 81 | +) -> Optional[str]: |
| 82 | + if source is None: |
| 83 | + updater = FileParatextProjectTextUpdater(USFM_TEST_PROJECT_PATH) |
| 84 | + return updater.update_usfm( |
| 85 | + "MAT", |
| 86 | + rows, |
| 87 | + id_text, |
| 88 | + text_behavior, |
| 89 | + paragraph_behavior, |
| 90 | + embed_behavior, |
| 91 | + style_behavior, |
| 92 | + preserve_paragraph_styles, |
| 93 | + update_block_handlers, |
| 94 | + ) |
| 95 | + else: |
| 96 | + source = source.strip().replace("\r\n", "\n") + "\r\n" |
| 97 | + updater = UpdateUsfmParserHandler( |
| 98 | + rows, |
| 99 | + id_text, |
| 100 | + text_behavior, |
| 101 | + paragraph_behavior, |
| 102 | + embed_behavior, |
| 103 | + style_behavior, |
| 104 | + preserve_paragraph_styles, |
| 105 | + update_block_handlers, |
| 106 | + ) |
| 107 | + parse_usfm(source, updater) |
| 108 | + return updater.get_usfm() |
| 109 | + |
| 110 | + |
| 111 | +def assess(target: Optional[str], truth: str) -> None: |
| 112 | + assert target is not None |
| 113 | + for target_line, truth_line in zip(target.split("\n"), truth.split("\n")): |
| 114 | + assert target_line.strip() == truth_line.strip() |
| 115 | + |
| 116 | + |
| 117 | +def read_usfm() -> str: |
| 118 | + with (USFM_TEST_PROJECT_PATH / "41MATTes.SFM").open("r", encoding="utf-8-sig", newline="\r\n") as file: |
| 119 | + return file.read() |
0 commit comments