Skip to content

Commit 55619a8

Browse files
🐛 Handle exclamation marks at EOL
Closes #37
1 parent 2ea42e9 commit 55619a8

File tree

3 files changed

+7
-2
lines changed

3 files changed

+7
-2
lines changed

pysbd/lang/common/numbers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ class Common(object):
77

88
# added special case: r"[。..!!?].*" to handle intermittent dots, exclamation, etc.
99
# TODO: above special cases group can be updated as per developer needs
10-
SENTENCE_BOUNDARY_REGEX = r"((?:[^)])*)(?=\s?[A-Z])|「(?:[^」])*」(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|\'(?:[^\'])*[^,]\'(?=\s[A-Z])|\"(?:[^\"])*[^,]\"(?=\s[A-Z])|\“(?:[^\”])*[^,]\”(?=\s[A-Z])|[。..!!?].*|\S.*?[。..!!??ȸȹ☉☈☇☄]"
10+
SENTENCE_BOUNDARY_REGEX = r"((?:[^)])*)(?=\s?[A-Z])|「(?:[^」])*」(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|\'(?:[^\'])*[^,]\'(?=\s[A-Z])|\"(?:[^\"])*[^,]\"(?=\s[A-Z])|\“(?:[^\”])*[^,]\”(?=\s[A-Z])|[。..!!??].*|\S.*?[。..!!??ȸȹ☉☈☇☄]"
11+
1112
# # Rubular: http://rubular.com/r/NqCqv372Ix
1213
QUOTATION_AT_END_OF_SENTENCE_REGEX = r'[!?\.-][\"\'“”]\s{1}[A-Z]'
1314

pysbd/processor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ def sentence_boundary_punctuation(self, txt):
178178
if hasattr(self.language_module, 'ReplaceNonSentenceBoundaryCommaRule'):
179179
txt = Text(txt).apply(
180180
self.language_module.ReplaceNonSentenceBoundaryCommaRule)
181+
# retain exclamation mark if it is an ending character of a given text
182+
txt = re.sub(r'&ᓴ&$', '!', txt)
181183
txt = re.findall(Common.SENTENCE_BOUNDARY_REGEX, txt)
182184
return txt
183185

tests/regression/test_issues.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
('#34', '..', ['..']),
2121
('#34', '. . .', ['. . .']),
2222
('#34', '! ! !', ['! ! !']),
23-
('#36', '??', ['??'])
23+
('#36', '??', ['??']),
24+
('#37', "As an example of a different special-purpose mechanism, we have introduced a methodology for letting donors make their donations to charities conditional on donations by other donors (who, in turn, can make their donations conditional) [70]. We have used this mechanism to collect money for Indian Ocean Tsunami and Hurricane Katrina victims. We have also introduced a more general framework for negotiation when one agent's actions have a direct effect (externality) on the other agents' utilities [69]. Both the charities and externalities methodologies require the solution of NP-hard optimization problems in general, but there are some natural tractable cases as well as effective MIP formulations. Recently, Ghosh and Mahdian [86] at Yahoo! Research extended our charities work, and based on this a web-based system for charitable donations was built at Yahoo!",
25+
['As an example of a different special-purpose mechanism, we have introduced a methodology for letting donors make their donations to charities conditional on donations by other donors (who, in turn, can make their donations conditional) [70].', 'We have used this mechanism to collect money for Indian Ocean Tsunami and Hurricane Katrina victims.', "We have also introduced a more general framework for negotiation when one agent's actions have a direct effect (externality) on the other agents' utilities [69].", 'Both the charities and externalities methodologies require the solution of NP-hard optimization problems in general, but there are some natural tractable cases as well as effective MIP formulations.', 'Recently, Ghosh and Mahdian [86] at Yahoo! Research extended our charities work, and based on this a web-based system for charitable donations was built at Yahoo!'])
2426
]
2527

2628
@pytest.mark.parametrize('issue_no,text,expected_sents', TEST_ISSUE_DATA)

0 commit comments

Comments
 (0)