Skip to content

Commit 2ea42e9

Browse files
🐛 Handle text with only punctuations
Closes #36
1 parent 05edfdb commit 2ea42e9

File tree

3 files changed

+6
-2
lines changed

3 files changed

+6
-2
lines changed

pysbd/lang/standard.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class DoublePunctuationRules(object):
3939
SecondRule = Rule(r'!\?', '☈')
4040
ThirdRule = Rule(r'\?\?', '☇')
4141
ForthRule = Rule(r'!!', '☄')
42+
DoublePunctuation = r'\?!|!\?|\?\?|!!'
4243
All = [FirstRule, SecondRule, ThirdRule, ForthRule]
4344

4445

pysbd/processor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ def process_text(self, txt):
143143
txt += 'ȸ'
144144
txt = ExclamationWords.apply_rules(txt)
145145
txt = BetweenPunctuation(txt).replace()
146-
txt = Text(txt).apply(*DoublePunctuationRules.All,
147-
Standard.QuestionMarkInQuotationRule,
146+
# handle text having only doublepunctuations
147+
if not re.match(DoublePunctuationRules.DoublePunctuation, txt):
148+
txt = Text(txt).apply(*DoublePunctuationRules.All)
149+
txt = Text(txt).apply(Standard.QuestionMarkInQuotationRule,
148150
*ExclamationPointRules.All)
149151
txt = ListItemReplacer(txt).replace_parens()
150152
txt = self.sentence_boundary_punctuation(txt)

tests/regression/test_issues.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
('#34', '..', ['..']),
2121
('#34', '. . .', ['. . .']),
2222
('#34', '! ! !', ['! ! !']),
23+
('#36', '??', ['??'])
2324
]
2425

2526
@pytest.mark.parametrize('issue_no,text,expected_sents', TEST_ISSUE_DATA)

0 commit comments

Comments
 (0)