|
"""Test seg_text.""" |
|
import pytest |
|
from gradiobee.seg_text import seg_text |
|
|
|
|
|
def test_seg_text1(): |
|
"""Test seg_text 1.""" |
|
text = " text 1\n\n test 2. test 3" |
|
_ = seg_text(text) |
|
assert len(_) == 2 |
|
|
|
text = " text 1\n\n test 2. Test 3" |
|
_ = seg_text(text) |
|
assert len(_) == 3 |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"test_input,expected", [ |
|
("", []), |
|
(" ", []), |
|
(" \n ", []), |
|
] |
|
) |
|
def test_seg_text_blanks(test_input, expected): |
|
"""Test blanks.""" |
|
assert seg_text(test_input) == expected |
|
|
|
|
|
def test_seg_text_semicolon (): |
|
"""Test semicolon.""" |
|
text = """ “元宇宙”,英文為“Metaverse”。該詞出自1992年;的科幻小說《雪崩》。 """ |
|
assert len(seg_text(text)) == 2 |
|
assert len(seg_text(text, 'zh')) == 2 |
|
assert len(seg_text(text, 'ja')) == 2 |
|
assert len(seg_text(text, 'ko')) == 2 |
|
assert len(seg_text(text, 'en')) == 1 |
|
|
|
|
|
def test_seg_text_semicolon_extra (): |
|
"""Test semicolon.""" |
|
extra = "[;;]" |
|
text = """ “元宇宙”,英文為“Metaverse”。該詞出自1992年;的科幻小說《雪崩》。 """ |
|
assert len(seg_text(text, extra=extra)) == 2 + 1 |
|
assert len(seg_text(text, 'zh', extra=extra)) == 2 + 1 |
|
assert len(seg_text(text, 'ja', extra=extra)) == 2 + 1 |
|
assert len(seg_text(text, 'ko', extra=extra)) == 2 + 1 |
|
assert len(seg_text(text, 'en', extra=extra)) == 1 + 1 |
|
|