From 9596a821dcc520cf129fdfbda014e6fc42d609b0 Mon Sep 17 00:00:00 2001 From: shhossain Date: Sun, 27 Aug 2023 10:31:38 +0600 Subject: [PATCH] added bangla language. --- pysbd/lang/bangla.py | 12 ++++++++++++ pysbd/languages.py | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 pysbd/lang/bangla.py diff --git a/pysbd/lang/bangla.py b/pysbd/lang/bangla.py new file mode 100644 index 0000000..ee6a046 --- /dev/null +++ b/pysbd/lang/bangla.py @@ -0,0 +1,12 @@ +from pysbd.abbreviation_replacer import AbbreviationReplacer +from pysbd.lang.common import Common, Standard + +class Bangla(Common, Standard): + + iso_code = 'bn' + + SENTENCE_BOUNDARY_REGEX = r'.*?[।\|!\?]|.*?$' + Punctuations = ['।', '|', '.', '!', '?'] + + class AbbreviationReplacer(AbbreviationReplacer): + SENTENCE_STARTERS = [] \ No newline at end of file diff --git a/pysbd/languages.py b/pysbd/languages.py index a7d764c..a42f5c9 100644 --- a/pysbd/languages.py +++ b/pysbd/languages.py @@ -22,6 +22,7 @@ from pysbd.lang.deutsch import Deutsch from pysbd.lang.kazakh import Kazakh from pysbd.lang.slovak import Slovak +from pysbd.lang.bangla import Bangla LANGUAGE_CODES = { 'en': English, @@ -46,7 +47,8 @@ 'ja': Japanese, 'de': Deutsch, 'kk': Kazakh, - 'sk': Slovak + 'sk': Slovak, + 'bn': Bangla }