Skip to content

Commit d9967c5

Browse files
committed
Update budoux
1 parent ba1f929 commit d9967c5

File tree

1 file changed

+2
-45
lines changed

1 file changed

+2
-45
lines changed

pythainlp/tokenize/budoux.py

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
`pythainlp.tokenize` will not fail if `budoux` is not installed. When
1111
used and `budoux` is missing, a clear ImportError is raised with an
1212
installation hint.
13-
14-
The BudouX API surface has changed across versions; this wrapper tries
15-
several common entry points (`LineBreaker`, `Budoux`, `parse`,
16-
`segment`) and normalizes the output into a list of strings.
1713
"""
1814
from typing import List
1915

@@ -51,45 +47,6 @@ def segment(text: str) -> List[str]:
5147

5248
parser = _parser
5349

54-
# Call the most-likely parse/segment method and normalize output.
55-
if hasattr(parser, "parse") and callable(getattr(parser, "parse")):
56-
result = parser.parse(text)
57-
elif hasattr(parser, "segment") and callable(getattr(parser, "segment")):
58-
result = parser.segment(text)
59-
elif hasattr(parser, "break_lines") and callable(
60-
getattr(parser, "break_lines")
61-
):
62-
result = parser.break_lines(text)
63-
else:
64-
# If parser is the module exposing top-level parse/segment
65-
if hasattr(parser, "parse") and callable(getattr(parser, "parse")):
66-
result = parser.parse(text)
67-
elif hasattr(parser, "segment") and callable(
68-
getattr(parser, "segment")
69-
):
70-
result = parser.segment(text)
71-
else:
72-
raise RuntimeError("Unable to call budoux parser method.")
73-
74-
# Normalize: allow list[str], list[dict], str (joined with newline)
75-
if isinstance(result, str):
76-
# some implementations return a string with newlines
77-
return [s for s in result.splitlines() if s]
78-
79-
if isinstance(result, list):
80-
out: List[str] = []
81-
for item in result:
82-
if isinstance(item, str):
83-
out.append(item)
84-
elif isinstance(item, dict):
85-
# Some APIs may return dict-like segments
86-
if "text" in item:
87-
out.append(item["text"])
88-
else:
89-
out.append(str(item))
90-
else:
91-
out.append(str(item))
92-
return out
50+
result = parser.parse(text)
9351

94-
# Fallback: stringify whatever we got
95-
return [str(result)]
52+
return result

0 commit comments

Comments
 (0)