|
10 | 10 | `pythainlp.tokenize` will not fail if `budoux` is not installed. When |
11 | 11 | used and `budoux` is missing, a clear ImportError is raised with an |
12 | 12 | installation hint. |
13 | | -
|
14 | | -The BudouX API surface has changed across versions; this wrapper tries |
15 | | -several common entry points (`LineBreaker`, `Budoux`, `parse`, |
16 | | -`segment`) and normalizes the output into a list of strings. |
17 | 13 | """ |
18 | 14 | from typing import List |
19 | 15 |
|
@@ -51,45 +47,6 @@ def segment(text: str) -> List[str]: |
51 | 47 |
|
52 | 48 | parser = _parser |
53 | 49 |
|
54 | | - # Call the most-likely parse/segment method and normalize output. |
55 | | - if hasattr(parser, "parse") and callable(getattr(parser, "parse")): |
56 | | - result = parser.parse(text) |
57 | | - elif hasattr(parser, "segment") and callable(getattr(parser, "segment")): |
58 | | - result = parser.segment(text) |
59 | | - elif hasattr(parser, "break_lines") and callable( |
60 | | - getattr(parser, "break_lines") |
61 | | - ): |
62 | | - result = parser.break_lines(text) |
63 | | - else: |
64 | | - # If parser is the module exposing top-level parse/segment |
65 | | - if hasattr(parser, "parse") and callable(getattr(parser, "parse")): |
66 | | - result = parser.parse(text) |
67 | | - elif hasattr(parser, "segment") and callable( |
68 | | - getattr(parser, "segment") |
69 | | - ): |
70 | | - result = parser.segment(text) |
71 | | - else: |
72 | | - raise RuntimeError("Unable to call budoux parser method.") |
73 | | - |
74 | | - # Normalize: allow list[str], list[dict], str (joined with newline) |
75 | | - if isinstance(result, str): |
76 | | - # some implementations return a string with newlines |
77 | | - return [s for s in result.splitlines() if s] |
78 | | - |
79 | | - if isinstance(result, list): |
80 | | - out: List[str] = [] |
81 | | - for item in result: |
82 | | - if isinstance(item, str): |
83 | | - out.append(item) |
84 | | - elif isinstance(item, dict): |
85 | | - # Some APIs may return dict-like segments |
86 | | - if "text" in item: |
87 | | - out.append(item["text"]) |
88 | | - else: |
89 | | - out.append(str(item)) |
90 | | - else: |
91 | | - out.append(str(item)) |
92 | | - return out |
| 50 | + result = parser.parse(text) |
93 | 51 |
|
94 | | - # Fallback: stringify whatever we got |
95 | | - return [str(result)] |
| 52 | + return result |
0 commit comments