|
14 | 14 | from collections import OrderedDict
|
15 | 15 | from collections.abc import Iterable, Iterator
|
16 | 16 | from copy import copy
|
17 |
| -from difflib import get_close_matches |
| 17 | +from difflib import SequenceMatcher |
18 | 18 | from email import message_from_string
|
| 19 | +from heapq import nlargest |
19 | 20 | from typing import TYPE_CHECKING
|
20 | 21 |
|
21 | 22 | from babel import __version__ as VERSION
|
|
31 | 32 |
|
32 | 33 | __all__ = ['Message', 'Catalog', 'TranslationError']
|
33 | 34 |
|
| 35 | +def get_close_matches(word, possibilities, n=3, cutoff=0.6): |
| 36 | + """A modified version of ``difflib.get_close_matches``. |
| 37 | +
|
| 38 | + It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work |
| 39 | + around https://github.com/python/cpython/issues/90825. |
| 40 | + """ |
| 41 | + if not n > 0: |
| 42 | + raise ValueError("n must be > 0: %r" % (n,)) |
| 43 | + if not 0.0 <= cutoff <= 1.0: |
| 44 | + raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) |
| 45 | + result = [] |
| 46 | + s = SequenceMatcher(autojunk=False) # only line changed from difflib.py |
| 47 | + s.set_seq2(word) |
| 48 | + for x in possibilities: |
| 49 | + s.set_seq1(x) |
| 50 | + if s.real_quick_ratio() >= cutoff and \ |
| 51 | + s.quick_ratio() >= cutoff and \ |
| 52 | + s.ratio() >= cutoff: |
| 53 | + result.append((s.ratio(), x)) |
| 54 | + |
| 55 | + # Move the best scorers to head of list |
| 56 | + result = nlargest(n, result) |
| 57 | + # Strip scores for the best n matches |
| 58 | + return [x for score, x in result] |
| 59 | + |
34 | 60 |
|
35 | 61 | PYTHON_FORMAT = re.compile(r'''
|
36 | 62 | \%
|
@@ -803,10 +829,13 @@ def update(
|
803 | 829 | # Prepare for fuzzy matching
|
804 | 830 | fuzzy_candidates = []
|
805 | 831 | if not no_fuzzy_matching:
|
806 |
| - fuzzy_candidates = { |
807 |
| - self._key_for(msgid): messages[msgid].context |
808 |
| - for msgid in messages if msgid and messages[msgid].string |
809 |
| - } |
| 832 | + fuzzy_candidates = {} |
| 833 | + for msgid in messages: |
| 834 | + if msgid and messages[msgid].string: |
| 835 | + key = self._key_for(msgid) |
| 836 | + ctxt = messages[msgid].context |
| 837 | + modified_key = key.lower().strip() |
| 838 | + fuzzy_candidates[modified_key] = (key, ctxt) |
810 | 839 | fuzzy_matches = set()
|
811 | 840 |
|
812 | 841 | def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
|
@@ -861,8 +890,8 @@ def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, s
|
861 | 890 | matches = get_close_matches(matchkey.lower().strip(),
|
862 | 891 | fuzzy_candidates.keys(), 1)
|
863 | 892 | if matches:
|
864 |
| - newkey = matches[0] |
865 |
| - newctxt = fuzzy_candidates[newkey] |
| 893 | + modified_key = matches[0] |
| 894 | + newkey, newctxt = fuzzy_candidates[modified_key] |
866 | 895 | if newctxt is not None:
|
867 | 896 | newkey = newkey, newctxt
|
868 | 897 | _merge(message, newkey, key)
|
|
0 commit comments