mashraf-222 · codeflash-ai · Oct 1, 2025
diff --git a/doctr/models/recognition/utils.py b/doctr/models/recognition/utils.py
@@ -28,45 +28,54 @@ def merge_strings(a: str, b: str, overlap_ratio: float) -> str:
         'abcdefgh'
     """
     seq_len = min(len(a), len(b))
-    if seq_len <= 1:  # One sequence is empty or will be after cropping in next step, return both to keep data
+    if seq_len <= 1:
         return a + b
 
-    a_crop, b_crop = a[:-1], b[1:]  # Remove last letter of "a" and first of "b", because they might be cut off
+    a_crop = a[:-1]
+    b_crop = b[1:]
     max_overlap = min(len(a_crop), len(b_crop))
 
-    # Compute Hamming distances for all possible overlaps
-    scores = [Hamming.distance(a_crop[-i:], b_crop[:i], processor=None) for i in range(1, max_overlap + 1)]
+    # Precompute expected_overlap outside of branching:
+    expected_overlap = round(len(b) * overlap_ratio) - 3
 
-    # Find zero-score matches
-    zero_matches = [i for i, score in enumerate(scores) if score == 0]
+    # Preallocate scores and zero_matches, single loop
+    scores = []
+    zero_matches = []
+    for i in range(1, max_overlap + 1):
+        score = Hamming.distance(a_crop[-i:], b_crop[:i], processor=None)
+        scores.append(score)
+        if score == 0:
+            zero_matches.append(i - 1)
 
-    expected_overlap = round(len(b) * overlap_ratio) - 3  # adjust for cropping and index
-
-    # Case 1: One perfect match - exactly one zero score - just merge there
     if len(zero_matches) == 1:
         i = zero_matches[0]
         return a_crop + b_crop[i + 1 :]
 
-    # Case 2: Multiple perfect matches - likely due to repeated characters.
-    # Use the estimated overlap length to choose the match closest to the expected alignment.
     elif len(zero_matches) > 1:
+        # Use generator to avoid list allocation inside min()
         best_i = min(zero_matches, key=lambda x: abs(x - expected_overlap))
         return a_crop + b_crop[best_i + 1 :]
 
-    # Case 3: Absence of zero scores indicates that the same character in the image was recognized differently OR that
-    # the overlap was too small and we just need to merge the crops fully
     if expected_overlap < -1:
         return a + b
     elif expected_overlap < 0:
         return a_crop + b_crop
 
-    # Find best overlap by minimizing Hamming distance + distance from expected overlap size
-    combined_scores = [score + abs(i - expected_overlap) for i, score in enumerate(scores)]
-    best_i = combined_scores.index(min(combined_scores))
-    return a_crop + b_crop[best_i + 1 :]
+    # Avoid enumerating twice by combining in one pass
+    min_score = None
+    min_idx = -1
+    for i, score in enumerate(scores):
+        combined = score + abs(i - expected_overlap)
+        if (min_score is None) or (combined < min_score):
+            min_score = combined
+            min_idx = i
+
+    return a_crop + b_crop[min_idx + 1 :]
 
 
-def merge_multi_strings(seq_list: list[str], overlap_ratio: float, last_overlap_ratio: float) -> str:
+def merge_multi_strings(
+    seq_list: list[str], overlap_ratio: float, last_overlap_ratio: float
+) -> str:
     """
     Merges consecutive string sequences with overlapping characters.