diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index a5548bcc61..02910a9041 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -74,7 +74,9 @@ def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]: return list(loc_preds), list(obj_scores) -def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray: +def resolve_enclosing_bbox( + bboxes: list[BoundingBox] | np.ndarray, +) -> BoundingBox | np.ndarray: """Compute enclosing bbox either from: Args: @@ -96,7 +98,9 @@ def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBo return (min(x), min(y)), (max(x), max(y)) -def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray: +def resolve_enclosing_rbbox( + rbboxes: list[np.ndarray], intermed_size: int = 1024 +) -> np.ndarray: """Compute enclosing rotated bbox either from: Args: @@ -130,7 +134,11 @@ def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: """ angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( - [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype + [ + [np.cos(angle_rad), -np.sin(angle_rad)], + [np.sin(angle_rad), np.cos(angle_rad)], + ], + dtype=points.dtype, ) return np.matmul(points, rotation_mat.T) @@ -145,10 +153,12 @@ def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[in Returns: the height and width of the rotated image """ - points: np.ndarray = np.array([ - [img_shape[1] / 2, img_shape[0] / 2], - [-img_shape[1] / 2, img_shape[0] / 2], - ]) + points: np.ndarray = np.array( + [ + [img_shape[1] / 2, img_shape[0] / 2], + [-img_shape[1] / 2, img_shape[0] / 2], + ] + ) rotated_points = rotate_abs_points(points, angle) @@ -176,7 +186,10 @@ def rotate_abs_geoms( """ # Switch to polygons polys = ( - np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1) + np.stack( + [geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], + axis=1, + ) if geoms.ndim == 2 else geoms ) @@ -191,13 +204,19 @@ def rotate_abs_geoms( # Switch back to top-left corner as referential target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape # Clip coords to fit since there is no expansion - rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1]) - rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0]) + rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip( + 0, target_shape[1] + ) + rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip( + 0, target_shape[0] + ) return rotated_polys -def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray: +def remap_boxes( + loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int] +) -> np.ndarray: """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. @@ -213,12 +232,18 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: if len(dest_shape) != 2: raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") if len(orig_shape) != 2: - raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") + raise ValueError( + f"Image_shape length should be 2, was found at: {len(orig_shape)}" + ) orig_height, orig_width = orig_shape dest_height, dest_width = dest_shape mboxes = loc_preds.copy() - mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width - mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height + mboxes[:, :, 0] = ( + (loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2 + ) / dest_width + mboxes[:, :, 1] = ( + (loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2 + ) / dest_height return mboxes @@ -263,19 +288,31 @@ def rotate_boxes( # Compute rotation matrix angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( - [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype + [ + [np.cos(angle_rad), -np.sin(angle_rad)], + [np.sin(angle_rad), np.cos(angle_rad)], + ], + dtype=_boxes.dtype, ) # Rotate absolute points - points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1) + points: np.ndarray = np.stack( + (_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1 + ) image_center = (orig_shape[1] / 2, orig_shape[0] / 2) rotated_points = image_center + np.matmul(points - image_center, rotation_mat) rotated_boxes: np.ndarray = np.stack( - (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1 + ( + rotated_points[:, :, 0] / orig_shape[1], + rotated_points[:, :, 1] / orig_shape[0], + ), + axis=-1, ) # Apply a mask if requested if target_shape is not None: - rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) + rotated_boxes = remap_boxes( + rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape + ) return rotated_boxes @@ -305,7 +342,14 @@ def rotate_image( int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))), ) - exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) + exp_img = np.pad( + image, + ( + (h_pad // 2, h_pad - h_pad // 2), + (w_pad // 2, w_pad - w_pad // 2), + (0, 0), + ), + ) else: exp_img = image @@ -316,15 +360,38 @@ def rotate_image( # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width - if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): - h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) + if (rot_img.shape[0] / rot_img.shape[1]) > ( + image.shape[0] / image.shape[1] + ): + h_pad, w_pad = ( + 0, + int( + rot_img.shape[0] * image.shape[1] / image.shape[0] + - rot_img.shape[1] + ), + ) # Pad height else: - h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 - rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) + h_pad, w_pad = ( + int( + rot_img.shape[1] * image.shape[0] / image.shape[1] + - rot_img.shape[0] + ), + 0, + ) + rot_img = np.pad( + rot_img, + ( + (h_pad // 2, h_pad - h_pad // 2), + (w_pad // 2, w_pad - w_pad // 2), + (0, 0), + ), + ) if preserve_origin_shape: # rescale - rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) + rot_img = cv2.resize( + rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR + ) return rot_img @@ -359,13 +426,17 @@ def estimate_page_angle(polys: np.ndarray) -> float: with np.errstate(divide="raise", invalid="raise"): try: return float( - np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom! + np.median( + np.degrees(np.arctan((yleft - yright) / (xright - xleft))) + ) # Y axis from top to bottom! ) except FloatingPointError: return 0.0 -def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray: +def convert_to_relative_coords( + geoms: np.ndarray, img_shape: tuple[int, int] +) -> np.ndarray: """Convert a geometry to relative coordinates Args: @@ -404,7 +475,9 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]: if boxes.shape[0] == 0: return [] if boxes.shape[1] != 4: - raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)") + raise AssertionError( + "boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)" + ) # Project relative coordinates _boxes = boxes.copy() @@ -420,7 +493,10 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]: def extract_rcrops( - img: np.ndarray, polys: np.ndarray, dtype=np.float32, assume_horizontal: bool = False + img: np.ndarray, + polys: np.ndarray, + dtype=np.float32, + assume_horizontal: bool = False, ) -> list[np.ndarray]: """Created cropped images from list of rotated bounding boxes @@ -436,7 +512,9 @@ def extract_rcrops( if polys.shape[0] == 0: return [] if polys.shape[1:] != (4, 2): - raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)") + raise AssertionError( + "polys are expected to be quadrilateral, of shape (N, 4, 2)" + ) # Project relative coordinates _boxes = polys.copy()