Skip to content

Commit

Permalink
Optimize quality computation (#8990)
Browse files Browse the repository at this point in the history
- Added lazy ellipse mask / RLE computation (OpenCV's ellipse() is quite slow with big masks)
- Removed simultaneous mask materialization for all masks on the image in segmentation matching
- Added batch size restriction in bulk saving of quality reports
  • Loading branch information
zhiltsov-max authored Jan 30, 2025
1 parent dcbe07e commit 3b5202e
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Improved performance and memory utilization for quality reports in tasks with ellipses and masks
(<https://github.com/cvat-ai/cvat/pull/8990>)
14 changes: 12 additions & 2 deletions cvat/apps/dataset_manager/formats/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,27 @@ def rle(cls, arr: np.ndarray) -> list[int]:

class EllipsesToMasks:
@staticmethod
def convert_ellipse(ellipse, img_h, img_w):
def _convert(ellipse, img_h, img_w):
cx, cy, rightX, topY = ellipse.points
rx = rightX - cx
ry = cy - topY
center = (round(cx), round(cy))
axis = (round(rx), round(ry))
angle = ellipse.rotation
mat = np.zeros((img_h, img_w), dtype=np.uint8)

# TODO: has bad performance for big masks, try to find a better solution
cv2.ellipse(mat, center, axis, angle, 0, 360, 255, thickness=-1)

rle = mask_utils.encode(np.asfortranarray(mat))
return dm.RleMask(rle=rle, label=ellipse.label, z_order=ellipse.z_order,
return rle

@staticmethod
def convert_ellipse(ellipse, img_h, img_w):
def _lazy_convert():
return EllipsesToMasks._convert(ellipse, img_h, img_w)

return dm.RleMask(rle=_lazy_convert, label=ellipse.label, z_order=ellipse.z_order,
attributes=ellipse.attributes, group=ellipse.group)


Expand Down
20 changes: 14 additions & 6 deletions cvat/apps/dataset_manager/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import re
import tempfile
import zipfile
from collections.abc import Generator, Sequence
from collections.abc import Generator, Iterable, Sequence
from contextlib import contextmanager
from copy import deepcopy
from datetime import timedelta
from enum import Enum
from threading import Lock
from typing import Any
from typing import Any, TypeVar

import attrs
import django_rq
Expand All @@ -38,18 +38,26 @@ def make_zip_archive(src_path, dst_path):
archive.write(path, osp.relpath(path, src_path))


def bulk_create(db_model, objects, flt_param):
_ModelT = TypeVar("_ModelT", bound=models.Model)

def bulk_create(
db_model: type[_ModelT],
objects: Iterable[_ModelT],
*,
flt_param: dict[str, Any] | None = None,
batch_size: int | None = 10000
) -> list[_ModelT]:
if objects:
if flt_param:
if "postgresql" in settings.DATABASES["default"]["ENGINE"]:
return db_model.objects.bulk_create(objects)
return db_model.objects.bulk_create(objects, batch_size=batch_size)
else:
ids = list(db_model.objects.filter(**flt_param).values_list('id', flat=True))
db_model.objects.bulk_create(objects)
db_model.objects.bulk_create(objects, batch_size=batch_size)

return list(db_model.objects.exclude(id__in=ids).filter(**flt_param))
else:
return db_model.objects.bulk_create(objects)
return db_model.objects.bulk_create(objects, batch_size=batch_size)

return []

Expand Down
22 changes: 13 additions & 9 deletions cvat/apps/quality_control/quality_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,14 +1147,22 @@ def _get_compiled_mask(

from pycocotools import mask as mask_utils

# Merge instance groups
object_rle_groups = [_to_rle(ann, img_h=img_h, img_w=img_w) for ann in anns]
object_rles = [mask_utils.merge(g) for g in object_rle_groups]
object_masks = mask_utils.decode(object_rles)

# Mask materialization can consume a lot of memory,
# avoid storing all the masks simultaneously
def _make_lazy_decode(i: int):
def _lazy_decode() -> dm.BinaryMaskImage:
return mask_utils.decode([object_rles[i]])[:, :, 0]

return _lazy_decode

return dm.CompiledMask.from_instance_masks(
# need to increment labels and instance ids by 1 to avoid confusion with background
instance_masks=(
dm.Mask(image=object_masks[:, :, i], z_order=ann.z_order, label=ann.label + 1)
dm.Mask(image=_make_lazy_decode(i), z_order=ann.z_order, label=ann.label + 1)
for i, ann in enumerate(anns)
),
instance_ids=(iid + 1 for iid in instance_ids),
Expand Down Expand Up @@ -2529,9 +2537,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_job_reports.append(db_job_report)

db_job_reports = bulk_create(
db_model=models.QualityReport, objects=db_job_reports, flt_param={}
)
db_job_reports = bulk_create(db_model=models.QualityReport, objects=db_job_reports)

db_conflicts = []
db_report_iter = itertools.chain([db_task_report], db_job_reports)
Expand All @@ -2546,9 +2552,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_conflicts.append(db_conflict)

db_conflicts = bulk_create(
db_model=models.AnnotationConflict, objects=db_conflicts, flt_param={}
)
db_conflicts = bulk_create(db_model=models.AnnotationConflict, objects=db_conflicts)

db_ann_ids = []
db_conflicts_iter = iter(db_conflicts)
Expand All @@ -2564,7 +2568,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_ann_ids.append(db_ann_id)

db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids, flt_param={})
db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids)

return db_task_report

Expand Down

0 comments on commit 3b5202e

Please sign in to comment.