From f8a776000a40e8f26034468ec813fd87963ec1a4 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Tue, 15 Apr 2025 15:23:22 +0200 Subject: [PATCH 1/4] feat: Remove the "background" class from the layout model to work with RT-DETRv2 Signed-off-by: Nikos Livathinos --- .../layoutmodel/layout_predictor.py | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/docling_ibm_models/layoutmodel/layout_predictor.py b/docling_ibm_models/layoutmodel/layout_predictor.py index 60ab1a5..879b45d 100644 --- a/docling_ibm_models/layoutmodel/layout_predictor.py +++ b/docling_ibm_models/layoutmodel/layout_predictor.py @@ -44,24 +44,23 @@ def __init__( """ # Initialize classes map: self._classes_map = { - 0: "background", - 1: "Caption", - 2: "Footnote", - 3: "Formula", - 4: "List-item", - 5: "Page-footer", - 6: "Page-header", - 7: "Picture", - 8: "Section-header", - 9: "Table", - 10: "Text", - 11: "Title", - 12: "Document Index", - 13: "Code", - 14: "Checkbox-Selected", - 15: "Checkbox-Unselected", - 16: "Form", - 17: "Key-Value Region", + 0: "Caption", + 1: "Footnote", + 2: "Formula", + 3: "List-item", + 4: "Page-footer", + 5: "Page-header", + 6: "Picture", + 7: "Section-header", + 8: "Table", + 9: "Text", + 10: "Title", + 11: "Document Index", + 12: "Code", + 13: "Checkbox-Selected", + 14: "Checkbox-Unselected", + 15: "Form", + 16: "Key-Value Region", } # Blacklisted classes From 7ec0181b9b2a7a87960349030e437915f6870a2c Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Tue, 15 Apr 2025 23:26:45 +0200 Subject: [PATCH 2/4] fix: Fix the LayoutPredictor to use the RT-DETR v2 model Signed-off-by: Nikos Livathinos --- docling_ibm_models/layoutmodel/layout_predictor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docling_ibm_models/layoutmodel/layout_predictor.py b/docling_ibm_models/layoutmodel/layout_predictor.py index 879b45d..02ff815 100644 --- a/docling_ibm_models/layoutmodel/layout_predictor.py +++ b/docling_ibm_models/layoutmodel/layout_predictor.py @@ -11,7 +11,7 @@ import torch import torchvision.transforms as T from PIL import Image -from transformers import RTDetrForObjectDetection, RTDetrImageProcessor +from transformers import RTDetrImageProcessor, RTDetrV2ForObjectDetection _log = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def __init__( processor_config = os.path.join(artifact_path, "preprocessor_config.json") model_config = os.path.join(artifact_path, "config.json") self._image_processor = RTDetrImageProcessor.from_json_file(processor_config) - self._model = RTDetrForObjectDetection.from_pretrained( + self._model = RTDetrV2ForObjectDetection.from_pretrained( artifact_path, config=model_config ).to(self._device) self._model.eval() From 7c0c6e080f629dcb7c849912985b70fb4b97a9a2 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Tue, 15 Apr 2025 23:27:50 +0200 Subject: [PATCH 3/4] chore: Remove poetry v1 command from the devops --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ce2add..69ca948 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,11 +14,11 @@ repos: pass_filenames: false language: system files: '\.py$' - - id: poetry - name: Poetry check - entry: poetry lock --check - pass_filenames: false - language: system + # - id: poetry + # name: Poetry check + # entry: poetry lock --check + # pass_filenames: false + # language: system - id: system name: MyPy entry: poetry run mypy docling_ibm_models From eef303ea0d2e6e48fb471aa9a5bd269b2d1f0f64 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Wed, 16 Apr 2025 00:36:51 +0200 Subject: [PATCH 4/4] fix: Fix bug in labels of LayoutPredictor. Fix layout test and demo to use new weights Signed-off-by: Nikos Livathinos --- demo/demo_layout_predictor.py | 5 +---- docling_ibm_models/layoutmodel/layout_predictor.py | 3 +-- tests/test_layout_predictor.py | 6 +++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/demo/demo_layout_predictor.py b/demo/demo_layout_predictor.py index 56c030f..75bac2a 100644 --- a/demo/demo_layout_predictor.py +++ b/demo/demo_layout_predictor.py @@ -118,10 +118,7 @@ def main(args): Path(viz_dir).mkdir(parents=True, exist_ok=True) # Download models from HF - download_path = snapshot_download( - repo_id="ds4sd/docling-models", revision="v2.1.0" - ) - artifact_path = os.path.join(download_path, "model_artifacts/layout") + artifact_path = snapshot_download(repo_id="ds4sd/docling-layout-heron", revision="main") # Test the LayoutPredictor demo(logger, artifact_path, device, num_threads, img_dir, viz_dir) diff --git a/docling_ibm_models/layoutmodel/layout_predictor.py b/docling_ibm_models/layoutmodel/layout_predictor.py index 02ff815..4ae8648 100644 --- a/docling_ibm_models/layoutmodel/layout_predictor.py +++ b/docling_ibm_models/layoutmodel/layout_predictor.py @@ -153,8 +153,7 @@ def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]: result["scores"], result["labels"], result["boxes"] ): score = float(score.item()) - - label_id = int(label_id.item()) + 1 # Advance the label_id + label_id = int(label_id.item()) label_str = self._classes_map[label_id] # Filter out blacklisted classes diff --git a/tests/test_layout_predictor.py b/tests/test_layout_predictor.py index 109ba42..9501826 100644 --- a/tests/test_layout_predictor.py +++ b/tests/test_layout_predictor.py @@ -31,12 +31,12 @@ def init() -> dict: "image_size": 640, "threshold": 0.6, }, - "pred_bboxes": 9, + # "pred_bboxes": 9, + "pred_bboxes": 12, } # Download models from HF - download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.1.0") - artifact_path = os.path.join(download_path, "model_artifacts/layout") + artifact_path = snapshot_download(repo_id="ds4sd/docling-layout-heron", revision="main") # Add the missing config keys init["artifact_path"] = artifact_path