diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e4e0183..21fb2e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.1.1 + +* Add NotImplementedError when trying to single index a TextRegions, reflecting the fact that it won't behave correctly at the moment. + ## 1.1.0 * Enhancement: Add `TextSource` to track where the text of an element came from diff --git a/test_unstructured_inference/test_elements.py b/test_unstructured_inference/test_elements.py index 7a8b937f..e82d5bc8 100644 --- a/test_unstructured_inference/test_elements.py +++ b/test_unstructured_inference/test_elements.py @@ -593,3 +593,31 @@ def test_textregions_from_coords_accepts_source(): assert region.source == Source.YOLOX assert region.is_extracted + + +@pytest.mark.skip(reason="Not implemented") +def test_textregions_allows_for_single_element_access_and_returns_textregion_with_correct_values(): + """Test that TextRegions allows for single element access and returns a TextRegion with the + correct values""" + + regions = [ + TextRegion.from_coords( + 0, 0, 10, 10, text="first", source=Source.YOLOX, is_extracted=IsExtracted.TRUE + ), + TextRegion.from_coords( + 0, + 0, + 20, + 20, + text="second", + source=Source.DETECTRON2_ONNX, + is_extracted=IsExtracted.PARTIAL, + ), + ] + text_regions = TextRegions.from_list(regions) + for i, region in enumerate(regions): + sliced = text_regions[i] + assert isinstance(sliced, TextRegion) + assert sliced.text == region.text + assert sliced.source == region.source + assert sliced.is_extracted is region.is_extracted diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 1bccd442..bedefe47 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.1.0" # pragma: no cover +__version__ = "1.1.1" # pragma: no cover diff --git a/unstructured_inference/inference/elements.py b/unstructured_inference/inference/elements.py index 4e1791b5..b72a9cdc 100644 --- a/unstructured_inference/inference/elements.py +++ b/unstructured_inference/inference/elements.py @@ -253,6 +253,9 @@ def slice(self, indices) -> TextRegions: # NOTE(alan): I would expect if I try to access a single element, it should return a # TextRegion, not a TextRegions. Currently, you get an error when trying to access a single # element. + if self.element_coords[indices].ndim == 1: + # We've indexed a single element. For now this isn't implemented. + raise NotImplementedError("Slicing a single element is not implemented") return TextRegions( element_coords=self.element_coords[indices], texts=self.texts[indices],