diff --git a/unstructured/staging/base.py b/unstructured/staging/base.py index aab1b1647f..3e42ca8f4f 100644 --- a/unstructured/staging/base.py +++ b/unstructured/staging/base.py @@ -133,25 +133,28 @@ def elements_to_dicts(elements: Iterable[Element]) -> list[dict[str, Any]]: def element_to_md(element: Element, exclude_binary_image_data: bool = False) -> str: - match element: - case Title(text=text): - return f"# {text}" - case Table(metadata=metadata, text=text) if metadata.text_as_html is not None: - return metadata.text_as_html - case Image(metadata=metadata, text=text) if ( - metadata.image_base64 is not None - and metadata.image_mime_type is None - and not exclude_binary_image_data - ): - return f"![{text}](data:image/*;base64,{metadata.image_base64})" - case Image(metadata=metadata, text=text) if ( - metadata.image_base64 is not None and not exclude_binary_image_data - ): - return f"![{text}](data:{metadata.image_mime_type};base64,{metadata.image_base64})" - case Image(metadata=metadata, text=text) if metadata.image_url is not None: + if isinstance(element, Title): + return f"# {element.text}" + + if isinstance(element, Table): + if element.metadata.text_as_html is not None: + return element.metadata.text_as_html + return element.text + + if isinstance(element, Image): + metadata = element.metadata + text = element.text + + if metadata.image_base64 is not None: + if metadata.image_mime_type is None and not exclude_binary_image_data: + return f"![{text}](data:image/*;base64,{metadata.image_base64})" + if not exclude_binary_image_data: + return f"![{text}](data:{metadata.image_mime_type};base64,{metadata.image_base64})" + + if metadata.image_url is not None: return f"![{text}]({metadata.image_url})" - case _: - return element.text + + return element.text def elements_to_md(