From 53dde72473777d477282f3e3ef017e4802f67e7d Mon Sep 17 00:00:00 2001 From: Nathan <168383951+Nathan-GoSupply@users.noreply.github.com> Date: Wed, 2 Apr 2025 15:46:59 +1100 Subject: [PATCH 1/3] Update pdfminer_utils.py Fix for 'PSSyntaxError' import error. "cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'" Latest pdfminer-six doesn't import PSSyntaxError into `pdfminer.pdfparser` anymore. It must now be directly imported from its source (`pdfminer.psexceptions`) --- unstructured/partition/pdf_image/pdfminer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured/partition/pdf_image/pdfminer_utils.py b/unstructured/partition/pdf_image/pdfminer_utils.py index ad6f981914..3993f41ae0 100644 --- a/unstructured/partition/pdf_image/pdfminer_utils.py +++ b/unstructured/partition/pdf_image/pdfminer_utils.py @@ -6,7 +6,7 @@ from pdfminer.layout import LAParams, LTContainer, LTImage, LTItem, LTTextLine from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager from pdfminer.pdfpage import PDFPage -from pdfminer.psparser import PSSyntaxError +from pdfminer.psexceptions import PSSyntaxError from pydantic import BaseModel from unstructured.logger import logger From 8ba0c0f2a162a9a1f092603441d253fadf629826 Mon Sep 17 00:00:00 2001 From: Nathan <168383951+Nathan-GoSupply@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:37:39 +1000 Subject: [PATCH 2/3] Update CHANGELOG.md pdfminer bug fix --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad3afdfc3e..f0914c1148 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,30 @@ +## 0.17.7 + +### Enhancements + +### Features + +### Fixes +Fix for 'PSSyntaxError' import error: +"cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'" + +Latest pdfminer-six doesn't import PSSyntaxError into pdfminer.pdfparser anymore. It must now be directly imported from its source (pdfminer.psexceptions) + +This change will also work on the older version. In the older version, `pdfminer.pdfparser` imports PSSyntaxError from `pdfminer.psexceptions`. +However they have since removed the PSSyntaxError import from `pdfminer.pdfparser`. + +Therefore, for the new pdfminer version we must change to directly import from `pdfminer.psexceptions`. + +So instead of +`pdfminer_utils.py` -> `pdfminer.pdfparser` ->`pdfminer.psexceptions` + +We can do +`pdfminer_utils.py` -> `pdfminer.psexceptions` + +PSSyntaxError is defined in `pdfminer.psexceptions` in both the old and new versions of pdfminer, so we will still get backward compatibility. + +[Here is the commit](https://github.com/pdfminer/pdfminer.six/commit/b9b75ff85877b7cd373539c79014cbde39508969#diff-a0da8fc41f6e21cd3ab62f914df6ce72d433d3222168af458993d911a5dac37c) for the change on pdfminer. + ## 0.17.6-dev0 ### Enhancements From 2c50988c9acee48b81d361cd02512f22aa2ef645 Mon Sep 17 00:00:00 2001 From: Nathan <168383951+Nathan-GoSupply@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:43:37 +1000 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0914c1148..c1ecc62e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,30 +1,3 @@ -## 0.17.7 - -### Enhancements - -### Features - -### Fixes -Fix for 'PSSyntaxError' import error: -"cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'" - -Latest pdfminer-six doesn't import PSSyntaxError into pdfminer.pdfparser anymore. It must now be directly imported from its source (pdfminer.psexceptions) - -This change will also work on the older version. In the older version, `pdfminer.pdfparser` imports PSSyntaxError from `pdfminer.psexceptions`. -However they have since removed the PSSyntaxError import from `pdfminer.pdfparser`. - -Therefore, for the new pdfminer version we must change to directly import from `pdfminer.psexceptions`. - -So instead of -`pdfminer_utils.py` -> `pdfminer.pdfparser` ->`pdfminer.psexceptions` - -We can do -`pdfminer_utils.py` -> `pdfminer.psexceptions` - -PSSyntaxError is defined in `pdfminer.psexceptions` in both the old and new versions of pdfminer, so we will still get backward compatibility. - -[Here is the commit](https://github.com/pdfminer/pdfminer.six/commit/b9b75ff85877b7cd373539c79014cbde39508969#diff-a0da8fc41f6e21cd3ab62f914df6ce72d433d3222168af458993d911a5dac37c) for the change on pdfminer. - ## 0.17.6-dev0 ### Enhancements @@ -42,6 +15,7 @@ PSSyntaxError is defined in `pdfminer.psexceptions` in both the old and new vers ### Fixes - **Removed out of date ubuntu Dockerfile.** The Dockerfile was out of date and non-functional. +- **Fix for 'PSSyntaxError' import error: "cannot import name 'PSSyntaxError' from 'pdfminer.pdfparser'"** PSSyntaxError needed to be imported from its source 'pdfminer.psexceptions'. ## 0.17.4