From 938fe34b5dfb0e9146afd677262784b918fa9e29 Mon Sep 17 00:00:00 2001 From: Michael Bryant Date: Fri, 28 Jun 2019 20:07:23 -0700 Subject: [PATCH 1/4] Use latest version of objects from object streams (#1) * Load object streams starting from latest, and don't clobber later versions of objects from object streams * Ignore pyenv's local file --- .gitignore | 6 ++++++ pdfrw/pdfreader.py | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index b4c3391..6f64af3 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,9 @@ coverage.xml # Sphinx documentation docs/_build/ + +# Vim +*.sw[op] + +# pyenv +.python-version diff --git a/pdfrw/pdfreader.py b/pdfrw/pdfreader.py index c2ae030..01a4395 100644 --- a/pdfrw/pdfreader.py +++ b/pdfrw/pdfreader.py @@ -182,6 +182,12 @@ def loadindirect(self, key, PdfDict=PdfDict, result = self.indirect_objects.get(key) if not isinstance(result, PdfIndirect): return result + + # If the object was loaded from an object stream, return it + result = self.loaded_object_stream_objs.get(key) + if result is not None: + return result + source = self.source offset = int(self.source.obj_offsets.get(key, '0')) if not offset: @@ -314,14 +320,25 @@ def load_stream_objects(self, object_streams): sobj = func(objsource) key = (num, 0) - self.indirect_objects[key] = sobj - if key in self.deferred_objects: - self.deferred_objects.remove(key) # Mark the object as indirect, and # add it to the list of streams if it starts a stream sobj.indirect = key + # We call load_stream_objects on the most recent stream objects + # in the file first, so we don't want to clobber already-stored + # objects. + if key not in self.loaded_object_stream_objs: + self.loaded_object_stream_objs[key] = sobj + + if key in self.indirect_objects: + continue + + self.indirect_objects[key] = sobj + + if key in self.deferred_objects: + self.deferred_objects.remove(key) + def findxref(self, fdata): ''' Find the cross reference section at the end of a file ''' @@ -601,6 +618,7 @@ def __init__(self, fname=None, fdata=None, decompress=False, private = self.private private.indirect_objects = {} private.deferred_objects = set() + private.loaded_object_stream_objs = {} private.special = {'<<': self.readdict, '[': self.readarray, 'endobj': self.empty_obj, @@ -617,6 +635,7 @@ def __init__(self, fname=None, fdata=None, decompress=False, while 1: source.obj_offsets = {} trailer, is_stream = self.parsexref(source) + xref_list.append((source.obj_offsets, trailer, is_stream)) prev = trailer.Prev if prev is None: token = source.next() @@ -624,7 +643,6 @@ def __init__(self, fname=None, fdata=None, decompress=False, source.warning('Expected "startxref" ' 'at end of xref table') break - xref_list.append((source.obj_offsets, trailer, is_stream)) source.floc = int(prev) # Handle document encryption @@ -644,18 +662,22 @@ def __init__(self, fname=None, fdata=None, decompress=False, self._parse_encrypt_info(source, password, trailer) - if is_stream: - self.load_stream_objects(trailer.object_streams) - - while xref_list: - later_offsets, later_trailer, is_stream = xref_list.pop() + # Go through all trailers from earliest to latest and make sure the + # trailer object contains the latest information. + for later_offsets, later_trailer, is_stream in reversed(xref_list): source.obj_offsets.update(later_offsets) if is_stream: trailer.update(later_trailer) - self.load_stream_objects(later_trailer.object_streams) else: trailer = later_trailer + # Go through all trailers from latest to earliest and load their + # object streams. + while xref_list: + _, later_trailer, is_stream = xref_list.pop(0) + if is_stream: + self.load_stream_objects(later_trailer.object_streams) + trailer.Prev = None if (trailer.Version and From b1f336c3388502151afdf56d074723caa9e8c47c Mon Sep 17 00:00:00 2001 From: Michael Bryant Date: Tue, 2 Jul 2019 08:24:15 -0700 Subject: [PATCH 2/4] Fix trailer update in alternate object resolution order (#2) --- pdfrw/objects/pdfdict.py | 5 +++++ pdfrw/pdfreader.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pdfrw/objects/pdfdict.py b/pdfrw/objects/pdfdict.py index 888fc83..e87c458 100644 --- a/pdfrw/objects/pdfdict.py +++ b/pdfrw/objects/pdfdict.py @@ -186,6 +186,11 @@ def itervalues(self): for key, value in self.iteritems(): yield value + def update_indirect(self, other): + ''' Update a PdfDict without calling real_value() on the items. ''' + for key, value in iteritems(other): + self[key] = value + def values(self): return list((value for key, value in self.iteritems())) diff --git a/pdfrw/pdfreader.py b/pdfrw/pdfreader.py index 01a4395..a1254ce 100644 --- a/pdfrw/pdfreader.py +++ b/pdfrw/pdfreader.py @@ -667,7 +667,7 @@ def __init__(self, fname=None, fdata=None, decompress=False, for later_offsets, later_trailer, is_stream in reversed(xref_list): source.obj_offsets.update(later_offsets) if is_stream: - trailer.update(later_trailer) + trailer.update_indirect(later_trailer) else: trailer = later_trailer From e0b32cffd7e5c13e97fde988333b0b4f5cfcc08b Mon Sep 17 00:00:00 2001 From: Michael Bryant Date: Sun, 15 Sep 2019 13:26:56 -0600 Subject: [PATCH 3/4] SUB-1024 - Remove token cache (#3) --- pdfrw/tokens.py | 6 +--- tests/expected.txt | 70 +++++++++++++++++++++++----------------------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/pdfrw/tokens.py b/pdfrw/tokens.py index 2b69e02..2e25254 100644 --- a/pdfrw/tokens.py +++ b/pdfrw/tokens.py @@ -82,8 +82,6 @@ def _gettoks(self, startloc, intern=intern, ''' fdata = self.fdata current = self.current = [(startloc, startloc)] - cache = {} - get_cache = cache.get while 1: for match in findtok(fdata, current[0][1]): current[0] = tokspan = match.span() @@ -141,9 +139,7 @@ def _gettoks(self, startloc, intern=intern, self.exception(('Tokenizer logic incorrect -- ' 'should never get here')) - newtok = get_cache(token) - if newtok is None: - newtok = cache[token] = toktype(token) + newtok = toktype(token) yield newtok if current[0] is not tokspan: break diff --git a/tests/expected.txt b/tests/expected.txt index 1989623..de2bbde 100644 --- a/tests/expected.txt +++ b/tests/expected.txt @@ -23,7 +23,7 @@ examples/poster_881f4dc8dcf069e707bf61af95492d86 a34be06d22105b6c02394a9f examples/rl1/4up_b1c400de699af29ea3f1983bb26870ab e21dfdd9ae56ddb261dc3d02bf6da198 examples/rl1/booklet_b1c400de699af29ea3f1983bb26870ab 410063b7fbae1c6d5af33758e2b43450 examples/rl1/subset_b1c400de699af29ea3f1983bb26870ab_3_5 745f1ac31a18d86afb294a449b72cb98 -examples/rl1/platypus_pdf_template_b1c400de699af29ea3f1983bb26870ab 88bd087c4dc039ced05faea3920cbec5 +examples/rl1/platypus_pdf_template_b1c400de699af29ea3f1983bb26870ab bb2449c75d96ff7913d59af89f0fd8b7 # List things that need work here (typically cause exceptions) @@ -82,36 +82,36 @@ repaginate/06c86654f9a77e82f9adaa0086fc391c.pdf 848966fe40a1e3de842f82700dc6d67b repaginate/08f69084d72dabc5dfdcf5c1ff2a719f.pdf b8c60878b0e0ce81cb6e8777038166b1 repaginate/09715ec1a7b0f3a7ae02b3046f627b9f.pdf daf7cff9c0a15bbb347489f9fbda25f8 repaginate/0a61de50b5ee0ea4d5d69c95dab817a3.pdf c6cd38b1131c4b856f60ebfcf51da6f5 -repaginate/1975ef8db7355b1d691bc79d0749574b.pdf 43433398ccb1edaaee734f4949a5cc3c +repaginate/1975ef8db7355b1d691bc79d0749574b.pdf 7fb96ee722ce391eaed9520f94fa0e00 repaginate/1c2af1d2b0db6cac3c8e558a26efd38b.pdf 20dc3be2affe9082564c01b1146d7598 repaginate/1f5dd128c3757420a881a155f2f8ace3.pdf 7130f1568526247895856806b3879db4 repaginate/22628a7ed578b622520325673ab2a4f2.pdf e312c9c588a5ccdb1a11ac37149b178b repaginate/2ac7c68e26a8ef797aead15e4875cc6d.pdf e7344551183415d6257e2cab2aef4a61 repaginate/295d26e61a85635433f8e4b768953f60.pdf a89a9fa39812ecd9fa5d6b9e785f389d repaginate/2d31f356c37dadd04b83ecc4e9a739a0.pdf bc04b61b41cb51f6a1c1da79fb387795 -repaginate/2fac0d9a189ca5fcef8626153d050be8.pdf 95fe3d9258ace5bdccb95a55c2c8cb22 +repaginate/2fac0d9a189ca5fcef8626153d050be8.pdf c770e9e7574f5e7bb5fe8e2ac69cc4b4 repaginate/319c998910453bc44d40c7748cd2cb79.pdf c0da6bf6db273bdb1385f408dcf063d0 repaginate/35df0b8cff4afec0c08f08c6a5bc9857.pdf 3568e1c885a461b350c790ec5b729af3 repaginate/365b9c95574ee8944370fe286905d0e8.pdf 84e5fc0d4f30ff8db05780fd244d9cf0 repaginate/4805fdcd7e142e8df3c04c6ba06025af.pdf 3b5b8254dc99c2f0f62fe2afa42fad4e repaginate/49e31fd074eca6af981d78d42d0078ec.pdf 77fd3fa86c7c0166a373b66cfef357d2 -repaginate/536dfc6fbadd87c03eb59375d091eb53.pdf afc90878b1306483dbde37c3a50b6a45 +repaginate/536dfc6fbadd87c03eb59375d091eb53.pdf 8c3e3db34bef93b1ba18ef2698789b52 repaginate/569f8094597bbe5b58efc3a7c6e14e87.pdf 894bf526c0a73ab70ebfd9bf3d614315 repaginate/5f0cff36d0ad74536a6513a98a755016.pdf 3298a3a13439764102395a34d571ff69 repaginate/5f265db2736850782aeaba2571a3c749.pdf 2e3046813ce6e40a39bd759a3c8a3c8c repaginate/6a42c8c79b807bf164d31071749e07b0.pdf bf00d5e44869ae59eb859860d7d5373f -repaginate/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 612cdd84eeac797a1c42fc91756b6d9e -repaginate/7037a992b80b60f0294016037baa9292.pdf dd41b0104f185206b51e7ffe5b07d261 +repaginate/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 4a724deb0892dcb41ade978a5e6b0f76 +repaginate/7037a992b80b60f0294016037baa9292.pdf 732b38f99fe2cfb86f0192ca3abe8fda repaginate/707e3e2d17cbe9ec2273414b3b63f333.pdf df4d756e2230c333f0c58ad354b5b51c repaginate/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2 repaginate/72eb207b8f882618899aa7a65d3cecda.pdf 0b64f19a8a39fadfa2a3eec3f1a01233 repaginate/97ba0a239cefa0dc727c2f1be050ec6c.pdf a94fe7183ce8979174b2ac16dcd9b1ea repaginate/9d8626d18b1d8807d271e6ffc409446a.pdf cdfcf8add1af9e612ba1a2ee06a6a273 -repaginate/9f98322c243fe67726d56ccfa8e0885b.pdf 69503ac140a1e4f1322f9350646e3dae +repaginate/9f98322c243fe67726d56ccfa8e0885b.pdf 73f1486228c38fbb2b52611fa453e284 repaginate/c55eb9a13859a7fbddd8af9c16eba3a7.pdf 8cddb0f9741f7515107b1bce5dc90c83 -repaginate/c5c895deecf7a7565393587e0d61be2b.pdf 59e350c6f7d7b89fab36a4019bb526fd -repaginate/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 3623b7f200818c63cb6838f9678a4840 -repaginate/d6fd9567078b48c86710e9c49173781f.pdf 874b532f61139261f71afb5987dd2a68 +repaginate/c5c895deecf7a7565393587e0d61be2b.pdf 2dcb361c33a14cfe629a56257a0a10f7 +repaginate/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 83c84c61fe564e9105501806e5b02455 +repaginate/d6fd9567078b48c86710e9c49173781f.pdf a8d733d84a396a26eac11dd5bb762354 repaginate/e9ab02aa769f4c040a6fa52f00d6e3f0.pdf 7d3c3ae13cc7d53e7fa6ef046e15dbaa repaginate/ec00d5825f47b9d0faa953b1709163c3.pdf 8e6a481476c2b3bdd64ce8e36f8fe273 repaginate/ed81787b83cc317c9f049643b853bea3.pdf 4636b68f294302417b81aaaadde1c73d @@ -121,36 +121,36 @@ simple/06c86654f9a77e82f9adaa0086fc391c.pdf 6e2a2e063de895d28dfea9aacb9fe469 simple/08f69084d72dabc5dfdcf5c1ff2a719f.pdf 5a41601f6033356539e623091a3f79ef simple/0a61de50b5ee0ea4d5d69c95dab817a3.pdf 182712dd5be8aebd29decb57cf530334 simple/09715ec1a7b0f3a7ae02b3046f627b9f.pdf c4e4b3b725bd5fc3b008f1ac6251ad1c -simple/1975ef8db7355b1d691bc79d0749574b.pdf 475c28c9588f3a7f6110d30f391758c4 +simple/1975ef8db7355b1d691bc79d0749574b.pdf 579671e38cfef46cb6817e1477762f8e simple/1c2af1d2b0db6cac3c8e558a26efd38b.pdf 3f17f19fd92adf01998bb13a0ee52b92 simple/1f5dd128c3757420a881a155f2f8ace3.pdf b0d01f9d6ac156326aeb14b940aa73e7 simple/22628a7ed578b622520325673ab2a4f2.pdf 1163cec415728899e997a29be465d02d simple/295d26e61a85635433f8e4b768953f60.pdf fe3b8960c7f877db05c7cd12c9c6e097 simple/2ac7c68e26a8ef797aead15e4875cc6d.pdf 2623eae06eada9587574f8ddd7fc80fa simple/2d31f356c37dadd04b83ecc4e9a739a0.pdf 9af4794d366fbd5840836e6612ceedd2 -simple/2fac0d9a189ca5fcef8626153d050be8.pdf 458501ecda909b00262b9654f0b09ebf +simple/2fac0d9a189ca5fcef8626153d050be8.pdf 03ed522f1ad5d634e945912704042cce simple/319c998910453bc44d40c7748cd2cb79.pdf 8c84e36ec1db8c1dbfaa312646e000b4 simple/35df0b8cff4afec0c08f08c6a5bc9857.pdf 0a2926c23ad916c449d5dadcfa9d38ef simple/365b9c95574ee8944370fe286905d0e8.pdf cf3bfac41f410bf5bd657e3f906dfbc6 simple/4805fdcd7e142e8df3c04c6ba06025af.pdf 3b5b8254dc99c2f0f62fe2afa42fad4e simple/49e31fd074eca6af981d78d42d0078ec.pdf 2c316537a5b0917634cbbdc5b91511df -simple/536dfc6fbadd87c03eb59375d091eb53.pdf 319851765c70ba103c4191f7ec2148db +simple/536dfc6fbadd87c03eb59375d091eb53.pdf eae0508f344fbea09436d59c69cc0da2 simple/569f8094597bbe5b58efc3a7c6e14e87.pdf 025f1bf95cc537c36b8c3a044758b86c simple/5f0cff36d0ad74536a6513a98a755016.pdf 8476fd75e75394fcbbe02816d0640e7d simple/5f265db2736850782aeaba2571a3c749.pdf d4d2e93ab22e866c86e32da84421f6f9 simple/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013 -simple/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf fe8dd16dd7fef40338140e0610d0cbbf -simple/7037a992b80b60f0294016037baa9292.pdf 6a2ef24e5f74dd74969ff8cefdfc6a05 +simple/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf f661cabf7c914a5d40c7b8d9dfa18693 +simple/7037a992b80b60f0294016037baa9292.pdf aa19f0294dcf97770e4de013928c61e9 simple/707e3e2d17cbe9ec2273414b3b63f333.pdf fb6a8eb3cdc2fbef125babe8815f3b70 simple/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2 simple/72eb207b8f882618899aa7a65d3cecda.pdf 4ce7ff29531cc417c26389af28dc1c5e simple/97ba0a239cefa0dc727c2f1be050ec6c.pdf c24873bab85b8ecc7c5433d8d802bceb simple/9d8626d18b1d8807d271e6ffc409446a.pdf 2358d654bf20d2b9d179ab009a615c4e -simple/9f98322c243fe67726d56ccfa8e0885b.pdf 9290b4c32f005e1e4c7f431955246c4c +simple/9f98322c243fe67726d56ccfa8e0885b.pdf b9ff8c7ba18650846bdf9ef528c558f0 simple/c55eb9a13859a7fbddd8af9c16eba3a7.pdf 6b406128e0ed1ac23dc5a0ee34d1f717 -simple/c5c895deecf7a7565393587e0d61be2b.pdf 2cc3c75e56d5dd562ca5b1f994bd9d5c -simple/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 2083f0e55cf06d88df02956a21bfef23 -simple/d6fd9567078b48c86710e9c49173781f.pdf 77464ec5cfdacb61a73b506bc4945631 +simple/c5c895deecf7a7565393587e0d61be2b.pdf 959c4a63a7643856fc353cfefaae020e +simple/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 808fc584dd8529cfdf7f5366d8c430e9 +simple/d6fd9567078b48c86710e9c49173781f.pdf 6a2ff228e8475c27a06302b8076fec7b simple/e9ab02aa769f4c040a6fa52f00d6e3f0.pdf 5bc96989bc4f4b6438da953443336124 simple/ec00d5825f47b9d0faa953b1709163c3.pdf 708f66049169c28ac39b0553908dc318 simple/ed81787b83cc317c9f049643b853bea3.pdf c227d627217dc6808c50e80063734d27 @@ -161,26 +161,26 @@ decompress/07b0ba4cff1c6ff73fd468b04b013457.pdf 499b9c1b1e1c76b7c5c0d5e3b62889e3 decompress/08f69084d72dabc5dfdcf5c1ff2a719f.pdf ccadb859eff77d525bf86f6d821ccf1b decompress/09715ec1a7b0f3a7ae02b3046f627b9f.pdf 2b9c8b26a92c7645cfefa1bfa8a8ab36 decompress/0a61de50b5ee0ea4d5d69c95dab817a3.pdf 182712dd5be8aebd29decb57cf530334 -decompress/1975ef8db7355b1d691bc79d0749574b.pdf a7d5eaf0a4259352898047f284e20b90 +decompress/1975ef8db7355b1d691bc79d0749574b.pdf ed67cc38641570140f25aec04e0deef2 decompress/1c2af1d2b0db6cac3c8e558a26efd38b.pdf 40d1cc7e26213510319b519032aff637 decompress/1f5dd128c3757420a881a155f2f8ace3.pdf b0d01f9d6ac156326aeb14b940aa73e7 decompress/22628a7ed578b622520325673ab2a4f2.pdf b68c7bf46ad4b70addc3369ba669dc7b decompress/295d26e61a85635433f8e4b768953f60.pdf 6f2ae8fb0ff853ed63537d8767ce13ad decompress/2ac7c68e26a8ef797aead15e4875cc6d.pdf d8d5589991ce15c834f35b340e7147a9 decompress/2d31f356c37dadd04b83ecc4e9a739a0.pdf 5a6b732690c42f07ae6a41c37cf28ff3 -decompress/2fac0d9a189ca5fcef8626153d050be8.pdf 998366ad30becd31bed711ba78c59a7f +decompress/2fac0d9a189ca5fcef8626153d050be8.pdf 8c5c8ce39199abc508fd1271c9bf86c5 decompress/319c998910453bc44d40c7748cd2cb79.pdf 7933a591caf3d49e45a42733bc48f99e decompress/35df0b8cff4afec0c08f08c6a5bc9857.pdf e339ae7747898d2faba270473171692a decompress/365b9c95574ee8944370fe286905d0e8.pdf 9da0100b5844c86e93093d0fbc78b3f6 decompress/4805fdcd7e142e8df3c04c6ba06025af.pdf 3b5b8254dc99c2f0f62fe2afa42fad4e decompress/49e31fd074eca6af981d78d42d0078ec.pdf 4e9bf31753ff7232de4c612a31bd21fc -decompress/536dfc6fbadd87c03eb59375d091eb53.pdf f755d2ef6052270121168d2341ad04b6 +decompress/536dfc6fbadd87c03eb59375d091eb53.pdf 256b7d2b91f00d66f56bc664d615456b decompress/569f8094597bbe5b58efc3a7c6e14e87.pdf aa782a7d553ec767ab61517996337f58 decompress/5f0cff36d0ad74536a6513a98a755016.pdf 9caae4e3a21eba9e4aa76620e7508d56 decompress/5f265db2736850782aeaba2571a3c749.pdf 836abcf6e6e1d39ad96481eb20e9b149 decompress/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013 -decompress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 226773cac79e1a5fed1379a0501a5df0 -decompress/7037a992b80b60f0294016037baa9292.pdf c9a3602b26d82ae145d9f5822125a158 +decompress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 4cfa3b673c04d8014a210d3481d71a08 +decompress/7037a992b80b60f0294016037baa9292.pdf a31f20a15cce263e4873173aba11bb5a decompress/707e3e2d17cbe9ec2273414b3b63f333.pdf 3250a56e14a9855eccd67bb347808d24 decompress/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2 decompress/72eb207b8f882618899aa7a65d3cecda.pdf a4366874fb6db1d9a0c998361ea32b8d @@ -190,9 +190,9 @@ decompress/9f98322c243fe67726d56ccfa8e0885b.pdf 0fa96e3669d14c64fff159d5aa457014 decompress/b107669d1dd69eabb89765fabb2cb321.pdf 56025c06ab8633575ddc6c6990d2fbf1 decompress/b1c400de699af29ea3f1983bb26870ab.pdf 08a5de62129a96d8d9a8f27052bfb227 decompress/c55eb9a13859a7fbddd8af9c16eba3a7.pdf 8e0eb14c12fc89e7cbb4001861d7198f -decompress/c5c895deecf7a7565393587e0d61be2b.pdf 2cc3c75e56d5dd562ca5b1f994bd9d5c -decompress/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf aaed7215c60dbf19bb4fefe88602196a -decompress/d6fd9567078b48c86710e9c49173781f.pdf 1fd1b4bc184e64ea6260c30261adf9c4 +decompress/c5c895deecf7a7565393587e0d61be2b.pdf 959c4a63a7643856fc353cfefaae020e +decompress/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 8faeec3a8eeda920bd1747663b872a6a +decompress/d6fd9567078b48c86710e9c49173781f.pdf bdba616833cb8b5bb0d31f34d0034b20 decompress/e9ab02aa769f4c040a6fa52f00d6e3f0.pdf 62b87ec47f1b93d75c32d0c78b6c2380 decompress/ec00d5825f47b9d0faa953b1709163c3.pdf 708f66049169c28ac39b0553908dc318 decompress/ed81787b83cc317c9f049643b853bea3.pdf 5c0a3bc5b19d58d48767bff8f31daae0 @@ -202,26 +202,26 @@ compress/07b0ba4cff1c6ff73fd468b04b013457.pdf 499b9c1b1e1c76b7c5c0d5e3b62889e3 compress/08f69084d72dabc5dfdcf5c1ff2a719f.pdf 3e7e53a92f96d52bbffe3ffa03d7b11e compress/09715ec1a7b0f3a7ae02b3046f627b9f.pdf 563ffde527978517393d9166b02c17d3 compress/0a61de50b5ee0ea4d5d69c95dab817a3.pdf 182712dd5be8aebd29decb57cf530334 -compress/1975ef8db7355b1d691bc79d0749574b.pdf d505caa75f8becea1a1c810f4a143976 +compress/1975ef8db7355b1d691bc79d0749574b.pdf 4364a78283fbe1ad17f49e11a06fe3fa compress/1c2af1d2b0db6cac3c8e558a26efd38b.pdf b78f4e45aef4149a068a0225ea1be88c compress/1f5dd128c3757420a881a155f2f8ace3.pdf 22148c2a65129f936b8e8c67397e5bf6 compress/22628a7ed578b622520325673ab2a4f2.pdf 54ec1fa64e64bfd146f13001444346f4 compress/295d26e61a85635433f8e4b768953f60.pdf 2ed8eb04a8c66138883a43917cd9c0c5 compress/2ac7c68e26a8ef797aead15e4875cc6d.pdf efe942d1e5b9f2f139c7e1f2e46ced24 compress/2d31f356c37dadd04b83ecc4e9a739a0.pdf eedc938e6782e1d15755b5c54fffc17c -compress/2fac0d9a189ca5fcef8626153d050be8.pdf 2d1b8e82cdc82c82bec3969acf026d30 +compress/2fac0d9a189ca5fcef8626153d050be8.pdf 7ddde594713b20308b4bc3fdb287c3b4 compress/319c998910453bc44d40c7748cd2cb79.pdf 5b9ca8444a17db8cb6fa427da7a89e44 compress/35df0b8cff4afec0c08f08c6a5bc9857.pdf 07c064df0fc0fd0c80c4a196b4c38403 compress/365b9c95574ee8944370fe286905d0e8.pdf 1b98e92f74c2f5324cce5fc8fbe46c15 compress/4805fdcd7e142e8df3c04c6ba06025af.pdf 4aa2e922739ba865da30a9917ddffe8e compress/49e31fd074eca6af981d78d42d0078ec.pdf 7422b3d205650552ff81bc06c89c13ba -compress/536dfc6fbadd87c03eb59375d091eb53.pdf c18b0f0f8e633fe15b17772c701a76a9 +compress/536dfc6fbadd87c03eb59375d091eb53.pdf f89a68553aba1ae5a0384febbc23cd47 compress/569f8094597bbe5b58efc3a7c6e14e87.pdf 3ee711f7fc678787346dca5d06ee5192 compress/5f0cff36d0ad74536a6513a98a755016.pdf bd2a1edf6299d5dc2e1ad6b5fc8bcc20 compress/5f265db2736850782aeaba2571a3c749.pdf bb4898beac50171de7502f13925af80c compress/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013 -compress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 1c3fbae41e7cad7deca13fab93514bc7 -compress/7037a992b80b60f0294016037baa9292.pdf 9182a9765544e4a91404db65a6f951d7 +compress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 58dc1924ef685c163ebbcc7ea0165b4f +compress/7037a992b80b60f0294016037baa9292.pdf 1ab3b5bc74cf27fb9bbfc6045282fb23 compress/707e3e2d17cbe9ec2273414b3b63f333.pdf 0e75dda73bf18d9968499277ab1a367e compress/71a751ce2d93a6a5d6ff21735b701fb7.pdf faa7eb31789a3789f65de30a4e58e594 compress/72eb207b8f882618899aa7a65d3cecda.pdf 0155549fc04357220cc6be541dda7bc1 @@ -231,9 +231,9 @@ compress/9f98322c243fe67726d56ccfa8e0885b.pdf f9d59774a75bb2dfc08ff7df65aa3048 compress/b107669d1dd69eabb89765fabb2cb321.pdf 56025c06ab8633575ddc6c6990d2fbf1 compress/b1c400de699af29ea3f1983bb26870ab.pdf 6eaeef32b0e28959e7681c8b02d8814f compress/c55eb9a13859a7fbddd8af9c16eba3a7.pdf 6ef82921011eb79a9d860214e213c868 -compress/c5c895deecf7a7565393587e0d61be2b.pdf 30d87ac6aa59d65169c389ee3badbca8 -compress/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf e4c768be930e9980c970d51d5f447e24 -compress/d6fd9567078b48c86710e9c49173781f.pdf cbc8922b8bea08928463b287767ec229 +compress/c5c895deecf7a7565393587e0d61be2b.pdf e4263c00989ea47a6594dbca187e0aca +compress/d2f0b2086160d4f3d325c79a5dc1fb4d.pdf 540149ce02d44b2c0785f9e594dfb880 +compress/d6fd9567078b48c86710e9c49173781f.pdf b88b73e8bb64a6122c9b1f801adea9a8 compress/e9ab02aa769f4c040a6fa52f00d6e3f0.pdf e893e407b3c2366d4ca822ce80b45c2c compress/ec00d5825f47b9d0faa953b1709163c3.pdf 9ba3db0dedec74c3d2a6f033f1b22a81 compress/ed81787b83cc317c9f049643b853bea3.pdf 2ceda401f68a44a3fb1da4e0f9dfc578 From 491a361498da2a949a84c9ae4ba40f6cc7181224 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 26 Sep 2019 14:21:45 -0700 Subject: [PATCH 4/4] SP-3760 Guard against infinite loop page trees --- pdfrw/pdfreader.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pdfrw/pdfreader.py b/pdfrw/pdfreader.py index a1254ce..d82e33a 100644 --- a/pdfrw/pdfreader.py +++ b/pdfrw/pdfreader.py @@ -22,6 +22,8 @@ from . import crypt from .py23_diffs import convert_load, convert_store, iteritems +_PAGE_TREE_MAX_DEPTH = 50000 + class PdfReader(PdfDict): @@ -490,18 +492,26 @@ def readpages(self, node): try: result = [] - stack = [node] + stack = [(node, 0)] append = result.append pop = stack.pop while stack: - node = pop() + node, depth = pop() + + # Guard against infinite loops in the page tree + if depth >= _PAGE_TREE_MAX_DEPTH: + log.error('Page tree exceeded max depth') + return [] + nodetype = node[typename] if nodetype == pagename: append(node) elif nodetype == pagesname: - stack.extend(reversed(node[kidname])) + stack.extend( + (n, depth + 1) for n in reversed(node[kidname]) + ) elif nodetype == catalogname: - stack.append(node[pagesname]) + stack.append((node[pagesname], depth + 1)) else: log.error('Expected /Page or /Pages dictionary, got %s' % repr(node))