Skip to content

Commit a89001d

Browse files
extract, diff, ...: use raise_missing=False
preloading: always use raise_missing=False, because the behaviour is defined at preloading time. fetch_many: use get_many with raise_missing=False. if get_many yields None instead of the expected chunk cdata bytes, on-the-fly create an all-zero replacement chunk of the correct size (if the size is known) and emit an error msg about the missing chunk id / size. note: for borg recreate with re-chunking this is a bit unpretty, because it will transform a missing chunk into a zero bytes range in the target file in the recreated archive. it will emit an error message at recreate time, but afterwards the recreated archive will not "know" about the problem any more and will just have that zero-patched file. so guess borg recreate with re-chunking should better only be used on repos that do not miss chunks.
1 parent 8c4890e commit a89001d

3 files changed

Lines changed: 34 additions & 4 deletions

File tree

src/borg/archive.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,14 @@ def fetch_many(self, chunks, is_preloaded=False, ro_type=None):
325325
sizes = [None] * len(ids)
326326
else:
327327
raise TypeError(f"unsupported or mixed element types: {chunks}")
328-
for id, size, cdata in zip(ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded)):
329-
_, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
328+
for id, size, cdata in zip(
329+
ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded, raise_missing=False)
330+
):
331+
if cdata is None:
332+
logger.error(f"repository object {bin_to_hex(id)} missing, returning {size} zero bytes.")
333+
data = zeros[:size] if size is not None else None
334+
else:
335+
_, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
330336
assert size is None or len(data) == size
331337
yield data
332338

src/borg/remote.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,9 @@ def handle_error(unpacked):
965965
self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args}))
966966
if not self.to_send and self.preload_ids:
967967
chunk_id = self.preload_ids.pop(0)
968-
args = {"id": chunk_id, "raise_missing": True}
968+
# for preloading chunks, the raise_missing behaviour is defined HERE,
969+
# not in the get_many / fetch_many call that later fetches the preloaded chunks.
970+
args = {"id": chunk_id, "raise_missing": False}
969971
self.msgid += 1
970972
self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid)
971973
self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: "get", ARGS: args}))

src/borg/testsuite/archiver/extract_cmd_test.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ... import xattr
1010
from ...chunker import has_seek_hole
1111
from ...constants import * # NOQA
12-
from ...helpers import EXIT_WARNING, BackupPermissionError
12+
from ...helpers import EXIT_WARNING, BackupPermissionError, bin_to_hex
1313
from ...helpers import flags_noatime, flags_normal
1414
from .. import changedir, same_ts_ns
1515
from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
@@ -24,6 +24,9 @@
2424
_extract_hardlinks_setup,
2525
assert_creates_file,
2626
generate_archiver_tests,
27+
create_src_archive,
28+
open_archive,
29+
src_file,
2730
)
2831

2932
pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA
@@ -737,3 +740,22 @@ def test_dry_run_extraction_flags(archivers, request):
737740
print(output)
738741

739742
assert not os.listdir("output"), "Output directory should be empty after dry-run"
743+
744+
745+
def test_extract_file_with_missing_chunk(archivers, request):
746+
archiver = request.getfixturevalue(archivers)
747+
cmd(archiver, "repo-create", RK_ENCRYPTION)
748+
create_src_archive(archiver, "archive")
749+
# Get rid of a chunk
750+
archive, repository = open_archive(archiver.repository_path, "archive")
751+
with repository:
752+
for item in archive.iter_items():
753+
if item.path.endswith(src_file):
754+
chunk = item.chunks[-1]
755+
repository.delete(chunk.id)
756+
break
757+
else:
758+
assert False # missed the file
759+
output = cmd(archiver, "extract", "archive")
760+
# TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file.
761+
assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output

0 commit comments

Comments
 (0)