Skip to content

Commit b136572

Browse files
committed
update: Fix versions database issues
Attempt to merge two list-blobs calls during a refactoring completely broke collect_blobs. * Redundant collect_blobs call caused values in vers database to be replaced by a list where all files for a version had the same blob id * Vers database contained filenames instead of paths * Blobs shared between versions had new blob ids for each version
1 parent 8665b2f commit b136572

File tree

2 files changed

+651
-10
lines changed

2 files changed

+651
-10
lines changed

elixir/update.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os.path
12
import logging
23
from multiprocessing import cpu_count
34
from multiprocessing.pool import Pool
@@ -108,17 +109,20 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
108109
idx = 0
109110

110111
# Get blob hashes and associated file names (without path)
111-
blobs = scriptLines('list-blobs', '-f', tag)
112+
blobs = scriptLines('list-blobs', '-p', tag)
112113
versionBuf = []
113114
idx_to_hash_and_filename = {}
114115

115116
# Collect new blobs, assign database ids to the blobs
116117
for blob in blobs:
117-
hash, filename = blob.split(b' ',maxsplit=1)
118-
blob_exist = db.blob.exists(hash)
119-
versionBuf.append((idx, filename))
120-
if not blob_exist:
121-
idx_to_hash_and_filename[idx] = (hash, filename.decode())
118+
hash, path = blob.split(b' ',maxsplit=1)
119+
filename = os.path.basename(path.decode())
120+
blob_idx = db.blob.get(hash)
121+
if blob_idx is not None:
122+
versionBuf.append((blob_idx, path))
123+
else:
124+
versionBuf.append((idx, path))
125+
idx_to_hash_and_filename[idx] = (hash, filename)
122126
db.blob.put(hash, idx)
123127
db.hash.put(idx, hash)
124128
db.file.put(idx, filename)
@@ -130,8 +134,8 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
130134
# Add mapping blob id -> path to version database
131135
versionBuf.sort()
132136
obj = PathList()
133-
for idx, path in versionBuf:
134-
obj.append(idx, path)
137+
for i, path in versionBuf:
138+
obj.append(i, path)
135139
db.vers.put(tag, obj, sync=True)
136140

137141
return idx_to_hash_and_filename
@@ -266,7 +270,6 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
266270
chunksize = int(len(idxes) / cpu_count())
267271
chunksize = min(max(1, chunksize), 100)
268272

269-
collect_blobs(db, tag)
270273
logger.info("collecting blobs done")
271274

272275
for result in pool.imap_unordered(get_defs, idxes, chunksize):
@@ -318,5 +321,4 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
318321
update_version(db, tag, pool, dts_comp_support)
319322
db.close()
320323
db = None
321-
break
322324

0 commit comments

Comments
 (0)