Skip to content

Commit 3de8215

Browse files
committed
update: Fix versions database issues
Attempt to merge two list-blobs calls during a refactoring completely broke collect_blobs. * Redundant collect_blobs call caused values in vers database to be replaced by a list where all files for a version had the same blob id * Vers database contained filenames instead of paths * Blobs shared between versions had new blob ids for each version
1 parent 265fcdb commit 3de8215

File tree

1 file changed

+12
-9
lines changed

1 file changed

+12
-9
lines changed

elixir/update.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os.path
12
import logging
23
from multiprocessing import cpu_count
34
from multiprocessing.pool import Pool
@@ -116,17 +117,20 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
116117
idx = 0
117118

118119
# Get blob hashes and associated file names (without path)
119-
blobs = scriptLines('list-blobs', '-f', tag)
120+
blobs = scriptLines('list-blobs', '-p', tag)
120121
versionBuf = []
121122
idx_to_hash_and_filename = {}
122123

123124
# Collect new blobs, assign database ids to the blobs
124125
for blob in blobs:
125-
hash, filename = blob.split(b' ',maxsplit=1)
126-
blob_exist = db.blob.exists(hash)
127-
versionBuf.append((idx, filename))
128-
if not blob_exist:
129-
idx_to_hash_and_filename[idx] = (hash, filename.decode())
126+
hash, path = blob.split(b' ',maxsplit=1)
127+
filename = os.path.basename(path.decode())
128+
blob_idx = db.blob.get(hash)
129+
if blob_idx is not None:
130+
versionBuf.append((blob_idx, path))
131+
else:
132+
versionBuf.append((idx, path))
133+
idx_to_hash_and_filename[idx] = (hash, filename)
130134
db.blob.put(hash, idx)
131135
db.hash.put(idx, hash)
132136
db.file.put(idx, filename)
@@ -138,8 +142,8 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
138142
# Add mapping blob id -> path to version database
139143
versionBuf.sort()
140144
obj = PathList()
141-
for idx, path in versionBuf:
142-
obj.append(idx, path)
145+
for i, path in versionBuf:
146+
obj.append(i, path)
143147
db.vers.put(tag, obj, sync=True)
144148

145149
return idx_to_hash_and_filename
@@ -274,7 +278,6 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
274278
chunksize = int(len(idxes) / cpu_count())
275279
chunksize = min(max(1, chunksize), 100)
276280

277-
collect_blobs(db, tag)
278281
logger.info("collecting blobs done")
279282

280283
for result in pool.imap_unordered(get_defs, idxes, chunksize):

0 commit comments

Comments
 (0)