Skip to content

Commit bd1bee0

Browse files
authored
HBASE-28839: Handle all types of exceptions during retrieval of bucket-cache from persistence. (#6250)
Signed-off-by: Wellington Chevreuil <[email protected]>
1 parent 46da224 commit bd1bee0

File tree

3 files changed

+74
-87
lines changed

3 files changed

+74
-87
lines changed

Diff for: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

+36-57
Original file line numberDiff line numberDiff line change
@@ -390,17 +390,18 @@ private void startPersistenceRetriever(int[] bucketSizes, long capacity) {
390390
try {
391391
retrieveFromFile(bucketSizes);
392392
LOG.info("Persistent bucket cache recovery from {} is complete.", persistencePath);
393-
} catch (IOException ioex) {
394-
LOG.error("Can't restore from file[{}] because of ", persistencePath, ioex);
393+
} catch (Throwable ex) {
394+
LOG.warn("Can't restore from file[{}]. The bucket cache will be reset and rebuilt."
395+
+ " Exception seen: ", persistencePath, ex);
395396
backingMap.clear();
396397
fullyCachedFiles.clear();
397398
backingMapValidated.set(true);
399+
regionCachedSize.clear();
398400
try {
399401
bucketAllocator = new BucketAllocator(capacity, bucketSizes);
400-
} catch (BucketAllocatorException ex) {
401-
LOG.error("Exception during Bucket Allocation", ex);
402+
} catch (BucketAllocatorException allocatorException) {
403+
LOG.error("Exception during Bucket Allocation", allocatorException);
402404
}
403-
regionCachedSize.clear();
404405
} finally {
405406
this.cacheState = CacheState.ENABLED;
406407
startWriterThreads();
@@ -951,7 +952,8 @@ public void logStats() {
951952
: (StringUtils.formatPercent(cacheStats.getHitCachingRatio(), 2) + ", "))
952953
+ "evictions=" + cacheStats.getEvictionCount() + ", " + "evicted="
953954
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction()
954-
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount());
955+
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + ", blocksCount="
956+
+ backingMap.size());
955957
cacheStats.reset();
956958

957959
bucketAllocator.logDebugStatistics();
@@ -1496,7 +1498,7 @@ private void retrieveFromFile(int[] bucketSizes) throws IOException {
14961498
} else if (Arrays.equals(pbuf, BucketProtoUtils.PB_MAGIC_V2)) {
14971499
// The new persistence format of chunked persistence.
14981500
LOG.info("Reading new chunked format of persistence.");
1499-
retrieveChunkedBackingMap(in, bucketSizes);
1501+
retrieveChunkedBackingMap(in);
15001502
} else {
15011503
// In 3.0 we have enough flexibility to dump the old cache data.
15021504
// TODO: In 2.x line, this might need to be filled in to support reading the old format
@@ -1590,17 +1592,7 @@ private void verifyFileIntegrity(BucketCacheProtos.BucketCacheEntry proto) {
15901592
}
15911593
}
15921594

1593-
private void parseFirstChunk(BucketCacheProtos.BucketCacheEntry firstChunk) throws IOException {
1594-
fullyCachedFiles.clear();
1595-
Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCacheKey>> pair =
1596-
BucketProtoUtils.fromPB(firstChunk.getDeserializersMap(), firstChunk.getBackingMap(),
1597-
this::createRecycler);
1598-
backingMap.putAll(pair.getFirst());
1599-
blocksByHFile.addAll(pair.getSecond());
1600-
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(firstChunk.getCachedFilesMap()));
1601-
}
1602-
1603-
private void parseChunkPB(BucketCacheProtos.BackingMap chunk,
1595+
private void updateCacheIndex(BucketCacheProtos.BackingMap chunk,
16041596
java.util.Map<java.lang.Integer, java.lang.String> deserializer) throws IOException {
16051597
Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCacheKey>> pair2 =
16061598
BucketProtoUtils.fromPB(deserializer, chunk, this::createRecycler);
@@ -1626,55 +1618,42 @@ private void parsePB(BucketCacheProtos.BucketCacheEntry proto) throws IOExceptio
16261618
}
16271619

16281620
private void persistChunkedBackingMap(FileOutputStream fos) throws IOException {
1629-
long numChunks = backingMap.size() / persistenceChunkSize;
1630-
if (backingMap.size() % persistenceChunkSize != 0) {
1631-
numChunks += 1;
1632-
}
1633-
16341621
LOG.debug(
16351622
"persistToFile: before persisting backing map size: {}, "
1636-
+ "fullycachedFiles size: {}, chunkSize: {}, numberofChunks: {}",
1637-
backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize, numChunks);
1623+
+ "fullycachedFiles size: {}, chunkSize: {}",
1624+
backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize);
16381625

1639-
BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize, numChunks);
1626+
BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize);
16401627

16411628
LOG.debug(
1642-
"persistToFile: after persisting backing map size: {}, "
1643-
+ "fullycachedFiles size: {}, numChunksPersisteed: {}",
1644-
backingMap.size(), fullyCachedFiles.size(), numChunks);
1629+
"persistToFile: after persisting backing map size: {}, " + "fullycachedFiles size: {}",
1630+
backingMap.size(), fullyCachedFiles.size());
16451631
}
16461632

1647-
private void retrieveChunkedBackingMap(FileInputStream in, int[] bucketSizes) throws IOException {
1648-
byte[] bytes = new byte[Long.BYTES];
1649-
int readSize = in.read(bytes);
1650-
if (readSize != Long.BYTES) {
1651-
throw new IOException("Invalid size of chunk-size read from persistence: " + readSize);
1652-
}
1653-
long batchSize = Bytes.toLong(bytes, 0);
1654-
1655-
readSize = in.read(bytes);
1656-
if (readSize != Long.BYTES) {
1657-
throw new IOException("Invalid size for number of chunks read from persistence: " + readSize);
1658-
}
1659-
long numChunks = Bytes.toLong(bytes, 0);
1660-
1661-
LOG.info("Number of chunks: {}, chunk size: {}", numChunks, batchSize);
1633+
private void retrieveChunkedBackingMap(FileInputStream in) throws IOException {
16621634

16631635
// Read the first chunk that has all the details.
1664-
BucketCacheProtos.BucketCacheEntry firstChunk =
1636+
BucketCacheProtos.BucketCacheEntry cacheEntry =
16651637
BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
1666-
parseFirstChunk(firstChunk);
1667-
1668-
// Subsequent chunks have the backingMap entries.
1669-
for (int i = 1; i < numChunks; i++) {
1670-
LOG.info("Reading chunk no: {}", i + 1);
1671-
parseChunkPB(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
1672-
firstChunk.getDeserializersMap());
1673-
LOG.info("Retrieved chunk: {}", i + 1);
1674-
}
1675-
verifyFileIntegrity(firstChunk);
1676-
verifyCapacityAndClasses(firstChunk.getCacheCapacity(), firstChunk.getIoClass(),
1677-
firstChunk.getMapClass());
1638+
1639+
fullyCachedFiles.clear();
1640+
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(cacheEntry.getCachedFilesMap()));
1641+
1642+
backingMap.clear();
1643+
blocksByHFile.clear();
1644+
1645+
// Read the backing map entries in batches.
1646+
int numChunks = 0;
1647+
while (in.available() > 0) {
1648+
updateCacheIndex(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
1649+
cacheEntry.getDeserializersMap());
1650+
numChunks++;
1651+
}
1652+
1653+
LOG.info("Retrieved {} of chunks with blockCount = {}.", numChunks, backingMap.size());
1654+
verifyFileIntegrity(cacheEntry);
1655+
verifyCapacityAndClasses(cacheEntry.getCacheCapacity(), cacheEntry.getIoClass(),
1656+
cacheEntry.getMapClass());
16781657
updateRegionSizeMapWhileRetrievingFromFile();
16791658
}
16801659

Diff for: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java

+30-29
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import org.apache.hadoop.hbase.io.hfile.BlockType;
3434
import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
3535
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
36-
import org.apache.hadoop.hbase.util.Bytes;
3736
import org.apache.hadoop.hbase.util.Pair;
3837
import org.apache.yetus.audience.InterfaceAudience;
3938

@@ -51,51 +50,53 @@ private BucketProtoUtils() {
5150
}
5251

5352
static BucketCacheProtos.BucketCacheEntry toPB(BucketCache cache,
54-
BucketCacheProtos.BackingMap backingMap) {
53+
BucketCacheProtos.BackingMap.Builder backingMapBuilder) {
5554
return BucketCacheProtos.BucketCacheEntry.newBuilder().setCacheCapacity(cache.getMaxSize())
5655
.setIoClass(cache.ioEngine.getClass().getName())
5756
.setMapClass(cache.backingMap.getClass().getName())
5857
.putAllDeserializers(CacheableDeserializerIdManager.save())
59-
.putAllCachedFiles(toCachedPB(cache.fullyCachedFiles)).setBackingMap(backingMap)
58+
.putAllCachedFiles(toCachedPB(cache.fullyCachedFiles))
59+
.setBackingMap(backingMapBuilder.build())
6060
.setChecksum(ByteString
6161
.copyFrom(((PersistentIOEngine) cache.ioEngine).calculateChecksum(cache.getAlgorithm())))
6262
.build();
6363
}
6464

65-
public static void serializeAsPB(BucketCache cache, FileOutputStream fos, long chunkSize,
66-
long numChunks) throws IOException {
67-
int blockCount = 0;
68-
int chunkCount = 0;
69-
int backingMapSize = cache.backingMap.size();
70-
BucketCacheProtos.BackingMap.Builder builder = BucketCacheProtos.BackingMap.newBuilder();
71-
65+
public static void serializeAsPB(BucketCache cache, FileOutputStream fos, long chunkSize)
66+
throws IOException {
67+
// Write the new version of magic number.
7268
fos.write(PB_MAGIC_V2);
73-
fos.write(Bytes.toBytes(chunkSize));
74-
fos.write(Bytes.toBytes(numChunks));
7569

70+
BucketCacheProtos.BackingMap.Builder builder = BucketCacheProtos.BackingMap.newBuilder();
7671
BucketCacheProtos.BackingMapEntry.Builder entryBuilder =
7772
BucketCacheProtos.BackingMapEntry.newBuilder();
73+
74+
// Persist the metadata first.
75+
toPB(cache, builder).writeDelimitedTo(fos);
76+
77+
int blockCount = 0;
78+
// Persist backing map entries in chunks of size 'chunkSize'.
7879
for (Map.Entry<BlockCacheKey, BucketEntry> entry : cache.backingMap.entrySet()) {
7980
blockCount++;
80-
entryBuilder.clear();
81-
entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
82-
entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
83-
builder.addEntry(entryBuilder.build());
84-
85-
if (blockCount % chunkSize == 0 || (blockCount == backingMapSize)) {
86-
chunkCount++;
87-
if (chunkCount == 1) {
88-
// Persist all details along with the first chunk into BucketCacheEntry
89-
BucketProtoUtils.toPB(cache, builder.build()).writeDelimitedTo(fos);
90-
} else {
91-
// Directly persist subsequent backing-map chunks.
92-
builder.build().writeDelimitedTo(fos);
93-
}
94-
if (blockCount < backingMapSize) {
95-
builder.clear();
96-
}
81+
addEntryToBuilder(entry, entryBuilder, builder);
82+
if (blockCount % chunkSize == 0) {
83+
builder.build().writeDelimitedTo(fos);
84+
builder.clear();
9785
}
9886
}
87+
// Persist the last chunk.
88+
if (builder.getEntryList().size() > 0) {
89+
builder.build().writeDelimitedTo(fos);
90+
}
91+
}
92+
93+
private static void addEntryToBuilder(Map.Entry<BlockCacheKey, BucketEntry> entry,
94+
BucketCacheProtos.BackingMapEntry.Builder entryBuilder,
95+
BucketCacheProtos.BackingMap.Builder builder) {
96+
entryBuilder.clear();
97+
entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
98+
entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
99+
builder.addEntry(entryBuilder.build());
99100
}
100101

101102
private static BucketCacheProtos.BlockCacheKey toPB(BlockCacheKey key) {

Diff for: hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -366,10 +366,17 @@ public void testSingleChunk() throws Exception {
366366
}
367367

368368
@Test
369-
public void testMultipleChunks() throws Exception {
369+
public void testCompletelyFilledChunks() throws Exception {
370+
// Test where the all the chunks are complete with chunkSize entries
370371
testChunkedBackingMapRecovery(5, 10);
371372
}
372373

374+
@Test
375+
public void testPartiallyFilledChunks() throws Exception {
376+
// Test where the last chunk is not completely filled.
377+
testChunkedBackingMapRecovery(5, 13);
378+
}
379+
373380
private void testChunkedBackingMapRecovery(int chunkSize, int numBlocks) throws Exception {
374381
HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
375382
Path testDir = TEST_UTIL.getDataTestDir();

0 commit comments

Comments
 (0)