Skip to content

Commit 27ecc23

Browse files
authored
HDFS-17332 DFSInputStream: avoid logging stacktrace until when we really need to fail a read request with a MissingBlockException (apache#6446)
Print a warn log message for read retries and only print the full stack trace for a read request failure. Contributed by: Xing Lin
1 parent cc4c4be commit 27ecc23

File tree

4 files changed

+404
-30
lines changed

4 files changed

+404
-30
lines changed

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

+81-27
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ public synchronized int read() throws IOException {
771771
* ChecksumFileSystem
772772
*/
773773
private synchronized int readBuffer(ReaderStrategy reader, int len,
774-
CorruptedBlocks corruptedBlocks)
774+
CorruptedBlocks corruptedBlocks, final Map<InetSocketAddress, List<IOException>> exceptionMap)
775775
throws IOException {
776776
IOException ioe;
777777

@@ -786,6 +786,7 @@ private synchronized int readBuffer(ReaderStrategy reader, int len,
786786
while (true) {
787787
// retry as many times as seekToNewSource allows.
788788
try {
789+
DFSClientFaultInjector.get().fetchFromDatanodeException();
789790
return reader.readFromBlock(blockReader, len);
790791
} catch (ChecksumException ce) {
791792
DFSClient.LOG.warn("Found Checksum error for "
@@ -796,11 +797,18 @@ private synchronized int readBuffer(ReaderStrategy reader, int len,
796797
// we want to remember which block replicas we have tried
797798
corruptedBlocks.addCorruptedBlock(getCurrentBlock(), currentNode);
798799
} catch (IOException e) {
799-
if (!retryCurrentNode) {
800-
DFSClient.LOG.warn("Exception while reading from "
801-
+ getCurrentBlock() + " of " + src + " from "
802-
+ currentNode, e);
800+
String msg = String.format("Failed to read block %s for file %s from datanode %s. "
801+
+ "Exception is %s. Retry with the current or next available datanode.",
802+
getCurrentBlock().getBlockName(), src, currentNode.getXferAddr(), e);
803+
DFSClient.LOG.warn(msg);
804+
805+
// Add the exception to exceptionMap for this datanode.
806+
InetSocketAddress datanode = currentNode.getResolvedAddress();
807+
if (!exceptionMap.containsKey(datanode)) {
808+
exceptionMap.put(datanode, new LinkedList<IOException>());
803809
}
810+
exceptionMap.get(datanode).add(e);
811+
804812
ioe = e;
805813
}
806814
boolean sourceFound;
@@ -822,6 +830,29 @@ private synchronized int readBuffer(ReaderStrategy reader, int len,
822830
}
823831
}
824832

833+
/**
834+
* Send IOExceptions happened at each individual datanode to DFSClient.LOG for a failed read
835+
* request. Used in both readWithStrategy() and pread(), to record the exceptions when a read
836+
* request failed to be served.
837+
* @param position offset in the file where we fail to read
838+
* @param exceptionMap a map which stores the list of IOExceptions for each datanode
839+
*/
840+
private void logDataNodeExceptionsOnReadError(long position, final Map<InetSocketAddress,
841+
List<IOException>> exceptionMap) {
842+
String msg = String.format("Failed to read from all available datanodes for file %s "
843+
+ "at position=%d after retrying.", src, position);
844+
DFSClient.LOG.error(msg);
845+
for (Map.Entry<InetSocketAddress, List<IOException>> dataNodeExceptions :
846+
exceptionMap.entrySet()) {
847+
List<IOException> exceptions = dataNodeExceptions.getValue();
848+
for (IOException ex : exceptions) {
849+
msg = String.format("Exception when fetching file %s at position=%d at datanode %s:", src,
850+
position, dataNodeExceptions.getKey());
851+
DFSClient.LOG.error(msg, ex);
852+
}
853+
}
854+
}
855+
825856
protected synchronized int readWithStrategy(ReaderStrategy strategy)
826857
throws IOException {
827858
dfsClient.checkOpen();
@@ -831,6 +862,9 @@ protected synchronized int readWithStrategy(ReaderStrategy strategy)
831862

832863
int len = strategy.getTargetLength();
833864
CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
865+
// A map to record IOExceptions when fetching from each datanode. Key is the socketAddress of
866+
// a datanode.
867+
Map<InetSocketAddress, List<IOException>> exceptionMap = new HashMap<>();
834868
failures = 0;
835869

836870
maybeRegisterBlockRefresh();
@@ -852,7 +886,7 @@ protected synchronized int readWithStrategy(ReaderStrategy strategy)
852886
}
853887
}
854888
long beginReadMS = Time.monotonicNow();
855-
int result = readBuffer(strategy, realLen, corruptedBlocks);
889+
int result = readBuffer(strategy, realLen, corruptedBlocks, exceptionMap);
856890
long readTimeMS = Time.monotonicNow() - beginReadMS;
857891
if (result >= 0) {
858892
pos += result;
@@ -880,6 +914,8 @@ protected synchronized int readWithStrategy(ReaderStrategy strategy)
880914
dfsClient.addNodeToDeadNodeDetector(this, currentNode);
881915
}
882916
if (--retries == 0) {
917+
// Fail the request and log all exceptions
918+
logDataNodeExceptionsOnReadError(pos, exceptionMap);
883919
throw e;
884920
}
885921
} finally {
@@ -1122,16 +1158,16 @@ private static String getBestNodeDNAddrPairErrorString(
11221158
return errMsgr.toString();
11231159
}
11241160

1125-
protected void fetchBlockByteRange(LocatedBlock block, long start, long end,
1126-
ByteBuffer buf, CorruptedBlocks corruptedBlocks)
1161+
protected void fetchBlockByteRange(LocatedBlock block, long start, long end, ByteBuffer buf,
1162+
CorruptedBlocks corruptedBlocks, final Map<InetSocketAddress, List<IOException>> exceptionMap)
11271163
throws IOException {
11281164
while (true) {
11291165
DNAddrPair addressPair = chooseDataNode(block, null);
11301166
// Latest block, if refreshed internally
11311167
block = addressPair.block;
11321168
try {
11331169
actualGetFromOneDataNode(addressPair, start, end, buf,
1134-
corruptedBlocks);
1170+
corruptedBlocks, exceptionMap);
11351171
return;
11361172
} catch (IOException e) {
11371173
checkInterrupted(e); // check if the read has been interrupted
@@ -1142,15 +1178,15 @@ protected void fetchBlockByteRange(LocatedBlock block, long start, long end,
11421178
}
11431179

11441180
private Callable<ByteBuffer> getFromOneDataNode(final DNAddrPair datanode,
1145-
final LocatedBlock block, final long start, final long end,
1181+
final long start, final long end,
11461182
final ByteBuffer bb,
11471183
final CorruptedBlocks corruptedBlocks,
1148-
final int hedgedReadId) {
1184+
final Map<InetSocketAddress, List<IOException>> exceptionMap) {
11491185
return new Callable<ByteBuffer>() {
11501186
@Override
11511187
public ByteBuffer call() throws Exception {
11521188
DFSClientFaultInjector.get().sleepBeforeHedgedGet();
1153-
actualGetFromOneDataNode(datanode, start, end, bb, corruptedBlocks);
1189+
actualGetFromOneDataNode(datanode, start, end, bb, corruptedBlocks, exceptionMap);
11541190
return bb;
11551191
}
11561192
};
@@ -1167,7 +1203,8 @@ public ByteBuffer call() throws Exception {
11671203
* block replica
11681204
*/
11691205
void actualGetFromOneDataNode(final DNAddrPair datanode, final long startInBlk,
1170-
final long endInBlk, ByteBuffer buf, CorruptedBlocks corruptedBlocks)
1206+
final long endInBlk, ByteBuffer buf, CorruptedBlocks corruptedBlocks,
1207+
final Map<InetSocketAddress, List<IOException>> exceptionMap)
11711208
throws IOException {
11721209
DFSClientFaultInjector.get().startFetchFromDatanode();
11731210
int refetchToken = 1; // only need to get a new access token once
@@ -1236,9 +1273,16 @@ void actualGetFromOneDataNode(final DNAddrPair datanode, final long startInBlk,
12361273
// ignore IOE, since we can retry it later in a loop
12371274
}
12381275
} else {
1239-
String msg = "Failed to connect to " + datanode.addr + " for file "
1240-
+ src + " for block " + block.getBlock() + ":" + e;
1241-
DFSClient.LOG.warn("Connection failure: " + msg, e);
1276+
String msg = String.format("Failed to read block %s for file %s from datanode %s. "
1277+
+ "Exception is %s. Retry with the next available datanode.",
1278+
block.getBlock().getBlockName(), src, datanode.addr, e);
1279+
DFSClient.LOG.warn(msg);
1280+
1281+
// Add the exception to the exceptionMap
1282+
if (!exceptionMap.containsKey(datanode.addr)) {
1283+
exceptionMap.put(datanode.addr, new LinkedList<IOException>());
1284+
}
1285+
exceptionMap.get(datanode.addr).add(e);
12421286
addToLocalDeadNodes(datanode.info);
12431287
dfsClient.addNodeToDeadNodeDetector(this, datanode.info);
12441288
throw new IOException(msg);
@@ -1270,17 +1314,16 @@ protected LocatedBlock refreshLocatedBlock(LocatedBlock block)
12701314
* 'hedged' read if the first read is taking longer than configured amount of
12711315
* time. We then wait on which ever read returns first.
12721316
*/
1273-
private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
1274-
long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks)
1275-
throws IOException {
1317+
private void hedgedFetchBlockByteRange(LocatedBlock block, long start, long end, ByteBuffer buf,
1318+
CorruptedBlocks corruptedBlocks,
1319+
final Map<InetSocketAddress, List<IOException>> exceptionMap) throws IOException {
12761320
final DfsClientConf conf = dfsClient.getConf();
12771321
ArrayList<Future<ByteBuffer>> futures = new ArrayList<>();
12781322
CompletionService<ByteBuffer> hedgedService =
12791323
new ExecutorCompletionService<>(dfsClient.getHedgedReadsThreadPool());
12801324
ArrayList<DatanodeInfo> ignored = new ArrayList<>();
12811325
ByteBuffer bb;
12821326
int len = (int) (end - start + 1);
1283-
int hedgedReadId = 0;
12841327
while (true) {
12851328
// see HDFS-6591, this metric is used to verify/catch unnecessary loops
12861329
hedgedReadOpsLoopNumForTesting++;
@@ -1293,9 +1336,8 @@ private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
12931336
// Latest block, if refreshed internally
12941337
block = chosenNode.block;
12951338
bb = ByteBuffer.allocate(len);
1296-
Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode(
1297-
chosenNode, block, start, end, bb,
1298-
corruptedBlocks, hedgedReadId++);
1339+
Callable<ByteBuffer> getFromDataNodeCallable =
1340+
getFromOneDataNode(chosenNode, start, end, bb, corruptedBlocks, exceptionMap);
12991341
Future<ByteBuffer> firstRequest = hedgedService
13001342
.submit(getFromDataNodeCallable);
13011343
futures.add(firstRequest);
@@ -1335,8 +1377,7 @@ private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
13351377
block = chosenNode.block;
13361378
bb = ByteBuffer.allocate(len);
13371379
Callable<ByteBuffer> getFromDataNodeCallable =
1338-
getFromOneDataNode(chosenNode, block, start, end, bb,
1339-
corruptedBlocks, hedgedReadId++);
1380+
getFromOneDataNode(chosenNode, start, end, bb, corruptedBlocks, exceptionMap);
13401381
Future<ByteBuffer> oneMoreRequest =
13411382
hedgedService.submit(getFromDataNodeCallable);
13421383
futures.add(oneMoreRequest);
@@ -1486,6 +1527,11 @@ private int pread(long position, ByteBuffer buffer)
14861527
List<LocatedBlock> blockRange = getBlockRange(position, realLen);
14871528
int remaining = realLen;
14881529
CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
1530+
// A map to record all IOExceptions happened at each datanode when fetching a block.
1531+
// In HDFS-17332, we worked on populating this map only for DFSInputStream, but not for
1532+
// DFSStripedInputStream. If you need the same function for DFSStripedInputStream, please
1533+
// work on it yourself (fetchBlockByteRange() in DFSStripedInputStream).
1534+
Map<InetSocketAddress, List<IOException>> exceptionMap = new HashMap<>();
14891535
for (LocatedBlock blk : blockRange) {
14901536
long targetStart = position - blk.getStartOffset();
14911537
int bytesToRead = (int) Math.min(remaining,
@@ -1494,11 +1540,17 @@ private int pread(long position, ByteBuffer buffer)
14941540
try {
14951541
if (dfsClient.isHedgedReadsEnabled() && !blk.isStriped()) {
14961542
hedgedFetchBlockByteRange(blk, targetStart,
1497-
targetEnd, buffer, corruptedBlocks);
1543+
targetEnd, buffer, corruptedBlocks, exceptionMap);
14981544
} else {
14991545
fetchBlockByteRange(blk, targetStart, targetEnd,
1500-
buffer, corruptedBlocks);
1546+
buffer, corruptedBlocks, exceptionMap);
15011547
}
1548+
} catch (IOException e) {
1549+
// When we reach here, it means we fail to fetch the current block from all available
1550+
// datanodes. Send IOExceptions in exceptionMap to the log and rethrow the exception to
1551+
// fail this request.
1552+
logDataNodeExceptionsOnReadError(position, exceptionMap);
1553+
throw e;
15021554
} finally {
15031555
// Check and report if any block replicas are corrupted.
15041556
// BlockMissingException may be caught if all block replicas are
@@ -1507,6 +1559,8 @@ private int pread(long position, ByteBuffer buffer)
15071559
false);
15081560
}
15091561

1562+
// Reset exceptionMap before fetching the next block.
1563+
exceptionMap.clear();
15101564
remaining -= bytesToRead;
15111565
position += bytesToRead;
15121566
}

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hdfs;
1919

20+
import java.net.InetSocketAddress;
21+
import java.util.Map;
2022
import org.apache.hadoop.classification.VisibleForTesting;
2123
import org.apache.hadoop.classification.InterfaceAudience;
2224
import org.apache.hadoop.fs.ReadOption;
@@ -479,10 +481,14 @@ private LocatedStripedBlock getBlockGroupAt(long offset) throws IOException {
479481

480482
/**
481483
* Real implementation of pread.
484+
* <p>
485+
* Note: exceptionMap is not populated with ioExceptions as what we added for DFSInputStream. If
486+
* you need this function, please implement it.
482487
*/
483488
@Override
484489
protected void fetchBlockByteRange(LocatedBlock block, long start,
485-
long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks)
490+
long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks,
491+
final Map<InetSocketAddress, List<IOException>> exceptionMap)
486492
throws IOException {
487493
// Refresh the striped block group
488494
LocatedStripedBlock blockGroup = getBlockGroupAt(block.getStartOffset());

0 commit comments

Comments
 (0)