apache · hgromer · May 6, 2025 · ndimiduk · May 20, 2025 · droudnitsky
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java
@@ -219,13 +219,14 @@ public void run() {
         } catch (IOException e) {
           // The service itself failed . It may be an error coming from the communication
           // layer, but, as well, a functional error raised by the server.
-          receiveGlobalFailure(multiAction, server, numAttempt, e, true);
+
+          receiveGlobalFailure(multiAction, server, numAttempt, e);
           return;
         } catch (Throwable t) {
           // This should not happen. Let's log & retry anyway.
           LOG.error("id=" + asyncProcess.id + ", caught throwable. Unexpected."
             + " Retrying. Server=" + server + ", tableName=" + tableName, t);
-          receiveGlobalFailure(multiAction, server, numAttempt, t, true);
+          receiveGlobalFailure(multiAction, server, numAttempt, t);
           return;
         }
         if (res.type() == AbstractResponse.ResponseType.MULTI) {
@@ -570,7 +571,6 @@ private RegionLocations findAllLocationsOrFail(Action action, boolean useCache)
    */
   void sendMultiAction(Map<ServerName, MultiAction> actionsByServer, int numAttempt,
     List<Action> actionsForReplicaThread, boolean reuseThread) {
-    boolean clearServerCache = true;
     // Run the last item on the same thread if we are already on a send thread.
     // We hope most of the time it will be the only item, so we can cut down on threads.
     int actionsRemaining = actionsByServer.size();
@@ -606,15 +606,14 @@ void sendMultiAction(Map<ServerName, MultiAction> actionsByServer, int numAttemp
               LOG.warn("id=" + asyncProcess.id + ", task rejected by pool. Unexpected." + " Server="
                 + server.getServerName(), t);
               // Do not update cache if exception is from failing to submit action to thread pool
-              clearServerCache = false;
             } else {
               // see #HBASE-14359 for more details
               LOG.warn("Caught unexpected exception/error: ", t);
             }
             asyncProcess.decTaskCounters(multiAction.getRegions(), server);
             // We're likely to fail again, but this will increment the attempt counter,
             // so it will finish.
-            receiveGlobalFailure(multiAction, server, numAttempt, t, clearServerCache);
+            receiveGlobalFailure(multiAction, server, numAttempt, t);
           }
         }
       }
@@ -764,13 +763,18 @@ private void failAll(MultiAction actions, ServerName server, int numAttempt,
    * @param t          the throwable (if any) that caused the resubmit
    */
   private void receiveGlobalFailure(MultiAction rsActions, ServerName server, int numAttempt,
-    Throwable t, boolean clearServerCache) {
+    Throwable t) {
     errorsByServer.reportServerError(server);
     Retry canRetry = errorsByServer.canTryMore(numAttempt) ? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
+    boolean clearServerCache = ClientExceptionsUtil.isMetaClearingException(t);
 
     // Do not update cache if exception is from failing to submit action to thread pool
     if (clearServerCache) {
       cleanServerCache(server, t);
+
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Cleared meta cache for server {} due to global failure {}", server, t);
+      }
     }
 
     int failed = 0;
@@ -779,12 +783,8 @@ private void receiveGlobalFailure(MultiAction rsActions, ServerName server, int
     for (Map.Entry<byte[], List<Action>> e : rsActions.actions.entrySet()) {
       byte[] regionName = e.getKey();
       byte[] row = e.getValue().get(0).getAction().getRow();
-      // Do not use the exception for updating cache because it might be coming from
-      // any of the regions in the MultiAction and do not update cache if exception is
-      // from failing to submit action to thread pool
       if (clearServerCache) {
-        updateCachedLocations(server, regionName, row,
-          ClientExceptionsUtil.isMetaClearingException(t) ? null : t);
+        updateCachedLocations(server, regionName, row, t);
       }
       for (Action action : e.getValue()) {
         Retry retry =

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/ClientExceptionsUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/ClientExceptionsUtil.java
@@ -26,6 +26,7 @@
 import java.net.SocketTimeoutException;
 import java.nio.channels.ClosedChannelException;
 import java.util.Set;
+import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.TimeoutException;
 import org.apache.hadoop.hbase.CallDroppedException;
 import org.apache.hadoop.hbase.CallQueueTooBigException;
@@ -56,8 +57,8 @@ public static boolean isMetaClearingException(Throwable cur) {
     if (cur == null) {
       return true;
     }
-    return !isSpecialException(cur) || (cur instanceof RegionMovedException)
-      || cur instanceof NotServingRegionException;
+    return (!isExecutorException(cur) && !isSpecialException(cur))
+      || (cur instanceof RegionMovedException) || cur instanceof NotServingRegionException;
   }
 
   public static boolean isSpecialException(Throwable cur) {
@@ -177,4 +178,8 @@ public static Throwable translatePFFE(Throwable t) throws IOException {
     }
     return t;
   }
+
+  private static boolean isExecutorException(Throwable t) {
+    return RejectedExecutionException.class.isAssignableFrom(t.getClass());
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java
@@ -27,6 +27,7 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
@@ -397,7 +398,7 @@ public static List<Throwable> metaCachePreservingExceptions() {
     return Arrays.asList(new RegionOpeningException(" "),
       new RegionTooBusyException("Some old message"), new RpcThrottlingException(" "),
       new MultiActionResultTooLarge(" "), new RetryImmediatelyException(" "),
-      new CallQueueTooBigException());
+      new CallQueueTooBigException(), new RejectedExecutionException(" "));
   }
 
   public static class RegionServerWithFakeRpcServices extends HRegionServer {