Skip to content

Commit

Permalink
IGNITE-22971 Reduce log pollution by RaftGroupServiceImpl (#4214)
Browse files Browse the repository at this point in the history
  • Loading branch information
sashapolo authored Aug 12, 2024
1 parent 513d54d commit 544786d
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.internal.raft;

/**
* Special type of exception used when a target peer is not present in the physical topology.
*
* <p>The stacktrace is omitted on purpose as to reduce log pollution (this exception is thrown and logged nearly immediately).
*/
public class PeerUnavailableException extends RuntimeException {
public PeerUnavailableException(String consistentId) {
super("Peer " + consistentId + " is unavailable", null, true, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static java.util.concurrent.ThreadLocalRandom.current;
import static java.util.stream.Collectors.toList;
import static org.apache.ignite.internal.tostring.IgniteToStringBuilder.includeSensitive;
import static org.apache.ignite.internal.util.ExceptionUtils.unwrapCause;
import static org.apache.ignite.lang.ErrorGroups.Common.INTERNAL_ERR;
import static org.apache.ignite.raft.jraft.rpc.CliRequests.AddLearnersRequest;
import static org.apache.ignite.raft.jraft.rpc.CliRequests.AddPeerRequest;
Expand All @@ -41,13 +42,11 @@
import static org.apache.ignite.raft.jraft.rpc.CliRequests.TransferLeaderRequest;

import java.io.IOException;
import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -598,6 +597,8 @@ private void handleThrowable(
long stopTime,
CompletableFuture<? extends NetworkMessage> fut
) {
err = unwrapCause(err);

if (recoverable(err)) {
Peer randomPeer = randomNode(peer);

Expand Down Expand Up @@ -707,17 +708,18 @@ private void scheduleRetry(Runnable runnable) {
}

/**
* Checks if an error is recoverable, for example, {@link java.net.ConnectException}.
* Checks if an error is recoverable.
*
* <p>An error is considered recoverable if it's an instance of {@link TimeoutException}, {@link IOException}
* or {@link PeerUnavailableException}.
*
* @param t The throwable.
* @return {@code True} if this is a recoverable exception.
*/
private static boolean recoverable(Throwable t) {
if (t instanceof ExecutionException || t instanceof CompletionException) {
t = t.getCause();
}
t = unwrapCause(t);

return t instanceof TimeoutException || t instanceof IOException;
return t instanceof TimeoutException || t instanceof IOException || t instanceof PeerUnavailableException;
}

private Peer randomNode() {
Expand Down Expand Up @@ -811,7 +813,7 @@ private CompletableFuture<ClusterNode> resolvePeer(Peer peer) {
ClusterNode node = cluster.topologyService().getByConsistentId(peer.consistentId());

if (node == null) {
return CompletableFuture.failedFuture(new ConnectException("Peer " + peer.consistentId() + " is unavailable"));
return CompletableFuture.failedFuture(new PeerUnavailableException(peer.consistentId()));
}

return CompletableFuture.completedFuture(node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.concurrent.ExecutorService;
import org.apache.ignite.internal.logger.IgniteLogger;
import org.apache.ignite.internal.logger.Loggers;
import org.apache.ignite.internal.raft.PeerUnavailableException;
import org.apache.ignite.network.ClusterNode;
import org.apache.ignite.internal.network.TopologyEventHandler;
import org.apache.ignite.raft.jraft.Status;
Expand Down Expand Up @@ -59,7 +60,7 @@ public abstract class AbstractClientService implements ClientService, TopologyEv
/**
* The set of pinged consistent IDs.
*/
protected Set<String> readyConsistentIds = new ConcurrentHashSet<>();
private final Set<String> readyConsistentIds = new ConcurrentHashSet<>();

public RpcClient getRpcClient() {
return this.rpcClient;
Expand Down Expand Up @@ -224,7 +225,7 @@ public void complete(final Object result, final Throwable err) {
}
}
else {
if (ThrowUtil.hasCause(err, null, ConnectException.class))
if (ThrowUtil.hasCause(err, null, PeerUnavailableException.class, ConnectException.class))
readyConsistentIds.remove(peerId.getConsistentId()); // Force logical reconnect.

if (done != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.function.BiPredicate;
import org.apache.ignite.internal.logger.IgniteLogger;
import org.apache.ignite.internal.logger.Loggers;
import org.apache.ignite.internal.raft.PeerUnavailableException;
import org.apache.ignite.internal.tostring.S;
import org.apache.ignite.network.ClusterNode;
import org.apache.ignite.internal.network.ClusterService;
Expand Down Expand Up @@ -135,8 +136,8 @@ public void send(PeerId peerId, Object request, CompletableFuture<Message> fut,
ClusterNode targetNode = service.topologyService().getByConsistentId(peerId.getConsistentId());

if (targetNode == null) {
// ConnectException will force a retry by the enclosing components.
fut.completeExceptionally(new ConnectException());
// PeerUnavailableException will force a retry by the enclosing components.
fut.completeExceptionally(new PeerUnavailableException(peerId.getConsistentId()));

return;
}
Expand Down

0 comments on commit 544786d

Please sign in to comment.