Skip to content

Commit 45ecba6

Browse files
committed
PaxosCleanupLocalCoordinator wait for transaction timeout before repairing
Patch by Ariel Weisberg; Reviewed by Benedict Elliott Smith for CASSANDRA-20585
1 parent f1457e8 commit 45ecba6

File tree

5 files changed

+63
-4
lines changed

5 files changed

+63
-4
lines changed

src/java/org/apache/cassandra/config/Config.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,4 +1382,6 @@ public enum CQLStartTime
13821382
// 3.x Cassandra Driver has its "read" timeout set to 12 seconds, default matches this.
13831383
public DurationSpec.LongMillisecondsBound native_transport_timeout = new DurationSpec.LongMillisecondsBound("12s");
13841384
public boolean enforce_native_deadline_for_hints = false;
1385+
1386+
public boolean paxos_repair_race_wait = true;
13851387
}

src/java/org/apache/cassandra/config/DatabaseDescriptor.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@
114114
import static org.apache.cassandra.config.CassandraRelevantProperties.ALLOCATE_TOKENS_FOR_KEYSPACE;
115115
import static org.apache.cassandra.config.CassandraRelevantProperties.ALLOW_UNLIMITED_CONCURRENT_VALIDATIONS;
116116
import static org.apache.cassandra.config.CassandraRelevantProperties.AUTO_BOOTSTRAP;
117-
import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_LOADER;
118117
import static org.apache.cassandra.config.CassandraRelevantProperties.CHRONICLE_ANALYTICS_DISABLE;
118+
import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_LOADER;
119119
import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_STCS_IN_L0;
120120
import static org.apache.cassandra.config.CassandraRelevantProperties.INITIAL_TOKEN;
121121
import static org.apache.cassandra.config.CassandraRelevantProperties.IO_NETTY_TRANSPORT_ESTIMATE_SIZE_ON_SUBMIT;
@@ -5296,4 +5296,15 @@ public static void setRejectOutOfTokenRangeRequests(boolean enabled)
52965296
{
52975297
conf.reject_out_of_token_range_requests = enabled;
52985298
}
5299+
5300+
public static boolean getPaxosRepairRaceWait()
5301+
{
5302+
return conf.paxos_repair_race_wait;
5303+
}
5304+
5305+
@VisibleForTesting
5306+
public static void setPaxosRepairRaceWait(boolean paxosRepairRaceWait)
5307+
{
5308+
conf.paxos_repair_race_wait = paxosRepairRaceWait;
5309+
}
52995310
}

src/java/org/apache/cassandra/service/StorageService.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ private static int getSchemaDelay()
351351
@VisibleForTesting // this is used for dtests only, see CASSANDRA-18152
352352
public volatile boolean skipNotificationListeners = false;
353353

354-
private final java.util.function.Predicate<Keyspace> anyOutOfRangeOpsRecorded =
354+
private final java.util.function.Predicate<Keyspace> anyOutOfRangeOpsRecorded =
355355
keyspace -> keyspace.metric.outOfRangeTokenReads.getCount() > 0
356356
|| keyspace.metric.outOfRangeTokenWrites.getCount() > 0
357357
|| keyspace.metric.outOfRangeTokenPaxosRequests.getCount() > 0;
@@ -7668,4 +7668,16 @@ public void setPrioritizeSAIOverLegacyIndex(boolean value)
76687668
{
76697669
DatabaseDescriptor.setPrioritizeSAIOverLegacyIndex(value);
76707670
}
7671+
7672+
@Override
7673+
public void setPaxosRepairRaceWait(boolean paxosRepairRaceWait)
7674+
{
7675+
DatabaseDescriptor.setPaxosRepairRaceWait(paxosRepairRaceWait);
7676+
}
7677+
7678+
@Override
7679+
public boolean getPaxosRepairRaceWait()
7680+
{
7681+
return DatabaseDescriptor.getPaxosRepairRaceWait();
7682+
}
76717683
}

src/java/org/apache/cassandra/service/StorageServiceMBean.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,4 +1322,8 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e
13221322

13231323
boolean getPrioritizeSAIOverLegacyIndex();
13241324
void setPrioritizeSAIOverLegacyIndex(boolean value);
1325+
1326+
void setPaxosRepairRaceWait(boolean paxosRepairCoordinatorWait);
1327+
1328+
boolean getPaxosRepairRaceWait();
13251329
}

src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.concurrent.ConcurrentHashMap;
2525

2626
import com.google.common.base.Preconditions;
27+
import com.google.common.util.concurrent.Uninterruptibles;
2728
import org.slf4j.Logger;
2829
import org.slf4j.LoggerFactory;
2930

@@ -40,9 +41,15 @@
4041
import org.apache.cassandra.service.paxos.PaxosRepair;
4142
import org.apache.cassandra.service.paxos.PaxosState;
4243
import org.apache.cassandra.service.paxos.uncommitted.UncommittedPaxosKey;
44+
import org.apache.cassandra.utils.Clock;
4345
import org.apache.cassandra.utils.CloseableIterator;
4446
import org.apache.cassandra.utils.concurrent.AsyncFuture;
4547

48+
import static java.util.concurrent.TimeUnit.MICROSECONDS;
49+
import static java.util.concurrent.TimeUnit.MILLISECONDS;
50+
import static java.util.concurrent.TimeUnit.SECONDS;
51+
import static org.apache.cassandra.config.DatabaseDescriptor.getCasContentionTimeout;
52+
import static org.apache.cassandra.config.DatabaseDescriptor.getWriteRpcTimeout;
4653
import static org.apache.cassandra.service.paxos.cleanup.PaxosCleanupSession.TIMEOUT_NANOS;
4754

4855
public class PaxosCleanupLocalCoordinator extends AsyncFuture<PaxosCleanupResponse>
@@ -126,16 +133,18 @@ private void scheduleKeyRepairsOrFinish()
126133
return;
127134
}
128135

136+
long txnTimeoutMicros = Math.max(getCasContentionTimeout(MICROSECONDS), getWriteRpcTimeout(MICROSECONDS));
137+
boolean waitForCoordinator = DatabaseDescriptor.getPaxosRepairRaceWait();
129138
while (inflight.size() < parallelism && uncommittedIter.hasNext())
130-
repairKey(uncommittedIter.next());
139+
repairKey(uncommittedIter.next(), txnTimeoutMicros, waitForCoordinator);
131140

132141
}
133142

134143
if (inflight.isEmpty())
135144
finish();
136145
}
137146

138-
private boolean repairKey(UncommittedPaxosKey uncommitted)
147+
private boolean repairKey(UncommittedPaxosKey uncommitted, long txnTimeoutMicros, boolean waitForCoordinator)
139148
{
140149
logger.trace("repairing {}", uncommitted);
141150
Preconditions.checkState(!inflight.containsKey(uncommitted.getKey()));
@@ -146,6 +155,9 @@ private boolean repairKey(UncommittedPaxosKey uncommitted)
146155
if (consistency == null)
147156
return false;
148157

158+
if (waitForCoordinator)
159+
maybeWaitForOriginalCoordinator(uncommitted, txnTimeoutMicros);
160+
149161
inflight.put(uncommitted.getKey(), tableRepairs.startOrGetOrQueue(uncommitted.getKey(), uncommitted.ballot(), uncommitted.getConsistencyLevel(), table, result -> {
150162
if (result.wasSuccessful())
151163
onKeyFinish(uncommitted.getKey());
@@ -155,6 +167,24 @@ private boolean repairKey(UncommittedPaxosKey uncommitted)
155167
return true;
156168
}
157169

170+
/**
171+
* Wait to repair things that are still potentially executing at the original coordinator to avoid
172+
* causing timeouts. This should only have to happen at most a few times when the repair starts
173+
*/
174+
private static void maybeWaitForOriginalCoordinator(UncommittedPaxosKey uncommitted, long txnTimeoutMicros)
175+
{
176+
long nowMicros = MILLISECONDS.toMicros(Clock.Global.currentTimeMillis());
177+
long ballotElapsedMicros = nowMicros - uncommitted.ballot().unixMicros();
178+
if (ballotElapsedMicros < 0 && Math.abs(ballotElapsedMicros) > SECONDS.toMicros(1))
179+
logger.warn("Encountered ballot that is more than 1 second in the future, is there a clock sync issue? {}", uncommitted.ballot());
180+
if (ballotElapsedMicros < txnTimeoutMicros)
181+
{
182+
long sleepMicros = txnTimeoutMicros - ballotElapsedMicros;
183+
logger.info("Paxos auto repair encountered a potentially in progress ballot, sleeping {}us to allow the in flight operation to finish", sleepMicros);
184+
Uninterruptibles.sleepUninterruptibly(sleepMicros, MICROSECONDS);
185+
}
186+
}
187+
158188
private synchronized void onKeyFinish(DecoratedKey key)
159189
{
160190
if (!inflight.containsKey(key))

0 commit comments

Comments
 (0)