Skip to content

Commit f304264

Browse files
committed
XDS client fallback
1 parent 32f4cf4 commit f304264

17 files changed

+1108
-227
lines changed

xds/src/main/java/io/grpc/xds/client/BootstrapperImpl.java

+13
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
@Internal
4242
public abstract class BootstrapperImpl extends Bootstrapper {
4343

44+
public static final String GRPC_EXPERIMENTAL_XDS_FALLBACK =
45+
"GRPC_EXPERIMENTAL_XDS_FALLBACK";
46+
4447
// Client features.
4548
@VisibleForTesting
4649
public static final String CLIENT_FEATURE_DISABLE_OVERPROVISIONING =
@@ -52,6 +55,9 @@ public abstract class BootstrapperImpl extends Bootstrapper {
5255
private static final String SERVER_FEATURE_IGNORE_RESOURCE_DELETION = "ignore_resource_deletion";
5356
private static final String SERVER_FEATURE_TRUSTED_XDS_SERVER = "trusted_xds_server";
5457

58+
@VisibleForTesting
59+
static boolean enableXdsFallback = GrpcUtil.getFlag(GRPC_EXPERIMENTAL_XDS_FALLBACK, false);
60+
5561
protected final XdsLogger logger;
5662

5763
protected FileReader reader = LocalFileReader.INSTANCE;
@@ -65,6 +71,7 @@ protected BootstrapperImpl() {
6571
protected abstract Object getImplSpecificConfig(Map<String, ?> serverConfig, String serverUri)
6672
throws XdsInitializationException;
6773

74+
6875
/**
6976
* Reads and parses bootstrap config. The config is expected to be in JSON format.
7077
*/
@@ -103,6 +110,9 @@ protected BootstrapInfo.Builder bootstrapBuilder(Map<String, ?> rawData)
103110
throw new XdsInitializationException("Invalid bootstrap: 'xds_servers' does not exist.");
104111
}
105112
List<ServerInfo> servers = parseServerInfos(rawServerConfigs, logger);
113+
if (servers.size() > 1 && !enableXdsFallback) {
114+
servers = ImmutableList.of(servers.get(0));
115+
}
106116
builder.servers(servers);
107117

108118
Node.Builder nodeBuilder = Node.newBuilder();
@@ -209,6 +219,9 @@ protected BootstrapInfo.Builder bootstrapBuilder(Map<String, ?> rawData)
209219
if (rawAuthorityServers == null || rawAuthorityServers.isEmpty()) {
210220
authorityServers = servers;
211221
} else {
222+
if (rawAuthorityServers.size() > 1 && !enableXdsFallback) {
223+
rawAuthorityServers = ImmutableList.of(rawAuthorityServers.get(0));
224+
}
212225
authorityServers = parseServerInfos(rawAuthorityServers, logger);
213226
}
214227
authorityInfoMapBuilder.put(

xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java

+93-27
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,8 @@
3636
import io.grpc.xds.client.Bootstrapper.ServerInfo;
3737
import io.grpc.xds.client.EnvoyProtoData.Node;
3838
import io.grpc.xds.client.XdsClient.ProcessingTracker;
39-
import io.grpc.xds.client.XdsClient.ResourceStore;
4039
import io.grpc.xds.client.XdsClient.XdsResponseHandler;
4140
import io.grpc.xds.client.XdsLogger.XdsLogLevel;
42-
import io.grpc.xds.client.XdsTransportFactory.EventHandler;
4341
import io.grpc.xds.client.XdsTransportFactory.StreamingCall;
4442
import io.grpc.xds.client.XdsTransportFactory.XdsTransport;
4543
import java.util.Collection;
@@ -65,40 +63,41 @@ final class ControlPlaneClient {
6563
private final ServerInfo serverInfo;
6664
private final XdsTransport xdsTransport;
6765
private final XdsResponseHandler xdsResponseHandler;
68-
private final ResourceStore resourceStore;
66+
private final XdsClient.ResourceStore resourceStore;
6967
private final ScheduledExecutorService timeService;
7068
private final BackoffPolicy.Provider backoffPolicyProvider;
7169
private final Stopwatch stopwatch;
7270
private final Node bootstrapNode;
73-
private final XdsClient xdsClient;
7471

7572
// Last successfully applied version_info for each resource type. Starts with empty string.
7673
// A version_info is used to update management server with client's most recent knowledge of
7774
// resources.
7875
private final Map<XdsResourceType<?>, String> versions = new HashMap<>();
7976

8077
private boolean shutdown;
78+
private boolean lastStateWasReady;
79+
private boolean inError;
80+
8181
@Nullable
8282
private AdsStream adsStream;
8383
@Nullable
8484
private BackoffPolicy retryBackoffPolicy;
8585
@Nullable
8686
private ScheduledHandle rpcRetryTimer;
87-
private MessagePrettyPrinter messagePrinter;
87+
private final MessagePrettyPrinter messagePrinter;
8888

8989
/** An entity that manages ADS RPCs over a single channel. */
9090
ControlPlaneClient(
9191
XdsTransport xdsTransport,
9292
ServerInfo serverInfo,
9393
Node bootstrapNode,
9494
XdsResponseHandler xdsResponseHandler,
95-
ResourceStore resourceStore,
95+
XdsClient.ResourceStore resourceStore,
9696
ScheduledExecutorService
9797
timeService,
9898
SynchronizationContext syncContext,
9999
BackoffPolicy.Provider backoffPolicyProvider,
100100
Supplier<Stopwatch> stopwatchSupplier,
101-
XdsClient xdsClient,
102101
MessagePrettyPrinter messagePrinter) {
103102
this.serverInfo = checkNotNull(serverInfo, "serverInfo");
104103
this.xdsTransport = checkNotNull(xdsTransport, "xdsTransport");
@@ -108,7 +107,6 @@ final class ControlPlaneClient {
108107
this.timeService = checkNotNull(timeService, "timeService");
109108
this.syncContext = checkNotNull(syncContext, "syncContext");
110109
this.backoffPolicyProvider = checkNotNull(backoffPolicyProvider, "backoffPolicyProvider");
111-
this.xdsClient = checkNotNull(xdsClient, "xdsClient");
112110
this.messagePrinter = checkNotNull(messagePrinter, "messagePrinter");
113111
stopwatch = checkNotNull(stopwatchSupplier, "stopwatchSupplier").get();
114112
logId = InternalLogId.allocate("xds-client", serverInfo.target());
@@ -138,6 +136,10 @@ public String toString() {
138136
return logId.toString();
139137
}
140138

139+
public ServerInfo getServerInfo() {
140+
return serverInfo;
141+
}
142+
141143
/**
142144
* Updates the resource subscription for the given resource type.
143145
*/
@@ -148,7 +150,15 @@ void adjustResourceSubscription(XdsResourceType<?> resourceType) {
148150
}
149151
if (adsStream == null) {
150152
startRpcStream();
153+
// when the stream becomes ready, it will send the discovery requests
154+
return;
155+
}
156+
157+
// We will do the rest of the method as part of the readyHandler when the stream is ready.
158+
if (!lastStateWasReady) {
159+
return;
151160
}
161+
152162
Collection<String> resources = resourceStore.getSubscribedResources(serverInfo, resourceType);
153163
if (resources == null) {
154164
resources = Collections.emptyList();
@@ -203,25 +213,45 @@ boolean isInBackoff() {
203213

204214
// Must be synchronized.
205215
boolean isReady() {
206-
return adsStream != null && adsStream.call != null && adsStream.call.isReady();
216+
return adsStream != null && adsStream.call != null
217+
&& adsStream.call.isReady() && !adsStream.closed;
218+
}
219+
220+
boolean isResponseReceived() {
221+
return adsStream != null && adsStream.responseReceived;
222+
}
223+
224+
boolean isConnected() {
225+
return lastStateWasReady;
226+
}
227+
228+
boolean isInError() {
229+
return inError;
207230
}
208231

232+
209233
/**
210234
* Starts a timer for each requested resource that hasn't been responded to and
211235
* has been waiting for the channel to get ready.
212236
*/
213237
// Must be synchronized.
214238
void readyHandler() {
215239
if (!isReady()) {
240+
logger.log(XdsLogLevel.DEBUG, "ADS stream ready handler called, but not ready {0}", logId);
216241
return;
217242
}
218243

219-
if (isInBackoff()) {
244+
logger.log(XdsLogLevel.DEBUG, "ADS stream ready {0}", logId);
245+
246+
if (rpcRetryTimer != null) {
220247
rpcRetryTimer.cancel();
221248
rpcRetryTimer = null;
222249
}
223250

224-
xdsClient.startSubscriberTimersIfNeeded(serverInfo);
251+
if (!lastStateWasReady) {
252+
lastStateWasReady = true;
253+
xdsResponseHandler.handleStreamRestarted(serverInfo);
254+
}
225255
}
226256

227257
/**
@@ -232,27 +262,50 @@ void readyHandler() {
232262
private void startRpcStream() {
233263
checkState(adsStream == null, "Previous adsStream has not been cleared yet");
234264
adsStream = new AdsStream();
265+
adsStream.start();
235266
logger.log(XdsLogLevel.INFO, "ADS stream started");
236267
stopwatch.reset().start();
237268
}
238269

270+
void sendDiscoveryRequests() {
271+
if (adsStream == null) {
272+
startRpcStream();
273+
// when the stream becomes ready, it will send the discovery requests
274+
return;
275+
}
276+
277+
if (isConnected()) {
278+
adjustAllResourceSubscriptions();
279+
}
280+
}
281+
282+
void adjustAllResourceSubscriptions() {
283+
if (isInBackoff()) {
284+
return;
285+
}
286+
287+
Set<XdsResourceType<?>> subscribedResourceTypes =
288+
new HashSet<>(resourceStore.getSubscribedResourceTypesWithTypeUrl().values());
289+
290+
for (XdsResourceType<?> type : subscribedResourceTypes) {
291+
adjustResourceSubscription(type);
292+
}
293+
}
294+
239295
@VisibleForTesting
240296
public final class RpcRetryTask implements Runnable {
241297
@Override
242298
public void run() {
243-
if (shutdown) {
299+
logger.log(XdsLogLevel.DEBUG, "Retry timeout. Restart ADS stream {0}", logId);
300+
if (shutdown || isReady()) {
244301
return;
245302
}
246-
startRpcStream();
247-
Set<XdsResourceType<?>> subscribedResourceTypes =
248-
new HashSet<>(resourceStore.getSubscribedResourceTypesWithTypeUrl().values());
249-
for (XdsResourceType<?> type : subscribedResourceTypes) {
250-
Collection<String> resources = resourceStore.getSubscribedResources(serverInfo, type);
251-
if (resources != null) {
252-
adsStream.sendDiscoveryRequest(type, resources);
253-
}
303+
304+
if (adsStream == null) {
305+
startRpcStream();
254306
}
255-
xdsResponseHandler.handleStreamRestarted(serverInfo);
307+
308+
// handling CPC management is triggered in readyHandler
256309
}
257310
}
258311

@@ -262,7 +315,7 @@ XdsResourceType<?> fromTypeUrl(String typeUrl) {
262315
return resourceStore.getSubscribedResourceTypesWithTypeUrl().get(typeUrl);
263316
}
264317

265-
private class AdsStream implements EventHandler<DiscoveryResponse> {
318+
private class AdsStream implements XdsTransportFactory.EventHandler<DiscoveryResponse> {
266319
private boolean responseReceived;
267320
private boolean closed;
268321
// Response nonce for the most recently received discovery responses of each resource type.
@@ -279,6 +332,9 @@ private class AdsStream implements EventHandler<DiscoveryResponse> {
279332
private AdsStream() {
280333
this.call = xdsTransport.createStreamingCall(methodDescriptor.getFullMethodName(),
281334
methodDescriptor.getRequestMarshaller(), methodDescriptor.getResponseMarshaller());
335+
}
336+
337+
void start() {
282338
call.start(this);
283339
}
284340

@@ -363,10 +419,13 @@ public void onStatusReceived(final Status status) {
363419
final void handleRpcResponse(XdsResourceType<?> type, String versionInfo, List<Any> resources,
364420
String nonce) {
365421
checkNotNull(type, "type");
422+
366423
if (closed) {
367424
return;
368425
}
426+
369427
responseReceived = true;
428+
inError = false;
370429
respNonces.put(type, nonce);
371430
ProcessingTracker processingTracker = new ProcessingTracker(
372431
() -> call.startRecvMessage(), syncContext);
@@ -376,6 +435,10 @@ final void handleRpcResponse(XdsResourceType<?> type, String versionInfo, List<A
376435
}
377436

378437
private void handleRpcStreamClosed(Status status) {
438+
if (this == adsStream || adsStream == null) {
439+
lastStateWasReady = false;
440+
}
441+
379442
if (closed) {
380443
return;
381444
}
@@ -385,13 +448,16 @@ private void handleRpcStreamClosed(Status status) {
385448
// has never been initialized.
386449
retryBackoffPolicy = backoffPolicyProvider.get();
387450
}
451+
388452
// FakeClock in tests isn't thread-safe. Schedule the retry timer before notifying callbacks
389453
// to avoid TSAN races, since tests may wait until callbacks are called but then would run
390454
// concurrently with the stopwatch and schedule.
455+
391456
long elapsed = stopwatch.elapsed(TimeUnit.NANOSECONDS);
392457
long delayNanos = Math.max(0, retryBackoffPolicy.nextBackoffNanos() - elapsed);
393-
rpcRetryTimer = syncContext.schedule(
394-
new RpcRetryTask(), delayNanos, TimeUnit.NANOSECONDS, timeService);
458+
459+
rpcRetryTimer =
460+
syncContext.schedule(new RpcRetryTask(), delayNanos, TimeUnit.NANOSECONDS, timeService);
395461

396462
Status newStatus = status;
397463
if (responseReceived) {
@@ -410,6 +476,7 @@ private void handleRpcStreamClosed(Status status) {
410476
} else {
411477
// If the ADS stream is closed without ever having received a response from the server, then
412478
// the XdsClient should consider that a connectivity error (see gRFC A57).
479+
inError = true;
413480
if (status.isOk()) {
414481
newStatus = Status.UNAVAILABLE.withDescription(
415482
"ADS stream closed with OK before receiving a response");
@@ -420,10 +487,8 @@ private void handleRpcStreamClosed(Status status) {
420487
}
421488

422489
closed = true;
423-
xdsResponseHandler.handleStreamClosed(newStatus);
490+
xdsResponseHandler.handleStreamClosed(newStatus, !responseReceived);
424491
cleanUp();
425-
426-
logger.log(XdsLogLevel.INFO, "Retry ADS stream in {0} ns", delayNanos);
427492
}
428493

429494
private void close(Exception error) {
@@ -441,4 +506,5 @@ private void cleanUp() {
441506
}
442507
}
443508
}
509+
444510
}

xds/src/main/java/io/grpc/xds/client/XdsClient.java

+16-15
Original file line numberDiff line numberDiff line change
@@ -298,14 +298,6 @@ public Object getSecurityConfig() {
298298
throw new UnsupportedOperationException();
299299
}
300300

301-
/**
302-
* For all subscriber's for the specified server, if the resource hasn't yet been
303-
* resolved then start a timer for it.
304-
*/
305-
protected void startSubscriberTimersIfNeeded(ServerInfo serverInfo) {
306-
throw new UnsupportedOperationException();
307-
}
308-
309301
/**
310302
* Returns a {@link ListenableFuture} to the snapshot of the subscribed resources as
311303
* they are at the moment of the call.
@@ -407,25 +399,34 @@ void handleResourceResponse(
407399

408400
/** Called when the ADS stream is closed passively. */
409401
// Must be synchronized.
410-
void handleStreamClosed(Status error);
402+
void handleStreamClosed(Status error, boolean shouldTryFallback);
411403

412-
/** Called when the ADS stream has been recreated. */
413-
// Must be synchronized.
404+
/** Called when the ADS stream has established communication with the xds server.
405+
* Is expected to manage the ControlPlanClients and cache updates associated with
406+
* Moving to or from a fallback server.
407+
*
408+
* <p>Must be synchronized.
409+
*/
414410
void handleStreamRestarted(ServerInfo serverInfo);
415411
}
416412

417413
public interface ResourceStore {
414+
418415
/**
419-
* Returns the collection of resources currently subscribing to or {@code null} if not
420-
* subscribing to any resources for the given type.
416+
* Returns the collection of resources currently subscribed to which have an authority matching
417+
* one of those for which the ControlPlaneClient associated with the specified ServerInfo is
418+
* the active one, or {@code null} if no such resources are currently subscribed to.
421419
*
422420
* <p>Note an empty collection indicates subscribing to resources of the given type with
423421
* wildcard mode.
422+
*
423+
* @param serverInfo the xds server to get the resources from
424+
* @param type the type of the resources that should be retrieved
424425
*/
425426
// Must be synchronized.
426427
@Nullable
427-
Collection<String> getSubscribedResources(ServerInfo serverInfo,
428-
XdsResourceType<? extends ResourceUpdate> type);
428+
Collection<String> getSubscribedResources(
429+
ServerInfo serverInfo, XdsResourceType<? extends ResourceUpdate> type);
429430

430431
Map<String, XdsResourceType<?>> getSubscribedResourceTypesWithTypeUrl();
431432
}

0 commit comments

Comments
 (0)