Skip to content

Commit 693ac5b

Browse files
DmitryLukyanovrstam
authored andcommitted
CSHARP-3173: CSharp driver does not fallback to a healthy mongos after one is unavailable.
1 parent cfb20b2 commit 693ac5b

File tree

3 files changed

+375
-1
lines changed

3 files changed

+375
-1
lines changed

src/MongoDB.Driver.Core/Core/Servers/Server.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,13 @@ private void HandleBeforeHandshakeCompletesException(IConnection connection, Exc
331331
return; // stale generation number
332332
}
333333

334+
if (ex is MongoConnectionException mongoConnectionException &&
335+
mongoConnectionException.IsNetworkException &&
336+
!mongoConnectionException.ContainsSocketTimeoutException)
337+
{
338+
_monitor.CancelCurrentCheck();
339+
}
340+
334341
if (ex is MongoConnectionException connectionException &&
335342
(connectionException.IsNetworkException || connectionException.ContainsSocketTimeoutException))
336343
{

src/MongoDB.Driver.Core/Core/Servers/ServerMonitor.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,11 @@ private async Task HeartbeatAsync(CancellationToken cancellationToken)
378378

379379
newDescription = newDescription.With(reasonChanged: "Heartbeat", lastHeartbeatTimestamp: DateTime.UtcNow);
380380

381-
SetDescription(newDescription);
381+
lock (_lock)
382+
{
383+
cancellationToken.ThrowIfCancellationRequested();
384+
SetDescription(newDescription);
385+
}
382386

383387
processAnother =
384388
// serverSupportsStreaming
Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
/* Copyright 2020-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
using System;
17+
using System.Linq;
18+
using System.Net;
19+
using System.Threading;
20+
using System.Threading.Tasks;
21+
using FluentAssertions;
22+
using MongoDB.Bson;
23+
using MongoDB.Bson.TestHelpers;
24+
using MongoDB.Bson.TestHelpers.XunitExtensions;
25+
using MongoDB.Driver.Core.Clusters;
26+
using MongoDB.Driver.Core.Clusters.ServerSelectors;
27+
using MongoDB.Driver.Core.Configuration;
28+
using MongoDB.Driver.Core.ConnectionPools;
29+
using MongoDB.Driver.Core.Connections;
30+
using MongoDB.Driver.Core.Events;
31+
using MongoDB.Driver.Core.Helpers;
32+
using MongoDB.Driver.Core.Servers;
33+
using MongoDB.Driver.Core.WireProtocol.Messages;
34+
using MongoDB.Driver.Core.WireProtocol.Messages.Encoders;
35+
using Moq;
36+
using Xunit;
37+
38+
namespace MongoDB.Driver.Core.Tests.Jira
39+
{
40+
public class CSharp3173Tests
41+
{
42+
private readonly static ClusterConnectionMode __clusterConnectionMode = ClusterConnectionMode.Sharded;
43+
private readonly static ClusterId __clusterId = new ClusterId();
44+
private readonly static EndPoint __endPoint1 = new DnsEndPoint("localhost", 27017);
45+
private readonly static EndPoint __endPoint2 = new DnsEndPoint("localhost", 27018);
46+
private readonly static TimeSpan __heartbeatInterval = TimeSpan.FromMilliseconds(200);
47+
private readonly static ServerId __serverId1 = new ServerId(__clusterId, __endPoint1);
48+
private readonly static ServerId __serverId2 = new ServerId(__clusterId, __endPoint2);
49+
50+
[Theory]
51+
[ParameterAttributeData]
52+
public void Ensure_command_network_error_before_hadnshake_is_correctly_handled([Values(false, true)] bool async, [Values(false, true)] bool streamable)
53+
{
54+
var eventCapturer = new EventCapturer().Capture<ServerDescriptionChangedEvent>();
55+
56+
// ensure that isMaster check response is finished only after network error
57+
var hasNetworkErrorBeenTriggered = new TaskCompletionSource<bool>();
58+
// ensure that there are no unexpected events between test ending and cluster disposing
59+
var hasClusterBeenDisposed = new TaskCompletionSource<bool>();
60+
61+
EndPoint initialSelectedEndpoint = null;
62+
using (var cluster = CreateAndSetupCluster(hasNetworkErrorBeenTriggered, hasClusterBeenDisposed, eventCapturer, streamable))
63+
{
64+
ForceClusterId(cluster, __clusterId);
65+
66+
// 0. Initial heartbeat via `connection.Open`
67+
// The next isMaster response will be delayed because the Task.WaitAny in the mock.Returns
68+
cluster.Initialize();
69+
70+
var selectedServer = cluster.SelectServer(CreateWritableServerAndEndPointSelector(__endPoint1), CancellationToken.None);
71+
initialSelectedEndpoint = selectedServer.EndPoint;
72+
initialSelectedEndpoint.Should().Be(__endPoint1);
73+
74+
// make sure the next isMaster check has been called
75+
Thread.Sleep(__heartbeatInterval + TimeSpan.FromMilliseconds(50));
76+
77+
// 1. Trigger the command network error BEFORE handshake. At this time isMaster response is alreaady delayed until `hasNetworkErrorBeenTriggered.SetResult`
78+
Exception exception;
79+
if (async)
80+
{
81+
exception = Record.Exception(() => selectedServer.GetChannelAsync(CancellationToken.None).GetAwaiter().GetResult());
82+
}
83+
else
84+
{
85+
exception = Record.Exception(() => selectedServer.GetChannel(CancellationToken.None));
86+
}
87+
88+
var e = exception.Should().BeOfType<MongoConnectionException>().Subject;
89+
e.Message.Should().Be("DnsException");
90+
91+
// 2. Waiting for the isMaster check
92+
hasNetworkErrorBeenTriggered.SetResult(true); // unlock the in-progress isMaster response
93+
94+
Thread.Sleep(100); // make sure the delayed isMaster check had time to change description if there is a bug
95+
var knownServers = cluster.Description.Servers.Where(s => s.Type != ServerType.Unknown);
96+
if (knownServers.Select(s => s.EndPoint).Contains(initialSelectedEndpoint))
97+
{
98+
throw new Exception($"The type of failed server {initialSelectedEndpoint} has not been changed to Unknown.");
99+
}
100+
101+
// ensure that a new server can be selected
102+
selectedServer = cluster.SelectServer(WritableServerSelector.Instance, CancellationToken.None);
103+
104+
// ensure that the selected server is not the same as the initial
105+
selectedServer.EndPoint.Should().Be(__endPoint2);
106+
107+
// the 4th event is MongoConnectionException which will trigger the next isMaster check immediately
108+
eventCapturer.WaitForOrThrowIfTimeout(events => events.Count() >= 4, TimeSpan.FromSeconds(5));
109+
}
110+
hasClusterBeenDisposed.SetCanceled(); // Cut off not related events. Stop waiting in the latest mock.Returns for OpenAsync
111+
112+
// Events asserting
113+
var initialHeartbeatEvents = new[]
114+
{
115+
// endpoints can be in random order
116+
eventCapturer.Next().Should().BeOfType<ServerDescriptionChangedEvent>().Subject,
117+
eventCapturer.Next().Should().BeOfType<ServerDescriptionChangedEvent>().Subject
118+
}
119+
.OrderBy(c => GetPort(c.NewDescription.EndPoint))
120+
.ToList();
121+
AssertEvent(initialHeartbeatEvents[0], __endPoint1, ServerType.ShardRouter, "Heartbeat");
122+
AssertEvent(initialHeartbeatEvents[1], __endPoint2, ServerType.ShardRouter, "Heartbeat"); // the next 27018 events will be suppressed
123+
124+
AssertNextEvent(eventCapturer, initialSelectedEndpoint, ServerType.Unknown, "InvalidatedBecause:ChannelException during handshake: MongoDB.Driver.MongoConnectionException: DnsException");
125+
AssertNextEvent(eventCapturer, initialSelectedEndpoint, ServerType.Unknown, "Heartbeat", typeof(MongoConnectionException));
126+
eventCapturer.Any().Should().BeFalse();
127+
128+
int GetPort(EndPoint endpoint) => ((DnsEndPoint)endpoint).Port;
129+
}
130+
131+
// private method
132+
private void AssertEvent(ServerDescriptionChangedEvent @event, EndPoint expectedEndPoint, ServerType expectedServerType, string expectedReasonStart, Type exceptionType = null)
133+
{
134+
@event.ServerId.ClusterId.Should().Be(__clusterId);
135+
@event.NewDescription.EndPoint.Should().Be(expectedEndPoint);
136+
@event.NewDescription.Type.Should().Be(expectedServerType);
137+
@event.NewDescription.State.Should().Be(expectedServerType == ServerType.Unknown ? ServerState.Disconnected : ServerState.Connected);
138+
if (exceptionType != null)
139+
{
140+
@event.NewDescription.HeartbeatException.Should().BeOfType(exceptionType);
141+
}
142+
else
143+
{
144+
@event.NewDescription.HeartbeatException.Should().BeNull();
145+
}
146+
@event.NewDescription.ReasonChanged.Should().StartWith(expectedReasonStart);
147+
}
148+
149+
private void AssertNextEvent(EventCapturer eventCapturer, EndPoint expectedEndPoint, ServerType expectedServerType, string expectedReasonStart, Type exceptionType = null)
150+
{
151+
var @event = eventCapturer.Next().Should().BeOfType<ServerDescriptionChangedEvent>().Subject;
152+
AssertEvent(@event, expectedEndPoint, expectedServerType, expectedReasonStart, exceptionType);
153+
}
154+
155+
private IConnectionPoolFactory CreateAndSetupConnectionPoolFactory(params (ServerId ServerId, EndPoint Endpoint, bool IsHealthy)[] serverInfoCollection)
156+
{
157+
var mockConnectionPoolFactory = new Mock<IConnectionPoolFactory>();
158+
159+
foreach (var serverInfo in serverInfoCollection)
160+
{
161+
var mockConnectionPool = new Mock<IConnectionPool>();
162+
SetupConnectionPoolFactory(mockConnectionPoolFactory, mockConnectionPool.Object, serverInfo.ServerId, serverInfo.Endpoint);
163+
164+
var mockServerConnection = new Mock<IConnectionHandle>();
165+
SetupConnection(mockServerConnection, serverInfo.ServerId);
166+
167+
SetupConnectionPool(mockConnectionPool, mockServerConnection.Object);
168+
}
169+
170+
return mockConnectionPoolFactory.Object;
171+
172+
void SetupConnection(Mock<IConnectionHandle> mockConnectionHandle, ServerId serverId)
173+
{
174+
mockConnectionHandle.SetupGet(c => c.ConnectionId).Returns(new ConnectionId(serverId));
175+
mockConnectionHandle
176+
.Setup(c => c.Open(It.IsAny<CancellationToken>()))
177+
.Throws(CreateDnsException(mockConnectionHandle.Object.ConnectionId)); // throw command dns exception
178+
mockConnectionHandle
179+
.Setup(c => c.OpenAsync(It.IsAny<CancellationToken>()))
180+
.Throws(CreateDnsException(mockConnectionHandle.Object.ConnectionId)); // throw command dns exception
181+
}
182+
183+
void SetupConnectionPool(Mock<IConnectionPool> mockConnectionPool, IConnectionHandle connection)
184+
{
185+
mockConnectionPool
186+
.Setup(c => c.AcquireConnection(It.IsAny<CancellationToken>()))
187+
.Returns(connection);
188+
mockConnectionPool
189+
.Setup(c => c.AcquireConnectionAsync(It.IsAny<CancellationToken>()))
190+
.Returns(Task.FromResult(connection));
191+
}
192+
193+
void SetupConnectionPoolFactory(Mock<IConnectionPoolFactory> mockFactory, IConnectionPool connectionPool, ServerId serverId, EndPoint endPoint)
194+
{
195+
mockFactory.Setup(c => c.CreateConnectionPool(serverId, endPoint)).Returns(connectionPool);
196+
}
197+
}
198+
199+
private IConnectionFactory CreateAndSetupServerMonitorConnectionFactory(
200+
TaskCompletionSource<bool> hasNetworkErrorBeenTriggered,
201+
TaskCompletionSource<bool> hasClusterBeenDisposed,
202+
bool streamable,
203+
params (ServerId ServerId, EndPoint Endpoint, bool IsHealthy)[] serverInfoCollection)
204+
{
205+
var mockConnectionFactory = new Mock<IConnectionFactory>();
206+
207+
foreach (var serverInfo in serverInfoCollection)
208+
{
209+
var mockServerMonitorConnection = new Mock<IConnection>();
210+
SetupServerMonitorConnection(mockServerMonitorConnection, serverInfo.ServerId, serverInfo.IsHealthy, hasNetworkErrorBeenTriggered, hasClusterBeenDisposed, streamable);
211+
mockConnectionFactory
212+
.Setup(c => c.CreateConnection(serverInfo.ServerId, serverInfo.Endpoint))
213+
.Returns(mockServerMonitorConnection.Object);
214+
}
215+
216+
return mockConnectionFactory.Object;
217+
}
218+
219+
private MultiServerCluster CreateAndSetupCluster(TaskCompletionSource<bool> hasNetworkErrorBeenTriggered, TaskCompletionSource<bool> hasClusterBeenDisposed, EventCapturer eventCapturer, bool streamable)
220+
{
221+
(ServerId ServerId, EndPoint Endpoint, bool IsHealthy)[] serverInfoCollection = new[]
222+
{
223+
(__serverId1, __endPoint1, false),
224+
(__serverId2, __endPoint2, true),
225+
};
226+
227+
var clusterSettings = new ClusterSettings(
228+
connectionMode: __clusterConnectionMode,
229+
serverSelectionTimeout: TimeSpan.FromSeconds(30),
230+
endPoints: serverInfoCollection.Select(c => c.Endpoint).ToArray());
231+
232+
var serverMonitorSettings = new ServerMonitorSettings(
233+
connectTimeout: TimeSpan.FromMilliseconds(1),
234+
heartbeatInterval: __heartbeatInterval);
235+
var serverSettings = new ServerSettings(serverMonitorSettings.HeartbeatInterval);
236+
237+
var connectionPoolFactory = CreateAndSetupConnectionPoolFactory(serverInfoCollection);
238+
var serverMonitorConnectionFactory = CreateAndSetupServerMonitorConnectionFactory(hasNetworkErrorBeenTriggered, hasClusterBeenDisposed, streamable, serverInfoCollection);
239+
var serverMonitorFactory = new ServerMonitorFactory(serverMonitorSettings, serverMonitorConnectionFactory, eventCapturer);
240+
241+
var serverFactory = new ServerFactory(__clusterConnectionMode, serverSettings, connectionPoolFactory, serverMonitorFactory, eventCapturer);
242+
243+
return new MultiServerCluster(clusterSettings, serverFactory, eventCapturer);
244+
}
245+
246+
private Exception CreateDnsException(ConnectionId connectionId)
247+
{
248+
return new MongoConnectionException(connectionId, "DnsException");
249+
}
250+
251+
private IServerSelector CreateWritableServerAndEndPointSelector(EndPoint endPoint)
252+
{
253+
IServerSelector endPointServerSelector = new EndPointServerSelector(endPoint);
254+
return new CompositeServerSelector(
255+
new[]
256+
{
257+
WritableServerSelector.Instance,
258+
endPointServerSelector
259+
});
260+
}
261+
262+
private void ForceClusterId(MultiServerCluster cluster, ClusterId clusterId)
263+
{
264+
Reflector.SetFieldValue(cluster, "_clusterId", clusterId);
265+
Reflector.SetFieldValue(cluster, "_description", ClusterDescription.CreateInitial(clusterId, __clusterConnectionMode));
266+
}
267+
268+
private void SetupServerMonitorConnection(
269+
Mock<IConnection> mockConnection,
270+
ServerId serverId,
271+
bool isHealthy,
272+
TaskCompletionSource<bool> hasNetworkErrorBeenTriggered,
273+
TaskCompletionSource<bool> hasClusterBeenDisposed,
274+
bool streamable)
275+
{
276+
var connectionId = new ConnectionId(serverId);
277+
var serverVersion = streamable ? "4.4" : "2.6";
278+
var isMasterDocument = new BsonDocument
279+
{
280+
{ "ok", 1 },
281+
{ "minWireVersion", 6 },
282+
{ "maxWireVersion", 7 },
283+
{ "msg", "isdbgrid" },
284+
{ "version", serverVersion },
285+
{ "topologyVersion", new TopologyVersion(ObjectId.Empty, 1).ToBsonDocument(), streamable }
286+
};
287+
288+
mockConnection.SetupGet(c => c.ConnectionId).Returns(connectionId);
289+
mockConnection.SetupGet(c => c.EndPoint).Returns(serverId.EndPoint);
290+
291+
mockConnection
292+
.SetupGet(c => c.Description)
293+
.Returns(
294+
new ConnectionDescription(
295+
mockConnection.Object.ConnectionId,
296+
new IsMasterResult(isMasterDocument),
297+
new BuildInfoResult(new BsonDocument("version", serverVersion))));
298+
299+
Func<ResponseMessage> commandResponseAction;
300+
if (streamable)
301+
{
302+
commandResponseAction = () => { return MessageHelper.BuildCommandResponse(new RawBsonDocument(isMasterDocument.ToBson()), moreToCome: true); };
303+
}
304+
else
305+
{
306+
commandResponseAction = () => { return MessageHelper.BuildReply(new RawBsonDocument(isMasterDocument.ToBson())); };
307+
}
308+
309+
if (isHealthy)
310+
{
311+
SetupHealthyConnection(mockConnection);
312+
}
313+
else
314+
{
315+
SetupFailedConnection(mockConnection);
316+
}
317+
318+
void SetupFailedConnection(Mock<IConnection> mockFaultyConnection)
319+
{
320+
// sync path is not used in serverMonitor
321+
mockFaultyConnection
322+
.SetupSequence(c => c.OpenAsync(It.IsAny<CancellationToken>()))
323+
.Returns(Task.FromResult(true)) // the first isMaster configuration passes
324+
.Returns(Task.FromResult(true)) // RTT
325+
.Throws(CreateDnsException(mockConnection.Object.ConnectionId)) // the dns exception. Should be triggered after Invalidate
326+
.Returns(async () =>
327+
{
328+
await WaitForTaskOrTimeout(hasClusterBeenDisposed.Task, TimeSpan.FromMinutes(1), "cluster dispose").ConfigureAwait(false);
329+
}); // ensure that there is no unrelated events
330+
331+
mockFaultyConnection
332+
.Setup(c => c.ReceiveMessageAsync(It.IsAny<int>(), It.IsAny<IMessageEncoderSelector>(), It.IsAny<MessageEncoderSettings>(), It.IsAny<CancellationToken>()))
333+
.Returns(async () =>
334+
{
335+
// wait until the command network error has been triggered
336+
await WaitForTaskOrTimeout(hasNetworkErrorBeenTriggered.Task, TimeSpan.FromMinutes(1), "network error").ConfigureAwait(false);
337+
return commandResponseAction();
338+
});
339+
}
340+
341+
void SetupHealthyConnection(Mock<IConnection> mockHealthyConnection)
342+
{
343+
mockHealthyConnection.Setup(c => c.Open(It.IsAny<CancellationToken>())); // no action is required
344+
mockHealthyConnection.Setup(c => c.OpenAsync(It.IsAny<CancellationToken>())).Returns(Task.FromResult(true)); // no action is required
345+
mockHealthyConnection
346+
.Setup(c => c.ReceiveMessage(It.IsAny<int>(), It.IsAny<IMessageEncoderSelector>(), It.IsAny<MessageEncoderSettings>(), It.IsAny<CancellationToken>()))
347+
.Returns(commandResponseAction);
348+
mockConnection
349+
.Setup(c => c.ReceiveMessageAsync(It.IsAny<int>(), It.IsAny<IMessageEncoderSelector>(), It.IsAny<MessageEncoderSettings>(), It.IsAny<CancellationToken>()))
350+
.ReturnsAsync(commandResponseAction);
351+
}
352+
}
353+
354+
private async Task WaitForTaskOrTimeout(Task task, TimeSpan timeout, string testTarget)
355+
{
356+
var resultedTask = await Task.WhenAny(task, Task.Delay(timeout)).ConfigureAwait(false);
357+
if (resultedTask != task)
358+
{
359+
throw new Exception($"The waiting for {testTarget} is exceeded timeout {timeout}.");
360+
}
361+
}
362+
}
363+
}

0 commit comments

Comments
 (0)