Skip to content

Commit a732d07

Browse files
authored
Merge pull request #8828 from dannyzaken/danny-fixes
By default disable node disconnect on error and IO detention
2 parents 4e1eeeb + e5754e7 commit a732d07

File tree

2 files changed

+21
-11
lines changed

2 files changed

+21
-11
lines changed

config.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,13 @@ config.NODES_FREE_SPACE_RESERVE = 100 * (1024 ** 2);
9191
// don't use agents with less than reserve + 5 GB
9292
config.MINIMUM_AGENT_TOTAL_STORAGE = config.NODES_FREE_SPACE_RESERVE + (5 * (1024 ** 3));
9393

94-
config.NODE_IO_DETENTION_DISABLE = false;
94+
95+
// by default not disconnecting nodes on error. This caused more issues than benefits
96+
config.NODES_DISCONNECT_ON_ERROR = false;
97+
98+
// by default not detaining nodes on io errors. This caused more issues than benefits
99+
config.NODE_IO_DETENTION_DISABLE = true;
100+
95101
config.NODE_IO_DETENTION_THRESHOLD = 60 * 1000;
96102
config.NODE_IO_DETENTION_RECENT_ISSUES = 5;
97103
// Picked two because minimum of nodes per pool is three

src/server/node_services/nodes_monitor.js

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ class NodesMonitor extends EventEmitter {
247247
return P.resolve()
248248
.then(() => this._run())
249249
.then(() => {
250-
// do nothing.
250+
// do nothing.
251251
});
252252
}
253253

@@ -1012,7 +1012,7 @@ class NodesMonitor extends EventEmitter {
10121012
})
10131013
.then(() => this._update_nodes_store('force'))
10141014
.then(() => {
1015-
// do nothing.
1015+
// do nothing.
10161016
});
10171017
}
10181018

@@ -1236,15 +1236,15 @@ class NodesMonitor extends EventEmitter {
12361236
if (item.node.deleted) return;
12371237
if (!item.connection) return;
12381238
if (!item.agent_info) return;
1239-
//The node should be set as enable if it is not decommissioned.
1239+
//The node should be set as enable if it is not decommissioned.
12401240
const should_enable = !item.node.decommissioned;
12411241
const item_pool = system_store.data.get_by_id(item.node.pool);
12421242
const location_info = {
12431243
node_id: String(item.node._id),
12441244
host_id: String(item.node.host_id),
12451245
pool_id: String(item.node.pool),
12461246
};
1247-
// We should only add region if it is defined.
1247+
// We should only add region if it is defined.
12481248
if (item_pool && !_.isUndefined(item_pool.region)) location_info.region = item_pool.region;
12491249
// We should change the service enable field if the field is not equal to the decommissioned decision.
12501250
const service_enabled_not_changed = (item.node.enabled === should_enable);
@@ -3373,12 +3373,16 @@ class NodesMonitor extends EventEmitter {
33733373
'node', item.node.name,
33743374
'issues_report', item.node.issues_report,
33753375
'block_report', block_report);
3376-
// disconnect from the node to force reconnect
3377-
// only disconnect if enough time passed since last disconnect to avoid amplification of errors in R\W flows
3378-
const DISCONNECT_GRACE_PERIOD = 2 * 60 * 1000; // 2 minutes grace before another disconnect
3379-
if (!item.disconnect_time || item.disconnect_time + DISCONNECT_GRACE_PERIOD < Date.now()) {
3380-
dbg.log0('disconnecting node to force reconnect. node:', item.node.name);
3381-
this._disconnect_node(item);
3376+
3377+
3378+
if (config.NODES_DISCONNECT_ON_ERROR) {
3379+
// disconnect from the node to force reconnect
3380+
// only disconnect if enough time passed since last disconnect to avoid amplification of errors in R\W flows
3381+
const DISCONNECT_GRACE_PERIOD = 2 * 60 * 1000; // 2 minutes grace before another disconnect
3382+
if (!item.disconnect_time || item.disconnect_time + DISCONNECT_GRACE_PERIOD < Date.now()) {
3383+
dbg.log0('disconnecting node to force reconnect. node:', item.node.name);
3384+
this._disconnect_node(item);
3385+
}
33823386
}
33833387
}
33843388
}

0 commit comments

Comments
 (0)