Skip to content

Commit 8ca2886

Browse files
committed
Enhance galera to interact over multiple clusters
This change adds a new parameter "remote_node_map" to the Galera resource agent which allows it to consider galera node names that are in other clusters as part of its Galera quorum. To achieve this, it launches pcs commands over SSH to the remote clusters in order to view and modify remote state variables. WIP.
1 parent 9d9cd48 commit 8ca2886

File tree

1 file changed

+92
-13
lines changed

1 file changed

+92
-13
lines changed

heartbeat/galera

Lines changed: 92 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
##
2727
# README.
28-
#
28+
#
2929
# This agent only supports being configured as a multistate Master
3030
# resource.
3131
#
@@ -49,15 +49,15 @@
4949
# pcs resource create db galera enable_creation=true \
5050
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
5151
#
52-
# By setting the 'enable_creation' option, the database will be automatically
52+
# By setting the 'enable_creation' option, the database will be automatically
5353
# generated at startup. The meta attribute 'master-max=3' means that all 3
5454
# nodes listed in the wsrep_cluster_address list will be allowed to connect
5555
# to the galera cluster and perform replication.
5656
#
5757
# NOTE: If you have more nodes in the pacemaker cluster then you wish
5858
# to have in the galera cluster, make sure to use location contraints to prevent
5959
# pacemaker from attempting to place a galera instance on a node that is
60-
# not in the 'wsrep_cluster_address" list.
60+
# not in the 'wsrep_cluster_address" list.
6161
#
6262
##
6363

@@ -68,6 +68,9 @@
6868
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
6969
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
7070

71+
72+
SSH_CMD="${SSH} -oStrictHostKeyChecking=no"
73+
7174
NODENAME=$(ocf_attribute_target)
7275

7376
# It is common for some galera instances to store
@@ -226,11 +229,26 @@ pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera
226229
227230
where the galera resource started on node pcmk1 would be named
228231
node.1.galera in the wsrep_cluster_address
232+
229233
</longdesc>
230234
<shortdesc lang="en">Pacemaker to Galera name mapping</shortdesc>
231235
<content type="string" default=""/>
232236
</parameter>
233237
238+
<parameter name="remote_node_map" unique="0" required="0">
239+
<longdesc lang="en">
240+
A mapping of pacemaker node names to remote host SSH expressions.
241+
242+
Allows pacemaker nodes in remote pacemaker clusters to be part of this
243+
Galera cluster:
244+
245+
246+
247+
</longdesc>
248+
<shortdesc lang="en">Pacemaker to remote cluster nodes</shortdesc>
249+
<content type="string" default=""/>
250+
</parameter>
251+
234252
<parameter name="check_user" unique="0" required="0">
235253
<longdesc lang="en">
236254
Cluster check user.
@@ -283,7 +301,7 @@ set_bootstrap_node()
283301
{
284302
local node=$(ocf_attribute_target $1)
285303

286-
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
304+
remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
287305
}
288306

289307
clear_bootstrap_node()
@@ -310,7 +328,7 @@ clear_no_grastate()
310328
is_no_grastate()
311329
{
312330
local node=$(ocf_attribute_target $1)
313-
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null
331+
remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null
314332
}
315333

316334
clear_last_commit()
@@ -329,8 +347,8 @@ get_last_commit()
329347

330348
if [ -z "$node" ]; then
331349
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
332-
else
333-
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
350+
else
351+
remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
334352
fi
335353
}
336354

@@ -351,7 +369,7 @@ get_safe_to_bootstrap()
351369
if [ -z "$node" ]; then
352370
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
353371
else
354-
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
372+
remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null
355373
fi
356374
}
357375

@@ -410,16 +428,34 @@ master_exists()
410428
fi
411429
# determine if a master instance is already up and is healthy
412430
crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
413-
return $?
431+
432+
local master_exists_local=$?
433+
434+
if [ $master_exists_local -eq 0 ]; then
435+
ocf_log info "Detected that a master exists for the local cluster"
436+
fi
437+
438+
# if not, and we have remote nodes, check those also
439+
if [ $master_exists_local -ne 0 ] && [ -n "$OCF_RESKEY_remote_node_map" ]; then
440+
for remote_ssh in $(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '{print $2;}' | sort | uniq); do
441+
$SSH_CMD $remote_ssh crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
442+
if [ $? -eq 0 ]; then
443+
ocf_log info "Detected that a master exists for the remote cluster $remote_ssh"
444+
return $?
445+
fi
446+
done
447+
fi
448+
449+
return $master_exists_local
414450
}
415451

416452
clear_master_score()
417453
{
418454
local node=$(ocf_attribute_target $1)
419455
if [ -z "$node" ]; then
420456
$CRM_MASTER -D
421-
else
422-
$CRM_MASTER -D -N $node
457+
else
458+
remote_crm_master $node -D
423459
fi
424460
}
425461

@@ -429,8 +465,51 @@ set_master_score()
429465

430466
if [ -z "$node" ]; then
431467
$CRM_MASTER -v 100
432-
else
433-
$CRM_MASTER -N $node -v 100
468+
else
469+
remote_crm_master $node -v 100
470+
fi
471+
}
472+
473+
get_remote_node()
474+
{
475+
local node=$1
476+
if [ -z "$OCF_RESKEY_remote_node_map" ]; then
477+
return
478+
else
479+
local retval=$(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}')
480+
if [ -z "$retval" ]; then
481+
return
482+
else
483+
echo $retval
484+
fi
485+
fi
486+
}
487+
488+
remote_crm_master()
489+
{
490+
local node=$1
491+
shift
492+
493+
local remote_ssh=$(get_remote_node $node)
494+
495+
if [ -z "$remote_ssh" ]; then
496+
$CRM_MASTER -N $node "$@"
497+
else
498+
$SSH_CMD $remote_ssh $CRM_MASTER -r ${INSTANCE_ATTR_NAME} -N $node "$@"
499+
fi
500+
}
501+
502+
remote_crm_attribute()
503+
{
504+
local node=$1
505+
shift
506+
507+
local remote_ssh=$(get_remote_node $node)
508+
509+
if [ -z "$remote_ssh" ]; then
510+
${HA_SBIN_DIR}/crm_attribute -N $node "$@"
511+
else
512+
$SSH_CMD $remote_ssh ${HA_SBIN_DIR}/crm_attribute -N $node "$@"
434513
fi
435514
}
436515

0 commit comments

Comments
 (0)