Gitweb: http://git.fedorahosted.org/git/?p=fence-agents.git;a=commitdiff;h=9ca5b914…
Commit: 9ca5b9143900968bab55104a41b8074758a62181
Parent: 0000000000000000000000000000000000000000
Author: Marek 'marx' Grac <mgrac(a)redhat.com>
AuthorDate: 2012-07-27 06:50 +0000
Committer: Marek 'marx' Grac <mgrac(a)redhat.com>
CommitterDate: 2012-07-27 06:50 +0000
annotated tag: v3.1.9 has been created
at 9ca5b9143900968bab55104a41b8074758a62181 (tag)
tagging cd73f094f7c163aca57a1dc9d5138fb468846835 (commit)
replaces v3.1.8
v3.1.9 release
Fabio M. Di Nitto (2):
test commit
build: fix dist_man_* vs man_*
Lon Hohberger (1):
Add iPDU fencing agent for model 46M4002
Marek 'marx' Grac (8):
fence_brocade: support option action on STDIN, originally only operation was supported
fence agents: Fix unique attribute for agents which are not based on fencing library
fence agents: autodetect of EOL in fence agents
fence_hpblade: Fence agent for HP BladeSystem
fence agents: Some agents do not support action=metadata on STDIN
fence_ipdu: Minor fixes to fence agent
Add fence agents specific for HP iLO2, iLO3, IMM and iDrac
fence_ilo2 was not correctly cleaned by Makefile
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=4b893d2a9f46d…
Commit: 4b893d2a9f46d643859fa50856ac63e0cdbd4a02
Parent: 982f2aa377fd1c0471b714945c401fdfe86e9bc1
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Mon Jul 9 08:33:37 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Jul 27 08:47:10 2012 +0200
qdiskd: restrict master_wins to 2 node cluster
given enough mingling of cluster.conf it was possible to
break quorum rule #1: there is only one quorum in a cluster at
any given time.
this change restricts master_wins to 2 node cluster only
and provides extra feedback to the user (via logging) on why
the mode is disabled.
Resolves: rhbz#838047
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/man/qdisk.5 | 5 +++--
cman/qdisk/disk.h | 1 +
cman/qdisk/main.c | 22 +++++++++++++++-------
3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index ca974fa..938ed69 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -297,8 +297,9 @@ and qdiskd's timeout (interval*tko) should be less than half of
Totem's token timeout. See section 3.3.1 for more information.
This option only takes effect if there are no heuristics
-configured. Usage of this option in configurations with more than
-two cluster nodes is undefined and should not be done.
+configured and it is valid only for 2 node cluster.
+This option is automatically disabled if heuristics are
+defined or cluster has more than 2 nodes configured.
In a two-node cluster with no heuristics and no defined vote
count (see above), this mode is turned by default. If enabled in
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index 6bed41d..1d8f7c8 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -252,6 +252,7 @@ typedef struct {
int qc_master; /* Master?! */
int qc_config;
int qc_token_timeout;
+ int qc_auto_votes;
disk_node_state_t qc_disk_status;
disk_node_state_t qc_status;
run_flag_t qc_flags;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 594e9e8..47ef5d2 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -1495,7 +1495,7 @@ auto_qdisk_votes(int desc)
logt_print(LOG_ERR, "Unable to determine qdiskd votes "
"automatically\n");
else
- logt_print(LOG_DEBUG, "Setting votes to %d\n", ret);
+ logt_print(LOG_DEBUG, "Setting autocalculated votes to %d\n", ret);
return (ret);
}
@@ -1657,6 +1657,8 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
ctx->qc_flags &= ~RF_AUTO_VOTES;
}
+ ctx->qc_auto_votes = auto_qdisk_votes(ccsfd);
+
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_votes = atoi(val);
@@ -1664,7 +1666,7 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
if (ctx->qc_votes < 0)
ctx->qc_votes = 0;
} else {
- ctx->qc_votes = auto_qdisk_votes(ccsfd);
+ ctx->qc_votes = ctx->qc_auto_votes;
if (ctx->qc_votes < 0) {
if (ctx->qc_config) {
logt_print(LOG_WARNING, "Unable to determine "
@@ -1930,15 +1932,21 @@ get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
*cfh = configure_heuristics(ccsfd, h, maxh,
ctx->qc_interval * (ctx->qc_tko - 1));
- if (*cfh) {
- if (ctx->qc_flags & RF_MASTER_WINS) {
- logt_print(LOG_WARNING, "Master-wins mode disabled\n");
+ if (ctx->qc_flags & RF_MASTER_WINS) {
+ if (*cfh) {
+ logt_print(LOG_WARNING, "Master-wins mode disabled "
+ "(not compatible with heuristics)\n");
+ ctx->qc_flags &= ~RF_MASTER_WINS;
+ }
+ if (ctx->qc_auto_votes != 1) {
+ logt_print(LOG_WARNING, "Master-wins mode disabled "
+ "(not compatible with more than 2 nodes)\n");
ctx->qc_flags &= ~RF_MASTER_WINS;
}
} else {
if (ctx->qc_flags & RF_AUTO_VOTES &&
- !(ctx->qc_flags & RF_MASTER_WINS) &&
- ctx->qc_votes == 1) {
+ !*cfh &&
+ ctx->qc_auto_votes == 1) {
/* Two node cluster, no heuristics, 1 vote for
* quorum disk daemon. Safe to enable master-wins.
* In fact, qdiskd without master-wins in this config
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=ebac037f4fa11…
Commit: ebac037f4fa11de5f803c6025c5b182c962e84d1
Parent: c65a13a82189f38bd80d5c52924d09a7d98daa37
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Mon Jul 9 08:33:37 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Jul 27 08:45:37 2012 +0200
qdiskd: restrict master_wins to 2 node cluster
given enough mingling of cluster.conf it was possible to
break quorum rule #1: there is only one quorum in a cluster at
any given time.
this change restricts master_wins to 2 node cluster only
and provides extra feedback to the user (via logging) on why
the mode is disabled.
Resolves: rhbz#838047
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/man/qdisk.5 | 5 +++--
cman/qdisk/disk.h | 1 +
cman/qdisk/main.c | 22 +++++++++++++++-------
3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index ca974fa..938ed69 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -297,8 +297,9 @@ and qdiskd's timeout (interval*tko) should be less than half of
Totem's token timeout. See section 3.3.1 for more information.
This option only takes effect if there are no heuristics
-configured. Usage of this option in configurations with more than
-two cluster nodes is undefined and should not be done.
+configured and it is valid only for 2 node cluster.
+This option is automatically disabled if heuristics are
+defined or cluster has more than 2 nodes configured.
In a two-node cluster with no heuristics and no defined vote
count (see above), this mode is turned by default. If enabled in
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index 6bed41d..1d8f7c8 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -252,6 +252,7 @@ typedef struct {
int qc_master; /* Master?! */
int qc_config;
int qc_token_timeout;
+ int qc_auto_votes;
disk_node_state_t qc_disk_status;
disk_node_state_t qc_status;
run_flag_t qc_flags;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 16c26e4..8eb9a3a 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -1505,7 +1505,7 @@ auto_qdisk_votes(int desc)
logt_print(LOG_ERR, "Unable to determine qdiskd votes "
"automatically\n");
else
- logt_print(LOG_DEBUG, "Setting votes to %d\n", ret);
+ logt_print(LOG_DEBUG, "Setting autocalculated votes to %d\n", ret);
return (ret);
}
@@ -1667,6 +1667,8 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
ctx->qc_flags &= ~RF_AUTO_VOTES;
}
+ ctx->qc_auto_votes = auto_qdisk_votes(ccsfd);
+
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_votes = atoi(val);
@@ -1674,7 +1676,7 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
if (ctx->qc_votes < 0)
ctx->qc_votes = 0;
} else {
- ctx->qc_votes = auto_qdisk_votes(ccsfd);
+ ctx->qc_votes = ctx->qc_auto_votes;
if (ctx->qc_votes < 0) {
if (ctx->qc_config) {
logt_print(LOG_WARNING, "Unable to determine "
@@ -1940,15 +1942,21 @@ get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
*cfh = configure_heuristics(ccsfd, h, maxh,
ctx->qc_interval * (ctx->qc_tko - 1));
- if (*cfh) {
- if (ctx->qc_flags & RF_MASTER_WINS) {
- logt_print(LOG_WARNING, "Master-wins mode disabled\n");
+ if (ctx->qc_flags & RF_MASTER_WINS) {
+ if (*cfh) {
+ logt_print(LOG_WARNING, "Master-wins mode disabled "
+ "(not compatible with heuristics)\n");
+ ctx->qc_flags &= ~RF_MASTER_WINS;
+ }
+ if (ctx->qc_auto_votes != 1) {
+ logt_print(LOG_WARNING, "Master-wins mode disabled "
+ "(not compatible with more than 2 nodes)\n");
ctx->qc_flags &= ~RF_MASTER_WINS;
}
} else {
if (ctx->qc_flags & RF_AUTO_VOTES &&
- !(ctx->qc_flags & RF_MASTER_WINS) &&
- ctx->qc_votes == 1) {
+ !*cfh &&
+ ctx->qc_auto_votes == 1) {
/* Two node cluster, no heuristics, 1 vote for
* quorum disk daemon. Safe to enable master-wins.
* In fact, qdiskd without master-wins in this config
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=9eccf2611b6bf…
Commit: 9eccf2611b6bfd92e4b15196b4744f24991a7bb8
Parent: 332db7754d6210991c34be056789aa1b33b26a0a
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Tue Jul 24 13:34:35 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Tue Jul 24 18:13:09 2012 -0400
rgmanager: Add IP resource agent "prefer_interface" parameter
This patch adds a "prefer_interface" parameter for IP resources.
The interface must already be configured and active. This
parameter should be used only when at least two active interfaces
have IP addresses on the same subnet and it's necessary to specify
which particular interface should be used.
Acked-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/ip.sh | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/resources/ip.sh b/rgmanager/src/resources/ip.sh
index 38d1ab9..bbd85f3 100755
--- a/rgmanager/src/resources/ip.sh
+++ b/rgmanager/src/resources/ip.sh
@@ -132,6 +132,15 @@ meta_data()
<content type="boolean"/>
</parameter>
+ <parameter name="prefer_interface">
+ <longdesc lang="en">
+ The network interface to which the IP address should be added. The interface must already be configured and active. This parameter should be used only when at least two active interfaces have IP addresses on the same subnet and it is desired to have the IP address added to a particular interface.
+ </longdesc>
+ <shortdesc lang="en">
+ Network interface
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
</parameters>
<actions>
@@ -587,6 +596,10 @@ ipv6()
fi
if [ "$1" = "add" ]; then
+ if [ -n "$OCF_RESKEY_prefer_interface" ] && \
+ [ "$OCF_RESKEY_prefer_interface" != $dev ]; then
+ continue
+ fi
ipv6_same_subnet $ifaddr_exp/$maskbits $addr_exp
if [ $? -ne 0 ]; then
continue
@@ -670,6 +683,10 @@ ipv4()
fi
if [ "$1" = "add" ]; then
+ if [ -n "$OCF_RESKEY_prefer_interface" ] && \
+ [ "$OCF_RESKEY_prefer_interface" != $dev ]; then
+ continue
+ fi
ipv4_same_subnet $ifaddr/$maskbits $addr
if [ $? -ne 0 ]; then
continue
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=88e4349b9b636…
Commit: 88e4349b9b636d12b3c51f9c0f734b0259b36954
Parent: 0eecc716d53b4fae15ad131c5d10c628348e0012
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 11:24:39 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 11:31:39 2012 +0200
cman init: clarify better comments for FENCE_JOIN=no
(input from GSS)
Related: rhbz#821016
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/init.d/cman.in | 7 ++++---
cman/init.d/cman.init.defaults.in | 7 ++++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index f39a9aa..9a0d726 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -126,9 +126,10 @@ fi
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
-# When setting FENCE_JOIN to "no", it is important to check
-# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
-# and quorum.
+# When setting FENCE_JOIN to "no", it is important to also set
+# DLM_CONTROLD_OPTS="-f0" (at least) for correct operation.
+# Please note that clusters without fencing are not
+# supported by Red Hat except for MRG installations.
[ -z "$FENCE_JOIN" ] && FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
diff --git a/cman/init.d/cman.init.defaults.in b/cman/init.d/cman.init.defaults.in
index 6e7f47f..1b7913e 100644
--- a/cman/init.d/cman.init.defaults.in
+++ b/cman/init.d/cman.init.defaults.in
@@ -50,9 +50,10 @@
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
-# When setting FENCE_JOIN to "no", it is important to check
-# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
-# and quorum.
+# When setting FENCE_JOIN to "no", it is important to also set
+# DLM_CONTROLD_OPTS="-f0" (at least) for correct operation.
+# Please note that clusters without fencing are not
+# supported by Red Hat except for MRG installations.
#FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=0eecc716d53b4…
Commit: 0eecc716d53b4fae15ad131c5d10c628348e0012
Parent: 69d2920745d3ac11a44b5d9ce3b07e8eb1d07b25
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue May 15 16:37:13 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 11:29:59 2012 +0200
cman init: add extra documentation for FENCE_JOIN=""
Related: rhbz#821016
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/init.d/cman.in | 3 +++
cman/init.d/cman.init.defaults.in | 3 +++
2 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index f345bcc..f39a9aa 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -126,6 +126,9 @@ fi
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
+# When setting FENCE_JOIN to "no", it is important to check
+# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
+# and quorum.
[ -z "$FENCE_JOIN" ] && FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
diff --git a/cman/init.d/cman.init.defaults.in b/cman/init.d/cman.init.defaults.in
index d6d010c..6e7f47f 100644
--- a/cman/init.d/cman.init.defaults.in
+++ b/cman/init.d/cman.init.defaults.in
@@ -50,6 +50,9 @@
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
+# When setting FENCE_JOIN to "no", it is important to check
+# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
+# and quorum.
#FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=69d2920745d3a…
Commit: 69d2920745d3ac11a44b5d9ce3b07e8eb1d07b25
Parent: 293c73fc93fb82cc427c37df0c088ff39cf25f4c
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue May 15 13:53:28 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 11:28:58 2012 +0200
cman init: allow sysconfig/cman to pass options to dlm_controld
DLM_CONTROLD_OPTS="" can now be used to pass startup options to the
daemon.
Resolves: rhbz#821016
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/init.d/cman.in | 5 ++++-
cman/init.d/cman.init.defaults.in | 3 +++
2 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index 1268b59..f345bcc 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -107,6 +107,9 @@ fi
# no (default) | cman will not start sshd
[ -z "$CMAN_SSHD_START" ] && CMAN_SSHD_START=no
+# DLM_CONTROLD_OPTS -- allow extra options to be passed to dlm_controld daemon.
+[ -z "$DLM_CONTROLD_OPTS" ] && DLM_CONTROLD_OPTS=""
+
# FENCE_JOIN_TIMEOUT -- seconds to wait for fence domain join to
# complete. If the join hasn't completed in this time, fence_tool join
# exits with an error, and this script exits with an error. To wait
@@ -615,7 +618,7 @@ stop_fenced()
start_dlm_controld()
{
- start_daemon dlm_controld || return 1
+ start_daemon dlm_controld "$DLM_CONTROLD_OPTS" || return 1
if [ "$INITLOGLEVEL" = "full" ]; then
ok
diff --git a/cman/init.d/cman.init.defaults.in b/cman/init.d/cman.init.defaults.in
index 05c3bc7..d6d010c 100644
--- a/cman/init.d/cman.init.defaults.in
+++ b/cman/init.d/cman.init.defaults.in
@@ -31,6 +31,9 @@
# no (default) | cman will not start sshd
#CMAN_SSHD_START=no
+# DLM_CONTROLD_OPTS -- allow extra options to be passed to dlm_controld daemon.
+#DLM_CONTROLD_OPTS=""
+
# FENCE_JOIN_TIMEOUT -- seconds to wait for fence domain join to
# complete. If the join hasn't completed in this time, fence_tool join
# exits with an error, and this script exits with an error. To wait
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=224a841585ea3…
Commit: 224a841585ea300718f9e82d9fadd0f163233e8f
Parent: e8af462b7531f87b5cb20f7204eeb4b520591da9
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 11:24:39 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 11:24:39 2012 +0200
cman init: clarify better comments for FENCE_JOIN=no
(input from GSS)
Related: rhbz#821016
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/init.d/cman.in | 7 ++++---
cman/init.d/cman.init.defaults.in | 7 ++++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index 95323b4..c6e038e 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -135,9 +135,10 @@ fi
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
-# When setting FENCE_JOIN to "no", it is important to check
-# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
-# and quorum.
+# When setting FENCE_JOIN to "no", it is important to also set
+# DLM_CONTROLD_OPTS="-f0" (at least) for correct operation.
+# Please note that clusters without fencing are not
+# supported by Red Hat except for MRG installations.
[ -z "$FENCE_JOIN" ] && FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
diff --git a/cman/init.d/cman.init.defaults.in b/cman/init.d/cman.init.defaults.in
index b981bab..5be97ff 100644
--- a/cman/init.d/cman.init.defaults.in
+++ b/cman/init.d/cman.init.defaults.in
@@ -58,9 +58,10 @@
# set to "yes", then the script will attempt to join the fence domain.
# If FENCE_JOIN is set to any other value, the default behavior is
# to join the fence domain (equivalent to "yes").
-# When setting FENCE_JOIN to "no", it is important to check
-# DLM_CONTROLD_OPTS to reflect expected behavior regarding fencing
-# and quorum.
+# When setting FENCE_JOIN to "no", it is important to also set
+# DLM_CONTROLD_OPTS="-f0" (at least) for correct operation.
+# Please note that clusters without fencing are not
+# supported by Red Hat except for MRG installations.
#FENCE_JOIN="yes"
# FENCED_OPTS -- allow extra options to be passed to fence daemon.
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=fa9267535f7bb…
Commit: fa9267535f7bbb49dcc766f87f35502dfa623847
Parent: bdbed1f76496e2f3cfc86535314cbeffd4d20257
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Wed Jul 11 11:44:49 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 11:00:04 2012 +0200
cman: fix data copy and memory leak when reloading config
cman.cluster_id,nodename,two_node information were not copied
from the old to the new config at reload time. This triggers
a problem when <cman is set in cluster.conf and we effectively
drop information from objdb (suboptimal).
Also fix a possible memory leak when we have reload issues.
Resolves: rhbz#839241
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/daemon/cman-preconfig.c | 31 +++++++++++++++++++++++++++++--
1 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index c42052e..321d78f 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -1483,6 +1483,7 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
hdb_handle_t cluster_parent_handle_new;
unsigned int config_version = 0, config_version_new = 0;
char *config_value = NULL;
+ char str[255];
/* don't reload if we've been told to run configless */
if (getenv("CMAN_NOCONFIG")) {
@@ -1494,16 +1495,16 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
/* find both /cluster entries */
objdb->object_find_create(OBJECT_PARENT_HANDLE, "cluster", strlen("cluster"), &find_handle);
objdb->object_find_next(find_handle, &cluster_parent_handle);
+ objdb->object_find_next(find_handle, &cluster_parent_handle_new);
+ objdb->object_find_destroy(find_handle);
if (!cluster_parent_handle) {
sprintf (error_reason, "%s", "Cannot find old /cluster/ key in configuration\n");
goto err;
}
- objdb->object_find_next(find_handle, &cluster_parent_handle_new);
if (!cluster_parent_handle_new) {
sprintf (error_reason, "%s", "Cannot find new /cluster/ key in configuration\n");
goto err;
}
- objdb->object_find_destroy(find_handle);
if (!objdb->object_key_get(cluster_parent_handle, "config_version", strlen("config_version"), (void *)&config_value, NULL)) {
if (config_value) {
@@ -1536,6 +1537,32 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
/* destroy the old one */
objdb->object_destroy(cluster_parent_handle);
+ /*
+ * create cluster.cman in the new config if it doesn't exists
+ */
+ objdb->object_find_create(cluster_parent_handle_new, "cman", strlen("cman"), &find_handle);
+ if (objdb->object_find_next(find_handle, &object_handle)) {
+ objdb->object_create(cluster_parent_handle_new, &object_handle,
+ "cman", strlen("cman"));
+ }
+ objdb->object_find_destroy(find_handle);
+
+ /*
+ * readd cluster_id/two_node/nodename
+ */
+ snprintf(str, sizeof(str) - 1, "%d", cluster_id);
+ objdb->object_key_create_typed(object_handle, "cluster_id",
+ str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
+
+ if (two_node) {
+ snprintf(str, sizeof(str) - 1, "%d", 1);
+ objdb->object_key_create_typed(object_handle, "two_node",
+ str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
+ }
+
+ objdb->object_key_create_typed(object_handle, "nodename",
+ nodename, strlen(nodename)+1, OBJDB_VALUETYPE_STRING);
+
/* update the reference to the new config */
cluster_parent_handle = cluster_parent_handle_new;
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=bdbed1f76496e…
Commit: bdbed1f76496e2f3cfc86535314cbeffd4d20257
Parent: af86e47803e3626641c1671ac89437d4a82066a7
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Wed Jun 27 11:46:01 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 10:47:40 2012 +0200
cman-preconfig: allow host aliases as valid cluster nodenames
Resolves: rhbz#786118
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/daemon/cman-preconfig.c | 91 +++++++++++++++++++++++++++++++++++-------
1 files changed, 76 insertions(+), 15 deletions(-)
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index c8f69e5..c42052e 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -462,7 +462,7 @@ static int verify_nodename(struct objdb_iface_ver0 *objdb, char *node)
struct sockaddr *sa;
hdb_handle_t nodes_handle;
hdb_handle_t find_handle = 0;
- int error;
+ int found = 0;
/* nodename is either from commandline or from uname */
if (nodelist_byname(objdb, cluster_parent_handle, node))
@@ -508,12 +508,11 @@ static int verify_nodename(struct objdb_iface_ver0 *objdb, char *node)
} while (nodes_handle);
objdb->object_find_destroy(find_handle);
-
- /* The cluster.conf names may not be related to uname at all,
- they may match a hostname on some network interface.
- NOTE: This is IPv4 only */
- error = getifaddrs(&ifa_list);
- if (error)
+ /*
+ * The cluster.conf names may not be related to uname at all,
+ * they may match a hostname on some network interface.
+ */
+ if (getifaddrs(&ifa_list))
return -1;
for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) {
@@ -532,12 +531,13 @@ static int verify_nodename(struct objdb_iface_ver0 *objdb, char *node)
if (sa->sa_family == AF_INET6)
salen = sizeof(struct sockaddr_in6);
- error = getnameinfo(sa, salen, nodename2,
- sizeof(nodename2), NULL, 0, 0);
- if (!error) {
+ if (getnameinfo(sa, salen,
+ nodename2, sizeof(nodename2),
+ NULL, 0, 0) == 0) {
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(node, nodename2);
+ found = 1;
goto out;
}
@@ -548,27 +548,88 @@ static int verify_nodename(struct objdb_iface_ver0 *objdb, char *node)
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(node, nodename2);
+ found = 1;
goto out;
}
}
}
/* See if it's the IP address that's in cluster.conf */
- error = getnameinfo(sa, sizeof(*sa), nodename2,
- sizeof(nodename2), NULL, 0, NI_NUMERICHOST);
- if (error)
+ if (getnameinfo(sa, sizeof(*sa),
+ nodename2, sizeof(nodename2),
+ NULL, 0, NI_NUMERICHOST))
continue;
if (nodelist_byname(objdb, cluster_parent_handle, nodename2)) {
strcpy(node, nodename2);
+ found = 1;
goto out;
}
}
- error = -1;
out:
+ if (found) {
+ freeifaddrs(ifa_list);
+ return 0;
+ }
+
+ /*
+ * This section covers the usecase where the nodename specified in cluster.conf
+ * is an alias specified in /etc/hosts. For example:
+ * <ipaddr> hostname alias1 alias2
+ * and <clusternode name="alias2">
+ * the above calls use uname and getnameinfo does not return aliases.
+ * here we take the name specified in cluster.conf, resolve it to an address
+ * and then compare against all known local ip addresses.
+ * if we have a match, we found our nodename. In theory this chunk of code
+ * could replace all the checks above, but let's avoid any possible regressions
+ * and use it as last.
+ */
+
+ nodes_handle = nodeslist_init(objdb, cluster_parent_handle, &find_handle);
+ while (nodes_handle) {
+ char *dbnodename = NULL;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL, *rp = NULL;
+
+ if (objdb_get_string(objdb, nodes_handle, "name", &dbnodename)) {
+ goto next;
+ }
+
+ memset(&hints, 0, sizeof(struct addrinfo));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = 0;
+ hints.ai_protocol = IPPROTO_UDP;
+
+ if (getaddrinfo(dbnodename, NULL, &hints, &result))
+ goto next;
+
+ for (rp = result; rp != NULL; rp = rp->ai_next) {
+ for (ifa = ifa_list; ifa; ifa = ifa->ifa_next) {
+ if (ipaddr_equal((struct sockaddr_storage *)rp->ai_addr,
+ (struct sockaddr_storage *)ifa->ifa_addr)) {
+ freeaddrinfo(result);
+ strncpy(node, dbnodename, sizeof(nodename) - 1);
+ found = 1;
+ goto out2;
+ }
+ }
+ }
+
+ freeaddrinfo(result);
+ next:
+ nodes_handle = nodeslist_next(objdb, find_handle);
+ }
+ out2:
+ objdb->object_find_destroy(find_handle);
freeifaddrs(ifa_list);
- return error;
+
+ if (found) {
+ return 0;
+ }
+
+ return -1;
}
/* Get any environment variable overrides */
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=5674e99cd0b14…
Commit: 5674e99cd0b141c34630ec411e0d682727a30dc9
Parent: e01a564837cc5f5310a046da3c71f995c301e649
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 10:27:57 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 10:34:25 2012 +0200
qdiskd: allow master to failover quickly when using master_wins
in case of master_wins and we are shutting down the master qdiskd,
there is a small window in which the other node is not quorate
because qdiskd has not become master yet.
this patch allows the master qdiskd to communicate to the other
nodes that it is going away and gives enough time to elect
a new master before dieing.
the process itself is safe and the worst case scenario the cluster
will behave as-is now (temporary loss of quorum), otherwise a fast
switch will take place.
Resolves: rhbz#814807
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/qdisk/disk.h | 5 +++-
cman/qdisk/main.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 75 insertions(+), 2 deletions(-)
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index fd80fa6..6bed41d 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -24,9 +24,12 @@ typedef enum {
S_INIT = 0x2, // Initializing. Hold your fire.
/* vvv Fencing will kill a node */
S_RUN = 0x5, // I think I'm running.
- S_MASTER= 0x6 // I know I'm running, and have advertised to
+ S_MASTER= 0x6, // I know I'm running, and have advertised to
// CMAN the availability of the disk vote for my
// partition.
+ S_EXIT = 0x7 // trigger master re-election before exit
+ // status is set only by master in master-win | auto-masterwin
+ // and next status _must_ be S_NONE
} disk_node_state_t;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index fc714f6..594e9e8 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -194,7 +194,8 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
continue;
/* Unchanged timestamp: miss */
- if (sb->ps_timestamp == ni[x].ni_last_seen) {
+ if ((sb->ps_timestamp == ni[x].ni_last_seen) &&
+ (ni[x].ni_state != S_EXIT)) {
/* XXX check for average + allow grace */
ni[x].ni_misses++;
if (ni[x].ni_misses > 1) {
@@ -229,6 +230,22 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
for (x = 0; x < max; x++) {
/*
+ Case 0: check if master node is about to leave
+ */
+ if (ni[x].ni_state == S_EXIT) {
+ logt_print(LOG_NOTICE, "Node %d is about to leave\n", ni[x].ni_status.ps_nodeid);
+ ni[x].ni_evil_incarnation = 0;
+ ni[x].ni_incarnation = 0;
+ ni[x].ni_seen = 0;
+ ni[x].ni_misses = 0;
+ ni[x].ni_state = S_NONE;
+ if (mask)
+ clear_bit(mask, (ni[x].ni_status.ps_nodeid-1),
+ sizeof(memb_mask_t));
+ continue;
+ }
+
+ /*
Case 1: check to see if the node is still up
according to our internal state, but has been
evicted by the master or cleanly shut down
@@ -1259,6 +1276,50 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
/**
+ Tell the other nodes to elect a new master != me.
+ */
+static int
+quorum_reelect_master(qd_ctx *ctx, node_info_t *ni, int max)
+{
+ if (qd_write_status(ctx, ctx->qc_my_id, S_EXIT,
+ NULL, NULL, NULL) != 0) {
+ logt_print(LOG_WARNING,
+ "Error writing to quorum disk during reelect_master\n");
+ }
+
+ while (1) {
+ int master, x;
+ int found = 0;
+ int low_id, count;
+
+ read_node_blocks(ctx, ni, max);
+
+ for (x = 0; x < max; x++) {
+ if (ni[x].ni_state >= S_RUN) {
+ found = 1;
+ }
+ }
+
+ if (!found) {
+ logt_print(LOG_DEBUG, "No other nodes are active. Exiting\n");
+ break;
+ }
+
+ master = master_exists(ctx, ni, max, &low_id, &count);
+ if (master) {
+ logt_print(LOG_DEBUG, "New master elected: %d\n", master);
+ break;
+ }
+ /*
+ * give time for message to be read
+ */
+ sleep(1);
+ }
+
+ return 0;
+}
+
+/**
Tell the other nodes we're done (safely!).
*/
static int
@@ -2151,6 +2212,15 @@ main(int argc, char **argv)
io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0) {
+ /*
+ * if we are master and we are in master-win mode,
+ * request other qdiskd to elect a new one
+ */
+ if ((ctx.qc_status == S_MASTER) &&
+ ((ctx.qc_flags & RF_MASTER_WINS) ||
+ (ctx.qc_flags & RF_AUTO_MASTER_WINS))) {
+ quorum_reelect_master(&ctx, ni, MAX_NODES_DISK);
+ }
/* Only clean up if we're exiting w/o error) */
logt_print(LOG_NOTICE, "Unregistering quorum device.\n");
cman_unregister_quorum_device(ctx.qc_cman_admin);
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=e8af462b7531f…
Commit: e8af462b7531f87b5cb20f7204eeb4b520591da9
Parent: 03e2215bd277fd79b8a6ee70a49de711e0f343ad
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Tue Jul 24 10:27:57 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Tue Jul 24 10:27:57 2012 +0200
qdiskd: allow master to failover quickly when using master_wins
in case of master_wins and we are shutting down the master qdiskd,
there is a small window in which the other node is not quorate
because qdiskd has not become master yet.
this patch allows the master qdiskd to communicate to the other
nodes that it is going away and gives enough time to elect
a new master before dieing.
the process itself is safe and the worst case scenario the cluster
will behave as-is now (temporary loss of quorum), otherwise a fast
switch will take place.
Resolves: rhbz#814807
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
cman/qdisk/disk.h | 5 +++-
cman/qdisk/main.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 75 insertions(+), 2 deletions(-)
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index fd80fa6..6bed41d 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -24,9 +24,12 @@ typedef enum {
S_INIT = 0x2, // Initializing. Hold your fire.
/* vvv Fencing will kill a node */
S_RUN = 0x5, // I think I'm running.
- S_MASTER= 0x6 // I know I'm running, and have advertised to
+ S_MASTER= 0x6, // I know I'm running, and have advertised to
// CMAN the availability of the disk vote for my
// partition.
+ S_EXIT = 0x7 // trigger master re-election before exit
+ // status is set only by master in master-win | auto-masterwin
+ // and next status _must_ be S_NONE
} disk_node_state_t;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 32677a2..16c26e4 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -196,7 +196,8 @@ read_node_blocks(qd_ctx *ctx, node_info_t *ni, int max)
continue;
/* Unchanged timestamp: miss */
- if (sb->ps_timestamp == ni[x].ni_last_seen) {
+ if ((sb->ps_timestamp == ni[x].ni_last_seen) &&
+ (ni[x].ni_state != S_EXIT)) {
/* XXX check for average + allow grace */
ni[x].ni_misses++;
if (ni[x].ni_misses > 1) {
@@ -231,6 +232,22 @@ check_transitions(qd_ctx *ctx, node_info_t *ni, int max, memb_mask_t mask)
for (x = 0; x < max; x++) {
/*
+ Case 0: check if master node is about to leave
+ */
+ if (ni[x].ni_state == S_EXIT) {
+ logt_print(LOG_NOTICE, "Node %d is about to leave\n", ni[x].ni_status.ps_nodeid);
+ ni[x].ni_evil_incarnation = 0;
+ ni[x].ni_incarnation = 0;
+ ni[x].ni_seen = 0;
+ ni[x].ni_misses = 0;
+ ni[x].ni_state = S_NONE;
+ if (mask)
+ clear_bit(mask, (ni[x].ni_status.ps_nodeid-1),
+ sizeof(memb_mask_t));
+ continue;
+ }
+
+ /*
Case 1: check to see if the node is still up
according to our internal state, but has been
evicted by the master or cleanly shut down
@@ -1269,6 +1286,50 @@ quorum_loop(qd_ctx *ctx, node_info_t *ni, int max)
/**
+ Tell the other nodes to elect a new master != me.
+ */
+static int
+quorum_reelect_master(qd_ctx *ctx, node_info_t *ni, int max)
+{
+ if (qd_write_status(ctx, ctx->qc_my_id, S_EXIT,
+ NULL, NULL, NULL) != 0) {
+ logt_print(LOG_WARNING,
+ "Error writing to quorum disk during reelect_master\n");
+ }
+
+ while (1) {
+ int master, x;
+ int found = 0;
+ int low_id, count;
+
+ read_node_blocks(ctx, ni, max);
+
+ for (x = 0; x < max; x++) {
+ if (ni[x].ni_state >= S_RUN) {
+ found = 1;
+ }
+ }
+
+ if (!found) {
+ logt_print(LOG_DEBUG, "No other nodes are active. Exiting\n");
+ break;
+ }
+
+ master = master_exists(ctx, ni, max, &low_id, &count);
+ if (master) {
+ logt_print(LOG_DEBUG, "New master elected: %d\n", master);
+ break;
+ }
+ /*
+ * give time for message to be read
+ */
+ sleep(1);
+ }
+
+ return 0;
+}
+
+/**
Tell the other nodes we're done (safely!).
*/
static int
@@ -2173,6 +2234,15 @@ main(int argc, char **argv)
io_nanny_start(ch_user, ctx.qc_tko * ctx.qc_interval);
if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0) {
+ /*
+ * if we are master and we are in master-win mode,
+ * request other qdiskd to elect a new one
+ */
+ if ((ctx.qc_status == S_MASTER) &&
+ ((ctx.qc_flags & RF_MASTER_WINS) ||
+ (ctx.qc_flags & RF_AUTO_MASTER_WINS))) {
+ quorum_reelect_master(&ctx, ni, MAX_NODES_DISK);
+ }
/* Only clean up if we're exiting w/o error) */
logt_print(LOG_NOTICE, "Unregistering quorum device.\n");
cman_unregister_quorum_device(ctx.qc_cman_admin);
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=3290dff9b8899…
Commit: 3290dff9b88992e913b28dd57c6b64a3c2b75c8a
Parent: 13005d1ff5246a332bfc6795ad443b0148cd11b2
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Mon Jul 16 11:21:43 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon Jul 16 11:21:43 2012 -0400
When the filesystem /etc lives on is completely full, umount will exit with exit status 16 if the umount syscall succeeded but it was unable to write a new mtab file because the disk is full. umount won't exit with status 16 under any other circumstances.
This patch changes the fs.sh, clusterfs.sh, and netfs.sh resource agents
to check treat both exit status 0 and exit status 16 as success.
Resolves: rhbz#819595
Acked-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/clusterfs.sh | 5 ++++-
rgmanager/src/resources/fs.sh | 5 ++++-
rgmanager/src/resources/netfs.sh | 5 ++++-
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh
index 49eb724..ff28e3d 100755
--- a/rgmanager/src/resources/clusterfs.sh
+++ b/rgmanager/src/resources/clusterfs.sh
@@ -792,8 +792,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
sync; sync; sync
ocf_log info "unmounting $dev ($mp)"
+ # umount will exit with status 16 iff the umount(2)
+ # succeeded, but /etc/mtab could not be written.
umount $mp
- if [ $? -eq 0 ]; then
+ retval=$?
+ if [ $retval -eq 0 -o $retval -eq 16 ]; then
umount_failed=
done=$YES
continue
diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh
index a98cddc..9dec8d1 100755
--- a/rgmanager/src/resources/fs.sh
+++ b/rgmanager/src/resources/fs.sh
@@ -1102,8 +1102,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
fi
ocf_log info "unmounting $mp"
+ # umount will exit with status 16 iff the umount(2)
+ # succeeded, but /etc/mtab could not be written.
umount $mp
- if [ $? -eq 0 ]; then
+ retval=$?
+ if [ $retval -eq 0 -o $retval -eq 16 ]; then
umount_failed=
done=$YES
continue
diff --git a/rgmanager/src/resources/netfs.sh b/rgmanager/src/resources/netfs.sh
index 837a4c4..6089a0d 100755
--- a/rgmanager/src/resources/netfs.sh
+++ b/rgmanager/src/resources/netfs.sh
@@ -559,8 +559,11 @@ stopNFSFilesystem() {
sync; sync; sync
ocf_log info "unmounting $mp"
+ # umount will exit with status 16 iff the umount(2)
+ # succeeded, but /etc/mtab could not be written.
umount $umount_flag $mp
- if [ $? -eq 0 ]; then
+ retval=$?
+ if [ $retval -eq 0 -o $retval -eq 16 ]; then
umount_failed=
done=$YES
continue
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=13005d1ff5246…
Commit: 13005d1ff5246a332bfc6795ad443b0148cd11b2
Parent: 467d015c53e5e2d00025708cda95b3df41ebda1f
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Wed Jul 11 09:11:33 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon Jul 16 11:09:20 2012 -0400
rgmanager: Fix return code when relocation fails and svc is running on original node
Return RG_ERELO (Relocation failure; service running on original node) when
relocation failed and the service was restarted successfully on the original
node.
This is the behavior that's currently in the STABLE32 and RHEL6 branches.
Acked-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/daemons/rg_state.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 4357d21..9000f1b 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -2061,7 +2061,7 @@ exhausted:
svcName);
if (svc_start(svcName, RG_START_RECOVER) == 0) {
*new_owner = me;
- return 0;
+ return RG_ERELO;
}
}
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=467d015c53e5e…
Commit: 467d015c53e5e2d00025708cda95b3df41ebda1f
Parent: d88584bd640700c51692198d2f6aeda0e773165c
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Mon Jul 16 10:57:28 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon Jul 16 11:05:26 2012 -0400
rgmanager: Fix for services stuck in recovery
Patch from John Ruemker <jruemker(a)redhat.com>:
"When starting rgmanager throughout the cluster around the same
time, multiple nodes may end up acting as the "root" for a particular
service. If that service happens to fail on startup, you can end up
with each of those nodes sending remote-start requests around the
cluster. Eventually the service will get stuck in a recovering state,
and cannot be modified in any way with clusvcadm. The only remedy we've
found is to kill rgmanager and start it back up.
Acked-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/daemons/groups.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 20ed2e1..bd406c8 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -747,7 +747,8 @@ eval_groups(int local, uint32_t nodeid, int nodeStatus)
(svcStatus.rs_state == RG_STATE_STARTED ||
svcStatus.rs_state == RG_STATE_RECOVER ||
svcStatus.rs_state == RG_STATE_STARTING ||
- svcStatus.rs_state == RG_STATE_STOPPING )) {
+ svcStatus.rs_state == RG_STATE_STOPPING ||
+ svcStatus.rs_state == RG_STATE_ERROR)) {
clulog(LOG_DEBUG,
"Marking %s on down member %d as stopped",
@@ -789,7 +790,8 @@ eval_groups(int local, uint32_t nodeid, int nodeStatus)
/* Disabled/failed/in recovery? Do nothing */
if ((svcStatus.rs_state == RG_STATE_DISABLED) ||
(svcStatus.rs_state == RG_STATE_FAILED) ||
- (svcStatus.rs_state == RG_STATE_RECOVER)) {
+ (svcStatus.rs_state == RG_STATE_RECOVER) ||
+ (svcStatus.rs_state == RG_STATE_ERROR)) {
continue;
}
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=03e2215bd277f…
Commit: 03e2215bd277fd79b8a6ee70a49de711e0f343ad
Parent: e080a6249f25a3bc18a6c9028c9da06bf4ef14c6
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Mon Jul 16 10:57:28 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon Jul 16 10:57:28 2012 -0400
rgmanager: Fix for services stuck in recovery
Patch from John Ruemker <jruemker(a)redhat.com>:
"When starting rgmanager throughout the cluster around the same
time, multiple nodes may end up acting as the "root" for a particular
service. If that service happens to fail on startup, you can end up
with each of those nodes sending remote-start requests around the
cluster. Eventually the service will get stuck in a recovering state,
and cannot be modified in any way with clusvcadm. The only remedy we've
found is to kill rgmanager and start it back up.
Acked-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/daemons/groups.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 4a72cb7..4537cc8 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -731,7 +731,8 @@ eval_groups(int local, uint32_t nodeid, int nodeStatus)
(svcStatus.rs_state == RG_STATE_STARTED ||
svcStatus.rs_state == RG_STATE_RECOVER ||
svcStatus.rs_state == RG_STATE_STARTING ||
- svcStatus.rs_state == RG_STATE_STOPPING )) {
+ svcStatus.rs_state == RG_STATE_STOPPING ||
+ svcStatus.rs_state == RG_STATE_ERROR)) {
logt_print(LOG_DEBUG,
"Marking %s on down member %d as stopped",
@@ -773,7 +774,8 @@ eval_groups(int local, uint32_t nodeid, int nodeStatus)
/* Disabled/failed/in recovery? Do nothing */
if ((svcStatus.rs_state == RG_STATE_DISABLED) ||
(svcStatus.rs_state == RG_STATE_FAILED) ||
- (svcStatus.rs_state == RG_STATE_RECOVER)) {
+ (svcStatus.rs_state == RG_STATE_RECOVER) ||
+ (svcStatus.rs_state == RG_STATE_ERROR)) {
continue;
}
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=88ab6a7b1e4da…
Commit: 88ab6a7b1e4da3802f64addc3d6f234e5cf82760
Parent: 8975bd6341b2d94c1f89279b1b00d4360da1f5ff
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Wed Jul 11 11:44:49 2012 +0200
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Wed Jul 11 11:44:49 2012 +0200
cman: fix data copy and memory leak when reloading config
cman.cluster_id,nodename,two_node information were not copied
from the old to the new config at reload time. This triggers
a problem when <cman is set in cluster.conf and we effectively
drop information from objdb (suboptimal).
Also fix a possible memory leak when we have reload issues.
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/daemon/cman-preconfig.c | 31 +++++++++++++++++++++++++++++--
1 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index 68fec22..22583fe 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -1478,6 +1478,7 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
hdb_handle_t cluster_parent_handle_new;
unsigned int config_version = 0, config_version_new = 0;
char *config_value = NULL;
+ char str[255];
/* don't reload if we've been told to run configless */
if (getenv("CMAN_NOCONFIG")) {
@@ -1489,16 +1490,16 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
/* find both /cluster entries */
objdb->object_find_create(OBJECT_PARENT_HANDLE, "cluster", strlen("cluster"), &find_handle);
objdb->object_find_next(find_handle, &cluster_parent_handle);
+ objdb->object_find_next(find_handle, &cluster_parent_handle_new);
+ objdb->object_find_destroy(find_handle);
if (!cluster_parent_handle) {
snprintf (error_reason, sizeof(error_reason) - 1, "Cannot find old /cluster/ key in configuration\n");
goto err;
}
- objdb->object_find_next(find_handle, &cluster_parent_handle_new);
if (!cluster_parent_handle_new) {
snprintf (error_reason, sizeof(error_reason) - 1, "Cannot find new /cluster/ key in configuration\n");
goto err;
}
- objdb->object_find_destroy(find_handle);
if (!objdb->object_key_get(cluster_parent_handle, "config_version", strlen("config_version"), (void *)&config_value, NULL)) {
if (config_value) {
@@ -1531,6 +1532,32 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 *objdb, int flush, const
/* destroy the old one */
objdb->object_destroy(cluster_parent_handle);
+ /*
+ * create cluster.cman in the new config if it doesn't exists
+ */
+ objdb->object_find_create(cluster_parent_handle_new, "cman", strlen("cman"), &find_handle);
+ if (objdb->object_find_next(find_handle, &object_handle)) {
+ objdb->object_create(cluster_parent_handle_new, &object_handle,
+ "cman", strlen("cman"));
+ }
+ objdb->object_find_destroy(find_handle);
+
+ /*
+ * readd cluster_id/two_node/nodename
+ */
+ snprintf(str, sizeof(str) - 1, "%d", cluster_id);
+ objdb->object_key_create_typed(object_handle, "cluster_id",
+ str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
+
+ if (two_node) {
+ snprintf(str, sizeof(str) - 1, "%d", 1);
+ objdb->object_key_create_typed(object_handle, "two_node",
+ str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
+ }
+
+ objdb->object_key_create_typed(object_handle, "nodename",
+ nodename, strlen(nodename)+1, OBJDB_VALUETYPE_STRING);
+
/* update the reference to the new config */
cluster_parent_handle = cluster_parent_handle_new;
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=5a1d50a8308bb…
Commit: 5a1d50a8308bb5b40c6a8d990f628bdaa9c20a59
Parent: 55710722d15be8f2eafdae472086182f88b2a0d5
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Thu Jun 28 15:24:48 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Tue Jul 10 15:56:33 2012 -0400
rgmanager: Don't immediately stop services that have started sucessfully
This patch fixes a bug that caused services to be started, then
immediately stopped during recovery. This occurred when a service
had its recovery policy set to relocate and was in a restricted
failover domain. If starting the service failed on other nodes in the
failover domain, the service would be restarted on the original node,
then immediately stopped, even if it had started successfully. This
patch causes rgmanager to leave the service running if it restarted
successfully.
Resolves: rhbz#789366
Acked-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/daemons/rg_state.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index a8b1e36..4357d21 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -2061,7 +2061,7 @@ exhausted:
svcName);
if (svc_start(svcName, RG_START_RECOVER) == 0) {
*new_owner = me;
- return RG_EFAIL;
+ return 0;
}
}
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=55710722d15be…
Commit: 55710722d15be8f2eafdae472086182f88b2a0d5
Parent: df45c13985a5219c04356eea98179935ed9efb14
Author: Ryan McCabe <rmccabe(a)redhat.com>
AuthorDate: Thu Jun 28 15:18:09 2012 -0400
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Tue Jul 10 15:55:29 2012 -0400
rgmanager: Don't exit uncleanly when cman asks us to shut down.
Original patch from Lon rediffed to apply to the current tree:
"Previous to this, rgmanager would uncleanly exit if you
issued a 'service cman stop'. This patch makes it uncleanly
exit if 'cman_tool leave force' or a corosync/openais crash
occurs, but in a simple cman_tool leave, rgmanager will no
longer exit uncleanly."
Without this patch, issuing 'service cman stop' when rgmanager
is running will make it impossible to stop the cman service because
rgmanager will have exited without releasing its dlm lockspace.
This patch causes rgmanager to refuse shutdown when requested
by cman. Users must stop the rgmanager service before stopping
the cman service.
Acked-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/clulib/msg_cluster.c | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/clulib/msg_cluster.c b/rgmanager/src/clulib/msg_cluster.c
index 8dc22d0..e864853 100644
--- a/rgmanager/src/clulib/msg_cluster.c
+++ b/rgmanager/src/clulib/msg_cluster.c
@@ -211,7 +211,7 @@ poll_cluster_messages(int timeout)
if (cman_dispatch(ch, 0) < 0) {
process_cman_event(ch, NULL,
- CMAN_REASON_TRY_SHUTDOWN, 0);
+ CMAN_REASON_TRY_SHUTDOWN, 1);
}
ret = 0;
}
@@ -987,6 +987,11 @@ process_cman_event(cman_handle_t handle, void *private, int reason, int arg)
printf("EVENT: %p %p %d %d\n", handle, private, reason, arg);
#endif
+ if (reason == CMAN_REASON_TRY_SHUTDOWN && !arg) {
+ cman_replyto_shutdown(handle, 0);
+ return;
+ }
+
/* Allocate queue node */
while ((node = malloc(sizeof(*node))) == NULL) {
sleep(1);
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=69b7a79e05…
Commit: 69b7a79e053aecaf7218134b8536bc0d7332a17e
Parent: 46388807a62c24e4156584481f7df5eb7c8620cd
Author: Andrew Price <anprice(a)redhat.com>
AuthorDate: Tue Jul 3 18:54:27 2012 +0100
Committer: Andrew Price <anprice(a)redhat.com>
CommitterDate: Tue Jul 3 19:02:53 2012 +0100
gfs2-utils: Make building gfs_controld optional
As of Fedora 17 gfs_controld isn't required and also doesn't build. This
patch adds an --enable-gfs_controld option to allow it to be built on
older systems while defaulting to future norm of not building it.
Signed-off-by: Andrew Price <anprice(a)redhat.com>
---
README.build | 6 ++++++
configure.ac | 27 ++++++++++++++++++---------
group/Makefile.am | 6 +++++-
3 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/README.build b/README.build
index 007f2d7..27e022a 100644
--- a/README.build
+++ b/README.build
@@ -21,6 +21,12 @@ Plus the following libraries:
- libcfg
- openaislib
+By default gfs_controld is not built as it is not required in Fedora 17 and
+later and cannot build on it. To re-enable it for older systems, use
+--enable-gfs_controld when running the configure script (see below). The
+library dependencies for gfs_controld are only required when this option is
+given.
+
Run the following commands:
./autogen.sh
diff --git a/configure.ac b/configure.ac
index 570f826..d56cfac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,6 +95,10 @@ AC_ARG_ENABLE([debug],
[ --enable-debug enable debug build. ],
[ default="no" ])
+AC_ARG_ENABLE([gfs_controld],
+ [ --enable-gfs_controld build gfs_controld. ],
+ [ default="no" ])
+
AC_ARG_WITH([syslogfacility],
[ --syslogfacility=FACILITY
cluster default syslog facility. ],
@@ -114,15 +118,20 @@ AC_ARG_WITH([kernel],
KERNEL_CPPFLAGS="-I$KERNEL_DIR/include"
-PKG_CHECK_MODULES([corosync],[corosync])
-PKG_CHECK_MODULES([cpg],[libcpg])
-PKG_CHECK_MODULES([sackpt],[libSaCkpt])
-PKG_CHECK_MODULES([logt],[liblogthread])
-PKG_CHECK_MODULES([ccs],[libccs])
-PKG_CHECK_MODULES([cfg],[libcfg])
-PKG_CHECK_MODULES([fenced],[libfenced])
-PKG_CHECK_MODULES([dlmcontrol],[libdlmcontrol])
-PKG_CHECK_MODULES([quorum],[libquorum])
+# gfs_controld isn't required in the latest versions of cluster
+AM_CONDITIONAL([BUILD_GFS_CONTROLD], [test "x$enable_gfs_controld" = "xyes"])
+AS_IF([test "x$enable_gfs_controld" = "xyes"], [
+ PKG_CHECK_MODULES([corosync],[corosync])
+ PKG_CHECK_MODULES([cpg],[libcpg])
+ PKG_CHECK_MODULES([sackpt],[libSaCkpt])
+ PKG_CHECK_MODULES([logt],[liblogthread])
+ PKG_CHECK_MODULES([ccs],[libccs])
+ PKG_CHECK_MODULES([cfg],[libcfg])
+ PKG_CHECK_MODULES([fenced],[libfenced])
+ PKG_CHECK_MODULES([dlmcontrol],[libdlmcontrol])
+ PKG_CHECK_MODULES([quorum],[libquorum])
+])
+
PKG_CHECK_MODULES([zlib],[zlib])
# old versions of ncurses don't ship pkg-config files
diff --git a/group/Makefile.am b/group/Makefile.am
index 5b7d0f8..2904ce1 100644
--- a/group/Makefile.am
+++ b/group/Makefile.am
@@ -1,3 +1,7 @@
MAINTAINERCLEANFILES = Makefile.in
-SUBDIRS = libgfscontrol gfs_control gfs_controld man include
+if BUILD_GFS_CONTROLD
+DIR_GFS_CONTROLD = gfs_controld
+endif
+
+SUBDIRS = libgfscontrol gfs_control $(DIR_GFS_CONTROLD) man include