Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=55…
Commit: 55dd07589f0ace485c31905a6b0fc8538a00c2eb
Parent: 0000000000000000000000000000000000000000
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: 2010-07-30 11:46 +0000
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: 2010-07-30 11:46 +0000
annotated tag: cluster-3.0.14 has been created
at 55dd07589f0ace485c31905a6b0fc8538a00c2eb (tag)
tagging db1a93f32b36da95a19716ed1b6832b04f4c68dd (commit)
replaces cluster-3.0.13
cluster-3.0.14 release
Abhijith Das (1):
gfs2 manual pages: gfs2_convert manpage and documentation updates
Bob Peterson (4):
gfs2_edit restoremeta should not return 0 on failure
fsck.gfs2: unaligned access on ia64
GFS2: libgfs2 bitfit algorithm using wrong shift point
Make gfs2_edit show bit-to-block translation when viewing bitmaps
Carlos Eduardo Maiolino (1):
resource-agents: Remove nfs service temp directories
Christine Caulfield (2):
cman: Recalculate expected_votes on a config reload.
cman: Mention cman_tool version -S in man page
David Teigland (2):
dlm_controld: fix plock checkpoint signatures
dlm_controld: fix plock owner in checkpoints
Fabio M. Di Nitto (17):
resource agents: Remove bashisms from resource scripts
cman init: more LSB compliance
rgmanger init: more LSB complaint bits
cman: add default config snippet for cman init script
gfs2 init: make the init script LSB compliant
cman config: copy all logging objects to the top level tree
fence: rename ibmblade to bladecenter_snmp
fence agents: add compatibility symlink for ibmblade / bladecenter_snmp
cman: allow init script to pass options to fenced
config: fix several issues with reload operation
cman-preconfig: better handle of logging reload operation
cman-preconfig: better handle of logging reload operation (part 2)
config: free new config if we cannot find the config_version
config: more cman_tool config reload cleanup
cman: simply message broadcasting handling
config: better error report when autodetecting config version errors
cman: fix consensus calculation
Federico Simoncelli (1):
config: Add missing cman_label
Guido Günther (1):
fence_rsb: Raise exceptions not strings
Jan Friesse (1):
cman: check for new config only once per second
Lon Hohberger (37):
config: Add missing resource docs to cluster.rng
config: Clean up recursion and documentation
config: Add documentation for interface tag
Revert "config: Clean up recursion and documentation"
Revert "config: Add missing resource docs to cluster.rng"
resource-agents: Add missing resource docs
config: Add missing resource docs to cluster.rng
config: Clean up recursion and documentation
resource-agents: Clean up recursion and documentation
rgmanager: Pass timeouts to resource agents
resource-agents: Make vm.sh use stop/start timeouts
rgmanager: Use sysrq-b to reboot
Revert "resource-agents: Make vm.sh use stop/start timeouts"
resource-agents: Make vm.sh use stop/start timeouts
resource-agents: fix incorrect link resolution in fs-lib
rgmanager: Make clustat -f not query CCS/objdb
resource-agents: Add NFSv4 support
resource-agents: Add NFSv4 agent to installation
config: Update cluster schema
config: Update cluster LDIF schema
doc: Add auto-generated cluster.conf reference
doc: Install cluster.conf reference
rgmanager: Make clulog filter correctly
resource-agents: Add resource type to logging
rgmanager: Man page improvements
resource-agents: Allow other values for "yes"
config: Allow multiple logging_daemon tags
config: Add doc for cman_label attribute
config: Update LDIF schema
cman: Recalculate quorum on config change
config: Add tomcat-6 resource agent to schema
config: Add tomcat-6 to ldif schema
doc: Add tomcat-6 to cluster_conf.html
cman: Recalculate quorum on quorum device vote changes
cman: Deprecate specifying config version to cman_tool
config: Regenerate fencing definitions for XML schema
config: Exclude fence_scsi from generated section
Marek 'marx' Grac (8):
fence_wti: support non-default TCP ports
fence_apc: fence_apc fails for some port numbers
resource-agents: Use SIGQUIT if SIGTERM was not fast enough
resource-agents: new agent for tomcat 6
resource-agents: change build system to include tomcat6 RA
resource-agents: RA for psql does not work correctly with netmask
fence_ilo: will throw exception if user does not have power priviledges
fencing: Not all parameters appear in metadata
Masahiro Matsuya (1):
resource-agents: Fix migration mapping behavior w/ virsh
Ryan O'Hara (1):
Fix open flags so that logfile won't be truncated each time we open it.
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=fa…
Commit: fa36367db8a3650a1cc761b7570ef2b4c0b3b88b
Parent: b2277bbc5295f52944a6901695cee71c7f0ee018
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Thu Jul 29 15:47:26 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Thu Jul 29 15:47:26 2010 -0400
fence-agents: Update fence_ack_manual man page
Resolves: rhbz#578604
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
fence/man/fence_ack_manual.8 | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/fence/man/fence_ack_manual.8 b/fence/man/fence_ack_manual.8
index fa9c4ed..29f947f 100644
--- a/fence/man/fence_ack_manual.8
+++ b/fence/man/fence_ack_manual.8
@@ -23,6 +23,10 @@ then run fence_ack_manual. Running fence_ack_manual allows the cluster to
continue with recovery of the fenced machine. The victim may be disconnected
from storage rather than resetting it.
+fence_ack_manual may also be used to allow a cluster to resume operation
+after fencing has failed for a host. This is not related to fence_manual(8),
+requires no configuration, and may be used whenever fencing has failed.
+
.SH OPTIONS
.TP
\fB-h\fP
@@ -37,6 +41,10 @@ Name of node that has been reset or disconnected from storage.
\fB-s\fP \fIIPaddress\fP
IP address of the machine which has been reset or disconnected from storage. (Deprecated; use -n instead.)
.TP
+\fB-e\fP
+Emergency fencing override. This may be used when fencing a given host
+is failing in order to restore the cluster to operation.
+.TP
\fB-V\fP
Print out a version message, then exit.
.SH SEE ALSO
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=94…
Commit: 94085eace39e248040cf7069c7294178c6f944ce
Parent: 8dff6d4626831bf941a32ee75e9b802fc51a0e8f
Author: Masahiro Matsuya <mmatsuya(a)redhat.com>
AuthorDate: Fri Jul 9 14:40:51 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Thu Jul 29 14:13:46 2010 -0400
resource-agents: Fix migration mapping behavior w/ virsh
Consider a two node cluster. The hostname of the nodes
are 'sk010001' and 'sk010002'. Each nodes has two bonded
network interfaces for public and private communications.
The hostname matches the hostname of the IP address on
public network.
Node1: sk010001
bond0 (for public network) : 172.22.51.1 sk010001
bond2 (for private network): 172.22.48.131 sk010001-hb
Node2: sk010002
bond0 (for public network) : 172.22.51.2 sk010002
bond2 (for private network): 172.22.48.132 sk010002-hb
In cluster.conf, a migration mapping is used to specify
that the private interfaces should be used for migration traffic.
Unfortunately, when doing a live migration, while the traffic
should use the -hb interfaces, bond0 is used.
This is because the vm.sh agent uses the following command
for live migration from sk010001 to sk010002:
virsh migrate --live su21k003 \
qemu+ssh://sk010002-hb/system
This is not enough to ensure the guest goes over the private
interface. The --migrateuri option of 'virsh migrate' is needed
for it. The following command should be executed instead:
virsh migrate --live su21k003 \
qemu+ssh://sk010002-hb/system tcp:sk010002-hb
Resolves: rhbz#596016
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/vm.sh | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh
index c8be516..61ced72 100755
--- a/rgmanager/src/resources/vm.sh
+++ b/rgmanager/src/resources/vm.sh
@@ -787,6 +787,7 @@ validate_all()
# Virsh makes it easier to do this. Really.
if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then
export OCF_RESKEY_migration_uri="qemu+ssh://%s/system"
+ export migrateuriopt="tcp:%s"
fi
# I just need to believe in it more.
@@ -822,7 +823,7 @@ virsh_migrate()
err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]})
rv=$?
elif [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then
- cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target)"
+ cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)"
ocf_log debug "$cmd"
err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]})
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=8d…
Commit: 8dff6d4626831bf941a32ee75e9b802fc51a0e8f
Parent: fbc82625c84ca9dfd99f7d8e3e051c53a63bf523
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Wed Apr 14 17:29:54 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Thu Jul 29 14:06:48 2010 -0400
rgmanager: Kill processes correctly w/ force_unmount
The killMountProcesses function was written about 10 years ago.
It was designed to work with lsof or fuser, and to log messages
for each process killed. This is not a bad idea. The problem
is that parsing the output of either is and error-prone,
particularly when mountpoints are similar to other directories
on the system.
A far less error-prone method to cleaning up a mount point is to
use 'fuser -kvm' on it. Not only is this less error-prone, it's
a good bit faster at doing its job than iterating through output
in a shell script.
This patch makes force_unmount very reliable at killing the correct
processes, but we lose the logging functionality. It is a fair
trade-off because there have been several bugs in the
killMountProcesses function over the years which have caused several
problems.
Resolves: bz573705
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/clusterfs.sh | 113 ++----------------------------
rgmanager/src/resources/fs.sh | 124 ++-------------------------------
rgmanager/src/resources/netfs.sh | 128 ++++------------------------------
3 files changed, 25 insertions(+), 340 deletions(-)
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh
index 250978e..6a837b1 100755
--- a/rgmanager/src/resources/clusterfs.sh
+++ b/rgmanager/src/resources/clusterfs.sh
@@ -504,113 +504,6 @@ isAlive()
#
-# killMountProcesses device mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -b 2>/dev/null | \
- grep -E "$mp(/.*|)\$" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-#
# startFilesystem
#
startFilesystem() {
@@ -891,7 +784,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
umount_failed=yes
if [ "$force_umount" ]; then
- killMountProcesses $mp
+ if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+ else
+ fuser -kvm "$mp"
+ fi
fi
if [ $try -ge $max_tries ]; then
diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh
index 900dca5..a2148f4 100755
--- a/rgmanager/src/resources/fs.sh
+++ b/rgmanager/src/resources/fs.sh
@@ -695,115 +695,8 @@ isAlive()
#
-# killMountProcesses mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -bn 2>/dev/null | \
- grep -E " $mp(/| |$)" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-
-#
-# Enable quotas on the mount point if the user requested them
+# Decide which quota options are enabled and return a string
+# which we can pass to quotaon
#
enable_fs_quotas()
{
@@ -1176,8 +1069,9 @@ stop: Could not match $OCF_RESKEY_device with a real device"
umount_failed=yes
if [ "$force_umount" ]; then
- killMountProcesses $mp
if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+
if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
[ "$OCF_RESKEY_nfslock" = "1" ]; then
ocf_log warning \
@@ -1189,15 +1083,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
notify_list_store $mp/.clumanager/statd
nfslock_reclaim=1
fi
+ else
+ fuser -kvm "$mp"
fi
fi
- if [ $try -ge $max_tries ]; then
- done=$YES
- else
- sleep $sleep_time
- let try=try+1
- fi
;;
*)
return $FAIL
@@ -1206,7 +1096,7 @@ stop: Could not match $OCF_RESKEY_device with a real device"
if [ $try -ge $max_tries ]; then
done=$YES
- elif [ "$done" -ne "$YES" ]; then
+ elif [ "$done" != "$YES" ]; then
sleep $sleep_time
let try=try+1
fi
diff --git a/rgmanager/src/resources/netfs.sh b/rgmanager/src/resources/netfs.sh
index bd391b6..2cc6863 100755
--- a/rgmanager/src/resources/netfs.sh
+++ b/rgmanager/src/resources/netfs.sh
@@ -359,113 +359,6 @@ isMounted () {
}
#
-# killMountProcesses mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -w -bn 2>/dev/null | \
- grep -w -E "$mp(/.*|)\$" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-#
# startNFSFilesystem
#
startNFSFilesystem() {
@@ -659,15 +552,20 @@ stopNFSFilesystem() {
umount_failed=yes
- if [ "$force_umount" ]; then
- killMountProcesses $mp
- fi
+ if [ "$force_umount" ]; then
+ if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+ else
+ fuser -kvm "$mp"
+ fi
+ fi
- if [ $try -ge $max_tries ]; then
- done=$YES
- else
- sleep $sleep_time
- let try=try+1
+
+ if [ $try -ge $max_tries ]; then
+ done=$YES
+ else
+ sleep $sleep_time
+ let try=try+1
fi
;;
*)