Gitweb: http://git.fedorahosted.org/git/gfs2-utils.git?p=gfs2-utils.git;a=commitdif…
Commit: a93b053d08c2173ad3c834c8b73c9831aa8f514a
Parent: c44c141dcf45e6370cc5e50680d72ad842971942
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Wed Jan 19 13:59:41 2011 -0600
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Wed Jan 19 13:59:41 2011 -0600
gfs2-utils: minor corrections to README.build
When I tried to build gfs2-utils as per these instructions, I discovered
I needed openaislib, so I added that dependency to the file. I also
fixed a typo in corosynclib.
---
README.build | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/README.build b/README.build
index 55efdf6..007f2d7 100644
--- a/README.build
+++ b/README.build
@@ -15,10 +15,11 @@ Plus the following libraries:
- liblogthread
- libfenced
- pthread (for gfs_controld)
- - corosynlib (for gfs_controld)
+ - corosynclib (for gfs_controld)
- libquorum
- libcpg
- libcfg
+ - openaislib
Run the following commands:
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=d3…
Commit: d3af317da0bccfe12814c06c0723c0dd1657df17
Parent: 1e9748b8e97b65454f4adbefbd684a44a2a13a6f
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Wed Jan 19 12:45:20 2011 -0600
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Wed Jan 19 12:57:06 2011 -0600
GFS2: mkfs.gfs2 segfaults with 18.55TB and -b512
The problem was that mkfs.gfs2 attempted to keep the number of resource
groups as low as possible, but in so doing, it chose a rgrp size that
required an enormous number of bitmap blocks per rgrp. In fact, it
tried to use more than it could possibly address, given the small
block size. Therefore, an error was flagged and mkfs.gfs2 aborted.
This patch ensures that mkfs.gfs2 chooses a rgrp size that takes into
account the maximum number of bitmap blocks. If it hits the max
number of bitmap blocks, it backs off on its rgrp size and uses a
smaller size. The smaller size rgrp ensures that a valid number of
bitmap blocks will be needed to represent all the blocks in each rgrp.
rhbz#624535
---
gfs2/libgfs2/fs_geometry.c | 39 ++++++++++++++++++++++++++++++++-------
1 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/gfs2/libgfs2/fs_geometry.c b/gfs2/libgfs2/fs_geometry.c
index a06e8a2..a15c31f 100644
--- a/gfs2/libgfs2/fs_geometry.c
+++ b/gfs2/libgfs2/fs_geometry.c
@@ -25,21 +25,42 @@
static uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified)
{
uint64_t nrgrp;
+ uint32_t rgblocks1, rgblocksn, bitblocks1, bitblocksn;
+ int bitmap_overflow = 0;
while (TRUE) {
nrgrp = DIV_RU(dev->length, (sdp->rgsize << 20) / sdp->bsize);
- if (rgsize_specified || /* If user specified an rg size or */
- nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # of rgs or */
- sdp->rgsize >= 2048) /* we've reached the max rg size */
+ /* check to see if the rg length overflows max # bitblks */
+ rgblocksn = dev->length / nrgrp;
+ rgblocks2bitblocks(sdp->bsize, &rgblocksn, &bitblocksn);
+ /* calculate size of the first rgrp */
+ rgblocks1 = dev->length - (nrgrp - 1) * (dev->length / nrgrp);
+ rgblocks2bitblocks(sdp->bsize, &rgblocks1, &bitblocks1);
+ if (bitblocks1 > 2149 || bitblocksn > 2149) {
+ bitmap_overflow = 1;
+ if (sdp->rgsize <= GFS2_DEFAULT_RGSIZE) {
+ fprintf(stderr, "error: It is not possible "
+ "to use the entire device with "
+ "block size %u bytes.\n",
+ sdp->bsize);
+ exit(-1);
+ }
+ sdp->rgsize -= GFS2_DEFAULT_RGSIZE; /* smaller rgs */
+ continue;
+ }
+ if (bitmap_overflow ||
+ rgsize_specified || /* If user specified an rg size or */
+ nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # or */
+ sdp->rgsize >= 2048) /* we reached the max rg size */
break;
- sdp->rgsize += GFS2_DEFAULT_RGSIZE; /* Try again w/bigger rgs */
+ sdp->rgsize += GFS2_DEFAULT_RGSIZE; /* bigger rgs */
}
if (sdp->debug)
- printf(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n", sdp->rgsize,
- nrgrp);
+ printf(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n",
+ sdp->rgsize, nrgrp);
return nrgrp;
}
@@ -210,7 +231,11 @@ void build_rgrps(struct gfs2_sbd *sdp, int do_write)
rl->rg.rg_header.mh_format = GFS2_FORMAT_RG;
rl->rg.rg_free = rgblocks;
- gfs2_compute_bitstructs(sdp, rl);
+ if (gfs2_compute_bitstructs(sdp, rl)) {
+ fprintf(stderr, "%s: Unable to build resource groups "
+ "with these characteristics.\n", __FUNCTION__);
+ exit(-1);
+ }
if (do_write) {
for (x = 0; x < bitblocks; x++) {
Gitweb: http://git.fedorahosted.org/git/gfs2-utils.git?p=gfs2-utils.git;a=commitdif…
Commit: c44c141dcf45e6370cc5e50680d72ad842971942
Parent: 84f68cec946fa50148815062512109c687958af5
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Wed Jan 19 12:45:20 2011 -0600
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Wed Jan 19 12:45:20 2011 -0600
GFS2: mkfs.gfs2 segfaults with 18.55TB and -b512
The problem was that mkfs.gfs2 attempted to keep the number of resource
groups as low as possible, but in so doing, it chose a rgrp size that
required an enormous number of bitmap blocks per rgrp. In fact, it
tried to use more than it could possibly address, given the small
block size. Therefore, an error was flagged and mkfs.gfs2 aborted.
This patch ensures that mkfs.gfs2 chooses a rgrp size that takes into
account the maximum number of bitmap blocks. If it hits the max
number of bitmap blocks, it backs off on its rgrp size and uses a
smaller size. The smaller size rgrp ensures that a valid number of
bitmap blocks will be needed to represent all the blocks in each rgrp.
rhbz#624535
---
gfs2/libgfs2/fs_geometry.c | 39 ++++++++++++++++++++++++++++++++-------
1 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/gfs2/libgfs2/fs_geometry.c b/gfs2/libgfs2/fs_geometry.c
index 82b4ff4..9c69661 100644
--- a/gfs2/libgfs2/fs_geometry.c
+++ b/gfs2/libgfs2/fs_geometry.c
@@ -27,21 +27,42 @@
static uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified)
{
uint64_t nrgrp;
+ uint32_t rgblocks1, rgblocksn, bitblocks1, bitblocksn;
+ int bitmap_overflow = 0;
while (TRUE) {
nrgrp = DIV_RU(dev->length, (sdp->rgsize << 20) / sdp->bsize);
- if (rgsize_specified || /* If user specified an rg size or */
- nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # of rgs or */
- sdp->rgsize >= 2048) /* we've reached the max rg size */
+ /* check to see if the rg length overflows max # bitblks */
+ rgblocksn = dev->length / nrgrp;
+ rgblocks2bitblocks(sdp->bsize, &rgblocksn, &bitblocksn);
+ /* calculate size of the first rgrp */
+ rgblocks1 = dev->length - (nrgrp - 1) * (dev->length / nrgrp);
+ rgblocks2bitblocks(sdp->bsize, &rgblocks1, &bitblocks1);
+ if (bitblocks1 > 2149 || bitblocksn > 2149) {
+ bitmap_overflow = 1;
+ if (sdp->rgsize <= GFS2_DEFAULT_RGSIZE) {
+ fprintf(stderr, "error: It is not possible "
+ "to use the entire device with "
+ "block size %u bytes.\n",
+ sdp->bsize);
+ exit(-1);
+ }
+ sdp->rgsize -= GFS2_DEFAULT_RGSIZE; /* smaller rgs */
+ continue;
+ }
+ if (bitmap_overflow ||
+ rgsize_specified || /* If user specified an rg size or */
+ nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # or */
+ sdp->rgsize >= 2048) /* we reached the max rg size */
break;
- sdp->rgsize += GFS2_DEFAULT_RGSIZE; /* Try again w/bigger rgs */
+ sdp->rgsize += GFS2_DEFAULT_RGSIZE; /* bigger rgs */
}
if (sdp->debug)
- printf(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n", sdp->rgsize,
- nrgrp);
+ printf(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n",
+ sdp->rgsize, nrgrp);
return nrgrp;
}
@@ -212,7 +233,11 @@ void build_rgrps(struct gfs2_sbd *sdp, int do_write)
rl->rg.rg_header.mh_format = GFS2_FORMAT_RG;
rl->rg.rg_free = rgblocks;
- gfs2_compute_bitstructs(sdp, rl);
+ if (gfs2_compute_bitstructs(sdp, rl)) {
+ fprintf(stderr, "%s: Unable to build resource groups "
+ "with these characteristics.\n", __FUNCTION__);
+ exit(-1);
+ }
if (do_write) {
for (x = 0; x < bitblocks; x++) {
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=1e…
Commit: 1e9748b8e97b65454f4adbefbd684a44a2a13a6f
Parent: cb9335662ca7fac0cbaae5e463605502e189c6f0
Author: Ryan O'Hara <rohara(a)redhat.com>
AuthorDate: Tue Jan 18 14:21:02 2011 -0600
Committer: Ryan O'Hara <rohara(a)redhat.com>
CommitterDate: Wed Jan 19 10:40:40 2011 -0600
fence_scsi: identify dm-multipath devices correctly
Previously, fence_scsi would only identify /dev/dm* devices as being
multipath devices. This prevented the use of "friendly" dm-multipath
names (eg. /dev/mapper/mpath1). Since fence_scsi can now be passed
devices to operate on, it should be able to handle "friendly" names,
too. This patch fixes the problem by getting the absolute path of the
device at registration time.
Resolves: rhbz#644389
Signed-off-by: Ryan O'Hara <rohara(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
fence/agents/scsi/fence_scsi.pl | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/fence/agents/scsi/fence_scsi.pl b/fence/agents/scsi/fence_scsi.pl
index 9ee8917..8ddde4e 100644
--- a/fence/agents/scsi/fence_scsi.pl
+++ b/fence/agents/scsi/fence_scsi.pl
@@ -1,5 +1,6 @@
#!/usr/bin/perl
+use Cwd 'realpath';
use File::Basename;
use Getopt::Std;
use POSIX;
@@ -112,6 +113,8 @@ sub do_register ($$$)
my $self = (caller(0))[3];
my ($host_key, $node_key, $dev) = @_;
+ $dev = realpath ($dev);
+
if (substr ($dev, 5) =~ /^dm/) {
my @slaves = get_mpath_slaves ($dev);
foreach (@slaves) {
@@ -139,6 +142,8 @@ sub do_register_ignore ($$)
my $self = (caller(0))[3];
my ($node_key, $dev) = @_;
+ $dev = realpath ($dev);
+
if (substr ($dev, 5) =~ /^dm/) {
my @slaves = get_mpath_slaves ($dev);
foreach (@slaves) {
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=cb…
Commit: cb9335662ca7fac0cbaae5e463605502e189c6f0
Parent: d65bbd39b052b56a1277ae1b3d7472b17140a9e5
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Wed Jan 19 11:09:57 2011 +0100
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Wed Jan 19 11:09:57 2011 +0100
ccs_tool: deprecate editing capabilities
ccs_tool/edit is now deprecated/unsupported in favour of ccs CLI tool
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Reviewed-by: Lon Hohberger <lhh(a)redhat.com>
---
config/tools/man/ccs_tool.8 | 132 +++----------------------------------------
1 files changed, 9 insertions(+), 123 deletions(-)
diff --git a/config/tools/man/ccs_tool.8 b/config/tools/man/ccs_tool.8
index 0f52f18..056dcb3 100644
--- a/config/tools/man/ccs_tool.8
+++ b/config/tools/man/ccs_tool.8
@@ -9,8 +9,13 @@ ccs_tool \- The tool used to make online queries to the cluster configuration.
.SH "DESCRIPTION"
\fBccs_tool\fP is part of the Cluster Configuration System (CCS). It used
-to peform different kind of queries to the cluster configuration and has support
-for some cluster.conf editing functions.
+to peform different kind of queries to the cluster configuration.
+
+.SH "WARNING"
+
+All \fBccs_tool\fP editing capabilities are now obsoleted and unsupported.
+Please see also \fBccs\fP package and documentation for a more complete
+implementation of cluster.conf CLI editor.
.SH "OPTIONS"
.TP
@@ -27,125 +32,6 @@ sub\-commands have their own options, see below for more detail
\fBquery\fP \fI<xpath query>\fP
Perform an xpath query on running cluster configuration.
-.TP
-\fBaddnode\fP [options] \fI<node> [<fenceoption=value>]...\fP
-Adds a new node to the cluster configuration file. Fencing device options
-are specified as key=value pairs (as many as required) and are entered into the
-configuration file as is. See the documentation for your fencing agent for more
-details (eg a powerswitch fence device may need to know which port the node is
-connected to).
-.br
-\fIOptions:\fP
-.br
-\-v <votes> Number of votes for this node (mandatory)
-.br
-\-n <nodeid> Node id for this node (optional)
-.br
-\-i <interface> Network interface to use for this node. Mandatory if the cluster
-is using multicast as transport. Forbidden if not.
-.br
-\-m <multicast> Multicast address for cluster. Only allowed on the first node to
-be added to the file. Subsequent nodes will use either multicast or broadcast
-depending on the properties of the first node.
-.br
-\-f <fencedevice> Name of fence device to use for this node. The fence device
-section must already have been added to the file, probably using the addfence command.
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-.br
-\-o <file> Output file. Defaults to the same as -c
-
-
-
-.TP
-\fBdelnode\fP [options] \fI<node>\fP
-Delete a node from the cluster configuration file. Note: there is no
-"edit" command so to change the properties of a node you must delete it
-and add it back in with the new properties.
-.br
-\fIOptions:\fP
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-.br
-\-o <file> Output file. Defaults to the same as -c
-
-
-
-.TP
-\fBaddfence\fP [options] \fI<name> <agent> [<option>=<value>]...\fP
-Adds a new fence device section to the cluster configuration file. <agent> is the
-name of the fence agent that controls the device. the options following are entered
-as key-value pairs. See the fence agent documentation for details about these. eg:
-you may need to enter the IP address and username/password for a powerswitch fencing
-device.
-.br
-\fIOptions:\fP
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-.br
-\-o <file> Output file. Defaults to the same as -c
-
-.TP
-\fBdelfence\fP [options] \fI<node>\fP
-Deletes a fencing device from the cluster configuration file.
-delfence will allow you to remove a fence device that is in use by nodes.
-This is to allow changes to be made, but be aware that it may produce an
-invalid configuration file if you don't add it back in again.
-.br
-\fIOptions:\fP
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-.br
-\-o <file> Output file. Defaults to the same as -c
-
-
-.TP
-\fBlsnode [options] \fP
-List the nodes in the configuration file. This is (hopefully obviously) not
-necessarily the same as the nodes currently in the cluster, but it should
-be a superset.
-.br
-\fIOptions:\fP
-.br
-\-v Verbose. Lists all the properties of the node, and the
-node-specific properties of the fence device too.
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-
-
-.TP
-\fBlsfence [options] \fP
-List all the fence devices in the cluster configuration file.
-.br
-\fIOptions:\fP
-.br
-\-v Verbose. Lists all the properties of the fence device rather
-than just the names and agents.
-.br
-\-c <file> Config file to use. Defaults to /etc/cluster/cluster.conf
-
-
-.TP
-\fBcreate [options] \fP \fI<clustername>\fP
-Create a new, skeleton, configuration file. Note that "create" on its own will
-not create a valid configuration file. Fence agents and nodes will need to be
-added to it before handing it over to ccsd. The new configuration file will
-have a version number of 1. Subsequent addnode/delnode/addfence/delfence operations
-will increment the version number by 1 each time.
-.br
-\fIOptions:\fP
-.br
-.br
-\-c <file> Config file to create. Defaults to /etc/cluster/cluster.conf
-
-.TP
-\fBaddnodeids\fP
-Adds node ID numbers to all the nodes in cluster.conf. In RHEL4, node IDs were optional
-and assigned by cman when a node joined the cluster. In RHEL5 they must be pre-assigned
-in cluster.conf. This command will not change any node IDs that are already set in
-cluster.conf, it will simply add unique node ID numbers to nodes that do not already
-have them.
-
-
.SH "SEE ALSO"
-cluster.conf(5)
+.BR cluster.conf (5),
+.BR ccs (8)
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=d6…
Commit: d65bbd39b052b56a1277ae1b3d7472b17140a9e5
Parent: da0d0e0e4fee1bac432304f9a792de8bd89c36d2
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Fri Jan 14 10:03:15 2011 +0100
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Jan 14 10:03:15 2011 +0100
cman init: do not include wrong default file
A recent change in glibc and shadow-utils (also in RHEL6) did add /etc/default
(same as debian based distros) introducing a possible bug in detecting the
correct default settings for the init script.
The window is very small as we don´t document the usage of /etc/default/cman,
but I don´t see a reason to take a chance. The fix is one liner to backport.
Resolves: rhbz#669340
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/init.d/cman.in | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index ea8293c..972a814 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -64,7 +64,7 @@ if [ -d /etc/sysconfig ]; then
fi
# deb based distros
-if [ -d /etc/default ]; then
+if [ ! -d /etc/sysconfig ]; then
[ -f /etc/default/cluster ] && . /etc/default/cluster
[ -f /etc/default/cman ] && . /etc/default/cman
[ -z "$LOCK_FILE" ] && LOCK_FILE="/var/lock/cman"
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=da…
Commit: da0d0e0e4fee1bac432304f9a792de8bd89c36d2
Parent: 68f0ebc9a95c5f68bb3ace7df0c963b0b1c1fca7
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Tue Sep 21 13:45:20 2010 -0400
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Thu Jan 13 10:19:20 2011 +0100
cman: Make qdiskd heuristics time out
Qdiskd heuristics were previously expected to enforce
their own timeouts. This patch makes qdiskd count
any heuristic which has taken longer than (interval*(tko-1))
as failed, since that heuristic is not being reliable.
A side effect is that now qdiskd will also automatically
calculate interval and tko counts for all heuristics,
obviating the need for administrators to do this manually.
Resolves: rhbz#636243
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
cman/man/qdisk.5 | 8 ++--
cman/qdisk/main.c | 6 +++-
cman/qdisk/score.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--------
cman/qdisk/score.h | 7 +++-
4 files changed, 92 insertions(+), 21 deletions(-)
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index efa3638..4070f48 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -189,7 +189,7 @@ master will only grant a node membership if:
.in 12
(a) CMAN believes the node to be online, and
-.br
+.bi
(b) that node has made enough consecutive, timely writes
.in 16
to the quorum disk, and
@@ -448,15 +448,15 @@ for heuristics. The default score for each heuristic is 1.
\fIinterval\fP\fB="\fP2\fB"\fP
.in 12
This is the frequency (in seconds) at which we poll the heuristic. The
-default interval for every heuristic is 2 seconds.
+default interval is determined by the qdiskd timeout.
.in 0
.in 9
\fItko\fP\fB="\fP1\fB"\fP
.in 12
After this many failed attempts to run the heuristic, it is considered DOWN,
-and its score is removed. The default tko for each heuristic is 1, which
-may be inadequate for things such as 'ping'.
+and its score is removed. The default tko for each heuristic is determined
+by the qdiskd timeout.
.in 8
\fB/>\fP
.in 0
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index 8ca99f7..617a705 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -1844,7 +1844,11 @@ get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
goto out;
}
- *cfh = configure_heuristics(ccsfd, h, maxh);
+ /* Heuristics need to report in 1 cycle before we need to
+ * report in so we can get their score.
+ */
+ *cfh = configure_heuristics(ccsfd, h, maxh,
+ ctx->qc_interval * (ctx->qc_tko - 1));
if (*cfh) {
if (ctx->qc_flags & RF_MASTER_WINS) {
diff --git a/cman/qdisk/score.c b/cman/qdisk/score.c
index 81ff700..572464d 100644
--- a/cman/qdisk/score.c
+++ b/cman/qdisk/score.c
@@ -75,22 +75,25 @@ restore_signals(void)
Spin off a user-defined heuristic
*/
static int
-fork_heuristic(struct h_data *h)
+fork_heuristic(struct h_data *h, struct timespec *now)
{
int pid;
char *argv[4];
- time_t now;
if (h->childpid) {
errno = EINPROGRESS;
return -1;
}
- now = time(NULL);
- if (now < h->nextrun)
+ if (now->tv_sec < h->nextrun.tv_sec ||
+ now->tv_nsec < h->nextrun.tv_nsec)
return 0;
- h->nextrun = now + h->interval;
+ h->nextrun.tv_sec = now->tv_sec + h->interval;
+ h->nextrun.tv_nsec = now->tv_nsec;
+
+ h->failtime.tv_sec = now->tv_sec + h->maxtime;
+ h->failtime.tv_nsec = now->tv_nsec;
pid = fork();
if (pid < 0)
@@ -162,7 +165,7 @@ total_score(struct h_data *h, int max, int *score, int *maxscore)
Check for response from a user-defined heuristic / script
*/
static int
-check_heuristic(struct h_data *h, int block)
+check_heuristic(struct h_data *h, int block, struct timespec *now)
{
int ret;
int status;
@@ -172,14 +175,40 @@ check_heuristic(struct h_data *h, int block)
return 0;
ret = waitpid(h->childpid, &status, block?0:WNOHANG);
- if (!block && ret == 0)
+ if (!block && ret == 0) {
/* No children exited */
+
+ /* no timeout */
+ if (!h->maxtime)
+ return 0;
+
+ /* If we overran our timeout, the heuristic is dead */
+ if (now->tv_sec > h->failtime.tv_sec ||
+ (now->tv_sec == h->failtime.tv_sec &&
+ now->tv_nsec > h->failtime.tv_nsec)) {
+ h->misses = h->tko;
+ h->failed = ETIMEDOUT;
+ if (h->available) {
+ logt_print(LOG_INFO, "Heuristic: '%s' DOWN - "
+ "Exceeded timeout of %d seconds\n",
+ h->program, h->maxtime);
+ h->available = 0;
+ }
+ }
+
return 0;
+ }
h->childpid = 0;
if (ret < 0 && errno == ECHILD)
/* wrong child? */
goto miss;
+
+ /* Timed out previously; this run must be ignored. */
+ if (h->failed) {
+ h->failed = 0;
+ goto miss;
+ }
if (!WIFEXITED(status)) {
ret = 0;
goto miss;
@@ -188,7 +217,7 @@ check_heuristic(struct h_data *h, int block)
ret = 0;
goto miss;
}
-
+
/* Returned 0 and was not killed */
if (!h->available) {
h->available = 1;
@@ -222,10 +251,12 @@ miss:
static int
fork_heuristics(struct h_data *h, int max)
{
+ struct timespec now;
int x;
+ clock_gettime(CLOCK_MONOTONIC, &now);
for (x = 0; x < max; x++)
- fork_heuristic(&h[x]);
+ fork_heuristic(&h[x], &now);
return 0;
}
@@ -236,19 +267,49 @@ fork_heuristics(struct h_data *h, int max)
static int
check_heuristics(struct h_data *h, int max, int block)
{
+ struct timespec now;
int x;
+ clock_gettime(CLOCK_MONOTONIC, &now);
for (x = 0; x < max; x++)
- check_heuristic(&h[x], block);
+ check_heuristic(&h[x], block, &now);
return 0;
}
+/*
+ * absmax should be qdiskd (interval * (tko-1))
+ */
+static void
+auto_heuristic_timing(int *interval, int *tko, int absmax)
+{
+ if (!interval || ! tko)
+ return;
+
+ if (absmax < 3)
+ return;
+
+ if (absmax <= 4) {
+ *interval = 1;
+ } else if (absmax <= 22) {
+ *interval = 2;
+ } else if (absmax <= 39) {
+ *interval = 3;
+ } else if (absmax <= 50) {
+ *interval = 4;
+ } else {
+ *interval = 5;
+ }
+
+ *tko = absmax / (*interval);
+}
+
+
/**
Read configuration data from CCS into the array provided
*/
int
-configure_heuristics(int ccsfd, struct h_data *h, int max)
+configure_heuristics(int ccsfd, struct h_data *h, int max, int maxtime)
{
int x = 0;
char *val;
@@ -261,11 +322,14 @@ configure_heuristics(int ccsfd, struct h_data *h, int max)
h[x].program = NULL;
h[x].available = 0;
h[x].misses = 0;
- h[x].interval = 2;
- h[x].tko = 1;
+ auto_heuristic_timing(&h[x].interval, &h[x].tko, maxtime);
+ h[x].maxtime = maxtime;
h[x].score = 1;
h[x].childpid = 0;
- h[x].nextrun = 0;
+ h[x].nextrun.tv_sec = 0;
+ h[x].nextrun.tv_nsec = 0;
+ h[x].failtime.tv_sec = 0;
+ h[x].failtime.tv_nsec = 0;
/* Get program */
snprintf(query, sizeof(query),
diff --git a/cman/qdisk/score.h b/cman/qdisk/score.h
index 77e155b..beff31b 100644
--- a/cman/qdisk/score.h
+++ b/cman/qdisk/score.h
@@ -10,19 +10,22 @@
struct h_data {
char * program;
+ struct timespec nextrun;
+ struct timespec failtime;
int score;
int available;
int tko;
int interval;
+ int maxtime;
int misses;
+ int failed;
pid_t childpid;
- time_t nextrun;
};
/*
Grab score data from CCSD
*/
-int configure_heuristics(int ccsfd, struct h_data *hp, int max);
+int configure_heuristics(int ccsfd, struct h_data *hp, int max, int maxtime);
/*
Start the thread which runs the scoring applets
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=68…
Commit: 68f0ebc9a95c5f68bb3ace7df0c963b0b1c1fca7
Parent: f83663ae3b0a9ac2e0c6a5fd7e52e7af88ec0ab2
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Wed Jan 12 12:21:31 2011 -0500
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Wed Jan 12 12:21:31 2011 -0500
Revert "Revert "config: Fix broken fence_egenera options""
This was an accidental revert.
This reverts commit a82b5cb2bf62913e52dec15c977bdf03b34abc01.
---
config/tools/xml/cluster.rng.in | 22 +++++++++++++++++++++-
1 files changed, 21 insertions(+), 1 deletions(-)
diff --git a/config/tools/xml/cluster.rng.in b/config/tools/xml/cluster.rng.in
index c7f3d43..505e07a 100644
--- a/config/tools/xml/cluster.rng.in
+++ b/config/tools/xml/cluster.rng.in
@@ -2272,10 +2272,30 @@ To validate your cluster.conf against this schema, run:
an optional ESH path. Presumably those should be attributes in
the schema. We need more invormation on this. -->
<group>
- <attribute name="cserver" rha:description="The hostname (and
+ <optional>
+ <attribute name="cserver" rha:description="The hostname (and
optionally the username in the form of username@hostname)
assigned to the device. Refer to the fence_egenera(8) man
page for more information." rha:sample=""/>
+ </optional>
+ <optional>
+ <attribute name="pserver" rha:description="The pserver to operate on. fence_egenera(8)" />
+ </optional>
+ <optional>
+ <attribute name="lpan" rha:description="The lpan to operate on. fence_egenera(8)" />
+ </optional>
+ <optional>
+ <attribute name="action" rha:description="The action to perform (reboot, off, on, or status). fence_egenera(8)" />
+ </optional>
+ <optional>
+ <attribute name="esh" rha:description="Path to the esh command on the cserver. fence_egenera(8)" />
+ </optional>
+ <optional>
+ <attribute name="user" rha:description="See fence_egenera(8)" />
+ </optional>
+ <optional>
+ <attribute name="delay" rha:description="Wait this many seconds before fencing is started. fence_egenera(8)" />
+ </optional>
</group>
<!-- FIXME: It appears that xCat is no longer supported. Found no
fence agents for x Cat in RHEL 5.3. -->