Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=fbfb14664cb... Commit: fbfb14664cbaaafab2bcf33707728191d382d52a Parent: 8ed638b27a21dfa1776f55709592a453974c5d90 Author: Andrew Price anprice@redhat.com AuthorDate: Thu May 30 14:27:02 2013 +0100 Committer: Andrew Price anprice@redhat.com CommitterDate: Thu May 30 14:27:02 2013 +0100
gfs2-utils: Retire gfs_controld
Since linux v3.3 gfs_controld has not been required and it has been disabled by default in gfs2-utils for some time. This patch removes the group/ directory containing gfs_control* and related files and removes them from the build system and .gitignore accordingly.
tunegfs2 also #includes linux_endian.h which was in group/include/ so it has been moved into gfs2/include/ for the time being.
It also fixes some other build system issues causing 'make distcheck' to fail.
Signed-off-by: Andrew Price anprice@redhat.com --- .gitignore | 2 - Makefile.am | 18 +- configure.ac | 58 - gfs2/include/Makefile.am | 2 +- gfs2/include/linux_endian.h | 68 + gfs2/init.d/gfs2-cluster | 94 - gfs2/system/gfs2-cluster.service | 12 - group/Makefile.am | 3 - group/gfs_control/Makefile.am | 9 - group/gfs_control/main.c | 465 ----- group/gfs_control/target.mk | 3 - group/gfs_controld/Makefile.am | 24 - group/gfs_controld/config.c | 157 -- group/gfs_controld/config.h | 14 - group/gfs_controld/cpg-new.c | 3600 ----------------------------------- group/gfs_controld/crc.c | 72 - group/gfs_controld/gfs_controld.h | 37 - group/gfs_controld/gfs_daemon.h | 241 --- group/gfs_controld/logging.c | 65 - group/gfs_controld/main.c | 1496 --------------- group/gfs_controld/member_cman.c | 207 -- group/gfs_controld/target.mk | 3 - group/gfs_controld/util.c | 266 --- group/include/Makefile.am | 3 - group/include/linux_endian.h | 68 - group/include/list.h | 336 ---- group/libgfscontrol/Makefile.am | 9 - group/libgfscontrol/libgfscontrol.h | 122 -- group/libgfscontrol/main.c | 436 ----- group/libgfscontrol/target.mk | 3 - group/man/Makefile.am | 3 - group/man/gfs_controld.8 | 122 -- group/man/target.mk | 3 - tests/Makefile.am | 1 + 34 files changed, 72 insertions(+), 7950 deletions(-)
diff --git a/.gitignore b/.gitignore index f8f8a82..3fa77a0 100644 --- a/.gitignore +++ b/.gitignore @@ -44,8 +44,6 @@ gfs2/tune/tunegfs2 tests/check_libgfs2 tests/testvol tests/tests.log -group/gfs_control/gfs_control -group/gfs_controld/gfs_controld ABOUT-NLS po/Makevars.template po/POTFILES diff --git a/Makefile.am b/Makefile.am index 1eea44c..963c060 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST = autogen.sh +EXTRA_DIST = autogen.sh README.build
AUTOMAKE_OPTIONS = foreign
@@ -12,21 +12,7 @@ noinst_HEADERS = make/copyright.cf
ACLOCAL_AMFLAGS = -I m4
-if BUILD_GFS_CONTROLD -DIR_GROUP = group -endif - -SUBDIRS = po $(DIR_GROUP) gfs2 doc tests - -install-exec-local: - $(INSTALL) -d $(DESTDIR)/$(LOGDIR) - $(INSTALL) -d $(DESTDIR)/$(CLUSTERVARRUN) - $(INSTALL) -d $(DESTDIR)/$(CLUSTERVARLIB) - -uninstall-local: - rmdir $(DESTDIR)/$(LOGDIR) || :; - rmdir $(DESTDIR)/$(CLUSTERVARRUN) || :; - rmdir $(DESTDIR)/$(CLUSTERVARLIB) || :; +SUBDIRS = po gfs2 doc tests
maintainer-clean-local: rm -rf m4 diff --git a/configure.ac b/configure.ac index c8e52a3..51c42b1 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,6 @@ LT_PREREQ([2.2.6]) LT_INIT
AC_CONFIG_MACRO_DIR([m4]) -AC_CONFIG_SRCDIR([group/gfs_controld/config.c]) AC_CONFIG_HEADERS([make/clusterautoconfig.h])
AC_CANONICAL_HOST @@ -89,36 +88,6 @@ AC_ARG_ENABLE([debug], [ --enable-debug enable debug build. ], [ default="no" ])
-AC_ARG_ENABLE([gfs_controld], - [ --enable-gfs_controld build gfs_controld. ], - [ default="no" ]) - -AC_ARG_WITH([syslogfacility], - [ --syslogfacility=FACILITY - cluster default syslog facility. ], - [ SYSLOGFACILITY="$withval" ], - [ SYSLOGFACILITY="LOG_LOCAL4" ]) - -AC_ARG_WITH([sysloglevel], - [ --sysloglevel=LEVEL - cluster default syslog level. ], - [ SYSLOGLEVEL="$withval" ], - [ SYSLOGLEVEL="LOG_INFO" ]) - -# gfs_controld isn't required in the latest versions of cluster -AM_CONDITIONAL([BUILD_GFS_CONTROLD], [test "x$enable_gfs_controld" = "xyes"]) -AS_IF([test "x$enable_gfs_controld" = "xyes"], [ - PKG_CHECK_MODULES([corosync],[corosync]) - PKG_CHECK_MODULES([cpg],[libcpg]) - PKG_CHECK_MODULES([sackpt],[libSaCkpt]) - PKG_CHECK_MODULES([logt],[liblogthread]) - PKG_CHECK_MODULES([ccs],[libccs]) - PKG_CHECK_MODULES([cfg],[libcfg]) - PKG_CHECK_MODULES([fenced],[libfenced]) - PKG_CHECK_MODULES([dlmcontrol],[libdlmcontrol]) - PKG_CHECK_MODULES([quorum],[libquorum]) -]) - # We use the Check framework for unit tests PKG_CHECK_MODULES([check], [check >= 0.9.8], [have_check=yes], @@ -176,27 +145,6 @@ AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([ftruncate gettimeofday memset realpath rmdir select setlocale socket strcasecmp strchr strdup strerror strstr])
-## random vars - -LOGDIR=${localstatedir}/log/cluster -CLUSTERVARRUN=${localstatedir}/run/cluster - -## do subst - -AC_SUBST([LOGDIR]) -AC_DEFINE_UNQUOTED([LOGDIR], "$(eval echo ${LOGDIR})", - [Default logging directory]) - -AC_SUBST([CLUSTERVARRUN]) -AC_DEFINE_UNQUOTED([CLUSTERVARRUN], "$(eval echo ${CLUSTERVARRUN})", - [Default cluster var/run directory]) - -AC_DEFINE_UNQUOTED([SYSLOGFACILITY], $(eval echo ${SYSLOGFACILITY}), - [Default syslog facility]) - -AC_DEFINE_UNQUOTED([SYSLOGLEVEL], $(eval echo ${SYSLOGLEVEL}), - [Default syslog level]) - ## *FLAGS handling
ENV_CFLAGS="$CFLAGS" @@ -255,12 +203,6 @@ CPPFLAGS="-I$(top_builddir)/make -I$(top_srcdir)/make \ LDFLAGS="$ENV_LDFLAGS"
AC_CONFIG_FILES([Makefile - group/Makefile - group/libgfscontrol/Makefile - group/gfs_control/Makefile - group/gfs_controld/Makefile - group/man/Makefile - group/include/Makefile gfs2/Makefile gfs2/include/Makefile gfs2/libgfs2/Makefile diff --git a/gfs2/include/Makefile.am b/gfs2/include/Makefile.am index fff2953..a017cb3 100644 --- a/gfs2/include/Makefile.am +++ b/gfs2/include/Makefile.am @@ -1,3 +1,3 @@ MAINTAINERCLEANFILES = Makefile.in
-noinst_HEADERS = osi_list.h +noinst_HEADERS = osi_list.h osi_tree.h linux_endian.h diff --git a/gfs2/include/linux_endian.h b/gfs2/include/linux_endian.h new file mode 100644 index 0000000..43089d2 --- /dev/null +++ b/gfs2/include/linux_endian.h @@ -0,0 +1,68 @@ +#ifndef __LINUX_ENDIAN_DOT_H__ +#define __LINUX_ENDIAN_DOT_H__ + + +#include <endian.h> +#include <byteswap.h> + + +/* I'm not sure which versions of alpha glibc/gcc are broken, + so fix all of them. */ +#ifdef __alpha__ +#undef bswap_64 +static __inline__ unsigned long bswap_64(unsigned long x) +{ + unsigned int h = x >> 32; + unsigned int l = x; + + h = bswap_32(h); + l = bswap_32(l); + + return ((unsigned long)l << 32) | h; +} +#endif /* __alpha__ */ + + +#if __BYTE_ORDER == __BIG_ENDIAN + +#define be16_to_cpu(x) (x) +#define be32_to_cpu(x) (x) +#define be64_to_cpu(x) (x) + +#define cpu_to_be16(x) (x) +#define cpu_to_be32(x) (x) +#define cpu_to_be64(x) (x) + +#define le16_to_cpu(x) (bswap_16((x))) +#define le32_to_cpu(x) (bswap_32((x))) +#define le64_to_cpu(x) (bswap_64((x))) + +#define cpu_to_le16(x) (bswap_16((x))) +#define cpu_to_le32(x) (bswap_32((x))) +#define cpu_to_le64(x) (bswap_64((x))) + +#endif /* __BYTE_ORDER == __BIG_ENDIAN */ + + +#if __BYTE_ORDER == __LITTLE_ENDIAN + +#define be16_to_cpu(x) (bswap_16((x))) +#define be32_to_cpu(x) (bswap_32((x))) +#define be64_to_cpu(x) (bswap_64((x))) + +#define cpu_to_be16(x) (bswap_16((x))) +#define cpu_to_be32(x) (bswap_32((x))) +#define cpu_to_be64(x) (bswap_64((x))) + +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) + +#define cpu_to_le16(x) (x) +#define cpu_to_le32(x) (x) +#define cpu_to_le64(x) (x) + +#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */ + + +#endif /* __LINUX_ENDIAN_DOT_H__ */ diff --git a/gfs2/init.d/gfs2-cluster b/gfs2/init.d/gfs2-cluster deleted file mode 100644 index d0f1c0e..0000000 --- a/gfs2/init.d/gfs2-cluster +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/sh -# -# gfs_controld - GFS2 cluster filesystem daemon -# -# chkconfig: - 23 76 -# description: Starts and stops gfs_controld for dlm based GFS2 filesystems -# - - -### BEGIN INIT INFO -# Provides: gfs_controld -# Required-Start: $network cman -# Required-Stop: $network cman -# Should-Start: -# Should-Stop: -# Default-Start: -# Default-Stop: -# Short-Description: Starts and stops gfs_controld -# Description: Starts and stops gfs_controld for dlm based GFS2 filesystems -### END INIT INFO - -. /etc/rc.d/init.d/functions - -prog="gfs_controld" -lockfile="/var/lock/subsys/$prog" -exec="/usr/sbin/$prog" - -start() { - [ -x $exec ] || exit 5 - echo -n $"Starting $prog: " - daemon $prog - retval=$? - echo - [ $retval -eq 0 ] && touch $lockfile - return $retval -} - -stop() { - echo -n $"Stopping $prog: " - killproc $prog - retval=$? - echo - [ $retval -eq 0 ] && rm -f $lockfile -} - -restart() { - stop - start -} - -reload() { - restart -} - -rh_status() { - status $prog -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - reload) - rh_status_q || exit 7 - $1 - ;; - force-reload) - force_reload - ;; - status) - rh_status - ;; - condrestart|try-restart) - rh_status_q || exit 0 - restart - ;; - *) - echo $"Usage $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}" - exit 2 -esac -exit $? - diff --git a/gfs2/system/gfs2-cluster.service b/gfs2/system/gfs2-cluster.service deleted file mode 100644 index d6bc0ca..0000000 --- a/gfs2/system/gfs2-cluster.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=GFS2 Control Daemon -After=syslog.target network.target cman.service -Before=remote-fs-pre.target -Wants=remote-fs-pre.target - -[Service] -Type=forking -ExecStart=/usr/sbin/gfs_controld $OPTIONS - -[Install] -WantedBy=multi-user.target diff --git a/group/Makefile.am b/group/Makefile.am deleted file mode 100644 index 5b7d0f8..0000000 --- a/group/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -SUBDIRS = libgfscontrol gfs_control gfs_controld man include diff --git a/group/gfs_control/Makefile.am b/group/gfs_control/Makefile.am deleted file mode 100644 index dd75f6f..0000000 --- a/group/gfs_control/Makefile.am +++ /dev/null @@ -1,9 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -sbin_PROGRAMS = gfs_control - -gfs_control_SOURCES = main.c - -gfs_control_CPPFLAGS = -I$(top_srcdir)/group/libgfscontrol - -gfs_control_LDADD = $(top_builddir)/group/libgfscontrol/libgfscontrol.la diff --git a/group/gfs_control/main.c b/group/gfs_control/main.c deleted file mode 100644 index dff32e3..0000000 --- a/group/gfs_control/main.c +++ /dev/null @@ -1,465 +0,0 @@ -#include "clusterautoconfig.h" - -#include <sys/types.h> -#include <sys/un.h> -#include <inttypes.h> -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <stddef.h> -#include <fcntl.h> -#include <string.h> -#include <errno.h> -#include <limits.h> -#include <netinet/in.h> - -#include "libgfscontrol.h" - -#define OPTION_STRING "nhV" - -#define OP_LIST 1 -#define OP_DUMP 2 -#define OP_PLOCKS 3 -#define OP_JOIN 4 -#define OP_LEAVE 5 -#define OP_JOINLEAVE 6 - -static char *fsname; -static int operation; -static int opt_ind; -static int ls_all_nodes; - -#define MAX_MG 128 -#define MAX_NODES 128 - -struct gfsc_mountgroup mgs[MAX_MG]; -struct gfsc_node nodes[MAX_NODES]; - - -static void print_usage(const char *prog_name) -{ - printf("Usage:\n"); - printf("\n"); - printf("%s [options] [ls|dump|plocks]\n", prog_name); - printf("\n"); - printf("Options:\n"); - printf(" -n Show all node information in ls\n"); - printf(" -h Print this help, then exit\n"); - printf(" -V Print program version information, then exit\n"); - printf("\n"); -} - -static void decode_arguments(int argc, char **argv) -{ - int cont = 1; - int optchar; - int need_fsname; - - while (cont) { - optchar = getopt(argc, argv, OPTION_STRING); - - switch (optchar) { - case 'n': - ls_all_nodes = 1; - break; - - case 'h': - print_usage(argv[0]); - exit(EXIT_SUCCESS); - break; - - case 'V': - printf("%s %s (built %s %s)\n", - argv[0], VERSION, __DATE__, __TIME__); - /* printf("%s\n", REDHAT_COPYRIGHT); */ - exit(EXIT_SUCCESS); - break; - - case ':': - case '?': - fprintf(stderr, "Please use '-h' for usage.\n"); - exit(EXIT_FAILURE); - break; - - case EOF: - cont = 0; - break; - - default: - fprintf(stderr, "unknown option: %c\n", optchar); - exit(EXIT_FAILURE); - break; - }; - } - - need_fsname = 1; - - while (optind < argc) { - - if (!strncmp(argv[optind], "leave", 5) && - (strlen(argv[optind]) == 5)) { - operation = OP_LEAVE; - opt_ind = optind + 1; - break; - } else if (!strncmp(argv[optind], "ls", 2) && - (strlen(argv[optind]) == 2)) { - operation = OP_LIST; - opt_ind = optind + 1; - need_fsname = 0; - break; - } else if (!strncmp(argv[optind], "dump", 4) && - (strlen(argv[optind]) == 4)) { - operation = OP_DUMP; - opt_ind = optind + 1; - need_fsname = 0; - break; - } else if (!strncmp(argv[optind], "plocks", 6) && - (strlen(argv[optind]) == 6)) { - operation = OP_PLOCKS; - opt_ind = optind + 1; - break; - } - - optind++; - } - - if (!operation || !opt_ind) { - print_usage(argv[0]); - exit(EXIT_FAILURE); - } - - if (optind < argc - 1) - fsname = argv[opt_ind]; - else if (need_fsname) { - fprintf(stderr, "fs name required\n"); - exit(EXIT_FAILURE); - } -} - -static int do_write(int fd, void *buf, size_t count) -{ - int rv, off = 0; - - retry: - rv = write(fd, (char *)buf + off, count); - if (rv == -1 && errno == EINTR) - goto retry; - if (rv < 0) - return rv; - - if (rv != count) { - count -= rv; - off += rv; - goto retry; - } - return 0; -} - -static void do_leave(char *table) -{ - struct gfsc_mount_args ma; - int rv; - - memset(&ma, 0, sizeof(ma)); - - strncpy(ma.table, table, sizeof(ma.table)); - - rv = gfsc_fs_leave(&ma, 0); - if (rv < 0) - fprintf(stderr, "gfs_controld leave error %d\n", rv); -} - -static char *mg_flags_str(uint32_t flags) -{ - static char str[128]; - int i = 0; - - memset(str, 0, sizeof(str)); - - if (flags & GFSC_MF_LOCAL_RECOVERY_BUSY) { - i++; - strcat(str, "recover"); - } - if (flags & GFSC_MF_FIRST_RECOVERY_NEEDED) { - strcat(str, i++ ? "," : ""); - strcat(str, "need_first"); - } - if (flags & GFSC_MF_KERNEL_MOUNT_ERROR) { - strcat(str, i++ ? "," : ""); - strcat(str, "error"); - } - if (flags & GFSC_MF_KERNEL_MOUNT_DONE) { - strcat(str, i++ ? "," : ""); - strcat(str, "mounted"); - } - if (flags & GFSC_MF_KERNEL_STOPPED) { - strcat(str, i++ ? "," : ""); - strcat(str, "blocked"); - } - if (flags & GFSC_MF_LEAVING) { - strcat(str, i++ ? "," : ""); - strcat(str, "leave"); - } - if (flags & GFSC_MF_JOINING) { - strcat(str, i++ ? "," : ""); - strcat(str, "join"); - } - - /* leave this one out, it will often be set and we don't need - to see it */ - /* - if (flags & GFSC_MF_FIRST_RECOVERY_MSG) - strcat(str, "first_recovery_msg "); - */ - return str; -} - -static char *node_mount_str(uint32_t flags) -{ - static char str[128]; - int i = 0; - - memset(str, 0, sizeof(str)); - - if (flags & GFSC_NF_KERNEL_MOUNT_DONE) { - i++; - strcat(str, "done"); - } - if (flags & GFSC_NF_KERNEL_MOUNT_ERROR) { - strcat(str, i++ ? "," : ""); - strcat(str, "error"); - } - if (flags & GFSC_NF_READONLY) { - strcat(str, i++ ? "," : ""); - strcat(str, "ro"); - } - if (flags & GFSC_NF_SPECTATOR) { - strcat(str, i++ ? "," : ""); - strcat(str, "spect"); - } - - if (!i) - strcat(str, "none"); - - /* ignoring CHECK_DLM */ - - return str; -} - -static int member_int(struct gfsc_node *n) -{ - if (n->flags & GFSC_NF_DISALLOWED) - return -1; - if (n->flags & GFSC_NF_MEMBER) - return 1; - return 0; -} - -static const char *condition_str(int cond) -{ - switch (cond) { - case 0: - return ""; - case 1: - return "kernel_mount_done"; - case 2: - return "notify_nodeid"; - case 3: - return "poll_dlm"; - case 4: - return "pending"; - default: - return "unknown"; - } -} - -static void show_nodeids(int count, struct gfsc_node *nodes_in) -{ - struct gfsc_node *n = nodes_in; - int i; - - for (i = 0; i < count; i++) { - printf("%d ", n->nodeid); - n++; - } - printf("\n"); -} - -static int node_compare(const void *va, const void *vb) -{ - const struct gfsc_node *a = va; - const struct gfsc_node *b = vb; - - return a->nodeid - b->nodeid; -} - -static void show_mg(struct gfsc_mountgroup *mg) -{ - int rv, node_count; - - printf("name %s\n", mg->name); - printf("id 0x%08x\n", mg->global_id); - printf("flags 0x%08x %s\n", - mg->flags, mg_flags_str(mg->flags)); - printf("change member %d joined %d remove %d failed %d seq %d,%d\n", - mg->cg_prev.member_count, mg->cg_prev.joined_count, - mg->cg_prev.remove_count, mg->cg_prev.failed_count, - mg->cg_prev.combined_seq, mg->cg_prev.seq); - - node_count = 0; - memset(&nodes, 0, sizeof(nodes)); - rv = gfsc_mountgroup_nodes(mg->name, GFSC_NODES_MEMBERS, - MAX_NODES, &node_count, nodes); - if (rv < 0) { - printf("members error\n"); - goto next; - } - qsort(nodes, node_count, sizeof(struct gfsc_node), node_compare); - - printf("members "); - show_nodeids(node_count, nodes); - - next: - if (!mg->cg_next.seq) - return; - - printf("new change member %d joined %d remove %d failed %d seq %d,%d\n", - mg->cg_next.member_count, mg->cg_next.joined_count, - mg->cg_next.remove_count, mg->cg_next.failed_count, - mg->cg_next.combined_seq, mg->cg_next.seq); - - printf("new status wait_messages %d wait_condition %d %s\n", - mg->cg_next.wait_messages, mg->cg_next.wait_condition, - condition_str(mg->cg_next.wait_condition)); - - node_count = 0; - memset(&nodes, 0, sizeof(nodes)); - rv = gfsc_mountgroup_nodes(mg->name, GFSC_NODES_NEXT, - MAX_NODES, &node_count, nodes); - if (rv < 0) { - printf("new members error\n"); - return; - } - qsort(nodes, node_count, sizeof(struct gfsc_node), node_compare); - - printf("new members "); - show_nodeids(node_count, nodes); -} - -static void show_all_nodes(int count, struct gfsc_node *nodes_in) -{ - struct gfsc_node *n = nodes_in; - int i; - - for (i = 0; i < count; i++) { - printf("nodeid %d jid %d member %d failed %d start %d seq_add %u seq_rem %u mount %s\n", - n->nodeid, - n->jid, - member_int(n), - n->failed_reason, - (n->flags & GFSC_NF_START) ? 1 : 0, - n->added_seq, - n->removed_seq, - node_mount_str(n->flags)); - n++; - } -} - -static void do_list(char *name) -{ - struct gfsc_mountgroup *mg; - int node_count; - int mg_count; - int rv; - int i; - - memset(mgs, 0, sizeof(mgs)); - - if (name) { - mg_count = 1; - rv = gfsc_mountgroup_info(name, mgs); - } else { - rv = gfsc_mountgroups(MAX_MG, &mg_count, mgs); - } - - if (rv < 0) - exit(EXIT_FAILURE); /* gfs_controld probably not running */ - - if (mg_count) - printf("gfs mountgroups\n"); - - for (i = 0; i < mg_count; i++) { - mg = &mgs[i]; - - show_mg(mg); - - if (!ls_all_nodes) - goto next; - - node_count = 0; - memset(&nodes, 0, sizeof(nodes)); - - rv = gfsc_mountgroup_nodes(mg->name, GFSC_NODES_ALL, - MAX_NODES, &node_count, nodes); - if (rv < 0) { - printf("all nodes error %d %d\n", rv, errno); - goto next; - } - - qsort(nodes, node_count, sizeof(struct gfsc_node),node_compare); - - printf("all nodes\n"); - show_all_nodes(node_count, nodes); - next: - printf("\n"); - } -} - -static void do_plocks(char *name) -{ - char buf[GFSC_DUMP_SIZE]; - - memset(buf, 0, sizeof(buf)); - - gfsc_dump_plocks(name, buf); - - do_write(STDOUT_FILENO, buf, strlen(buf)); -} - -static void do_dump(void) -{ - char buf[GFSC_DUMP_SIZE]; - - memset(buf, 0, sizeof(buf)); - - gfsc_dump_debug(buf); - - do_write(STDOUT_FILENO, buf, strlen(buf)); -} - -int main(int argc, char **argv) -{ - decode_arguments(argc, argv); - - switch (operation) { - - case OP_LEAVE: - do_leave(fsname); - break; - - case OP_LIST: - do_list(fsname); - break; - - case OP_DUMP: - do_dump(); - break; - - case OP_PLOCKS: - do_plocks(fsname); - break; - } - return 0; -} - diff --git a/group/gfs_control/target.mk b/group/gfs_control/target.mk deleted file mode 100644 index ff65253..0000000 --- a/group/gfs_control/target.mk +++ /dev/null @@ -1,3 +0,0 @@ - -$(eval $(call make-binary,/usr/sbin/gfs_control,group/libgfscontrol/libgfscontrol.a)) - diff --git a/group/gfs_controld/Makefile.am b/group/gfs_controld/Makefile.am deleted file mode 100644 index f8d646d..0000000 --- a/group/gfs_controld/Makefile.am +++ /dev/null @@ -1,24 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -sbin_PROGRAMS = gfs_controld - -noinst_HEADERS = config.h gfs_controld.h gfs_daemon.h - -gfs_controld_SOURCES = config.c \ - cpg-new.c \ - crc.c \ - logging.c \ - main.c \ - member_cman.c \ - util.c - -gfs_controld_CPPFLAGS = -I$(top_srcdir)/group/libgfscontrol \ - -I$(top_srcdir)/group/include - -gfs_controld_CFLAGS = $(ccs_CFLAGS) $(logt_CFLAGS) $(dlmcontrol_CFLAGS) \ - $(fenced_CFLAGS) $(sackpt_CFLAGS) $(cpg_CFLAGS) \ - $(cfg_CFLAGS) $(quorum_CFLAGS) - -gfs_controld_LDFLAGS = $(ccs_LIBS) $(logt_LIBS) $(dlmcontrol_LIBS) \ - $(cpg_LIBS) $(cfg_LIBS) $(sackpt_LIBS) \ - $(fenced_LIBS) $(quorum_LIBS) -lpthread diff --git a/group/gfs_controld/config.c b/group/gfs_controld/config.c deleted file mode 100644 index 3131189..0000000 --- a/group/gfs_controld/config.c +++ /dev/null @@ -1,157 +0,0 @@ -#include "gfs_daemon.h" - -#include <sys/types.h> -#include <asm/types.h> -#include <sys/uio.h> -#include <netinet/in.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/utsname.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <net/if.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <stdlib.h> -#include <stddef.h> -#include <stdint.h> -#include <fcntl.h> -#include <netdb.h> -#include <limits.h> -#include <unistd.h> -#include <dirent.h> - -#include "config.h" -#include "ccs.h" - -int ccs_handle; - -/* was a config value set on command line?, 0 or 1. */ - -int optd_debug_logfile; -int optd_enable_withdraw; - -/* actual config value from command line, cluster.conf, or default. */ - -int cfgd_debug_logfile = DEFAULT_DEBUG_LOGFILE; -int cfgd_enable_withdraw = DEFAULT_ENABLE_WITHDRAW; - -void read_ccs_name(const char *path, char *name) -{ - char *str; - int error; - - error = ccs_get(ccs_handle, path, &str); - if (error || !str) - return; - - strcpy(name, str); - - free(str); -} - -void read_ccs_yesno(const char *path, int *yes, int *no) -{ - char *str; - int error; - - *yes = 0; - *no = 0; - - error = ccs_get(ccs_handle, path, &str); - if (error || !str) - return; - - if (!strcmp(str, "yes")) - *yes = 1; - - else if (!strcmp(str, "no")) - *no = 1; - - free(str); -} - -int read_ccs_int(const char *path, int *config_val) -{ - char *str; - int val; - int error; - - error = ccs_get(ccs_handle, path, &str); - if (error || !str) - return -1; - - val = atoi(str); - - if (val < 0) { - log_error("ignore invalid value %d for %s", val, path); - return -1; - } - - *config_val = val; - log_debug("%s is %u", path, val); - free(str); - return 0; -} - -#define LOCKSPACE_NODIR "/cluster/dlm/lockspace[@name="%s"]/@nodir" - -void read_ccs_nodir(struct mountgroup *mg, char *buf) -{ - char path[PATH_MAX]; - char *str; - int val; - int error; - - memset(path, 0, PATH_MAX); - sprintf(path, LOCKSPACE_NODIR, mg->name); - - error = ccs_get(ccs_handle, path, &str); - if (error || !str) - return; - - val = atoi(str); - - if (val < 0) { - log_error("ignore invalid value %d for %s", val, path); - return; - } - - snprintf(buf, 32, ":nodir=%d", val); - - log_debug("%s is %u", path, val); - free(str); -} - -#define ENABLE_WITHDRAW_PATH "/cluster/gfs_controld/@enable_withdraw" - -int setup_ccs(void) -{ - int cd; - - if (ccs_handle) - return 0; - - cd = ccs_connect(); - if (cd < 0) { - log_error("ccs_connect error %d %d", cd, errno); - return -1; - } - ccs_handle = cd; - - if (!optd_enable_withdraw) - read_ccs_int(ENABLE_WITHDRAW_PATH, &cfgd_enable_withdraw); - - read_ccs_name("/cluster/@name", clustername); - log_debug("cluster name "%s"", clustername); - - return 0; -} - -void close_ccs(void) -{ - ccs_disconnect(ccs_handle); -} - diff --git a/group/gfs_controld/config.h b/group/gfs_controld/config.h deleted file mode 100644 index bc4788d..0000000 --- a/group/gfs_controld/config.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __CONFIG_DOT_H__ -#define __CONFIG_DOT_H__ - -#define DEFAULT_DEBUG_LOGFILE 0 -#define DEFAULT_ENABLE_WITHDRAW 1 - -extern int optd_debug_logfile; -extern int optd_enable_withdraw; - -extern int cfgd_debug_logfile; -extern int cfgd_enable_withdraw; - -#endif - diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c deleted file mode 100644 index 8cf2b00..0000000 --- a/group/gfs_controld/cpg-new.c +++ /dev/null @@ -1,3600 +0,0 @@ -#include "gfs_daemon.h" -#include "config.h" -#include "libdlmcontrol.h" - -#define MAX_JOURNALS 256 - -/* gfs_header types */ -enum { - GFS_MSG_PROTOCOL = 1, - GFS_MSG_START = 2, - GFS_MSG_MOUNT_DONE = 3, - GFS_MSG_FIRST_RECOVERY_DONE = 4, - GFS_MSG_RECOVERY_RESULT = 5, - GFS_MSG_REMOUNT = 6, - GFS_MSG_WITHDRAW = 7, - GFS_MSG_WITHDRAW_ACK = 8, -}; - -/* gfs_header flags */ -#define GFS_MFLG_JOINING 1 /* accompanies start, we are joining */ - -struct gfs_header { - uint16_t version[3]; /* daemon_run protocol */ - uint16_t type; /* GFS_MSG_ */ - uint32_t nodeid; /* sender */ - uint32_t to_nodeid; /* recipient, 0 for all */ - uint32_t global_id; /* global unique id for this lockspace */ - uint32_t flags; /* GFS_MFLG_ */ - uint32_t msgdata; /* in-header payload depends on MSG type */ - uint32_t pad1; - uint64_t pad2; -}; - -struct protocol_version { - uint16_t major; - uint16_t minor; - uint16_t patch; - uint16_t flags; -}; - -struct protocol { - union { - struct protocol_version dm_ver; - uint16_t daemon_max[4]; - }; - union { - struct protocol_version km_ver; - uint16_t kernel_max[4]; - }; - union { - struct protocol_version dr_ver; - uint16_t daemon_run[4]; - }; - union { - struct protocol_version kr_ver; - uint16_t kernel_run[4]; - }; -}; - -/* mg_info and id_info: for syncing state in start message */ - -struct mg_info { - uint32_t mg_info_size; - uint32_t id_info_size; - uint32_t id_info_count; - - uint32_t started_count; - - int member_count; - int joined_count; - int remove_count; - int failed_count; - - int first_recovery_needed; - int first_recovery_master; -}; - -#define IDI_NODEID_IS_MEMBER 0x00000001 -#define IDI_JID_NEEDS_RECOVERY 0x00000002 -#define IDI_MOUNT_DONE 0x00000008 -#define IDI_MOUNT_ERROR 0x00000010 -#define IDI_MOUNT_RO 0x00000020 -#define IDI_MOUNT_SPECTATOR 0x00000040 - -struct id_info { - int nodeid; - int jid; - uint32_t flags; -}; - -#define JID_NONE -1 - -struct journal { - struct list_head list; - int jid; - int nodeid; - int failed_nodeid; - int needs_recovery; - - int local_recovery_busy; - int local_recovery_done; - int local_recovery_result; - int failed_recovery_count; -}; - -struct node { - struct list_head list; - int nodeid; - int jid; - int ro; - int spectator; - int kernel_mount_done; - int kernel_mount_error; - - int check_dlm; - int dlm_notify_callback; - int dlm_notify_result; - - int failed_reason; - uint32_t added_seq; - uint32_t removed_seq; - uint64_t add_time; - - int withdraw; - int send_withdraw_ack; - - uint64_t cluster_add_time; - uint64_t cluster_remove_time; - - struct protocol proto; -}; - -struct member { - struct list_head list; - int nodeid; - int start; /* 1 if we received a start message for this change */ - int added; /* 1 if added by this change */ - int failed; /* 1 if failed in this change */ - int disallowed; - char *start_msg; /* full copy of the start message from this node */ - struct mg_info *mg_info; /* shortcut into started_msg */ -}; - -/* One of these change structs is created for every confchg a cpg gets. */ - -#define CGST_WAIT_CONDITIONS 1 -#define CGST_WAIT_MESSAGES 2 - -struct change { - struct list_head list; - struct list_head members; - struct list_head removed; /* nodes removed by this change */ - struct list_head saved_messages; /* saved messages */ - int member_count; - int joined_count; - int remove_count; - int failed_count; - int state; - int we_joined; - uint32_t seq; /* used as a reference for debugging, and for queries */ - uint32_t combined_seq; /* for queries */ - uint64_t create_time; -}; - -struct save_msg { - struct list_head list; - int len; - char buf[0]; -}; - -static int dlmcontrol_fd; -static int cpg_fd_daemon; -static struct protocol our_protocol; -static struct list_head daemon_nodes; -static struct cpg_address daemon_member[MAX_NODES]; -static int daemon_member_count; - -/* - cpg confchg's arrive telling us that mountgroup members have - joined/left/failed. A "change" struct is created for each confchg, - and added to the mg->changes list. - - apply_changes() - --------------- - - <a new node won't know whether first_recovery_needed or not, but it also - won't have any conditions to wait for, so a new node will go directly to - sending out start message regardless> - - if first_recovery_needed, - (or new, where new is not having completed a start barrier yet) - all nodes: skip wait conditions - all nodes: send start message - - else !first_recovery_needed, - all nodes: if failures in changes, wait for conditions: - local mount to complete if in progress, stop_kernel, dlm_notified - all nodes: send start message - - <new changes that arrive result in going back to beginning; start messages - from this aborted start cycle will be ignored> - - all nodes: wait for all start messages - - <once all start messages are received, new changes will be handled in a - new batch after all current changes are cleared at end of sync_state> - - if start cycle / start barrier completes (start messages received from - all nodes without being interrupted by a change), go on to sync_state - which puts all members (as defined by the most recent change) in sync. - - "old nodes" are nodes that have completed a start cycle before (have - a non-zero started_count), and "new nodes" are nodes that have not - completed a start cycle before (they are being added by one of the - changes in this start cycle) - - sync_state() - ------------ - - if old nodes have first_recovery_needed, or all nodes are new - all nodes: mg->first_recovery_needed = 1 - all nodes: mg->first_recovery_master = prev or new low nodeid - new nodes: instantiate existing state to match old nodes - old nodes: update state per the changes in the completed start cycle - all nodes: assign jids to new members - all nodes: clear all change structs - - else !first_recovery_needed, - new nodes: instantiate existing state to match old nodes - old nodes: update state per the changes in the completed start cycle - all nodes: assign jids to new members - all nodes: clear all change structs - - <new changes that arrive from here on result in going back to the top> - - apply_recovery() - ---------------- - - if first_recovery_needed, - master: tells mount to run with first=1 (if not already) - all nodes: wait for first_recovery_done message - master: sends first_recovery_done message when mount is done - all nodes: mg->first_recovery_needed = 0 - all nodes: start kernel / tell mount.gfs to mount(2) (master already did) - all nodes: send message with result of kernel mount - - else !first_recovery_needed, - all nodes: if there are no journals to recover, goto start kernel - old nodes: tell kernel to recover jids, send message with each result - all nodes: wait for all recoveries to be done - all nodes: start kernel - new nodes: tell mount.gfs to mount(2) - new nodes: send message with result of kernel mount - - [If no one can recover some journal(s), all will be left waiting, unstarted. - A new change from a new mount will result in things going back to the top, - and hopefully the new node will be successful at doing the journal - recoveries when it comes through the apply_recovery() section, which - would let everyone start again.] -*/ - -static void apply_changes_recovery(struct mountgroup *mg); -static void send_withdraw_acks(struct mountgroup *mg); - -static void log_config(const struct cpg_name *group_name, - const struct cpg_address *member_list, - size_t member_list_entries, - const struct cpg_address *left_list, - size_t left_list_entries, - const struct cpg_address *joined_list, - size_t joined_list_entries) -{ - char m_buf[128]; - char j_buf[32]; - char l_buf[32]; - size_t i, len, pos; - int ret; - - memset(m_buf, 0, sizeof(m_buf)); - memset(j_buf, 0, sizeof(j_buf)); - memset(l_buf, 0, sizeof(l_buf)); - - len = sizeof(m_buf); - pos = 0; - for (i = 0; i < member_list_entries; i++) { - ret = snprintf(m_buf + pos, len - pos, " %d", - member_list[i].nodeid); - if (ret >= len - pos) - break; - pos += ret; - } - - len = sizeof(j_buf); - pos = 0; - for (i = 0; i < joined_list_entries; i++) { - ret = snprintf(j_buf + pos, len - pos, " %d", - joined_list[i].nodeid); - if (ret >= len - pos) - break; - pos += ret; - } - - len = sizeof(l_buf); - pos = 0; - for (i = 0; i < left_list_entries; i++) { - ret = snprintf(l_buf + pos, len - pos, " %d", - left_list[i].nodeid); - if (ret >= len - pos) - break; - pos += ret; - } - - log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value, - member_list_entries, joined_list_entries, left_list_entries, - m_buf, j_buf, l_buf); -} - -static const char *msg_name(int type) -{ - switch (type) { - case GFS_MSG_PROTOCOL: - return "protocol"; - case GFS_MSG_START: - return "start"; - case GFS_MSG_MOUNT_DONE: - return "mount_done"; - case GFS_MSG_FIRST_RECOVERY_DONE: - return "first_recovery_done"; - case GFS_MSG_RECOVERY_RESULT: - return "recovery_result"; - case GFS_MSG_REMOUNT: - return "remount"; - case GFS_MSG_WITHDRAW: - return "withdraw"; - case GFS_MSG_WITHDRAW_ACK: - return "withdraw_ack"; - default: - return "unknown"; - } -} - -static int _send_message(cpg_handle_t h, void *buf, int len, int type) -{ - struct iovec iov; - cpg_error_t error; - int retries = 0; - - iov.iov_base = buf; - iov.iov_len = len; - - retry: - error = cpg_mcast_joined(h, CPG_TYPE_AGREED, &iov, 1); - if (error == CPG_ERR_TRY_AGAIN) { - retries++; - usleep(1000); - if (!(retries % 100)) - log_error("cpg_mcast_joined retry %d %s", - retries, msg_name(type)); - goto retry; - } - if (error != CPG_OK) { - log_error("cpg_mcast_joined error %d handle %llx %s", - error, (unsigned long long)h, msg_name(type)); - return -1; - } - - if (retries) - log_debug("cpg_mcast_joined retried %d %s", - retries, msg_name(type)); - - return 0; -} - -/* header fields caller needs to set: type, to_nodeid, flags, msgdata */ - -static void gfs_send_message(struct mountgroup *mg, char *buf, int len) -{ - struct gfs_header *hd = (struct gfs_header *) buf; - int type = hd->type; - - hd->version[0] = cpu_to_le16(our_protocol.daemon_run[0]); - hd->version[1] = cpu_to_le16(our_protocol.daemon_run[1]); - hd->version[2] = cpu_to_le16(our_protocol.daemon_run[2]); - hd->type = cpu_to_le16(hd->type); - hd->nodeid = cpu_to_le32(our_nodeid); - hd->to_nodeid = cpu_to_le32(hd->to_nodeid); - hd->global_id = cpu_to_le32(mg->id); - hd->flags = cpu_to_le32(hd->flags); - hd->msgdata = cpu_to_le32(hd->msgdata); - - _send_message(mg->cpg_handle, buf, len, type); -} - -static struct member *find_memb(struct change *cg, int nodeid) -{ - struct member *memb; - - list_for_each_entry(memb, &cg->members, list) { - if (memb->nodeid == nodeid) - return memb; - } - return NULL; -} - -static struct mountgroup *find_mg_handle(cpg_handle_t h) -{ - struct mountgroup *mg; - - list_for_each_entry(mg, &mountgroups, list) { - if (mg->cpg_handle == h) - return mg; - } - return NULL; -} - -static struct mountgroup *find_mg_ci(int ci) -{ - struct mountgroup *mg; - - list_for_each_entry(mg, &mountgroups, list) { - if (mg->cpg_client == ci) - return mg; - } - return NULL; -} - -static struct journal *find_journal(struct mountgroup *mg, int jid) -{ - struct journal *j; - - list_for_each_entry(j, &mg->journals, list) { - if (j->jid == jid) - return j; - } - return NULL; -} - -static struct journal *find_journal_by_nodeid(struct mountgroup *mg, int nodeid) -{ - struct journal *j; - - list_for_each_entry(j, &mg->journals, list) { - if (j->nodeid == nodeid) - return j; - } - return NULL; -} - -static void free_cg(struct change *cg) -{ - struct member *memb, *safe; - struct save_msg *sm, *sm2; - - list_for_each_entry_safe(memb, safe, &cg->members, list) { - list_del(&memb->list); - if (memb->start_msg) - free(memb->start_msg); - free(memb); - } - list_for_each_entry_safe(memb, safe, &cg->removed, list) { - list_del(&memb->list); - if (memb->start_msg) - free(memb->start_msg); - free(memb); - } - list_for_each_entry_safe(sm, sm2, &cg->saved_messages, list) { - list_del(&sm->list); - free(sm); - } - free(cg); -} - -void free_mg(struct mountgroup *mg) -{ - struct change *cg, *cg_safe; - struct node *node, *node_safe; - - list_for_each_entry_safe(cg, cg_safe, &mg->changes, list) { - list_del(&cg->list); - free_cg(cg); - } - - if (mg->started_change) - free_cg(mg->started_change); - - list_for_each_entry_safe(node, node_safe, &mg->node_history, list) { - list_del(&node->list); - free(node); - } - - free(mg); -} - -static struct node *get_node_history(struct mountgroup *mg, int nodeid) -{ - struct node *node; - - list_for_each_entry(node, &mg->node_history, list) { - if (node->nodeid == nodeid) - return node; - } - return NULL; -} - -static void node_history_init(struct mountgroup *mg, int nodeid, - struct change *cg) -{ - struct node *node; - - node = get_node_history(mg, nodeid); - if (node) { - list_del(&node->list); - goto out; - } - - node = malloc(sizeof(struct node)); - if (!node) { - log_error("node_history_init no mem"); - return; - } - out: - memset(node, 0, sizeof(struct node)); - - node->nodeid = nodeid; - node->add_time = 0; - list_add_tail(&node->list, &mg->node_history); - - if (cg) - node->added_seq = cg->seq; /* for queries */ -} - -void node_history_cluster_add(int nodeid) -{ - struct mountgroup *mg; - struct node *node; - - list_for_each_entry(mg, &mountgroups, list) { - node_history_init(mg, nodeid, NULL); - - node = get_node_history(mg, nodeid); - if (!node) { - log_error("node_history_cluster_add no nodeid %d", - nodeid); - return; - } - - node->cluster_add_time = time(NULL); - } -} - -void node_history_cluster_remove(int nodeid) -{ - struct mountgroup *mg; - struct node *node; - - list_for_each_entry(mg, &mountgroups, list) { - node = get_node_history(mg, nodeid); - if (!node) { - log_error("node_history_cluster_remove no nodeid %d", - nodeid); - return; - } - - node->cluster_remove_time = time(NULL); - } -} - -static void node_history_start(struct mountgroup *mg, int nodeid) -{ - struct node *node; - - node = get_node_history(mg, nodeid); - if (!node) { - log_error("node_history_start no nodeid %d", nodeid); - return; - } - - node->add_time = time(NULL); -} - -static void node_history_left(struct mountgroup *mg, int nodeid, - struct change *cg) -{ - struct node *node; - - node = get_node_history(mg, nodeid); - if (!node) { - log_error("node_history_left no nodeid %d", nodeid); - return; - } - - node->add_time = 0; - node->removed_seq = cg->seq; /* for queries */ -} - -static void node_history_fail(struct mountgroup *mg, int nodeid, - struct change *cg, int reason) -{ - struct node *node; - - node = get_node_history(mg, nodeid); - if (!node) { - log_error("node_history_fail no nodeid %d", nodeid); - return; - } - - node->check_dlm = 1; - - node->removed_seq = cg->seq; /* for queries */ - node->failed_reason = reason; /* for queries */ -} - -static int is_added(struct mountgroup *mg, int nodeid) -{ - struct change *cg; - struct member *memb; - - list_for_each_entry(cg, &mg->changes, list) { - memb = find_memb(cg, nodeid); - if (memb && memb->added) - return 1; - } - return 0; -} - -static int is_withdraw(struct mountgroup *mg, int nodeid) -{ - struct node *node; - - node = get_node_history(mg, nodeid); - if (!node) { - log_error("is_withdraw no nodeid %d", nodeid); - return 0; - } - return node->withdraw; -} - -static int journals_need_recovery(struct mountgroup *mg) -{ - struct change *cg; - struct journal *j; - struct member *memb; - int count = 0; - - list_for_each_entry(j, &mg->journals, list) - if (j->needs_recovery) - count++; - - list_for_each_entry(cg, &mg->changes, list) { - list_for_each_entry(memb, &cg->removed, list) { - if (!memb->failed && !is_withdraw(mg, memb->nodeid)) - continue; - /* check whether this node had a journal assigned? */ - count++; - } - } - - return count; -} - -/* find a start message from an old node to use; it doesn't matter which old - node we take the start message from, they should all be the same */ - -static int get_id_list(struct mountgroup *mg, struct id_info **ids, - int *count, int *size) -{ - struct change *cg; - struct member *memb; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (!memb->mg_info->started_count) - continue; - - *count = memb->mg_info->id_info_count; - *size = memb->mg_info->id_info_size; - *ids = (struct id_info *)(memb->start_msg + - sizeof(struct gfs_header) + - memb->mg_info->mg_info_size); - return 0; - } - return -1; -} - -static struct id_info *get_id_struct(struct id_info *ids, int count, int size, - int nodeid) -{ - struct id_info *id = ids; - int i; - - for (i = 0; i < count; i++) { - if (id->nodeid == nodeid) - return id; - id = (struct id_info *)((char *)id + size); - } - return NULL; -} - -static void start_kernel(struct mountgroup *mg) -{ - struct change *cg = mg->started_change; - - if (!mg->kernel_stopped) { - log_error("start_kernel cg %u not stopped", cg->seq); - return; - } - - log_group(mg, "start_kernel cg %u member_count %d", - cg->seq, cg->member_count); - - set_sysfs(mg, "block", 0); - mg->kernel_stopped = 0; - - if (mg->joining) { - client_reply_join_full(mg, 0); - mg->joining = 0; - mg->mount_client_notified = 1; - } -} - -static void stop_kernel(struct mountgroup *mg) -{ - if (!mg->kernel_stopped) { - log_group(mg, "stop_kernel"); - set_sysfs(mg, "block", 1); - mg->kernel_stopped = 1; - } -} - -void process_dlmcontrol(int ci) -{ - struct mountgroup *mg; - struct node *node; - char name[GFS_MOUNTGROUP_LEN+1]; - int rv, type, nodeid, result; - - memset(name, 0, sizeof(name)); - - rv = dlmc_fs_result(dlmcontrol_fd, name, &type, &nodeid, &result); - if (rv) { - log_error("process_dlmcontrol dlmc_fs_result %d", rv); - return; - } - - mg = find_mg(name); - if (!mg) { - log_error("process_dlmcontrol no mg %s", name); - return; - } - - if (type == DLMC_RESULT_NOTIFIED) { - log_group(mg, "process_dlmcontrol notified nodeid %d result %d", - nodeid, result); - - node = get_node_history(mg, nodeid); - if (!node) { - /* shouldn't happen */ - log_error("process_dlmcontrol no nodeid %d", nodeid); - return; - } - - if (mg->dlm_notify_nodeid != nodeid) { - /* shouldn't happen */ - log_error("process_dlmcontrol node %d expected %d", - nodeid, mg->dlm_notify_nodeid); - return; - } - - mg->dlm_notify_nodeid = 0; - node->dlm_notify_callback = 1; - node->dlm_notify_result = result; - - } else if (type == DLMC_RESULT_REGISTER) { - if (result) { - /* shouldn't happen */ - log_error("process_dlmcontrol register %d", result); - client_reply_join_full(mg, result); - mg->mount_client_notified = 1; - } else { - log_group(mg, "process_dlmcontrol register %d", result); - mg->dlm_registered = 1; - } - } else { - log_group(mg, "process_dlmcontrol unknown type %d", type); - } - - poll_dlm = 0; - - apply_changes_recovery(mg); -} - -static int check_dlm_notify_done(struct mountgroup *mg) -{ - struct node *node; - int rv; - - /* we're waiting for a notify result from the dlm (could we fire off - all dlmc_fs_notified() calls at once instead of serially?) */ - - if (mg->dlm_notify_nodeid) - return 0; - - list_for_each_entry(node, &mg->node_history, list) { - - /* check_dlm is set when we see a node fail, and is cleared - below when we find that the dlm has also seen it fail */ - - if (!node->check_dlm) - continue; - - /* we're in sync with the dlm for this nodeid, i.e. we've - both seen this node fail */ - - if (node->dlm_notify_callback && !node->dlm_notify_result) { - node->dlm_notify_callback = 0; - node->check_dlm = 0; - continue; - } - - /* we're not in sync with the dlm for this nodeid, i.e. - the dlm hasn't seen this node fail yet; try calling - dlmc_fs_notified() again in a bit */ - - if (node->dlm_notify_callback && node->dlm_notify_result) { - log_group(mg, "check_dlm_notify result %d will retry nodeid %d", - node->dlm_notify_result, node->nodeid); - node->dlm_notify_callback = 0; - poll_dlm = 1; - return 0; - } - - /* check if the dlm has seen this nodeid fail, we get the - answer asynchronously in process_dlmcontrol */ - - log_group(mg, "check_dlm_notify nodeid %d begin", node->nodeid); - - rv = dlmc_fs_notified(dlmcontrol_fd, mg->name, node->nodeid); - if (rv) { - log_error("dlmc_fs_notified error %d", rv); - return 0; - } - - mg->dlm_notify_nodeid = node->nodeid; - return 0; - } - - log_group(mg, "check_dlm_notify done"); - return 1; -} - -static int wait_conditions_done(struct mountgroup *mg) -{ - if (!mg->dlm_registered) { - log_group(mg, "wait_conditions need dlm register"); - return 0; - } - - if (mg->first_recovery_needed) { - log_group(mg, "wait_conditions skip for first_recovery_needed"); - return 1; - } - - if (!mg->started_count) { - log_group(mg, "wait_conditions skip for zero started_count"); - return 1; - } - - if (!journals_need_recovery(mg)) { - log_group(mg, "wait_conditions skip for zero " - "journals_need_recovery"); - return 1; - } - - if (!mg->mount_client_notified) { - log_group(mg, "wait_conditions skip mount client not notified"); - return 1; - } - - if (mg->kernel_mount_done && mg->kernel_mount_error) { - log_group(mg, "wait_conditions skip for kernel_mount_error"); - return 1; - } - - if (!mg->kernel_mount_done) { - log_group(mg, "wait_conditions need mount_done"); - return 0; - } - - stop_kernel(mg); - - if (!check_dlm_notify_done(mg)) - return 0; - - return 1; -} - -static int wait_messages_done(struct mountgroup *mg) -{ - struct change *cg = list_first_entry(&mg->changes, struct change, list); - struct member *memb; - int need = 0, total = 0; - - list_for_each_entry(memb, &cg->members, list) { - if (!memb->start) - need++; - total++; - } - - if (need) { - log_group(mg, "wait_messages cg %u need %d of %d", - cg->seq, need, total); - return 0; - } - - log_group(mg, "wait_messages cg %u got all %d", cg->seq, total); - return 1; -} - -static void cleanup_changes(struct mountgroup *mg) -{ - struct change *cg = list_first_entry(&mg->changes, struct change, list); - struct change *safe; - - list_del(&cg->list); - if (mg->started_change) - free_cg(mg->started_change); - mg->started_change = cg; - - /* zero started_count means "never started" */ - - mg->started_count++; - if (!mg->started_count) - mg->started_count++; - - cg->combined_seq = cg->seq; /* for queries */ - - list_for_each_entry_safe(cg, safe, &mg->changes, list) { - mg->started_change->combined_seq = cg->seq; /* for queries */ - list_del(&cg->list); - free_cg(cg); - } -} - -/* do the change details in the message match the details of the given change */ - -static int match_change(struct mountgroup *mg, struct change *cg, - struct gfs_header *hd, struct mg_info *mi, - struct id_info *ids) -{ - struct id_info *id; - struct member *memb; - struct node *node; - uint32_t seq = hd->msgdata; - int i, members_mismatch; - - /* We can ignore messages if we're not in the list of members. - The one known time this will happen is after we've joined - the cpg, we can get messages for changes prior to the change - in which we're added. */ - - id = get_id_struct(ids, mi->id_info_count, mi->id_info_size,our_nodeid); - - if (!id || !(id->flags & IDI_NODEID_IS_MEMBER)) { - log_group(mg, "match_change %d:%u skip cg %u we are not in members", - hd->nodeid, seq, cg->seq); - return 0; - } - - memb = find_memb(cg, hd->nodeid); - if (!memb) { - log_group(mg, "match_change %d:%u skip cg %u sender not member", - hd->nodeid, seq, cg->seq); - return 0; - } - - if (memb->start && hd->type == GFS_MSG_START) { - log_group(mg, "match_change %d:%u skip %u already start", - hd->nodeid, seq, cg->seq); - return 0; - } - - /* a node's start can't match a change if the node joined the cluster - more recently than the change was created */ - - node = get_node_history(mg, hd->nodeid); - if (!node) { - log_group(mg, "match_change %d:%u skip cg %u no node history", - hd->nodeid, seq, cg->seq); - return 0; - } - - if (node->cluster_add_time > cg->create_time) { - log_group(mg, "match_change %d:%u skip cg %u created %llu " - "cluster add %llu", hd->nodeid, seq, cg->seq, - (unsigned long long)cg->create_time, - (unsigned long long)node->cluster_add_time); - return 0; - } - - /* verify this is the right change by matching the counts - and the nodeids of the current members */ - - if (mi->member_count != cg->member_count || - mi->joined_count != cg->joined_count || - mi->remove_count != cg->remove_count || - mi->failed_count != cg->failed_count) { - log_group(mg, "match_change %d:%u skip cg %u expect counts " - "%d %d %d %d", hd->nodeid, seq, cg->seq, - cg->member_count, cg->joined_count, - cg->remove_count, cg->failed_count); - return 0; - } - - members_mismatch = 0; - id = ids; - - for (i = 0; i < mi->id_info_count; i++) { - if (id->flags & IDI_NODEID_IS_MEMBER) { - memb = find_memb(cg, id->nodeid); - if (!memb) { - log_group(mg, "match_change %d:%u skip cg %u " - "no memb %d", hd->nodeid, seq, - cg->seq, id->nodeid); - members_mismatch = 1; - break; - } - } - id = (struct id_info *)((char *)id + mi->id_info_size); - } - - if (members_mismatch) - return 0; - - log_group(mg, "match_change %d:%u matches cg %u", hd->nodeid, seq, - cg->seq); - return 1; -} - -/* Unfortunately, there's no really simple way to match a message with the - specific change that it was sent for. We hope that by passing all the - details of the change in the message, we will be able to uniquely match - it to the correct change. */ - -/* A start message will usually be for the first (current) change on our list. - In some cases it will be for a non-current change, and we can ignore it: - - 1. A,B,C get confchg1 adding C - 2. C sends start for confchg1 - 3. A,B,C get confchg2 adding D - 4. A,B,C,D recv start from C for confchg1 - ignored - 5. C,D send start for confchg2 - 6. A,B send start for confchg2 - 7. A,B,C,D recv all start messages for confchg2; start barrier/cycle done - - In step 4, how do the nodes know whether the start message from C is - for confchg1 or confchg2? Hopefully by comparing the counts and members. */ - -static struct change *find_change(struct mountgroup *mg, struct gfs_header *hd, - struct mg_info *mi, struct id_info *ids) -{ - struct change *cg; - - list_for_each_entry_reverse(cg, &mg->changes, list) { - if (!match_change(mg, cg, hd, mi, ids)) - continue; - return cg; - } - - log_group(mg, "find_change %d:%u no match", hd->nodeid, hd->msgdata); - return NULL; -} - -static void mg_info_in(struct mg_info *mi) -{ - mi->mg_info_size = le32_to_cpu(mi->mg_info_size); - mi->id_info_size = le32_to_cpu(mi->id_info_size); - mi->id_info_count = le32_to_cpu(mi->id_info_count); - mi->started_count = le32_to_cpu(mi->started_count); - mi->member_count = le32_to_cpu(mi->member_count); - mi->joined_count = le32_to_cpu(mi->joined_count); - mi->remove_count = le32_to_cpu(mi->remove_count); - mi->failed_count = le32_to_cpu(mi->failed_count); - mi->first_recovery_needed = le32_to_cpu(mi->first_recovery_needed); - mi->first_recovery_master = le32_to_cpu(mi->first_recovery_master); -} - -static void id_info_in(struct id_info *id) -{ - id->nodeid = le32_to_cpu(id->nodeid); - id->jid = le32_to_cpu(id->jid); - id->flags = le32_to_cpu(id->flags); -} - -static void ids_in(struct mg_info *mi, struct id_info *ids) -{ - struct id_info *id; - int i; - - id = ids; - for (i = 0; i < mi->id_info_count; i++) { - id_info_in(id); - id = (struct id_info *)((char *)id + mi->id_info_size); - } -} - -static void receive_start(struct mountgroup *mg, struct gfs_header *hd, int len) -{ - struct change *cg; - struct member *memb; - struct mg_info *mi; - struct id_info *ids; - uint32_t seq = hd->msgdata; - int added; - - log_group(mg, "receive_start %d:%u len %d", hd->nodeid, seq, len); - - mi = (struct mg_info *)((char *)hd + sizeof(struct gfs_header)); - ids = (struct id_info *)((char *)mi + sizeof(struct mg_info)); - - mg_info_in(mi); - ids_in(mi, ids); - - cg = find_change(mg, hd, mi, ids); - if (!cg) - return; - - memb = find_memb(cg, hd->nodeid); - if (!memb) { - /* this should never happen since match_change checks it */ - log_error("receive_start no member %d", hd->nodeid); - return; - } - - added = is_added(mg, hd->nodeid); - - if (added && mi->started_count && mg->started_count) { - log_error("receive_start %d:%u add node with started_count %u", - hd->nodeid, seq, mi->started_count); - - /* see comment in fence/fenced/cpg.c */ - memb->disallowed = 1; - return; - } - - node_history_start(mg, hd->nodeid); - memb->start = 1; - - if (memb->start_msg) { - /* shouldn't happen */ - log_error("receive_start %d:%u dup start msg", hd->nodeid, seq); - return; - } - - /* save a copy of each start message */ - memb->start_msg = malloc(len); - if (!memb->start_msg) { - log_error("receive_start len %d no mem", len); - return; - } - memcpy(memb->start_msg, hd, len); - - /* a shortcut to the saved mg_info */ - memb->mg_info = (struct mg_info *)(memb->start_msg + - sizeof(struct gfs_header)); -} - -/* start messages are associated with a specific change and use the - find_change/match_change routines to make sure all start messages - are matched with the same change on all nodes. The current set of - changes are cleared after a completed start cycle. Other messages - happen outside the context of changes. An "incomplete" start cycle - is when a confchg arrives (adding a new change struct) before all - start messages have been received for the current change. In this - case, all members send a new start message for the latest change, - and any start messages received for the previous change(s) are ignored. - - To sync state with start messages, we need to include: - - the state before applying any of the current set of queued changes - (new nodes will initialize with this) - - the essential info from changes in the set that's being started, - so nodes added by one of the queued changes can apply the same changes - to the init state that the existing nodes do. */ - -/* recovery_result and mount_done messages may arrive between the time - that an old node sends start and the time a new node receives it. - two old nodes may also send start before/after a recovery_result or - mount_done message, creating inconsistent data in their start messages. - - Soln: a new node saves recovery_result/mount_done messages between - last confchg and final start. the new node knows that a start message - from an old node may or may not include the effects from rr/md messages - since the last confchg, but *will* include all effects from prior to - the last confchg. The saved rr/md messages can be applied on top of - the state from an old node's start message; applying them a second time - should not change anything, producing the same result. */ - -static int count_ids(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - struct journal *j; - int count = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) - count++; - - list_for_each_entry(j, &mg->journals, list) - if (j->needs_recovery) - count++; - - list_for_each_entry(cg, &mg->changes, list) { - list_for_each_entry(memb, &cg->removed, list) { - if (!memb->failed && !is_withdraw(mg, memb->nodeid)) - continue; - j = find_journal_by_nodeid(mg, memb->nodeid); - if (j) - count++; - } - } - - return count; -} - -/* old member: current member that has completed a start cycle - new member: current member that has not yet completed a start cycle */ - -static void send_start(struct mountgroup *mg) -{ - struct change *cg, *c; - struct gfs_header *hd; - struct mg_info *mi; - struct id_info *id; - struct member *memb; - struct node *node; - struct journal *j; - char *buf; - uint32_t flags; - int len, id_count, jid; - int old_memb = 0, new_memb = 0, old_journal = 0, new_journal = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - id_count = count_ids(mg); - - /* sanity check */ - - if (!mg->started_count && id_count != cg->member_count) { - log_error("send_start bad counts id_count %d member_count %d", - cg->member_count, id_count); - return; - } - - len = sizeof(struct gfs_header) + sizeof(struct mg_info) + - id_count * sizeof(struct id_info); - - buf = malloc(len); - if (!buf) { - log_error("send_start len %d no mem", len); - return; - } - memset(buf, 0, len); - - hd = (struct gfs_header *)buf; - mi = (struct mg_info *)(buf + sizeof(*hd)); - id = (struct id_info *)(buf + sizeof(*hd) + sizeof(*mi)); - - /* fill in header (gfs_send_message handles part of header) */ - - hd->type = GFS_MSG_START; - hd->msgdata = cg->seq; - hd->flags |= mg->joining ? GFS_MFLG_JOINING : 0; - - /* fill in mg_info */ - - mi->mg_info_size = cpu_to_le32(sizeof(struct mg_info)); - mi->id_info_size = cpu_to_le32(sizeof(struct id_info)); - mi->id_info_count = cpu_to_le32(id_count); - mi->started_count = cpu_to_le32(mg->started_count); - mi->member_count = cpu_to_le32(cg->member_count); - mi->joined_count = cpu_to_le32(cg->joined_count); - mi->remove_count = cpu_to_le32(cg->remove_count); - mi->failed_count = cpu_to_le32(cg->failed_count); - mi->first_recovery_needed = cpu_to_le32(mg->first_recovery_needed); - mi->first_recovery_master = cpu_to_le32(mg->first_recovery_master); - - /* fill in id_info entries */ - - /* New members send info about themselves, and empty id_info slots for - all other members. Old members send full info about all old - members, and empty id_info slots about new members. The union of - start messages from a single old node and all new nodes give a - complete picture of state for all members. In sync_state, all nodes - (old and new) make this union, and then assign jid's to new nodes. */ - - list_for_each_entry(memb, &cg->members, list) { - - if (!mg->started_count || is_added(mg, memb->nodeid)) { - /* send empty slot for new member */ - jid = JID_NONE; - flags = IDI_NODEID_IS_MEMBER; - - /* include our own info which no one knows yet */ - if (!mg->started_count && memb->nodeid == our_nodeid) { - flags |= mg->ro ? IDI_MOUNT_RO : 0; - flags |= mg->spectator ? IDI_MOUNT_SPECTATOR : 0; - } - new_memb++; - - } else { - /* send full info for old member */ - node = get_node_history(mg, memb->nodeid); - if (!node) { - log_error("send_start no nodeid %d", memb->nodeid); - continue; - } - - jid = node->jid; - flags = IDI_NODEID_IS_MEMBER; - flags |= node->ro ? IDI_MOUNT_RO : 0; - flags |= node->spectator ? IDI_MOUNT_SPECTATOR : 0; - flags |= node->kernel_mount_done ? IDI_MOUNT_DONE : 0; - flags |= node->kernel_mount_error ? IDI_MOUNT_ERROR : 0; - old_memb++; - } - - id->nodeid = cpu_to_le32(memb->nodeid); - id->jid = cpu_to_le32(jid); - id->flags = cpu_to_le32(flags); - id++; - } - - /* journals needing recovery from previous start cycles */ - - list_for_each_entry(j, &mg->journals, list) { - if (j->needs_recovery) { - flags = IDI_JID_NEEDS_RECOVERY; - id->jid = cpu_to_le32(j->jid); - id->flags = cpu_to_le32(flags); - id++; - old_journal++; - } - } - - /* journals needing recovery from the current start cycle */ - - list_for_each_entry(c, &mg->changes, list) { - list_for_each_entry(memb, &c->removed, list) { - if (!memb->failed && !is_withdraw(mg, memb->nodeid)) - continue; - j = find_journal_by_nodeid(mg, memb->nodeid); - if (j) { - flags = IDI_JID_NEEDS_RECOVERY; - id->jid = cpu_to_le32(j->jid); - id->flags = cpu_to_le32(flags); - id++; - new_journal++; - } - } - } - - /* sanity check */ - - if (!mg->started_count && (old_memb || old_journal || new_journal)) { - log_error("send_start cg %u bad counts om %d nm %d oj %d nj %d", - cg->seq, old_memb, new_memb, old_journal, new_journal); - return; - } - - log_group(mg, "send_start cg %u id_count %d om %d nm %d oj %d nj %d", - cg->seq, id_count, old_memb, new_memb, old_journal, - new_journal); - - gfs_send_message(mg, buf, len); - - free(buf); -} - -static void send_mount_done(struct mountgroup *mg, int result) -{ - struct gfs_header h; - - memset(&h, 0, sizeof(h)); - - h.type = GFS_MSG_MOUNT_DONE; - h.msgdata = result; - - gfs_send_message(mg, (char *)&h, sizeof(h)); -} - -static void send_first_recovery_done(struct mountgroup *mg) -{ - struct gfs_header h; - - memset(&h, 0, sizeof(h)); - - h.type = GFS_MSG_FIRST_RECOVERY_DONE; - - gfs_send_message(mg, (char *)&h, sizeof(h)); -} - -static void send_recovery_result(struct mountgroup *mg, int jid, int result) -{ - struct gfs_header *hd; - char *buf; - int len, *p; - - len = sizeof(struct gfs_header) + 2 * sizeof(int); - - buf = malloc(len); - if (!buf) { - log_error("send_recovery_result no mem %d", len); - return; - } - memset(buf, 0, len); - - hd = (struct gfs_header *)buf; - hd->type = GFS_MSG_RECOVERY_RESULT; - - p = (int *)(buf + sizeof(struct gfs_header)); - - p[0] = cpu_to_le32(jid); - p[1] = cpu_to_le32(result); - - gfs_send_message(mg, buf, len); - - free(buf); -} - -void send_remount(struct mountgroup *mg, int ro) -{ - struct gfs_header h; - - memset(&h, 0, sizeof(h)); - - h.type = GFS_MSG_REMOUNT; - h.msgdata = ro; - - gfs_send_message(mg, (char *)&h, sizeof(h)); -} - -void send_withdraw(struct mountgroup *mg) -{ - struct gfs_header h; - - memset(&h, 0, sizeof(h)); - - h.type = GFS_MSG_WITHDRAW; - - gfs_send_message(mg, (char *)&h, sizeof(h)); -} - -static void save_message(struct mountgroup *mg, struct gfs_header *hd, int len) -{ - struct change *cg; - struct save_msg *sm; - - cg = list_first_entry(&mg->changes, struct change, list); - - sm = malloc(sizeof(struct save_msg) + len); - if (!sm) { - log_error("save_message len %d no mem", len); - return; - } - - sm->len = len; - memcpy(sm->buf, hd, len); - - list_add_tail(&sm->list, &cg->saved_messages); -} - -void gfs_mount_done(struct mountgroup *mg) -{ - send_mount_done(mg, mg->kernel_mount_error); -} - -static void receive_mount_done(struct mountgroup *mg, struct gfs_header *hd, - int len) -{ - struct node *node; - - log_group(mg, "receive_mount_done from %d result %d", - hd->nodeid, hd->msgdata); - - node = get_node_history(mg, hd->nodeid); - if (!node) { - log_error("receive_mount_done no nodeid %d", hd->nodeid); - return; - } - - node->kernel_mount_done = 1; - node->kernel_mount_error = hd->msgdata; -} - -static void receive_recovery_result(struct mountgroup *mg, - struct gfs_header *hd, int len) -{ - struct journal *j; - int jid, result, *p; - - p = (int *)((char *)hd + sizeof(struct gfs_header)); - jid = le32_to_cpu(p[0]); - result = le32_to_cpu(p[1]); - - log_group(mg, "receive_recovery_result from %d jid %d result %d", - hd->nodeid, jid, result); - - j = find_journal(mg, jid); - if (!j) { - log_error("receive_recovery_result from %d no jid %d", - hd->nodeid, jid); - return; - } - - if (!j->needs_recovery) - return; - - if (result == LM_RD_SUCCESS) - j->needs_recovery = 0; - else { - j->failed_recovery_count++; - log_group(mg, "jid %d failed_recovery_count %d", jid, - j->failed_recovery_count); - } -} - -static void receive_first_recovery_done(struct mountgroup *mg, - struct gfs_header *hd, int len) -{ - int master = mg->first_recovery_master; - - log_group(mg, "receive_first_recovery_done from %d master %d " - "mount_client_notified %d", - hd->nodeid, master, mg->mount_client_notified); - - if (list_empty(&mg->changes)) { - /* everything is idle, no changes in progress */ - - mg->first_recovery_needed = 0; - mg->first_recovery_master = 0; - mg->first_recovery_msg = 1; - - if (master != our_nodeid) - start_kernel(mg); - } else { - /* Everyone will receive this message in the same sequence - wrt other start messages and confchgs: - - - If a new confchg arrives after this message (and before - the final start message in the current start cycle), - a new start cycle will begin. All nodes before the - confchg will have frn=0 due to receiving this message, - and nodes added by the confchg will see frn=0 in all - start messages (in any_nodes_first_recovery() which - returns 0). - - - If the final start message arrives after this message, - the start cycle will complete, running sync_state(), on - all current nodes with all having seen this message. - Old and new nodes in the current start cycle will see - this msg and use it (first_recovery_msg) instead of the - first_recovery_needed/master data in the start messages - (which may be inconsistent due to members sending their - start messages either before or after receiving this - message). */ - - /* exclude new nodes from this sanity check since they've - never set a master value to compare against */ - if (mg->started_count && (master != hd->nodeid)) - log_error("receive_first_recovery_done from %d " - "master %d", hd->nodeid, master); - - mg->first_recovery_needed = 0; - mg->first_recovery_master = 0; - mg->first_recovery_msg = 1; - } -} - -static void receive_remount(struct mountgroup *mg, struct gfs_header *hd, - int len) -{ - struct node *node; - - log_group(mg, "receive_remount from %d ro %d", hd->nodeid, hd->msgdata); - - node = get_node_history(mg, hd->nodeid); - if (!node) { - log_error("receive_remount no nodeid %d", hd->nodeid); - return; - } - - node->ro = hd->msgdata; - - if (hd->nodeid == our_nodeid) - mg->ro = node->ro; -} - -/* The node with the withdraw wants to leave the mountgroup, but have - the other nodes do recovery for it when it leaves. They wouldn't usually - do recovery for a node that leaves "normally", i.e. without failing at the - cluster membership level. So, we send a withdraw message to tell the - others that our succeeding leave-removal should be followed by recovery - like a failure-removal would be. - - The withdrawing node can't release dlm locks for the fs before other - nodes have stopped the fs. The same reason as for any gfs journal - recovery; the locks on the failed/withdrawn fs "protect" the parts of - the fs that need to be recovered, and until the fs on all mounters has - been stopped/blocked, our existing dlm locks need to remain to prevent - other nodes from touching these parts of the fs. - - So, the node doing withdraw needs to know that other nodes in the mountgroup - have blocked the fs before it sets /sys/fs/gfs/foo/withdraw to 1, which - tells gfs-kernel to continue and release dlm locks. - - Until the node doing withdraw has released the dlm locks on the withdrawn - fs, the other nodes' attempts to recover the given journal will fail (they - fail to acquire the journal lock.) So, these nodes need to either wait until - the dlm locks have been released before attempting to recover the journal, - or retry failed attempts at recovering the journal. - - How it works - . nodes A,B,C in mountgroup for fs foo - . foo is withrawn on node C - . C sends withdraw to all - . all set C->withraw = 1 - . C leaves mountgroup - . A,B,C get confchg removing C - . A,B stop kernel foo - . A,B send out-of-band message to C indicating foo is stopped - . C gets OOB message and set /sys/fs/gfs/foo/withdraw to 1 - . dlm locks for foo are released on C - . A,B will now be able to acquire C's journal lock for foo - . A,B will complete recovery of foo - - An "in-band" message would be through cpg foo, but since C has left cpg - foo, we can't use that cpg, and have to go through an external channel. -*/ - -static void receive_withdraw(struct mountgroup *mg, struct gfs_header *hd, - int len) -{ - struct node *node; - - log_group(mg, "receive_withdraw from %d", hd->nodeid); - - node = get_node_history(mg, hd->nodeid); - if (!node) { - log_error("receive_withdraw no nodeid %d", hd->nodeid); - return; - } - node->withdraw = 1; - - if (hd->nodeid == our_nodeid) - gfs_leave_mountgroup(mg, 0); -} - -/* start message from all nodes shows zero started_count */ - -static int all_nodes_new(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (memb->mg_info->started_count) - return 0; - } - return 1; -} - -/* does start message from any node with non-zero started_count have - first_recovery_needed set? (verify that all started nodes agree on - first_recovery_needed) */ - -static int any_nodes_first_recovery(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - int yes = 0, no = 0, master = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (!memb->mg_info->started_count) - continue; - if (memb->mg_info->first_recovery_needed) - yes++; - else - no++; - } - - if (no && yes) { - /* disagreement on first_recovery_needed, shouldn't happen */ - log_error("any_nodes_first_recovery no %d yes %d", no, yes); - return 1; - } - - if (no) - return 0; - - /* sanity check: verify agreement on the master */ - - list_for_each_entry(memb, &cg->members, list) { - if (!memb->mg_info->started_count) - continue; - if (!master) { - master = memb->mg_info->first_recovery_master; - continue; - } - if (master == memb->mg_info->first_recovery_master) - continue; - - /* disagreement on master, shouldn't happen */ - log_error("any_nodes_first_recovery master %d vs %d", - master, memb->mg_info->first_recovery_master); - } - - return 1; -} - -/* If all nodes new, there's no previous master, pick low nodeid; - if not all nodes new, there will be a previous master, use that one unless - it's no longer a member; if master is no longer a member pick low nodeid. - The current master will already be set in mg->first_recovery_master for old - nodes, but new nodes will need to look in the start messages to find it. */ - -static int pick_first_recovery_master(struct mountgroup *mg, int all_new) -{ - struct change *cg; - struct member *memb; - int old = 0, low = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (memb->mg_info->started_count) - old = memb->mg_info->first_recovery_master; - - if (!low) - low = memb->nodeid; - else if (memb->nodeid < low) - low = memb->nodeid; - } - - memb = find_memb(cg, old); - - if (!memb || all_new) { - log_group(mg, "pick_first_recovery_master low %d old %d", - low, old); - return low; - } - - log_group(mg, "pick_first_recovery_master old %d", old); - return old; -} - -/* use a start message from an old node to create node info for each old node */ - -static void create_old_nodes(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - struct node *node; - struct journal *j; - struct id_info *ids, *id; - int id_count, id_size, rv; - - /* get ids from a start message of an old node */ - - rv = get_id_list(mg, &ids, &id_count, &id_size); - if (rv) { - /* all new nodes, no old nodes */ - log_group(mg, "create_old_nodes all new"); - return; - } - - /* use id list to set info for all old nodes */ - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (!memb->mg_info->started_count) - continue; - - node = get_node_history(mg, memb->nodeid); - id = get_id_struct(ids, id_count, id_size, memb->nodeid); - - if (!node || !id) { - /* shouldn't happen */ - log_error("create_old_nodes %d node %d id %d", - memb->nodeid, !!node, !!id); - return; - } - - if (!(id->flags & IDI_NODEID_IS_MEMBER) || - (id->flags & IDI_JID_NEEDS_RECOVERY)) { - /* shouldn't happen */ - log_error("create_old_nodes %d bad flags %x", - memb->nodeid, id->flags); - return; - } - - node->jid = id->jid; - node->kernel_mount_done = !!(id->flags & IDI_MOUNT_DONE); - node->kernel_mount_error = !!(id->flags & IDI_MOUNT_ERROR); - node->ro = !!(id->flags & IDI_MOUNT_RO); - node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR); - - j = malloc(sizeof(struct journal)); - if (!j) { - log_error("create_old_nodes no mem"); - return; - } - memset(j, 0, sizeof(struct journal)); - - j->nodeid = node->nodeid; - j->jid = node->jid; - list_add(&j->list, &mg->journals); - - log_group(mg, "create_old_nodes %d jid %d ro %d spect %d " - "kernel_mount_done %d error %d", - node->nodeid, node->jid, node->ro, node->spectator, - node->kernel_mount_done, node->kernel_mount_error); - } -} - -/* use start messages from new nodes to create node info for each new node */ - -static void create_new_nodes(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - struct id_info *ids, *id; - struct node *node; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (memb->mg_info->started_count) - continue; - - node = get_node_history(mg, memb->nodeid); - if (!node) { - /* shouldn't happen */ - log_error("create_new_nodes %d no node", memb->nodeid); - return; - } - - ids = (struct id_info *)(memb->start_msg + - sizeof(struct gfs_header) + - memb->mg_info->mg_info_size); - - id = get_id_struct(ids, memb->mg_info->id_info_count, - memb->mg_info->id_info_size, memb->nodeid); - - if (!(id->flags & IDI_NODEID_IS_MEMBER) || - (id->flags & IDI_JID_NEEDS_RECOVERY)) { - /* shouldn't happen */ - log_error("create_new_nodes %d bad flags %x", - memb->nodeid, id->flags); - return; - } - - node->jid = JID_NONE; - node->ro = !!(id->flags & IDI_MOUNT_RO); - node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR); - - log_group(mg, "create_new_nodes %d ro %d spect %d", - node->nodeid, node->ro, node->spectator); - } -} - -static void print_id_list(struct mountgroup *mg, struct id_info *ids, - int id_count, int id_size) -{ - struct id_info *id = ids; - int i; - - for (i = 0; i < id_count; i++) { - log_group(mg, "id nodeid %d jid %d flags %08x", - id->nodeid, id->jid, id->flags); - id = (struct id_info *)((char *)id + id_size); - } -} - -static void create_failed_journals(struct mountgroup *mg) -{ - struct journal *j; - struct id_info *ids, *id; - int id_count, id_size; - int rv, i; - - rv = get_id_list(mg, &ids, &id_count, &id_size); - if (rv) { - /* all new nodes, no old nodes */ - log_group(mg, "create_failed_journals all new"); - return; - } - print_id_list(mg, ids, id_count, id_size); - - id = ids; - - for (i = 0; i < id_count; i++) { - if (!(id->flags & IDI_JID_NEEDS_RECOVERY)) - goto next; - - j = malloc(sizeof(struct journal)); - if (!j) { - log_error("create_failed_journals no mem"); - return; - } - memset(j, 0, sizeof(struct journal)); - - j->jid = id->jid; - j->needs_recovery = 1; - list_add(&j->list, &mg->journals); - log_group(mg, "create_failed_journals jid %d", j->jid); - next: - id = (struct id_info *)((char *)id + id_size); - } -} - -/* This pattern (for each failed memb in removed list of each change) is - repeated and needs to match in four places: here, count_ids(), - send_start(), and journals_need_recovery(). */ - -static void set_failed_journals(struct mountgroup *mg) -{ - struct change *cg; - struct member *memb; - struct journal *j; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(cg, &mg->changes, list) { - list_for_each_entry(memb, &cg->removed, list) { - if (!memb->failed && !is_withdraw(mg, memb->nodeid)) - continue; - j = find_journal_by_nodeid(mg, memb->nodeid); - if (j) { - j->needs_recovery = 1; - j->failed_nodeid = j->nodeid; - j->nodeid = 0; - log_group(mg, "set_failed_journals jid %d " - "nodeid %d", j->jid, memb->nodeid); - } else { - log_group(mg, "set_failed_journals no journal " - "for nodeid %d ", memb->nodeid); - } - } - } -} - -/* returns nodeid of new member with the next highest nodeid */ - -static int next_new_nodeid(struct mountgroup *mg, int prev) -{ - struct change *cg; - struct member *memb; - int low = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry(memb, &cg->members, list) { - if (memb->mg_info->started_count) - continue; - if (memb->nodeid <= prev) - continue; - if (!low) - low = memb->nodeid; - else if (memb->nodeid < low) - low = memb->nodeid; - } - - return low; -} - -/* returns lowest unused jid */ - -static int next_free_jid(struct mountgroup *mg) -{ - int i; - - for (i = 0; i < MAX_JOURNALS; i++) { - if (!find_journal(mg, i)) - return i; - } - return -1; -} - -static void create_new_journals(struct mountgroup *mg) -{ - struct journal *j, *safe; - struct change *cg; - struct node *node; - int nodeid = 0; - - cg = list_first_entry(&mg->changes, struct change, list); - - /* first get rid of journal structs that are no longer used - or dirty, i.e. from nodes that have unmounted/left, or - journals that have been recovered */ - - list_for_each_entry_safe(j, safe, &mg->journals, list) { - if (j->needs_recovery) - continue; - - if (find_memb(cg, j->nodeid)) - continue; - - list_del(&j->list); - free(j); - } - - while (1) { - nodeid = next_new_nodeid(mg, nodeid); - if (!nodeid) - break; - - node = get_node_history(mg, nodeid); - if (!node) { - /* shouldn't happen */ - log_error("create_new_journals no nodeid %d", nodeid); - continue; - } - - if (node->spectator) - node->jid = JID_NONE; - else - node->jid = next_free_jid(mg); - - if (node->nodeid == our_nodeid) - mg->our_jid = node->jid; - - log_group(mg, "create_new_journals %d gets jid %d", - node->nodeid, node->jid); - - if (node->jid == JID_NONE) - continue; - - j = malloc(sizeof(struct journal)); - if (!j) { - log_error("create_new_journals no mem"); - continue; - } - memset(j, 0, sizeof(struct journal)); - - j->nodeid = nodeid; - j->jid = node->jid; - list_add(&j->list, &mg->journals); - } -} - -/* recovery_result and mount_done messages are saved by new members until - they've completed the start cycle and have member state to apply them to. - The start messages from old nodes may not reflect the rr/md updates. */ - -static void apply_saved_messages(struct mountgroup *mg) -{ - struct change *cg; - struct save_msg *sm, *safe; - struct gfs_header *hd; - - cg = list_first_entry(&mg->changes, struct change, list); - - list_for_each_entry_safe(sm, safe, &cg->saved_messages, list) { - hd = (struct gfs_header *)sm->buf; - - switch (hd->type) { - case GFS_MSG_MOUNT_DONE: - receive_mount_done(mg, hd, sm->len); - break; - case GFS_MSG_RECOVERY_RESULT: - receive_recovery_result(mg, hd, sm->len); - break; - } - - list_del(&sm->list); - free(sm); - } -} - -/* this is run immediately after receiving the final start message in a start - cycle, so all nodes will run this in the same sequence wrt other messages - and confchgs */ - -static void sync_state(struct mountgroup *mg) -{ - /* This is needed for the case where the first_recovery_done message - arrives while a change/start cycle is in progress. The - first_recovery data in the start messages (used by new nodes in this - cycle to determine the first_recovery state) may be inconsistent in - different start messages (because nodes sent their start messages at - different times wrt the first_recovery_done message.) But, in the - case where the new nodes received the first_recovery_done message, - they can just use that and don't need the (possibly inconsistent) - first recovery data in the start messages. */ - - if (mg->first_recovery_msg) { - if (mg->first_recovery_needed || mg->first_recovery_master) { - /* shouldn't happen */ - log_error("sync_state first_recovery_msg needed %d " - "master %d", mg->first_recovery_needed, - mg->first_recovery_master); - } - - log_group(mg, "sync_state first_recovery_msg"); - goto out; - } - - /* This is the path the initial start cycle for the group always - follows. It's the case where one or more nodes are all starting up - for the first time. No one has completed a start cycle yet because - everyone is joining, and one node needs to do first recovery. */ - - if (all_nodes_new(mg)) { - if (mg->first_recovery_needed || mg->first_recovery_master) { - /* shouldn't happen */ - log_error("sync_state all_nodes_new first_recovery " - "needed %d master %d", - mg->first_recovery_needed, - mg->first_recovery_master); - } - mg->first_recovery_needed = 1; - mg->first_recovery_master = pick_first_recovery_master(mg, 1); - - log_group(mg, "sync_state all_nodes_new first_recovery_needed " - "master %d", mg->first_recovery_master); - goto out; - } - - /* This is for the case where new nodes are added to existing members - that have first_recovery_needed set. */ - - if (any_nodes_first_recovery(mg)) { - mg->first_recovery_needed = 1; - mg->first_recovery_master = pick_first_recovery_master(mg, 0); - - log_group(mg, "sync_state first_recovery_needed master %d", - mg->first_recovery_master); - goto out; - } - - /* Normal case where nodes join an established group that completed - first recovery sometime in the past. Existing nodes that weren't - around during first recovery come through here, and new nodes - being added in this cycle come through here. */ - - if (mg->first_recovery_needed) { - /* shouldn't happen */ - log_error("sync_state frn should not be set"); - goto out; - } - - log_group(mg, "sync_state"); - out: - send_withdraw_acks(mg); - - if (!mg->started_count) { - create_old_nodes(mg); - create_new_nodes(mg); - create_failed_journals(mg); - apply_saved_messages(mg); - create_new_journals(mg); - } else { - create_new_nodes(mg); - set_failed_journals(mg); - create_new_journals(mg); - } -} - -static void apply_changes(struct mountgroup *mg) -{ - struct change *cg; - - cg = list_first_entry(&mg->changes, struct change, list); - - switch (cg->state) { - - case CGST_WAIT_CONDITIONS: - if (wait_conditions_done(mg)) { - send_start(mg); - cg->state = CGST_WAIT_MESSAGES; - } - break; - - case CGST_WAIT_MESSAGES: - if (wait_messages_done(mg)) { - sync_state(mg); - cleanup_changes(mg); - } - break; - - default: - log_error("apply_changes invalid state %d", cg->state); - } -} - -void process_first_mount(struct mountgroup *mg) -{ - /* - * Assumption here is that only the first mounter will get - * uevents when first_recovery_needed is set. - */ - - /* make a local record of jid and recover_status; we may want - to check below that we've seen uevents for all jids - during first recovery before sending first_recovery_done. */ - - log_group(mg, "recovery_uevent mg %s first recovery done", mg->name); - - /* ignore extraneous uevent from others_may_mount */ - if (mg->first_done_uevent) - return; - - log_group(mg, "recovery_uevent first_done"); - mg->first_done_uevent = 1; - send_first_recovery_done(mg); - - apply_changes_recovery(mg); -} - -/* We send messages with the info from kernel uevents or mount.gfs ipc, - and then process the uevent/ipc upon receiving the message for it, so - that it can be processed in the same order by all nodes. */ - -void process_recovery_uevent(struct mountgroup *mg, int jid, int recover_status) -{ - struct journal *j; - - if (mg->first_recovery_needed) - return; - - if (!mg->local_recovery_busy) { - /* This will happen in two known situations: - - we get a recovery_done uevent for our own journal - when we mount (jid == mg->our_jid) - - the first mounter can read first_done and clear - first_recovery_needed before seeing the change - uevent from others_may_mount */ - log_group(mg, "recovery_uevent jid %d ignore", jid); - return; - } - - mg->local_recovery_busy = 0; - - if (mg->local_recovery_jid != jid) { - log_error("recovery_uevent jid %d expected %d", jid, - mg->local_recovery_jid); - return; - } - - j = find_journal(mg, jid); - if (!j) { - log_error("recovery_uevent no journal %d", jid); - return; - } - - log_group(mg, "recovery_uevent jid %d status %d " - "local_recovery_done %d needs_recovery %d", - jid, recover_status, j->local_recovery_done, - j->needs_recovery); - - j->local_recovery_done = 1; - j->local_recovery_result = recover_status; - - /* j->needs_recovery will be cleared when we receive this - recovery_result message. if it's already set, then - someone else has completed the recovery and there's - no need to send our result */ - - if (j->needs_recovery) - send_recovery_result(mg, jid, recover_status); - - apply_changes_recovery(mg); -} - -static void start_journal_recovery(struct mountgroup *mg, int jid) -{ - int rv; - - log_group(mg, "start_journal_recovery jid %d", jid); - - rv = set_sysfs(mg, "recover", jid); - if (rv < 0) { - log_error("start_journal_recovery %d error %d", jid, rv); - return; - } - - mg->local_recovery_busy = 1; - mg->local_recovery_jid = jid; -} - -static int wait_recoveries_done(struct mountgroup *mg) -{ - struct journal *j; - int wait_count = 0; - - list_for_each_entry(j, &mg->journals, list) { - if (j->needs_recovery) { - log_group(mg, "wait_recoveries jid %d nodeid %d " - "unrecovered", j->jid, j->failed_nodeid); - wait_count++; - } - } - - if (wait_count) - return 0; - - log_group(mg, "wait_recoveries done"); - return 1; -} - -/* pick a jid that has not been successfully recovered by someone else - (received recovery_result success message) and hasn't been recovered - by us (local record); if nothing to recover, return 0 */ - -static int pick_journal_to_recover(struct mountgroup *mg, int *jid) -{ - struct journal *j; - - list_for_each_entry(j, &mg->journals, list) { - if (j->needs_recovery && !j->local_recovery_done) { - *jid = j->jid; - return 1; - } - } - -#if 0 - /* FIXME: do something so this doesn't happen so regularly; maybe - * retry only after all nodes have failed. This code doesn't work - * but shows an idea of roughly how to fix the issue. - */ - - /* Retry recoveries that failed the first time. This is necessary - at times for withrawn journals when all nodes fail the recovery - (fail to get journal lock) before the withdrawing node has had a - chance to clear its dlm locks for the withdrawn journal. - 32 max retries is random, and includes attempts by all nodes. */ - - list_for_each_entry(j, &mg->journals, list) { - if (j->needs_recovery && j->local_recovery_done && - (j->local_recovery_result == LM_RD_GAVEUP) && - (j->failed_recovery_count > 1) && - (j->failed_recovery_count < 32)) { - log_group(mg, "retrying jid %d recovery", j->jid); - *jid = j->jid; - sleep(1); /* might this cause problems? */ - return 1; - } - } -#endif - - return 0; -} - -/* processing that happens after all changes have been dealt with */ - -static void apply_recovery(struct mountgroup *mg) -{ - int jid; - - if (mg->first_recovery_needed) { - if (mg->first_recovery_master == our_nodeid && - !mg->mount_client_notified) { - log_group(mg, "apply_recovery first start_kernel"); - mg->first_mounter = 1; /* adds first=1 to hostdata */ - start_kernel(mg); /* includes reply to mount.gfs */ - } - return; - } - - /* The normal non-first-recovery mode. When a recovery_done message - is received, check whether any more journals need recovery. If - so, start recovery on the next one, if not, start the kernel. */ - - if (!wait_recoveries_done(mg)) { - if (!mg->kernel_mount_done || mg->kernel_mount_error) - return; - if (mg->spectator) - return; - if (mg->local_recovery_busy) - return; - if (pick_journal_to_recover(mg, &jid)) - start_journal_recovery(mg, jid); - } else { - if (!mg->kernel_stopped) - return; - log_group(mg, "apply_recovery start_kernel"); - start_kernel(mg); - } -} - -static void apply_changes_recovery(struct mountgroup *mg) -{ - if (!list_empty(&mg->changes)) - apply_changes(mg); - - if (mg->started_change && list_empty(&mg->changes)) - apply_recovery(mg); -} - -void process_mountgroups(void) -{ - struct mountgroup *mg, *safe; - - list_for_each_entry_safe(mg, safe, &mountgroups, list) - apply_changes_recovery(mg); -} - -static int add_change(struct mountgroup *mg, - const struct cpg_address *member_list, - size_t member_list_entries, - const struct cpg_address *left_list, - size_t left_list_entries, - const struct cpg_address *joined_list, - size_t joined_list_entries, - struct change **cg_out) -{ - struct change *cg; - struct member *memb; - int i, error; - - cg = malloc(sizeof(struct change)); - if (!cg) - goto fail_nomem; - memset(cg, 0, sizeof(struct change)); - INIT_LIST_HEAD(&cg->members); - INIT_LIST_HEAD(&cg->removed); - INIT_LIST_HEAD(&cg->saved_messages); - cg->state = CGST_WAIT_CONDITIONS; - cg->create_time = time(NULL); - cg->seq = ++mg->change_seq; - if (!cg->seq) - cg->seq = ++mg->change_seq; - - cg->member_count = member_list_entries; - cg->joined_count = joined_list_entries; - cg->remove_count = left_list_entries; - - for (i = 0; i < member_list_entries; i++) { - memb = malloc(sizeof(struct member)); - if (!memb) - goto fail_nomem; - memset(memb, 0, sizeof(struct member)); - memb->nodeid = member_list[i].nodeid; - list_add_tail(&memb->list, &cg->members); - } - - for (i = 0; i < left_list_entries; i++) { - memb = malloc(sizeof(struct member)); - if (!memb) - goto fail_nomem; - memset(memb, 0, sizeof(struct member)); - memb->nodeid = left_list[i].nodeid; - if (left_list[i].reason == CPG_REASON_NODEDOWN || - left_list[i].reason == CPG_REASON_PROCDOWN) { - memb->failed = 1; - cg->failed_count++; - } - list_add_tail(&memb->list, &cg->removed); - - if (memb->failed) - node_history_fail(mg, memb->nodeid, cg, - left_list[i].reason); - else - node_history_left(mg, memb->nodeid, cg); - - log_group(mg, "add_change cg %u remove nodeid %d reason %d", - cg->seq, memb->nodeid, left_list[i].reason); - - if (left_list[i].reason == CPG_REASON_PROCDOWN) - kick_node_from_cluster(memb->nodeid); - } - - for (i = 0; i < joined_list_entries; i++) { - memb = find_memb(cg, joined_list[i].nodeid); - if (!memb) { - log_error("no member %d", joined_list[i].nodeid); - error = -ENOENT; - goto fail; - } - memb->added = 1; - - if (memb->nodeid == our_nodeid) - cg->we_joined = 1; - else - node_history_init(mg, memb->nodeid, cg); - - log_group(mg, "add_change cg %u joined nodeid %d", cg->seq, - memb->nodeid); - } - - if (cg->we_joined) { - log_group(mg, "add_change cg %u we joined", cg->seq); - list_for_each_entry(memb, &cg->members, list) - node_history_init(mg, memb->nodeid, cg); - } - - log_group(mg, "add_change cg %u counts member %d joined %d remove %d " - "failed %d", cg->seq, cg->member_count, cg->joined_count, - cg->remove_count, cg->failed_count); - - list_add(&cg->list, &mg->changes); - *cg_out = cg; - return 0; - - fail_nomem: - log_error("no memory"); - error = -ENOMEM; - fail: - free_cg(cg); - return error; -} - -static int we_left(const struct cpg_address *left_list, - size_t left_list_entries) -{ - int i; - - for (i = 0; i < left_list_entries; i++) { - if (left_list[i].nodeid == our_nodeid) - return 1; - } - return 0; -} - -static void confchg_cb(cpg_handle_t handle, - const struct cpg_name *group_name, - const struct cpg_address *member_list, - size_t member_list_entries, - const struct cpg_address *left_list, - size_t left_list_entries, - const struct cpg_address *joined_list, - size_t joined_list_entries) -{ - struct mountgroup *mg; - struct change *cg; - int rv; - - log_config(group_name, member_list, member_list_entries, - left_list, left_list_entries, - joined_list, joined_list_entries); - - mg = find_mg_handle(handle); - if (!mg) { - log_error("confchg_cb no mountgroup for cpg %s", - group_name->value); - return; - } - - if (mg->leaving && we_left(left_list, left_list_entries)) { - /* we called cpg_leave(), and this should be the final - cpg callback we receive */ - log_group(mg, "confchg for our leave"); - dlmc_fs_unregister(dlmcontrol_fd, mg->name); - cpg_finalize(mg->cpg_handle); - client_dead(mg->cpg_client); - list_del(&mg->list); - if (!mg->withdraw_uevent) { - free_mg(mg); - } else { - if (!member_list_entries) { - /* no one remaining to send us an ack */ - set_sysfs(mg, "withdraw", 1); - free_mg(mg); - } else { - /* set the sysfs withdraw file and free the mg - when the ack arrives */ - list_add(&mg->list, &withdrawn_mounts); - } - } - return; - } - - rv = add_change(mg, member_list, member_list_entries, - left_list, left_list_entries, - joined_list, joined_list_entries, &cg); - if (rv) - return; - - apply_changes_recovery(mg); -} - -static void gfs_header_in(struct gfs_header *hd) -{ - hd->version[0] = le16_to_cpu(hd->version[0]); - hd->version[1] = le16_to_cpu(hd->version[1]); - hd->version[2] = le16_to_cpu(hd->version[2]); - hd->type = le16_to_cpu(hd->type); - hd->nodeid = le32_to_cpu(hd->nodeid); - hd->to_nodeid = le32_to_cpu(hd->to_nodeid); - hd->global_id = le32_to_cpu(hd->global_id); - hd->flags = le32_to_cpu(hd->flags); - hd->msgdata = le32_to_cpu(hd->msgdata); -} - -static int gfs_header_check(struct gfs_header *hd, int nodeid) -{ - if (hd->version[0] != our_protocol.daemon_run[0] || - hd->version[1] != our_protocol.daemon_run[1]) { - log_error("reject message from %d version %u.%u.%u vs %u.%u.%u", - nodeid, hd->version[0], hd->version[1], - hd->version[2], our_protocol.daemon_run[0], - our_protocol.daemon_run[1], - our_protocol.daemon_run[2]); - return -1; - } - - if (hd->nodeid != nodeid) { - log_error("bad message nodeid %d %d", hd->nodeid, nodeid); - return -1; - } - - return 0; -} - -static void deliver_cb(cpg_handle_t handle, - const struct cpg_name *group_name, - uint32_t nodeid, uint32_t pid, - void *data, size_t len) -{ - struct mountgroup *mg; - struct gfs_header *hd; - - mg = find_mg_handle(handle); - if (!mg) { - log_error("deliver_cb no mg for cpg %s", group_name->value); - return; - } - - if (len < sizeof(*hd)) { - log_error("deliver_cb short message %zd", len); - return; - } - - hd = (struct gfs_header *)data; - gfs_header_in(hd); - - if (gfs_header_check(hd, nodeid) < 0) - return; - - switch (hd->type) { - case GFS_MSG_START: - receive_start(mg, hd, len); - break; - case GFS_MSG_MOUNT_DONE: - if (!mg->started_count) - save_message(mg, hd, len); - else - receive_mount_done(mg, hd, len); - break; - case GFS_MSG_FIRST_RECOVERY_DONE: - receive_first_recovery_done(mg, hd, len); - break; - case GFS_MSG_RECOVERY_RESULT: - if (!mg->started_count) - save_message(mg, hd, len); - else - receive_recovery_result(mg, hd, len); - break; - case GFS_MSG_REMOUNT: - receive_remount(mg, hd, len); - break; - case GFS_MSG_WITHDRAW: - receive_withdraw(mg, hd, len); - break; - default: - log_error("unknown msg type %d", hd->type); - } - - apply_changes_recovery(mg); -} - -static cpg_callbacks_t cpg_callbacks = { - .cpg_deliver_fn = deliver_cb, - .cpg_confchg_fn = confchg_cb, -}; - -static void process_cpg_mountgroup(int ci) -{ - struct mountgroup *mg; - cpg_error_t error; - - mg = find_mg_ci(ci); - if (!mg) { - log_error("process_mountgroup_cpg no mountgroup for ci %d", ci); - return; - } - - error = cpg_dispatch(mg->cpg_handle, CPG_DISPATCH_ALL); - if (error != CPG_OK) { - log_error("cpg_dispatch error %d", error); - return; - } - - update_flow_control_status(); -} - -int gfs_join_mountgroup(struct mountgroup *mg) -{ - cpg_error_t error; - cpg_handle_t h; - struct cpg_name name; - int i = 0, fd, ci, rv; - - /* I think this registration with dlm_controld could be done - just about anywhere before we do the mount(2). */ - rv = dlmc_fs_register(dlmcontrol_fd, mg->name); - if (rv) { - log_error("dlmc_fs_register failed %d", rv); - return rv; - } - - error = cpg_initialize(&h, &cpg_callbacks); - if (error != CPG_OK) { - log_error("cpg_initialize error %d", error); - goto fail; - } - - cpg_fd_get(h, &fd); - - ci = client_add(fd, process_cpg_mountgroup, NULL); - - mg->cpg_handle = h; - mg->cpg_client = ci; - mg->cpg_fd = fd; - mg->kernel_stopped = 1; - mg->joining = 1; - - memset(&name, 0, sizeof(name)); - sprintf(name.value, "gfs:mount:%s", mg->name); - name.length = strlen(name.value) + 1; - - /* TODO: allow global_id to be set in cluster.conf? */ - mg->id = cpgname_to_crc(name.value, name.length); - - retry: - error = cpg_join(h, &name); - if (error == CPG_ERR_TRY_AGAIN) { - sleep(1); - if (!(++i % 10)) - log_error("cpg_join error retrying"); - goto retry; - } - if (error != CPG_OK) { - log_error("cpg_join error %d", error); - cpg_finalize(h); - goto fail_client; - } - - return 0; - - fail_client: - client_dead(ci); - cpg_finalize(h); - fail: - dlmc_fs_unregister(dlmcontrol_fd, mg->name); - return -ENOTCONN; -} - -/* If mount(2) fails, we'll often get two leaves, one from seeing the remove - uevent, and the other from mount.gfs. I suspect they could arrive in either - order. We can just ignore the second. The second would either not find - the mg here, or would see mg->leaving of 1 from the first. */ - -void gfs_leave_mountgroup(struct mountgroup *mg, int mnterr) -{ - cpg_error_t error; - struct cpg_name name; - int i = 0; - - if (mg->leaving) { - log_group(mg, "leave: already leaving"); - return; - } - mg->leaving = 1; - - memset(&name, 0, sizeof(name)); - sprintf(name.value, "gfs:mount:%s", mg->name); - name.length = strlen(name.value) + 1; - - retry: - error = cpg_leave(mg->cpg_handle, &name); - if (error == CPG_ERR_TRY_AGAIN) { - sleep(1); - if (!(++i % 10)) - log_error("cpg_leave error retrying"); - goto retry; - } - if (error != CPG_OK) - log_error("cpg_leave error %d", error); -} - -void do_leave(struct mountgroup *mg, int mnterr) -{ - log_debug("do_leave %s mnterr %d", mg->name, mnterr); - - if (mg->withdraw_uevent) { - log_group(mg, "do_leave: ignored during withdraw"); - return; - } - - gfs_leave_mountgroup(mg, mnterr); -} - -static void receive_withdraw_ack(struct gfs_header *hd, int len) -{ - struct mountgroup *mg; - - if (hd->to_nodeid != our_nodeid) - return; - - log_debug("receive_withdraw_ack from %d global_id %x", - hd->nodeid, hd->global_id); - - list_for_each_entry(mg, &withdrawn_mounts, list) { - if (mg->id != hd->global_id) - continue; - set_sysfs(mg, "withdraw", 1); - list_del(&mg->list); - free_mg(mg); - break; - } -} - -static void send_withdraw_ack(struct mountgroup *mg, int nodeid) -{ - struct gfs_header h; - - memset(&h, 0, sizeof(h)); - - h.version[0] = cpu_to_le16(our_protocol.daemon_run[0]); - h.version[1] = cpu_to_le16(our_protocol.daemon_run[1]); - h.version[2] = cpu_to_le16(our_protocol.daemon_run[2]); - h.type = cpu_to_le16(GFS_MSG_WITHDRAW_ACK); - h.nodeid = cpu_to_le32(our_nodeid); - h.to_nodeid = cpu_to_le32(nodeid); - h.global_id = cpu_to_le32(mg->id); - - _send_message(cpg_handle_daemon, (char *)&h, sizeof(h), - GFS_MSG_WITHDRAW_ACK); -} - -/* Everyone remaining in the group will send an ack for the withdrawn fs; - all but the first will be ignored. */ - -static void send_withdraw_acks(struct mountgroup *mg) -{ - struct node *node; - - list_for_each_entry(node, &mg->node_history, list) { - if (node->withdraw && !node->send_withdraw_ack) { - send_withdraw_ack(mg, node->nodeid); - node->send_withdraw_ack = 1; - } - } -} - -static struct node *get_node_daemon(int nodeid) -{ - struct node *node; - - list_for_each_entry(node, &daemon_nodes, list) { - if (node->nodeid == nodeid) - return node; - } - return NULL; -} - -static void add_node_daemon(int nodeid) -{ - struct node *node; - - if (get_node_daemon(nodeid)) - return; - - node = malloc(sizeof(struct node)); - if (!node) { - log_error("add_node_daemon no mem"); - return; - } - memset(node, 0, sizeof(struct node)); - node->nodeid = nodeid; - list_add_tail(&node->list, &daemon_nodes); -} - -static void pv_in(struct protocol_version *pv) -{ - pv->major = le16_to_cpu(pv->major); - pv->minor = le16_to_cpu(pv->minor); - pv->patch = le16_to_cpu(pv->patch); - pv->flags = le16_to_cpu(pv->flags); -} - -static void pv_out(struct protocol_version *pv) -{ - pv->major = cpu_to_le16(pv->major); - pv->minor = cpu_to_le16(pv->minor); - pv->patch = cpu_to_le16(pv->patch); - pv->flags = cpu_to_le16(pv->flags); -} - -static void protocol_in(struct protocol *proto) -{ - pv_in(&proto->dm_ver); - pv_in(&proto->km_ver); - pv_in(&proto->dr_ver); - pv_in(&proto->kr_ver); -} - -static void protocol_out(struct protocol *proto) -{ - pv_out(&proto->dm_ver); - pv_out(&proto->km_ver); - pv_out(&proto->dr_ver); - pv_out(&proto->kr_ver); -} - -/* go through member list saved in last confchg, see if we have received a - proto message from each */ - -static int all_protocol_messages(void) -{ - struct node *node; - int i; - - if (!daemon_member_count) - return 0; - - for (i = 0; i < daemon_member_count; i++) { - node = get_node_daemon(daemon_member[i].nodeid); - if (!node) { - log_error("all_protocol_messages no node %d", - daemon_member[i].nodeid); - return 0; - } - - if (!node->proto.daemon_max[0]) - return 0; - } - return 1; -} - -static int pick_min_protocol(struct protocol *proto) -{ - uint16_t mind[4]; - uint16_t mink[4]; - struct node *node; - int i; - - memset(&mind, 0, sizeof(mind)); - memset(&mink, 0, sizeof(mink)); - - /* first choose the minimum major */ - - for (i = 0; i < daemon_member_count; i++) { - node = get_node_daemon(daemon_member[i].nodeid); - if (!node) { - log_error("pick_min_protocol no node %d", - daemon_member[i].nodeid); - return -1; - } - - if (!mind[0] || node->proto.daemon_max[0] < mind[0]) - mind[0] = node->proto.daemon_max[0]; - - if (!mink[0] || node->proto.kernel_max[0] < mink[0]) - mink[0] = node->proto.kernel_max[0]; - } - - if (!mind[0] || !mink[0]) { - log_error("pick_min_protocol zero major number"); - return -1; - } - - /* second pick the minimum minor with the chosen major */ - - for (i = 0; i < daemon_member_count; i++) { - node = get_node_daemon(daemon_member[i].nodeid); - if (!node) - continue; - - if (mind[0] == node->proto.daemon_max[0]) { - if (!mind[1] || node->proto.daemon_max[1] < mind[1]) - mind[1] = node->proto.daemon_max[1]; - } - - if (mink[0] == node->proto.kernel_max[0]) { - if (!mink[1] || node->proto.kernel_max[1] < mink[1]) - mink[1] = node->proto.kernel_max[1]; - } - } - - if (!mind[1] || !mink[1]) { - log_error("pick_min_protocol zero minor number"); - return -1; - } - - /* third pick the minimum patch with the chosen major.minor */ - - for (i = 0; i < daemon_member_count; i++) { - node = get_node_daemon(daemon_member[i].nodeid); - if (!node) - continue; - - if (mind[0] == node->proto.daemon_max[0] && - mind[1] == node->proto.daemon_max[1]) { - if (!mind[2] || node->proto.daemon_max[2] < mind[2]) - mind[2] = node->proto.daemon_max[2]; - } - - if (mink[0] == node->proto.kernel_max[0] && - mink[1] == node->proto.kernel_max[1]) { - if (!mink[2] || node->proto.kernel_max[2] < mink[2]) - mink[2] = node->proto.kernel_max[2]; - } - } - - if (!mind[2] || !mink[2]) { - log_error("pick_min_protocol zero patch number"); - return -1; - } - - memcpy(&proto->daemon_run, &mind, sizeof(mind)); - memcpy(&proto->kernel_run, &mink, sizeof(mink)); - return 0; -} - -static void receive_protocol(struct gfs_header *hd, int len) -{ - struct protocol *p; - struct node *node; - - p = (struct protocol *)((char *)hd + sizeof(struct gfs_header)); - protocol_in(p); - - if (len < sizeof(struct gfs_header) + sizeof(struct protocol)) { - log_error("receive_protocol invalid len %d from %d", - len, hd->nodeid); - return; - } - - /* zero is an invalid version value */ - - if (!p->daemon_max[0] || !p->daemon_max[1] || !p->daemon_max[2] || - !p->kernel_max[0] || !p->kernel_max[1] || !p->kernel_max[2]) { - log_error("receive_protocol invalid max value from %d " - "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid, - p->daemon_max[0], p->daemon_max[1], p->daemon_max[2], - p->kernel_max[0], p->kernel_max[1], p->kernel_max[2]); - return; - } - - /* the run values will be zero until a version is set, after - which none of the run values can be zero */ - - if (p->daemon_run[0] && (!p->daemon_run[1] || !p->daemon_run[2] || - !p->kernel_run[0] || !p->kernel_run[1] || !p->kernel_run[2])) { - log_error("receive_protocol invalid run value from %d " - "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid, - p->daemon_run[0], p->daemon_run[1], p->daemon_run[2], - p->kernel_run[0], p->kernel_run[1], p->kernel_run[2]); - return; - } - - /* if we have zero run values, and this msg has non-zero run values, - then adopt them as ours; otherwise save this proto message */ - - if (our_protocol.daemon_run[0]) - return; - - if (p->daemon_run[0]) { - memcpy(&our_protocol.daemon_run, &p->daemon_run, - sizeof(struct protocol_version)); - memcpy(&our_protocol.kernel_run, &p->kernel_run, - sizeof(struct protocol_version)); - log_debug("run protocol from nodeid %d", hd->nodeid); - return; - } - - /* save this node's proto so we can tell when we've got all, and - use it to select a minimum protocol from all */ - - node = get_node_daemon(hd->nodeid); - if (!node) { - log_error("receive_protocol no node %d", hd->nodeid); - return; - } - memcpy(&node->proto, p, sizeof(struct protocol)); -} - -static void send_protocol(struct protocol *proto) -{ - struct gfs_header *hd; - struct protocol *pr; - char *buf; - int len; - - len = sizeof(struct gfs_header) + sizeof(struct protocol); - buf = malloc(len); - if (!buf) { - log_error("send_protocol no mem %d", len); - return; - } - memset(buf, 0, len); - - hd = (struct gfs_header *)buf; - pr = (struct protocol *)(buf + sizeof(*hd)); - - hd->type = cpu_to_le16(GFS_MSG_PROTOCOL); - hd->nodeid = cpu_to_le32(our_nodeid); - - memcpy(pr, proto, sizeof(struct protocol)); - protocol_out(pr); - - _send_message(cpg_handle_daemon, buf, len, GFS_MSG_PROTOCOL); -} - -int set_protocol(void) -{ - struct protocol proto; - struct pollfd pollfd; - int sent_proposal = 0; - int rv; - - memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = cpg_fd_daemon; - pollfd.events = POLLIN; - - while (1) { - if (our_protocol.daemon_run[0]) - break; - - if (!sent_proposal && all_protocol_messages()) { - /* propose a protocol; look through info from all - nodes and pick the min for both daemon and kernel, - and propose that */ - - sent_proposal = 1; - - /* copy our max values */ - memcpy(&proto, &our_protocol, sizeof(struct protocol)); - - rv = pick_min_protocol(&proto); - if (rv < 0) - return rv; - - log_debug("set_protocol member_count %d propose " - "daemon %u.%u.%u kernel %u.%u.%u", - daemon_member_count, - proto.daemon_run[0], proto.daemon_run[1], - proto.daemon_run[2], proto.kernel_run[0], - proto.kernel_run[1], proto.kernel_run[2]); - - send_protocol(&proto); - } - - /* only process messages/events from daemon cpg until protocol - is established */ - - rv = poll(&pollfd, 1, -1); - if (rv == -1 && errno == EINTR) { - if (daemon_quit) - return -1; - continue; - } - if (rv < 0) { - log_error("set_protocol poll errno %d", errno); - return -1; - } - - if (pollfd.revents & POLLIN) - process_cpg_daemon(0); - if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) { - log_error("set_protocol poll revents %u", - pollfd.revents); - return -1; - } - } - - if (our_protocol.daemon_run[0] != our_protocol.daemon_max[0] || - our_protocol.daemon_run[1] > our_protocol.daemon_max[1]) { - log_error("incompatible daemon protocol run %u.%u.%u max %u.%u.%u", - our_protocol.daemon_run[0], - our_protocol.daemon_run[1], - our_protocol.daemon_run[2], - our_protocol.daemon_max[0], - our_protocol.daemon_max[1], - our_protocol.daemon_max[2]); - return -1; - } - - if (our_protocol.kernel_run[0] != our_protocol.kernel_max[0] || - our_protocol.kernel_run[1] > our_protocol.kernel_max[1]) { - log_error("incompatible kernel protocol run %u.%u.%u max %u.%u.%u", - our_protocol.kernel_run[0], - our_protocol.kernel_run[1], - our_protocol.kernel_run[2], - our_protocol.kernel_max[0], - our_protocol.kernel_max[1], - our_protocol.kernel_max[2]); - return -1; - } - - log_debug("daemon run %u.%u.%u max %u.%u.%u " - "kernel run %u.%u.%u max %u.%u.%u", - our_protocol.daemon_run[0], - our_protocol.daemon_run[1], - our_protocol.daemon_run[2], - our_protocol.daemon_max[0], - our_protocol.daemon_max[1], - our_protocol.daemon_max[2], - our_protocol.kernel_run[0], - our_protocol.kernel_run[1], - our_protocol.kernel_run[2], - our_protocol.kernel_max[0], - our_protocol.kernel_max[1], - our_protocol.kernel_max[2]); - - send_protocol(&our_protocol); - return 0; -} - -static void deliver_cb_daemon(cpg_handle_t handle, - const struct cpg_name *group_name, - uint32_t nodeid, uint32_t pid, - void *data, size_t len) -{ - struct gfs_header *hd; - - if (len < sizeof(*hd)) { - log_error("deliver_cb short message %zd", len); - return; - } - - hd = (struct gfs_header *)data; - gfs_header_in(hd); - - switch (hd->type) { - case GFS_MSG_PROTOCOL: - receive_protocol(hd, len); - break; - case GFS_MSG_WITHDRAW_ACK: - if (gfs_header_check(hd, nodeid) < 0) - return; - receive_withdraw_ack(hd, len); - break; - default: - log_error("deliver_cb_daemon unknown msg type %d", hd->type); - } -} - -static void confchg_cb_daemon(cpg_handle_t handle, - const struct cpg_name *group_name, - const struct cpg_address *member_list, - size_t member_list_entries, - const struct cpg_address *left_list, - size_t left_list_entries, - const struct cpg_address *joined_list, - size_t joined_list_entries) -{ - int i; - - log_config(group_name, member_list, member_list_entries, - left_list, left_list_entries, - joined_list, joined_list_entries); - - if (joined_list_entries) - send_protocol(&our_protocol); - - memset(&daemon_member, 0, sizeof(daemon_member)); - daemon_member_count = member_list_entries; - - for (i = 0; i < member_list_entries; i++) { - daemon_member[i] = member_list[i]; - add_node_daemon(member_list[i].nodeid); - } -} - -static cpg_callbacks_t cpg_callbacks_daemon = { - .cpg_deliver_fn = deliver_cb_daemon, - .cpg_confchg_fn = confchg_cb_daemon, -}; - -void process_cpg_daemon(int ci) -{ - cpg_error_t error; - - error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL); - if (error != CPG_OK) - log_error("daemon cpg_dispatch error %d", error); -} - -int setup_cpg_daemon(void) -{ - cpg_error_t error; - cpg_handle_t h; - struct cpg_name name; - int i = 0; - - INIT_LIST_HEAD(&daemon_nodes); - - memset(&our_protocol, 0, sizeof(our_protocol)); - our_protocol.daemon_max[0] = 1; - our_protocol.daemon_max[1] = 1; - our_protocol.daemon_max[2] = 1; - our_protocol.kernel_max[0] = 1; - our_protocol.kernel_max[1] = 1; - our_protocol.kernel_max[2] = 1; - - error = cpg_initialize(&h, &cpg_callbacks_daemon); - if (error != CPG_OK) { - log_error("daemon cpg_initialize error %d", error); - return -1; - } - - cpg_fd_get(h, &cpg_fd_daemon); - - cpg_handle_daemon = h; - - memset(&name, 0, sizeof(name)); - sprintf(name.value, "gfs:controld"); - name.length = strlen(name.value) + 1; - - retry: - error = cpg_join(h, &name); - if (error == CPG_ERR_TRY_AGAIN) { - sleep(1); - if (!(++i % 10)) - log_error("daemon cpg_join error retrying"); - goto retry; - } - if (error != CPG_OK) { - log_error("daemon cpg_join error %d", error); - goto fail; - } - - log_debug("setup_cpg_daemon %d", cpg_fd_daemon); - return cpg_fd_daemon; - - fail: - cpg_finalize(h); - return -1; -} - -void close_cpg_daemon(void) -{ - struct mountgroup *mg; - cpg_error_t error; - struct cpg_name name; - int i = 0; - - if (!cpg_handle_daemon) - return; - if (cluster_down) - goto fin; - - memset(&name, 0, sizeof(name)); - sprintf(name.value, "gfs:controld"); - name.length = strlen(name.value) + 1; - - retry: - error = cpg_leave(cpg_handle_daemon, &name); - if (error == CPG_ERR_TRY_AGAIN) { - sleep(1); - if (!(++i % 10)) - log_error("daemon cpg_leave error retrying"); - goto retry; - } - if (error != CPG_OK) - log_error("daemon cpg_leave error %d", error); - fin: - list_for_each_entry(mg, &mountgroups, list) { - if (mg->cpg_handle) - cpg_finalize(mg->cpg_handle); - } - cpg_finalize(cpg_handle_daemon); -} - -int setup_dlmcontrol(void) -{ - int fd; - - fd = dlmc_fs_connect(); - if (fd < 0) - log_error("cannot connect to dlm_controld %d", fd); - else - dlmcontrol_fd = fd; - - return fd; -} - -int set_mountgroup_info(struct mountgroup *mg, struct gfsc_mountgroup *out) -{ - struct change *cg, *last = NULL; - - strncpy(out->name, mg->name, GFS_MOUNTGROUP_LEN); - out->global_id = mg->id; - - if (mg->joining) - out->flags |= GFSC_MF_JOINING; - if (mg->leaving) - out->flags |= GFSC_MF_LEAVING; - if (mg->kernel_stopped) - out->flags |= GFSC_MF_KERNEL_STOPPED; - if (mg->kernel_mount_done) - out->flags |= GFSC_MF_KERNEL_MOUNT_DONE; - if (mg->kernel_mount_error) - out->flags |= GFSC_MF_KERNEL_MOUNT_ERROR; - if (mg->first_recovery_needed) - out->flags |= GFSC_MF_FIRST_RECOVERY_NEEDED; - if (mg->first_recovery_msg) - out->flags |= GFSC_MF_FIRST_RECOVERY_MSG; - if (mg->local_recovery_busy) - out->flags |= GFSC_MF_LOCAL_RECOVERY_BUSY; - - if (!mg->started_change) - goto next; - - cg = mg->started_change; - - out->cg_prev.member_count = cg->member_count; - out->cg_prev.joined_count = cg->joined_count; - out->cg_prev.remove_count = cg->remove_count; - out->cg_prev.failed_count = cg->failed_count; - out->cg_prev.combined_seq = cg->combined_seq; - out->cg_prev.seq = cg->seq; - - next: - if (list_empty(&mg->changes)) - goto out; - - list_for_each_entry(cg, &mg->changes, list) - last = cg; - - cg = list_first_entry(&mg->changes, struct change, list); - - out->cg_next.member_count = cg->member_count; - out->cg_next.joined_count = cg->joined_count; - out->cg_next.remove_count = cg->remove_count; - out->cg_next.failed_count = cg->failed_count; - out->cg_next.combined_seq = last->seq; - out->cg_next.seq = cg->seq; - - /* FIXME: use real definitions for these conditions - (also in dlm_controld) */ - - if (cg->state == CGST_WAIT_CONDITIONS) - out->cg_next.wait_condition = 4; - if (!mg->kernel_mount_done) - out->cg_next.wait_condition = 1; - if (mg->dlm_notify_nodeid) - out->cg_next.wait_condition = 2; - if (poll_dlm) - out->cg_next.wait_condition = 3; - - if (cg->state == CGST_WAIT_MESSAGES) - out->cg_next.wait_messages = 1; - out: - return 0; -} - -static int _set_node_info(struct mountgroup *mg, struct change *cg, int nodeid, - struct gfsc_node *node) -{ - struct member *m = NULL; - struct node *n; - - node->nodeid = nodeid; - - if (cg) - m = find_memb(cg, nodeid); - if (!m) - goto history; - - node->flags |= GFSC_NF_MEMBER; - - if (m->start) - node->flags |= GFSC_NF_START; - if (m->disallowed) - node->flags |= GFSC_NF_DISALLOWED; - - history: - n = get_node_history(mg, nodeid); - if (!n) - goto out; - - node->jid = n->jid; - - if (n->kernel_mount_done) - node->flags |= GFSC_NF_KERNEL_MOUNT_DONE; - if (n->kernel_mount_error) - node->flags |= GFSC_NF_KERNEL_MOUNT_ERROR; - if (n->check_dlm) - node->flags |= GFSC_NF_CHECK_DLM; - if (n->ro) - node->flags |= GFSC_NF_READONLY; - if (n->spectator) - node->flags |= GFSC_NF_SPECTATOR; - - node->added_seq = n->added_seq; - node->removed_seq = n->removed_seq; - node->failed_reason = n->failed_reason; - out: - return 0; -} - -int set_node_info(struct mountgroup *mg, int nodeid, struct gfsc_node *node) -{ - struct change *cg; - - if (!list_empty(&mg->changes)) { - cg = list_first_entry(&mg->changes, struct change, list); - return _set_node_info(mg, cg, nodeid, node); - } - - return _set_node_info(mg, mg->started_change, nodeid, node); -} - -int set_mountgroups(int *count, struct gfsc_mountgroup **mgs_out) -{ - struct mountgroup *mg; - struct gfsc_mountgroup *mgs, *mgp; - int mg_count = 0; - - list_for_each_entry(mg, &mountgroups, list) - mg_count++; - - mgs = malloc(mg_count * sizeof(struct gfsc_mountgroup)); - if (!mgs) - return -ENOMEM; - memset(mgs, 0, mg_count * sizeof(struct gfsc_mountgroup)); - - mgp = mgs; - list_for_each_entry(mg, &mountgroups, list) { - set_mountgroup_info(mg, mgp++); - } - - *count = mg_count; - *mgs_out = mgs; - return 0; -} - -int set_mountgroup_nodes(struct mountgroup *mg, int option, int *node_count, - struct gfsc_node **nodes_out) -{ - struct change *cg; - struct node *n; - struct gfsc_node *nodes = NULL, *nodep; - struct member *memb; - int count = 0; - - if (option == GFSC_NODES_ALL) { - if (!list_empty(&mg->changes)) - cg = list_first_entry(&mg->changes, struct change,list); - else - cg = mg->started_change; - - list_for_each_entry(n, &mg->node_history, list) - count++; - - } else if (option == GFSC_NODES_MEMBERS) { - if (!mg->started_change) - goto out; - cg = mg->started_change; - count = cg->member_count; - - } else if (option == GFSC_NODES_NEXT) { - if (list_empty(&mg->changes)) - goto out; - cg = list_first_entry(&mg->changes, struct change, list); - count = cg->member_count; - } else - goto out; - - nodes = malloc(count * sizeof(struct gfsc_node)); - if (!nodes) - return -ENOMEM; - memset(nodes, 0, count * sizeof(struct gfsc_node)); - nodep = nodes; - - if (option == GFSC_NODES_ALL) { - list_for_each_entry(n, &mg->node_history, list) - _set_node_info(mg, cg, n->nodeid, nodep++); - } else { - list_for_each_entry(memb, &cg->members, list) - _set_node_info(mg, cg, memb->nodeid, nodep++); - } - out: - *node_count = count; - *nodes_out = nodes; - return 0; -} - diff --git a/group/gfs_controld/crc.c b/group/gfs_controld/crc.c deleted file mode 100644 index 29bc096..0000000 --- a/group/gfs_controld/crc.c +++ /dev/null @@ -1,72 +0,0 @@ -#include "gfs_daemon.h" - -static const uint32_t crc_32_tab[] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -/** - * - * Copied from: - * - * gfs2_disk_hash - hash an array of data - * @data: the data to be hashed - * @len: the length of data to be hashed - * - * This function must produce the same results as the one in the kernel: - * crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF - * - * Take some data and convert it to a 32-bit hash. - * - * The hash function is a 32-bit CRC of the data. The algorithm uses - * the crc_32_tab table above. - * - * This may not be the fastest hash function, but it does a fair bit better - * at providing uniform results than the others I've looked at. That's - * really important for efficient directories. - * - * Returns: the hash - */ - -uint32_t cpgname_to_crc(const char *data, int len) -{ - uint32_t hash = 0xFFFFFFFF; - - for (; len--; data++) - hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); - - hash = ~hash; - - return hash; -} - diff --git a/group/gfs_controld/gfs_controld.h b/group/gfs_controld/gfs_controld.h deleted file mode 100644 index 571dffb..0000000 --- a/group/gfs_controld/gfs_controld.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __GFS_CONTROLD_DOT_H__ -#define __GFS_CONTROLD_DOT_H__ - -/* This defines the interface between gfs_controld and libgfscontrol, and - should only be used by libgfscontrol. */ - -#define GFSC_SOCK_PATH "gfsc_sock" -#define GFSC_QUERY_SOCK_PATH "gfsc_query_sock" - -#define GFSC_MAGIC 0x6F5C6F5C -#define GFSC_VERSION 0x00010001 - -#define GFSC_CMD_DUMP_DEBUG 1 -#define GFSC_CMD_DUMP_PLOCKS 2 -#define GFSC_CMD_MOUNTGROUP_INFO 3 -#define GFSC_CMD_NODE_INFO 4 -#define GFSC_CMD_MOUNTGROUPS 5 -#define GFSC_CMD_MOUNTGROUP_NODES 6 -#define GFSC_CMD_FS_JOIN 7 -#define GFSC_CMD_FS_REMOUNT 8 -#define GFSC_CMD_FS_MOUNT_DONE 9 -#define GFSC_CMD_FS_LEAVE 10 - -struct gfsc_header { - unsigned int magic; - unsigned int version; - unsigned int command; - unsigned int option; - unsigned int len; - int data; /* embedded command-specific data, for convenience */ - int unused1; - int unsued2; - char name[GFS_MOUNTGROUP_LEN]; /* no terminating null space */ -}; - -#endif - diff --git a/group/gfs_controld/gfs_daemon.h b/group/gfs_controld/gfs_daemon.h deleted file mode 100644 index d5ea341..0000000 --- a/group/gfs_controld/gfs_daemon.h +++ /dev/null @@ -1,241 +0,0 @@ -#ifndef __GFS_DAEMON_DOT_H__ -#define __GFS_DAEMON_DOT_H__ - -#include "clusterautoconfig.h" - -#include <sys/types.h> -#include <asm/types.h> -#include <sys/uio.h> -#include <netinet/in.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/utsname.h> -#include <sys/poll.h> -#include <sys/wait.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <net/if.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <stdlib.h> -#include <stddef.h> -#include <stdint.h> -#include <fcntl.h> -#include <netdb.h> -#include <limits.h> -#include <unistd.h> -#include <time.h> -#include <syslog.h> -#include <sched.h> -#include <signal.h> -#include <sys/time.h> -#include <dirent.h> -#include <openais/saAis.h> -#include <openais/saCkpt.h> -#include <corosync/cpg.h> -#include <liblogthread.h> - -#include <linux/dlmconstants.h> -#include "libgfscontrol.h" -#include "gfs_controld.h" -#include "list.h" -#include "linux_endian.h" - -/* TODO: warn if - DLM_LOCKSPACE_LEN (from dlmconstants.h) != - GFS_MOUNTGROUP_LEN (from libgfscontrol.h) -*/ - -/* Maximum members of a mountgroup, should match CPG_MEMBERS_MAX in - corosync/cpg.h. There are no max defines in gfs-kernel for - mountgroup members. (FIXME verify gfs-kernel/lock_dlm) */ - -#define MAX_NODES 128 - -/* Max string length printed on a line, for debugging/dump output. */ - -#define MAXLINE 256 - -extern int daemon_debug_opt; -extern int daemon_quit; -extern int cluster_down; -extern int poll_dlm; -extern struct list_head mountgroups; -extern int our_nodeid; -extern char clustername[1024]; /* actual limit is sure to be smaller */ -extern char daemon_debug_buf[256]; -extern char dump_buf[GFSC_DUMP_SIZE]; -extern int dump_point; -extern int dump_wrap; -extern int dmsetup_wait; -extern cpg_handle_t cpg_handle_daemon; -extern int libcpg_flow_control_on; -extern struct list_head withdrawn_mounts; - -void daemon_dump_save(void); - -#define log_level(lvl, fmt, args...) \ -do { \ - snprintf(daemon_debug_buf, 255, "%ld " fmt "\n", time(NULL), ##args); \ - daemon_dump_save(); \ - logt_print(lvl, fmt "\n", ##args); \ - if (daemon_debug_opt) \ - fprintf(stderr, "%s", daemon_debug_buf); \ -} while (0) - -#define log_debug(fmt, args...) log_level(LOG_DEBUG, fmt, ##args) -#define log_error(fmt, args...) log_level(LOG_ERR, fmt, ##args) - -#define log_group(g, fmt, args...) \ -do { \ - snprintf(daemon_debug_buf, 255, "%ld %s " fmt "\n", time(NULL), \ - (g)->name, ##args); \ - daemon_dump_save(); \ - logt_print(LOG_DEBUG, "%s " fmt "\n", (g)->name, ##args); \ - if (daemon_debug_opt) \ - fprintf(stderr, "%s", daemon_debug_buf); \ -} while (0) - -#define log_plock(g, fmt, args...) \ -do { \ - snprintf(daemon_debug_buf, 255, "%ld %s " fmt "\n", time(NULL), \ - (g)->name, ##args); \ - if (daemon_debug_opt && cfgd_plock_debug) \ - fprintf(stderr, "%s", daemon_debug_buf); \ -} while (0) - -struct mountgroup { - struct list_head list; - uint32_t id; - struct gfsc_mount_args mount_args; - char name[GFS_MOUNTGROUP_LEN+1]; - - int mount_client; - int mount_client_result; - int mount_client_notified; - int mount_client_delay; - int remount_client; - - int withdraw_uevent; - int withdraw_suspend; - int dmsetup_wait; - pid_t dmsetup_pid; - int our_jid; - int spectator; - int ro; - int joining; - int leaving; - int kernel_mount_error; - int kernel_mount_done; - int first_mounter; - int no_mount_helper; - - /* cpg-new stuff */ - - cpg_handle_t cpg_handle; - int cpg_client; - int cpg_fd; - int kernel_stopped; - uint32_t change_seq; - uint32_t started_count; - struct change *started_change; - struct list_head changes; - struct list_head node_history; - struct list_head journals; - int dlm_registered; - int dlm_notify_nodeid; - int first_done_uevent; - int first_recovery_needed; - int first_recovery_master; - int first_recovery_msg; - int local_recovery_jid; - int local_recovery_busy; -}; - -/* these need to match the kernel defines of the same name in lm_interface.h */ - -#define LM_RD_GAVEUP 308 -#define LM_RD_SUCCESS 309 - -/* config.c */ -int setup_ccs(void); -void close_ccs(void); -void read_ccs_name(const char *path, char *name); -void read_ccs_yesno(const char *path, int *yes, int *no); -int read_ccs_int(const char *path, int *config_val); -void read_ccs_nodir(struct mountgroup *mg, char *buf); - -/* cpg-new.c */ -int setup_cpg_daemon(void); -void close_cpg_daemon(void); -void process_cpg_daemon(int ci); -int setup_dlmcontrol(void); -void process_dlmcontrol(int ci); -int set_protocol(void); -void process_recovery_uevent(struct mountgroup *mg, int jid, int status); -void process_first_mount(struct mountgroup *mg); -void process_mountgroups(void); -int gfs_join_mountgroup(struct mountgroup *mg); -void do_leave(struct mountgroup *mg, int mnterr); -void gfs_mount_done(struct mountgroup *mg); -void send_remount(struct mountgroup *mg, int ro); -void send_withdraw(struct mountgroup *mg); -int set_mountgroup_info(struct mountgroup *mg, struct gfsc_mountgroup *out); -int set_node_info(struct mountgroup *mg, int nodeid, struct gfsc_node *node); -int set_mountgroups(int *count, struct gfsc_mountgroup **mgs_out); -int set_mountgroup_nodes(struct mountgroup *mg, int option, int *node_count, - struct gfsc_node **nodes_out); -void free_mg(struct mountgroup *mg); -void node_history_cluster_add(int nodeid); -void node_history_cluster_remove(int nodeid); -void gfs_leave_mountgroup(struct mountgroup *mg, int mnterr); - -/* main.c */ -int do_read(int fd, void *buf, size_t count); -int do_write(int fd, void *buf, size_t count); -void client_dead(int ci); -int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci)); -int client_fd(int ci); -void client_ignore(int ci, int fd); -void client_back(int ci, int fd); -struct mountgroup *create_mg(const char *name); -struct mountgroup *find_mg(char *name); -struct mountgroup *find_mg_id(uint32_t id); -void client_reply_remount(struct mountgroup *mg, int ci, int result); -void client_reply_join_full(struct mountgroup *mg, int result); -void query_lock(void); -void query_unlock(void); -void process_connection(int ci); -void cluster_dead(int ci); - -/* member_cman.c */ -int setup_cluster(void); -void close_cluster(void); -void process_cluster(int ci); -void update_cluster(void); -int setup_cluster_cfg(void); -void close_cluster_cfg(void); -void process_cluster_cfg(int ci); -void kick_node_from_cluster(int nodeid); - -/* util.c */ -int we_are_in_fence_domain(void); -int set_sysfs(struct mountgroup *mg, const char *field, int val); -int run_dmsetup_suspend(struct mountgroup *mg, char *dev); -void update_dmsetup_wait(void); -void update_flow_control_status(void); -int check_uncontrolled_filesystems(void); - -/* logging.c */ - -void init_logging(void); -void setup_logging(void); -void close_logging(void); - -/* crc.c */ -uint32_t cpgname_to_crc(const char *data, int len); - -#endif diff --git a/group/gfs_controld/logging.c b/group/gfs_controld/logging.c deleted file mode 100644 index 6c6d024..0000000 --- a/group/gfs_controld/logging.c +++ /dev/null @@ -1,65 +0,0 @@ -#include <syslog.h> -#include "gfs_daemon.h" -#include "config.h" -#include "ccs.h" - -extern int ccs_handle; - -#define DAEMON_NAME "gfs_controld" -#define DEFAULT_LOG_MODE LOG_MODE_OUTPUT_FILE|LOG_MODE_OUTPUT_SYSLOG -#define DEFAULT_SYSLOG_FACILITY LOG_LOCAL4 /* Is this really right? */ -#define DEFAULT_SYSLOG_PRIORITY LOG_INFO -#define DEFAULT_LOGFILE_PRIORITY LOG_INFO /* ? */ -#ifndef LOGDIR -#define LOGDIR "/var/log/cluster" -#endif /* LOGDIR */ -#define DEFAULT_LOGFILE LOGDIR "/" DAEMON_NAME ".log" - -static int log_mode; -static int syslog_facility; -static int syslog_priority; -static int logfile_priority; -static char logfile[PATH_MAX]; - -void init_logging(void) -{ - log_mode = DEFAULT_LOG_MODE; - syslog_facility = DEFAULT_SYSLOG_FACILITY; - syslog_priority = DEFAULT_SYSLOG_PRIORITY; - logfile_priority = DEFAULT_LOGFILE_PRIORITY; - strcpy(logfile, DEFAULT_LOGFILE); - - /* logfile_priority is the only one of these options that - can be controlled from command line or environment variable */ - - if (cfgd_debug_logfile) - logfile_priority = LOG_DEBUG; - - log_debug("logging mode %d syslog f %d p %d logfile p %d %s", - log_mode, syslog_facility, syslog_priority, - logfile_priority, logfile); - - logt_init(DAEMON_NAME, log_mode, syslog_facility, syslog_priority, - logfile_priority, logfile); -} - -void setup_logging(void) -{ - ccs_read_logging(ccs_handle, DAEMON_NAME, - &cfgd_debug_logfile, &log_mode, - &syslog_facility, &syslog_priority, - &logfile_priority, logfile); - - log_debug("logging mode %d syslog f %d p %d logfile p %d %s", - log_mode, syslog_facility, syslog_priority, - logfile_priority, logfile); - - logt_conf(DAEMON_NAME, log_mode, syslog_facility, syslog_priority, - logfile_priority, logfile); -} - -void close_logging(void) -{ - logt_exit(); -} - diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c deleted file mode 100644 index aa38839..0000000 --- a/group/gfs_controld/main.c +++ /dev/null @@ -1,1496 +0,0 @@ -#include "gfs_daemon.h" -#include "config.h" -#include <pthread.h> -#include "copyright.cf" - -#include <linux/netlink.h> - -#ifndef CLUSTERVARRUN -#define CLUSTERVARRUN "/var/run/cluster" -#endif /* CLUSTERVARRUN */ -#define LOCKFILE_NAME CLUSTERVARRUN "/gfs_controld.pid" -#define CLIENT_NALLOC 32 -#define UEVENT_BUF_SIZE 4096 - -static int client_maxi; -static int client_size; -static struct client *client; -static struct pollfd *pollfd; -static pthread_t query_thread; -static pthread_mutex_t query_mutex; - -struct client { - int fd; - void *workfn; - void *deadfn; - struct mountgroup *mg; -}; - -static void do_withdraw(struct mountgroup *mg); - -int do_read(int fd, void *buf, size_t count) -{ - int rv, off = 0; - - while (off < count) { - rv = read(fd, (char *)buf + off, count - off); - if (rv == 0) - return -1; - if (rv == -1 && errno == EINTR) - continue; - if (rv == -1) - return -1; - off += rv; - } - return 0; -} - -int do_write(int fd, void *buf, size_t count) -{ - int rv, off = 0; - - retry: - rv = write(fd, (char *)buf + off, count); - if (rv == -1 && errno == EINTR) - goto retry; - if (rv < 0) { - log_error("write errno %d", errno); - return rv; - } - - if (rv != count) { - count -= rv; - off += rv; - goto retry; - } - return 0; -} - -static void client_alloc(void) -{ - int i; - - if (!client) { - client = malloc(CLIENT_NALLOC * sizeof(struct client)); - pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd)); - } else { - client = realloc(client, (client_size + CLIENT_NALLOC) * - sizeof(struct client)); - pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) * - sizeof(struct pollfd)); - if (!pollfd) - log_error("can't alloc for pollfd"); - } - if (!client || !pollfd) - log_error("can't alloc for client array"); - - for (i = client_size; i < client_size + CLIENT_NALLOC; i++) { - client[i].workfn = NULL; - client[i].deadfn = NULL; - client[i].fd = -1; - pollfd[i].fd = -1; - pollfd[i].revents = 0; - } - client_size += CLIENT_NALLOC; -} - -void client_dead(int ci) -{ - close(client[ci].fd); - client[ci].workfn = NULL; - client[ci].fd = -1; - pollfd[ci].fd = -1; -} - -int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci)) -{ - int i; - - if (!client) - client_alloc(); - again: - for (i = 0; i < client_size; i++) { - if (client[i].fd == -1) { - client[i].workfn = workfn; - if (deadfn) - client[i].deadfn = deadfn; - else - client[i].deadfn = client_dead; - client[i].fd = fd; - pollfd[i].fd = fd; - pollfd[i].events = POLLIN; - if (i > client_maxi) - client_maxi = i; - return i; - } - } - - client_alloc(); - goto again; -} - -int client_fd(int ci) -{ - return client[ci].fd; -} - -void client_ignore(int ci, int fd) -{ - pollfd[ci].fd = -1; - pollfd[ci].events = 0; -} - -void client_back(int ci, int fd) -{ - pollfd[ci].fd = fd; - pollfd[ci].events = POLLIN; -} - -static void sigterm_handler(int sig) -{ - daemon_quit = 1; -} - -struct mountgroup *create_mg(const char *name) -{ - struct mountgroup *mg; - - if (strlen(name) > GFS_MOUNTGROUP_LEN) { - log_error("create_mg: name %s too long", name); - return NULL; - } - - mg = malloc(sizeof(struct mountgroup)); - if (!mg) - return NULL; - memset(mg, 0, sizeof(struct mountgroup)); - - INIT_LIST_HEAD(&mg->changes); - INIT_LIST_HEAD(&mg->journals); - INIT_LIST_HEAD(&mg->node_history); - - strncpy(mg->name, name, GFS_MOUNTGROUP_LEN); - - return mg; -} - -struct mountgroup *find_mg(char *name) -{ - struct mountgroup *mg; - - if (strlen(name) > GFS_MOUNTGROUP_LEN) { - log_error("find_mg: name %s too long", name); - return NULL; - } - - list_for_each_entry(mg, &mountgroups, list) { - if ((strlen(mg->name) == strlen(name)) && - !strncmp(mg->name, name, strlen(name))) - return mg; - } - return NULL; -} - -struct mountgroup *find_mg_id(uint32_t id) -{ - struct mountgroup *mg; - - list_for_each_entry(mg, &mountgroups, list) { - if (mg->id == id) - return mg; - } - return NULL; -} - -enum { - Env_ACTION = 0, - Env_SUBSYSTEM, - Env_LOCKPROTO, - Env_LOCKTABLE, - Env_DEVPATH, - Env_RECOVERY, - Env_FIRSTMOUNT, - Env_JID, - Env_SPECTATOR, - Env_RDONLY, - Env_Last, /* Flag for end of vars */ -}; - -static const char *uevent_vars[] = { - [Env_ACTION] = "ACTION=", - [Env_SUBSYSTEM] = "SUBSYSTEM=", - [Env_LOCKPROTO] = "LOCKPROTO=", - [Env_LOCKTABLE] = "LOCKTABLE=", - [Env_DEVPATH] = "DEVPATH=", - [Env_RECOVERY] = "RECOVERY=", - [Env_FIRSTMOUNT] = "FIRSTMOUNT=", - [Env_JID] = "JID=", - [Env_SPECTATOR] = "SPECTATOR=", - [Env_RDONLY] = "RDONLY=", -}; - -/* - * Parses a uevent message for the interesting bits. It requires a list - * of variables to look for, and an equally long list of pointers into - * which to write the results. - */ -static void decode_uevent(const char *buf, unsigned len, const char *vars[], - unsigned nvars, const char *vals[]) -{ - const char *ptr; - unsigned int i; - int slen, vlen; - - memset(vals, 0, sizeof(const char *) * nvars); - - while (len > 0) { - ptr = buf; - slen = strlen(ptr); - buf += slen; - len -= slen; - buf++; - len--; - - for (i = 0; i < nvars; i++) { - vlen = strlen(vars[i]); - if (vlen > slen) - continue; - if (memcmp(vars[i], ptr, vlen) != 0) - continue; - vals[i] = ptr + vlen; - break; - } - } -} - -static char *uevent_fsname(const char *vals[]) -{ - char *name = NULL; - - if (vals[Env_LOCKTABLE]) { - name = strchr(vals[Env_LOCKTABLE], ':'); - if (name && *name) - name++; - } - return name; -} - -/* - * This is called only if mount.gfs2 has not already set up the - * mount group. In that case we know that the mount helper doesn't - * exist and thus the no_mount_helper flag is set, to indicate that - * this mount will be administrated entirely via the uevent/sysfs - * interface. - */ - -static void do_new_mount(const char *name, struct mountgroup *mg, - const char *uevent_vals[]) -{ - int rv; - - if (!uevent_vars[Env_LOCKPROTO] || - !uevent_vars[Env_LOCKTABLE]) - return; - - /* We only care about lock_dlm mounts */ - if (strcmp(uevent_vals[Env_LOCKPROTO], "lock_dlm") != 0) - return; - - if (mg) { - /* Might have already been set up by mount.gfs2 */ - if (mg->no_mount_helper == 0) - return; - log_error("do_new_mount: duplicate mount %s", - uevent_vals[Env_LOCKTABLE]); - return; - } - - mg = create_mg(name); - if (mg == NULL) - return; - - mg->no_mount_helper = 1; - - strncpy(mg->mount_args.type, uevent_vals[Env_SUBSYSTEM], PATH_MAX); - strncpy(mg->mount_args.proto, uevent_vals[Env_LOCKPROTO], PATH_MAX); - strncpy(mg->mount_args.table, uevent_vals[Env_LOCKTABLE], PATH_MAX); - - if (uevent_vals[Env_SPECTATOR] && - strcmp(uevent_vals[Env_SPECTATOR], "1") == 0) - mg->spectator = 1; - - if (uevent_vals[Env_RDONLY] && - strcmp(uevent_vals[Env_RDONLY], "1") == 0) - mg->ro = 1; - - list_add(&mg->list, &mountgroups); - rv = gfs_join_mountgroup(mg); - if (rv) { - log_error("join: group join error %d", rv); - goto fail; - } - log_group(mg, "do_new_mount ci %d result %d first=%d:jid=%d", - mg->mount_client, rv, mg->first_mounter, mg->our_jid); - return; - -fail: - list_del(&mg->list); - free(mg); - return; -} - -/* - * This is called upon successful mount and also upon a successful - * remount operation. Unless the no_mount_helper flag is set on the - * mount group, this is a no-op. - */ -static void do_online(struct mountgroup *mg, const char *uevent_vals[]) -{ - int ro = 0; - - /* If using mount helper, ignore the message here */ - if (mg->no_mount_helper == 0) - return; - - /* Catch successful original mount */ - if (!mg->kernel_mount_done) { - mg->mount_client = 0; - mg->kernel_mount_done = 1; - mg->kernel_mount_error = 0; - gfs_mount_done(mg); - return; - } - - /* From here on, its remounts only */ - - if (uevent_vals[Env_RDONLY] && - strcmp(uevent_vals[Env_RDONLY], "1") == 0) - ro = 1; - - send_remount(mg, ro); -} - -static void process_uevent(int ci) -{ - struct mountgroup *mg; - char buf[UEVENT_BUF_SIZE]; - const char *uevent_vals[Env_Last]; - char *fsname; - int rv; - - retry_recv: - rv = recv(client[ci].fd, &buf, sizeof(buf), 0); - if (rv < 0) { - if (errno == EINTR) - goto retry_recv; - if (errno != EAGAIN) - log_error("uevent recv error %d errno %d", rv, errno); - return; - } - buf[rv] = 0; - - decode_uevent(buf, rv, uevent_vars, Env_Last, uevent_vals); - - if (!uevent_vals[Env_DEVPATH] || - !uevent_vals[Env_ACTION] || - !uevent_vals[Env_SUBSYSTEM]) - return; - - if (strncmp(uevent_vals[Env_DEVPATH], "/fs/gfs", 7) != 0) - return; - - log_debug("uevent %s %s %s", - uevent_vals[Env_ACTION], - uevent_vals[Env_SUBSYSTEM], - uevent_vals[Env_DEVPATH]); - - fsname = uevent_fsname(uevent_vals); - if (!fsname) { - log_error("no fsname uevent %s %s %s", - uevent_vals[Env_ACTION], - uevent_vals[Env_SUBSYSTEM], - uevent_vals[Env_DEVPATH]); - return; - } - - mg = find_mg(fsname); - - if (!strcmp(uevent_vals[Env_ACTION], "add")) { - do_new_mount(fsname, mg, uevent_vals); - return; - } - - if (!mg) { - log_error("mount group %s not found", fsname); - return; - } - - if (!strcmp(uevent_vals[Env_ACTION], "remove")) { - /* We want to trigger the leave at the very end of the kernel's - unmount process, i.e. at the end of put_super(), so we do the - leave when the second uevent (from the gfs kobj) arrives. */ - - if (strcmp(uevent_vals[Env_SUBSYSTEM], "lock_dlm") == 0) - return; - - /* Catch original mount failure */ - if (mg->no_mount_helper && !mg->kernel_mount_done) { - mg->mount_client = 0; - mg->kernel_mount_done = 1; - mg->kernel_mount_error = -1; - gfs_mount_done(mg); - return; - } - - do_leave(mg, 0); - return; - } - - if (!strcmp(uevent_vals[Env_ACTION], "change")) { - int jid, status = -1; - - - if (uevent_vals[Env_RECOVERY]) { - if (!uevent_vals[Env_JID] || - (sscanf(uevent_vals[Env_JID], "%d", &jid) != 1)) - return; - if (strcmp(uevent_vals[Env_RECOVERY], "Done") == 0) - status = LM_RD_SUCCESS; - if (strcmp(uevent_vals[Env_RECOVERY], "Failed") == 0) - status = LM_RD_GAVEUP; - if (status < 0) - return; - process_recovery_uevent(mg, jid, status); - return; - } - - if (uevent_vals[Env_FIRSTMOUNT] && - (strcmp(uevent_vals[Env_FIRSTMOUNT], "Done") == 0)) { - process_first_mount(mg); - } - return; - } - - if (!strcmp(uevent_vals[Env_ACTION], "online")) - do_online(mg, uevent_vals); - - if (!strcmp(uevent_vals[Env_ACTION], "offline")) - do_withdraw(mg); -} - -static int setup_uevent(void) -{ - struct sockaddr_nl snl; - int s, rv; - - s = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT); - if (s < 0) { - log_error("uevent netlink socket"); - return s; - } - - memset(&snl, 0, sizeof(snl)); - snl.nl_family = AF_NETLINK; - snl.nl_pid = getpid(); - snl.nl_groups = 1; - - rv = bind(s, (struct sockaddr *) &snl, sizeof(snl)); - if (rv < 0) { - log_error("uevent bind error %d errno %d", rv, errno); - close(s); - return rv; - } - - return s; -} - -static void init_header(struct gfsc_header *h, int cmd, char *name, int result, - int extra_len) -{ - memset(h, 0, sizeof(struct gfsc_header)); - - h->magic = GFSC_MAGIC; - h->version = GFSC_VERSION; - h->len = sizeof(struct gfsc_header) + extra_len; - h->command = cmd; - h->data = result; - - if (name) - strncpy(h->name, name, GFS_MOUNTGROUP_LEN); -} - -static void query_dump_debug(int fd) -{ - struct gfsc_header h; - int extra_len; - int len; - - /* in the case of dump_wrap, extra_len will go in two writes, - first the log tail, then the log head */ - if (dump_wrap) - extra_len = GFSC_DUMP_SIZE; - else - extra_len = dump_point; - - init_header(&h, GFSC_CMD_DUMP_DEBUG, NULL, 0, extra_len); - do_write(fd, &h, sizeof(h)); - - if (dump_wrap) { - len = GFSC_DUMP_SIZE - dump_point; - do_write(fd, dump_buf + dump_point, len); - len = dump_point; - } else - len = dump_point; - - /* NUL terminate the debug string */ - dump_buf[dump_point] = '\0'; - - do_write(fd, dump_buf, len); -} - -/* combines a header and the data and sends it back to the client in - a single do_write() call */ - -static void do_reply(int fd, int cmd, char *name, int result, void *buf, - int buflen) -{ - char *reply; - int reply_len; - - reply_len = sizeof(struct gfsc_header) + buflen; - reply = malloc(reply_len); - if (!reply) - return; - memset(reply, 0, reply_len); - - init_header((struct gfsc_header *)reply, cmd, name, result, buflen); - - if (buf && buflen) - memcpy(reply + sizeof(struct gfsc_header), buf, buflen); - - do_write(fd, reply, reply_len); - - free(reply); -} - -static void query_mountgroup_info(int fd, char *name) -{ - struct mountgroup *mg; - struct gfsc_mountgroup mountgroup; - int rv; - - mg = find_mg(name); - if (!mg) { - rv = -ENOENT; - goto out; - } - - memset(&mountgroup, 0, sizeof(mountgroup)); - - rv = set_mountgroup_info(mg, &mountgroup); - out: - do_reply(fd, GFSC_CMD_MOUNTGROUP_INFO, name, rv, - (char *)&mountgroup, sizeof(mountgroup)); -} - -static void query_node_info(int fd, char *name, int nodeid) -{ - struct mountgroup *mg; - struct gfsc_node node; - int rv; - - mg = find_mg(name); - if (!mg) { - rv = -ENOENT; - goto out; - } - - rv = set_node_info(mg, nodeid, &node); - out: - do_reply(fd, GFSC_CMD_NODE_INFO, name, rv, - (char *)&node, sizeof(node)); -} - -static void query_mountgroups(int fd, int max) -{ - int mg_count = 0; - struct gfsc_mountgroup *mgs = NULL; - int rv, result; - - rv = set_mountgroups(&mg_count, &mgs); - if (rv < 0) { - result = rv; - mg_count = 0; - goto out; - } - - if (mg_count > max) { - result = -E2BIG; - mg_count = max; - } else { - result = mg_count; - } - out: - do_reply(fd, GFSC_CMD_MOUNTGROUPS, NULL, result, - (char *)mgs, mg_count * sizeof(struct gfsc_mountgroup)); - - if (mgs) - free(mgs); -} - -static void query_mountgroup_nodes(int fd, char *name, int option, int max) -{ - struct mountgroup *mg; - int node_count = 0; - struct gfsc_node *nodes = NULL; - int rv, result; - - mg = find_mg(name); - if (!mg) { - result = -ENOENT; - node_count = 0; - goto out; - } - - rv = set_mountgroup_nodes(mg, option, &node_count, &nodes); - if (rv < 0) { - result = rv; - node_count = 0; - goto out; - } - - /* node_count is the number of structs copied/returned; the caller's - max may be less than that, in which case we copy as many as they - asked for and return -E2BIG */ - - if (node_count > max) { - result = -E2BIG; - node_count = max; - } else { - result = node_count; - } - out: - do_reply(fd, GFSC_CMD_MOUNTGROUP_NODES, name, result, - (char *)nodes, node_count * sizeof(struct gfsc_node)); - - if (nodes) - free(nodes); -} - -static void client_reply_join(int ci, struct gfsc_mount_args *ma, int result) -{ - char *name = strstr(ma->table, ":") + 1; - - log_debug("client_reply_join %s ci %d result %d", name, ci, result); - - do_reply(client[ci].fd, GFSC_CMD_FS_JOIN, - name, result, ma, sizeof(struct gfsc_mount_args)); -} - -static void client_sysfs_join(struct mountgroup *mg, int result) -{ - int rv; - - if (result) { - rv = set_sysfs(mg, "jid", result); - if (rv) { - log_error("join: error %d returning result %d", rv, result); - } - return; - } - - if (mg->spectator) { - rv = set_sysfs(mg, "jid", 0); - if (rv) { - log_error("join: error setting jid %d", rv); - } - return; - } - - rv = set_sysfs(mg, "first", mg->first_mounter); - if (rv) { - log_error("join: error setting first %d", rv); - } - rv = set_sysfs(mg, "jid", mg->our_jid); - if (rv) { - log_error("join: error setting jid %d", rv); - } -} - -void client_reply_join_full(struct mountgroup *mg, int result) -{ - char nodir_str[32]; - - if (result) - goto out; - - if (mg->our_jid < 0) { - snprintf(mg->mount_args.hostdata, PATH_MAX, - "hostdata=id=%u:first=%d", - mg->id, mg->first_mounter); - } else { - snprintf(mg->mount_args.hostdata, PATH_MAX, - "hostdata=jid=%d:id=%u:first=%d", - mg->our_jid, mg->id, mg->first_mounter); - } - - memset(nodir_str, 0, sizeof(nodir_str)); - - read_ccs_nodir(mg, nodir_str); - if (nodir_str[0]) - strcat(mg->mount_args.hostdata, nodir_str); - out: - log_group(mg, "client_reply_join_full ci %d result %d %s", - mg->mount_client, result, mg->mount_args.hostdata); - - if (mg->no_mount_helper) - client_sysfs_join(mg, result); - else - client_reply_join(mg->mount_client, &mg->mount_args, result); -} - -static void do_join(int ci, struct gfsc_mount_args *ma) -{ - struct mountgroup *mg = NULL; - char table2[PATH_MAX]; - char *cluster = NULL, *name = NULL; - int rv; - - log_debug("join: %s %s %s %s %s %s", ma->dir, ma->type, ma->proto, - ma->table, ma->options, ma->dev); - - if (strcmp(ma->proto, "lock_dlm")) { - log_error("join: lockproto %s not supported", ma->proto); - rv = -EPROTONOSUPPORT; - goto fail; - } - - if (strstr(ma->options, "jid=") || - strstr(ma->options, "first=") || - strstr(ma->options, "id=")) { - log_error("join: jid, first and id are reserved options"); - rv = -EOPNOTSUPP; - goto fail; - } - - /* table is <cluster>:<name> */ - - memset(table2, 0, sizeof(table2)); - strncpy(table2, ma->table, sizeof(table2)); - - name = strstr(table2, ":"); - if (!name) { - rv = -EBADFD; - goto fail; - } - - *name = '\0'; - name++; - cluster = table2; - - mg = find_mg(name); - if (mg) { - if (strcmp(mg->mount_args.dev, ma->dev)) { - log_error("different fs dev %s with same name", - mg->mount_args.dev); - rv = -EADDRINUSE; - } else if (mg->leaving) { - /* we're leaving the group */ - log_error("join: reject mount due to unmount"); - rv = -ESTALE; - } else if (mg->mount_client || !mg->kernel_mount_done) { - log_error("join: other mount in progress %d %d", - mg->mount_client, mg->kernel_mount_done); - rv = -EBUSY; - } else { - log_group(mg, "join: already mounted"); - rv = -EALREADY; - } - goto fail; - } - - mg = create_mg(name); - if (!mg) { - rv = -ENOMEM; - goto fail; - } - mg->mount_client = ci; - memcpy(&mg->mount_args, ma, sizeof(struct gfsc_mount_args)); - - if (strlen(cluster) != strlen(clustername) || - strlen(cluster) == 0 || strcmp(cluster, clustername)) { - log_error("join: fs requires cluster="%s" current="%s"", - cluster, clustername); - rv = -EBADR; - goto fail_free; - } - log_group(mg, "join: cluster name matches: %s", clustername); - - if (strstr(ma->options, "spectator")) { - log_group(mg, "join: spectator mount"); - mg->spectator = 1; - } else { - if (!we_are_in_fence_domain()) { - log_error("join: not in default fence domain"); - rv = -ENOANO; - goto fail_free; - } - } - - if (strstr(ma->options, "ro")) { - if (mg->spectator) { - log_error("join: readonly invalid with spectator"); - rv = -EROFS; - goto fail_free; - } - mg->ro = 1; - } - - list_add(&mg->list, &mountgroups); - - rv = gfs_join_mountgroup(mg); - if (rv) { - log_error("join: group join error %d", rv); - list_del(&mg->list); - goto fail_free; - } - return; - - fail_free: - free(mg); - fail: - client_reply_join(ci, ma, rv); -} - -/* The basic rule of withdraw is that we don't want to tell the kernel to drop - all locks until we know gfs has been stopped/blocked on all nodes. - A withdrawing node is very much like a readonly node, differences are - that others recover its journal when they remove it from the group, - and when it's been removed from the group, it tells the locally withdrawing - gfs to clear out locks. */ - -static void do_withdraw(struct mountgroup *mg) -{ - int rv; - - log_debug("withdraw: %s", mg->name); - - if (!cfgd_enable_withdraw) { - log_error("withdraw feature not enabled"); - return; - } - - mg->withdraw_uevent = 1; - - rv = run_dmsetup_suspend(mg, mg->mount_args.dev); - if (rv) { - log_error("do_withdraw %s: dmsetup %s error %d", mg->name, - mg->mount_args.dev, rv); - return; - } - - dmsetup_wait = 1; -} - -static void do_mount_done(char *table, int result) -{ - struct mountgroup *mg; - char *name = strstr(table, ":") + 1; - - log_debug("mount_done: %s result %d", name, result); - - mg = find_mg(name); - if (!mg) { - log_error("mount_done: %s not found", name); - return; - } - - mg->mount_client = 0; - mg->kernel_mount_done = 1; - mg->kernel_mount_error = result; - - gfs_mount_done(mg); -} - -void client_reply_remount(struct mountgroup *mg, int ci, int result) -{ - do_reply(client[ci].fd, GFSC_CMD_FS_REMOUNT, mg->name, result, - &mg->mount_args, sizeof(struct gfsc_mount_args)); -} - -/* mount.gfs creates a special ma->options string with only "ro" or "rw" */ - -static void do_remount(int ci, struct gfsc_mount_args *ma) -{ - struct mountgroup *mg; - char *name = strstr(ma->table, ":") + 1; - int ro = 0, result = 0; - - log_debug("remount: %s ci %d options %s", name, ci, ma->options); - - mg = find_mg(name); - if (!mg) { - log_error("remount: %s not found", name); - result = -1; - goto out; - } - - /* FIXME: Should allow remounts */ - if (mg->spectator) { - log_error("remount of spectator not allowed"); - result = -1; - goto out; - } - - if (!strcmp(ma->options, "ro")) - ro = 1; - - send_remount(mg, ro); -out: - client_reply_remount(mg, ci, result); -} - -void process_connection(int ci) -{ - struct gfsc_header h; - struct gfsc_mount_args empty; - struct gfsc_mount_args *ma; - struct mountgroup *mg; - char *extra = NULL; - int rv, extra_len; - - rv = do_read(client[ci].fd, &h, sizeof(h)); - if (rv < 0) { - log_debug("connection %d read error %d", ci, rv); - goto out; - } - - if (h.magic != GFSC_MAGIC) { - log_debug("connection %d magic error %x", ci, h.magic); - goto out; - } - - if ((h.version & 0xFFFF0000) != (GFSC_VERSION & 0xFFFF0000)) { - log_debug("connection %d version error %x", ci, h.version); - goto out; - } - - if (h.len > sizeof(h)) { - extra_len = h.len - sizeof(h); - extra = malloc(extra_len); - if (!extra) { - log_error("process_connection no mem %d", extra_len); - goto out; - } - memset(extra, 0, extra_len); - - rv = do_read(client[ci].fd, extra, extra_len); - if (rv < 0) { - log_debug("connection %d extra read error %d", ci, rv); - goto out; - } - } - - ma = (struct gfsc_mount_args *)extra; - - if (!ma) { - memset(&empty, 0, sizeof(empty)); - - if (h.command == GFSC_CMD_FS_JOIN || - h.command == GFSC_CMD_FS_REMOUNT) { - do_reply(client[ci].fd, h.command, h.name, -EINVAL, - &empty, sizeof(empty)); - } - log_debug("connection %d cmd %d no data", ci, h.command); - goto out; - } - - switch (h.command) { - - case GFSC_CMD_FS_JOIN: - do_join(ci, ma); - break; - - case GFSC_CMD_FS_LEAVE: - mg = find_mg(ma->table); - if (!mg) { - log_error("do_leave: %s not found", ma->table); - break; - } - do_leave(mg, h.data); - break; - - case GFSC_CMD_FS_MOUNT_DONE: - do_mount_done(ma->table, h.data); - break; - - case GFSC_CMD_FS_REMOUNT: - do_remount(ci, ma); - break; - - default: - log_error("process_connection %d unknown command %d", - ci, h.command); - } - out: - if (extra) - free(extra); - - /* no client_dead(ci) here, since the connection for - join/remount is reused */ -} - -static void process_listener(int ci) -{ - int fd, i; - - fd = accept(client[ci].fd, NULL, NULL); - if (fd < 0) { - log_error("process_listener: accept error %d %d", fd, errno); - return; - } - - i = client_add(fd, process_connection, NULL); - - log_debug("client connection %d fd %d", i, fd); -} - -static int setup_listener(const char *sock_path) -{ - struct sockaddr_un addr; - socklen_t addrlen; - int rv, s; - - /* we listen for new client connections on socket s */ - - s = socket(AF_LOCAL, SOCK_STREAM, 0); - if (s < 0) { - log_error("socket error %d %d", s, errno); - return s; - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_LOCAL; - strcpy(&addr.sun_path[1], sock_path); - addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1; - - rv = bind(s, (struct sockaddr *) &addr, addrlen); - if (rv < 0) { - log_error("bind error %d %d", rv, errno); - close(s); - return rv; - } - - rv = listen(s, 5); - if (rv < 0) { - log_error("listen error %d %d", rv, errno); - close(s); - return rv; - } - return s; -} - -void query_lock(void) -{ - pthread_mutex_lock(&query_mutex); -} - -void query_unlock(void) -{ - pthread_mutex_unlock(&query_mutex); -} - -/* This is a thread, so we have to be careful, don't call log_ functions. - We need a thread to process queries because the main thread may block - for long periods. */ - -static void *process_queries(void *arg) -{ - struct gfsc_header h; - int f, rv, s; - - rv = setup_listener(GFSC_QUERY_SOCK_PATH); - if (rv < 0) - return NULL; - - s = rv; - - for (;;) { - f = accept(s, NULL, NULL); - if (f < 0) - return NULL; - - rv = do_read(f, &h, sizeof(h)); - if (rv < 0) { - goto out; - } - - if (h.magic != GFSC_MAGIC) { - goto out; - } - - if ((h.version & 0xFFFF0000) != (GFSC_VERSION & 0xFFFF0000)) { - goto out; - } - - query_lock(); - - switch (h.command) { - case GFSC_CMD_DUMP_DEBUG: - query_dump_debug(f); - break; - case GFSC_CMD_MOUNTGROUP_INFO: - query_mountgroup_info(f, h.name); - break; - case GFSC_CMD_NODE_INFO: - query_node_info(f, h.name, h.data); - break; - case GFSC_CMD_MOUNTGROUPS: - query_mountgroups(f, h.data); - break; - case GFSC_CMD_MOUNTGROUP_NODES: - query_mountgroup_nodes(f, h.name, h.option, h.data); - break; - default: - break; - } - query_unlock(); - - out: - close(f); - } -} - -static int setup_queries(void) -{ - int rv; - - pthread_mutex_init(&query_mutex, NULL); - - rv = pthread_create(&query_thread, NULL, process_queries, NULL); - if (rv < 0) { - log_error("can't create query thread"); - return rv; - } - return 0; -} - -void cluster_dead(int ci) -{ - if (!cluster_down) - log_error("cluster is down, exiting"); - daemon_quit = 1; - cluster_down = 1; -} - -static void loop(void) -{ - int poll_timeout = -1; - int rv, i; - void (*workfn) (int ci); - void (*deadfn) (int ci); - - rv = setup_queries(); - if (rv < 0) - goto out; - - rv = setup_listener(GFSC_SOCK_PATH); - if (rv < 0) - goto out; - client_add(rv, process_listener, NULL); - - rv = setup_cluster_cfg(); - if (rv < 0) - goto out; - client_add(rv, process_cluster_cfg, cluster_dead); - - rv = setup_cluster(); - if (rv < 0) - goto out; - client_add(rv, process_cluster, cluster_dead); - - update_cluster(); - - rv = setup_ccs(); - if (rv < 0) - goto out; - - setup_logging(); - - rv = check_uncontrolled_filesystems(); - if (rv < 0) - goto out; - - rv = setup_uevent(); - if (rv < 0) - goto out; - client_add(rv, process_uevent, NULL); - - rv = setup_cpg_daemon(); - if (rv < 0) - goto out; - client_add(rv, process_cpg_daemon, cluster_dead); - - rv = set_protocol(); - if (rv < 0) - goto out; - - rv = setup_dlmcontrol(); - if (rv < 0) - goto out; - client_add(rv, process_dlmcontrol, cluster_dead); - - for (;;) { - rv = poll(pollfd, client_maxi + 1, poll_timeout); - if (rv == -1 && errno == EINTR) { - if (daemon_quit && list_empty(&mountgroups)) - goto out; - daemon_quit = 0; - continue; - } - if (rv < 0) { - log_error("poll errno %d", errno); - goto out; - } - - query_lock(); - - for (i = 0; i <= client_maxi; i++) { - if (client[i].fd < 0) - continue; - if (pollfd[i].revents & POLLIN) { - workfn = client[i].workfn; - workfn(i); - } - if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) { - deadfn = client[i].deadfn; - deadfn(i); - } - } - query_unlock(); - - if (daemon_quit) - break; - - query_lock(); - - poll_timeout = -1; - - if (poll_dlm) { - process_mountgroups(); - poll_timeout = 500; - } - - if (dmsetup_wait) { - update_dmsetup_wait(); - if (dmsetup_wait) { - if (poll_timeout == -1) - poll_timeout = 1000; - } - } - query_unlock(); - } - out: - close_cpg_daemon(); - close_logging(); - close_ccs(); - close_cluster(); - close_cluster_cfg(); - - if (!list_empty(&mountgroups)) - log_error("mountgroups abandoned"); -} - -static void lockfile(void) -{ - int fd, error; - struct flock lock; - char buf[33]; - - memset(buf, 0, 33); - - fd = open(LOCKFILE_NAME, O_CREAT|O_WRONLY, - S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); - if (fd < 0) { - fprintf(stderr, "cannot open/create lock file %s\n", - LOCKFILE_NAME); - exit(EXIT_FAILURE); - } - - lock.l_type = F_WRLCK; - lock.l_start = 0; - lock.l_whence = SEEK_SET; - lock.l_len = 0; - - error = fcntl(fd, F_SETLK, &lock); - if (error) { - fprintf(stderr, "gfs_controld is already running\n"); - exit(EXIT_FAILURE); - } - - error = ftruncate(fd, 0); - if (error) { - fprintf(stderr, "cannot clear lock file %s\n", LOCKFILE_NAME); - exit(EXIT_FAILURE); - } - - sprintf(buf, "%d\n", getpid()); - - error = write(fd, buf, strlen(buf)); - if (error <= 0) { - fprintf(stderr, "cannot write lock file %s\n", LOCKFILE_NAME); - exit(EXIT_FAILURE); - } -} - -static void print_usage(void) -{ - printf("Usage:\n"); - printf("\n"); - printf("gfs_controld [options]\n"); - printf("\n"); - printf("Options:\n"); - printf("\n"); - printf(" -D Enable debugging to stderr and don't fork\n"); - printf(" -L Enable debugging to log file\n"); - printf(" -w <num> Enable (1) or disable (0) withdraw\n"); - printf(" Default is %d\n", DEFAULT_ENABLE_WITHDRAW); - printf(" -h Print this help, then exit\n"); - printf(" -V Print program version information, then exit\n"); -} - -#define OPTION_STRING "LDw:hV" - -static void read_arguments(int argc, char **argv) -{ - int cont = 1; - int optchar; - - while (cont) { - optchar = getopt(argc, argv, OPTION_STRING); - - switch (optchar) { - - case 'D': - daemon_debug_opt = 1; - break; - - case 'L': - optd_debug_logfile = 1; - cfgd_debug_logfile = 1; - break; - - case 'w': - optd_enable_withdraw = 1; - cfgd_enable_withdraw = atoi(optarg); - break; - - case 'h': - print_usage(); - exit(EXIT_SUCCESS); - break; - - case 'V': - printf("gfs_controld %s (built %s %s)\n", - VERSION, __DATE__, __TIME__); - printf("%s\n", REDHAT_COPYRIGHT); - exit(EXIT_SUCCESS); - break; - - case ':': - case '?': - fprintf(stderr, "Please use '-h' for usage.\n"); - exit(EXIT_FAILURE); - break; - - case EOF: - cont = 0; - break; - - default: - fprintf(stderr, "unknown option: %c\n", optchar); - exit(EXIT_FAILURE); - break; - }; - } - - if (getenv("GFS_CONTROLD_DEBUG")) { - optd_debug_logfile = 1; - cfgd_debug_logfile = 1; - } -} - -static void set_scheduler(void) -{ - struct sched_param sched_param; - int rv; - - rv = sched_get_priority_max(SCHED_RR); - if (rv != -1) { - sched_param.sched_priority = rv; - rv = sched_setscheduler(0, SCHED_RR, &sched_param); - if (rv == -1) - log_error("could not set SCHED_RR priority %d err %d", - sched_param.sched_priority, errno); - } else { - log_error("could not get maximum scheduler priority err %d", - errno); - } -} - -int main(int argc, char **argv) -{ - INIT_LIST_HEAD(&mountgroups); - INIT_LIST_HEAD(&withdrawn_mounts); - - read_arguments(argc, argv); - - if (!daemon_debug_opt) { - if (daemon(0, 0) < 0) { - perror("daemon error"); - exit(EXIT_FAILURE); - } - } - lockfile(); - init_logging(); - log_level(LOG_INFO, "gfs_controld %s started", VERSION); - signal(SIGTERM, sigterm_handler); - set_scheduler(); - - loop(); - - return 0; -} - -void daemon_dump_save(void) -{ - int len, i; - - len = strlen(daemon_debug_buf); - - for (i = 0; i < len; i++) { - dump_buf[dump_point++] = daemon_debug_buf[i]; - - if (dump_point == GFSC_DUMP_SIZE) { - dump_point = 0; - dump_wrap = 1; - } - } -} - -int daemon_debug_opt; -int daemon_quit; -int cluster_down; -int poll_dlm; -struct list_head mountgroups; -int our_nodeid; -char clustername[1024]; -char daemon_debug_buf[256]; -char dump_buf[GFSC_DUMP_SIZE]; -int dump_point; -int dump_wrap; -int dmsetup_wait; -cpg_handle_t cpg_handle_daemon; -int libcpg_flow_control_on; -struct list_head withdrawn_mounts; - diff --git a/group/gfs_controld/member_cman.c b/group/gfs_controld/member_cman.c deleted file mode 100644 index 07fb982..0000000 --- a/group/gfs_controld/member_cman.c +++ /dev/null @@ -1,207 +0,0 @@ -#include "gfs_daemon.h" -#include "config.h" -#include <corosync/corotypes.h> -#include <corosync/cfg.h> -#include <corosync/quorum.h> - -static corosync_cfg_handle_t ch; -static quorum_handle_t qh; -static uint32_t old_nodes[MAX_NODES]; -static int old_node_count; -static uint32_t quorum_nodes[MAX_NODES]; -static int quorum_node_count; - -static int is_member(uint32_t *node_list, int count, uint32_t nodeid) -{ - int i; - - for (i = 0; i < count; i++) { - if (node_list[i] == nodeid) - return 1; - } - return 0; -} - -static int is_old_member(uint32_t nodeid) -{ - return is_member(old_nodes, old_node_count, nodeid); -} - -static int is_cluster_member(uint32_t nodeid) -{ - return is_member(quorum_nodes, quorum_node_count, nodeid); -} - -static void quorum_callback(quorum_handle_t h, uint32_t quorate, - uint64_t ring_seq, uint32_t node_list_entries, - uint32_t *node_list) -{ - int i; - - old_node_count = quorum_node_count; - memcpy(&old_nodes, &quorum_nodes, sizeof(old_nodes)); - - quorum_node_count = 0; - memset(&quorum_nodes, 0, sizeof(quorum_nodes)); - - for (i = 0; i < node_list_entries; i++) - quorum_nodes[quorum_node_count++] = node_list[i]; - - for (i = 0; i < old_node_count; i++) { - if (!is_cluster_member(old_nodes[i])) { - log_debug("cluster node %u removed", old_nodes[i]); - node_history_cluster_remove(old_nodes[i]); - } - } - - for (i = 0; i < quorum_node_count; i++) { - if (!is_old_member(quorum_nodes[i])) { - log_debug("cluster node %u added", quorum_nodes[i]); - node_history_cluster_add(quorum_nodes[i]); - } - } -} - -static quorum_callbacks_t quorum_callbacks = -{ - .quorum_notify_fn = quorum_callback, -}; - -void process_cluster(int ci) -{ - cs_error_t err; - - err = quorum_dispatch(qh, CS_DISPATCH_ALL); - if (err != CS_OK) - cluster_dead(0); -} - -/* Force re-read of quorum nodes */ -void update_cluster(void) -{ - cs_error_t err; - - err = quorum_dispatch(qh, CS_DISPATCH_ONE); - if (err != CS_OK) - cluster_dead(0); -} - -int setup_cluster(void) -{ - cs_error_t err; - int fd; - - err = quorum_initialize(&qh, &quorum_callbacks); - if (err != CS_OK) { - log_error("quorum init error %d", err); - return -1; - } - - err = quorum_fd_get(qh, &fd); - if (err != CS_OK) { - log_error("quorum fd_get error %d", err); - goto fail; - } - - err = quorum_trackstart(qh, CS_TRACK_CHANGES); - if (err != CS_OK) { - log_error("quorum trackstart error %d", err); - goto fail; - } - - old_node_count = 0; - memset(&old_nodes, 0, sizeof(old_nodes)); - quorum_node_count = 0; - memset(&quorum_nodes, 0, sizeof(quorum_nodes)); - - return fd; - fail: - quorum_finalize(qh); - return -1; -} - -void close_cluster(void) -{ - quorum_trackstop(qh); - quorum_finalize(qh); -} - -void kick_node_from_cluster(int nodeid) -{ - if (!nodeid) { - log_error("telling corosync to shut down cluster locally"); - corosync_cfg_try_shutdown(ch, - COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); - } else { - log_error("telling corosync to remove nodeid %d from cluster", - nodeid); - corosync_cfg_kill_node(ch, nodeid, "gfs_controld"); - } -} - -static void shutdown_callback(corosync_cfg_handle_t h, - corosync_cfg_shutdown_flags_t flags) -{ - if (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) { - if (list_empty(&mountgroups)) - corosync_cfg_replyto_shutdown(ch, - COROSYNC_CFG_SHUTDOWN_FLAG_YES); - else { - log_debug("no to corosync shutdown"); - corosync_cfg_replyto_shutdown(ch, - COROSYNC_CFG_SHUTDOWN_FLAG_NO); - } - } -} - -static corosync_cfg_callbacks_t cfg_callbacks = -{ - .corosync_cfg_shutdown_callback = shutdown_callback, - .corosync_cfg_state_track_callback = NULL, -}; - -void process_cluster_cfg(int ci) -{ - cs_error_t err; - - err = corosync_cfg_dispatch(ch, CS_DISPATCH_ALL); - if (err != CS_OK) - cluster_dead(0); -} - -int setup_cluster_cfg(void) -{ - cs_error_t err; - unsigned int nodeid; - int fd; - - err = corosync_cfg_initialize(&ch, &cfg_callbacks); - if (err != CS_OK) { - log_error("corosync cfg init error %d", err); - return -1; - } - - err = corosync_cfg_fd_get(ch, &fd); - if (err != CS_OK) { - log_error("corosync cfg fd_get error %d", err); - corosync_cfg_finalize(ch); - return -1; - } - - err = corosync_cfg_local_get(ch, &nodeid); - if (err != CS_OK) { - log_error("corosync cfg local_get error %d", err); - corosync_cfg_finalize(ch); - return -1; - } - our_nodeid = nodeid; - log_debug("our_nodeid %d", our_nodeid); - - return fd; -} - -void close_cluster_cfg(void) -{ - corosync_cfg_finalize(ch); -} - diff --git a/group/gfs_controld/target.mk b/group/gfs_controld/target.mk deleted file mode 100644 index 6d04792..0000000 --- a/group/gfs_controld/target.mk +++ /dev/null @@ -1,3 +0,0 @@ - -$(eval $(call make-binary,/usr/sbin/gfs_controld,group/libgfscontrol/libgfscontrol.a -llogthread -lcpg -lpthread -lccs -lfenced -lcfg -ldlmcontrol -lcman -lquorum)) - diff --git a/group/gfs_controld/util.c b/group/gfs_controld/util.c deleted file mode 100644 index f77f85b..0000000 --- a/group/gfs_controld/util.c +++ /dev/null @@ -1,266 +0,0 @@ -#include "gfs_daemon.h" -#include "config.h" -#include "libfenced.h" - -void update_flow_control_status(void) -{ - cpg_flow_control_state_t flow_control_state; - cpg_error_t error; - - error = cpg_flow_control_state_get(cpg_handle_daemon, &flow_control_state); - if (error != CPG_OK) { - log_error("cpg_flow_control_state_get %d", error); - return; - } - - if (flow_control_state == CPG_FLOW_CONTROL_ENABLED) { - if (libcpg_flow_control_on == 0) { - log_debug("flow control on"); - } - libcpg_flow_control_on = 1; - } else { - if (libcpg_flow_control_on) { - log_debug("flow control off"); - } - libcpg_flow_control_on = 0; - } -} - -int we_are_in_fence_domain(void) -{ - struct fenced_node nodeinfo; - int rv; - - memset(&nodeinfo, 0, sizeof(nodeinfo)); - - rv = fenced_node_info(our_nodeid, &nodeinfo); - if (rv < 0) { - log_debug("fenced_node_info error %d", rv); - return 0; - } - - if (nodeinfo.member) - return 1; - return 0; -} - -#define SYSFS_DIR "/sys/fs" -#define SYSFS_BUFLEN 64 - -int set_sysfs(struct mountgroup *mg, const char *field, int val) -{ - char fname[PATH_MAX]; - char out[SYSFS_BUFLEN]; - int rv, fd; - - snprintf(fname, PATH_MAX, "%s/%s/%s/lock_module/%s", - SYSFS_DIR, mg->mount_args.type, mg->mount_args.table, field); - - log_group(mg, "set %s to %d", fname, val); - - fd = open(fname, O_RDWR); - if (fd < 0) { - log_group(mg, "set open %s error %d %d", fname, fd, errno); - return -1; - } - - memset(out, 0, sizeof(out)); - sprintf(out, "%d", val); - - rv = write(fd, out, strlen(out)); - if (rv < 0) - log_group(mg, "set write %s error %d", fname, errno); - close(fd); - - return 0; -} - -int run_dmsetup_suspend(struct mountgroup *mg, char *dev) -{ - struct sched_param sched_param; - char fname[PATH_MAX]; - char smajor[16]; - char sminor[16]; - pid_t pid; - int i; - int major, minor; - FILE *fp; - - snprintf(fname, PATH_MAX, "%s/%s/%s/device/dev", - SYSFS_DIR, mg->mount_args.type, mg->mount_args.table); - - fp = fopen(fname, "r"); - if (fp == NULL) { - log_group(mg, "set open %s error %d", fname, errno); - return -1; - } - - if (fscanf(fp, "%d:%d", &major, &minor) != 2) { - log_group(mg, "cannot read device numbers %d", errno); - return -1; - } - - fclose(fp); - - log_group(mg, "run_dmsetup_suspend %d:%d", major, minor); - - snprintf(smajor, 16, "%d", major); - snprintf(sminor, 16, "%d", minor); - - pid = fork(); - if (pid < 0) - return -1; - - if (pid) { - mg->dmsetup_wait = 1; - mg->dmsetup_pid = pid; - return 0; - } else { - sched_param.sched_priority = 0; - sched_setscheduler(0, SCHED_OTHER, &sched_param); - - for (i = 0; i < 50; i++) - close(i); - - execlp("dmsetup", "dmsetup", "suspend", "--nolockfs", - "--noflush", "-j", smajor, "-m", sminor, NULL); - exit(EXIT_FAILURE); - } - return -1; -} - -static void dmsetup_suspend_done(struct mountgroup *mg, int rv) -{ - log_group(mg, "dmsetup_suspend_done result %d", rv); - mg->dmsetup_wait = 0; - mg->dmsetup_pid = 0; - - if (!rv) { - mg->withdraw_suspend = 1; - send_withdraw(mg); - } -} - -void update_dmsetup_wait(void) -{ - struct mountgroup *mg; - int status; - int waiting = 0; - pid_t pid; - - list_for_each_entry(mg, &mountgroups, list) { - if (mg->dmsetup_wait) { - pid = waitpid(mg->dmsetup_pid, &status, WNOHANG); - - /* process not exited yet */ - if (!pid) { - waiting++; - continue; - } - - if (pid < 0) { - log_error("update_dmsetup_wait %s: waitpid %d " - "error %d", mg->name, - mg->dmsetup_pid, errno); - dmsetup_suspend_done(mg, -2); - continue; - } - - /* process exited */ - - if (!WIFEXITED(status) || WEXITSTATUS(status)) - dmsetup_suspend_done(mg, -1); - else - dmsetup_suspend_done(mg, 0); - } - } - - if (!waiting) { - dmsetup_wait = 0; - log_debug("dmsetup_wait off"); - } -} - -static int ignore_nolock(const char *sysfs_dir, char *table) -{ - char path[PATH_MAX]; - char buf[32]; - int fd, rv; - - memset(path, 0, PATH_MAX); - - snprintf(path, PATH_MAX, "%s/%s/lock_module/proto_name", - sysfs_dir, table); - - /* lock_nolock doesn't create the "lock_module" dir at all, - so we'll fail to open this */ - - fd = open(path, O_RDONLY); - if (fd < 0) - return 1; - - memset(buf, 0, sizeof(buf)); - - rv = read(fd, buf, sizeof(buf)); - close(fd); - if (rv < 0) - return 1; - - if (!strncmp(buf, "lock_nolock", 11)) - return 1; - - return 0; -} - -/* This is for the case where gfs_controld exits/fails, abandoning gfs - filesystems in the kernel, and then gfs_controld is restarted. When - gfs_controld exits and abandons lockspaces, that node needs to be - rebooted to clear the uncontrolled filesystems from the kernel. */ - -int check_uncontrolled_filesystems(void) -{ - DIR *d; - struct dirent *de; - int count = 0; - - d = opendir("/sys/fs/gfs/"); - if (!d) - goto gfs2; - - while ((de = readdir(d))) { - if (de->d_name[0] == '.') - continue; - - if (ignore_nolock("/sys/fs/gfs/", de->d_name)) - continue; - - log_error("found uncontrolled gfs fs %s", de->d_name); - count++; - } - closedir(d); - - gfs2: - d = opendir("/sys/fs/gfs2/"); - if (!d) - goto out; - - while ((de = readdir(d))) { - if (de->d_name[0] == '.') - continue; - - if (ignore_nolock("/sys/fs/gfs2/", de->d_name)) - continue; - - log_error("found uncontrolled gfs2 fs %s", de->d_name); - count++; - } - closedir(d); - - out: - if (count) { - kick_node_from_cluster(our_nodeid); - return -1; - } - return 0; -} - diff --git a/group/include/Makefile.am b/group/include/Makefile.am deleted file mode 100644 index b2c3ce5..0000000 --- a/group/include/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -noinst_HEADERS = linux_endian.h list.h diff --git a/group/include/linux_endian.h b/group/include/linux_endian.h deleted file mode 100644 index 43089d2..0000000 --- a/group/include/linux_endian.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __LINUX_ENDIAN_DOT_H__ -#define __LINUX_ENDIAN_DOT_H__ - - -#include <endian.h> -#include <byteswap.h> - - -/* I'm not sure which versions of alpha glibc/gcc are broken, - so fix all of them. */ -#ifdef __alpha__ -#undef bswap_64 -static __inline__ unsigned long bswap_64(unsigned long x) -{ - unsigned int h = x >> 32; - unsigned int l = x; - - h = bswap_32(h); - l = bswap_32(l); - - return ((unsigned long)l << 32) | h; -} -#endif /* __alpha__ */ - - -#if __BYTE_ORDER == __BIG_ENDIAN - -#define be16_to_cpu(x) (x) -#define be32_to_cpu(x) (x) -#define be64_to_cpu(x) (x) - -#define cpu_to_be16(x) (x) -#define cpu_to_be32(x) (x) -#define cpu_to_be64(x) (x) - -#define le16_to_cpu(x) (bswap_16((x))) -#define le32_to_cpu(x) (bswap_32((x))) -#define le64_to_cpu(x) (bswap_64((x))) - -#define cpu_to_le16(x) (bswap_16((x))) -#define cpu_to_le32(x) (bswap_32((x))) -#define cpu_to_le64(x) (bswap_64((x))) - -#endif /* __BYTE_ORDER == __BIG_ENDIAN */ - - -#if __BYTE_ORDER == __LITTLE_ENDIAN - -#define be16_to_cpu(x) (bswap_16((x))) -#define be32_to_cpu(x) (bswap_32((x))) -#define be64_to_cpu(x) (bswap_64((x))) - -#define cpu_to_be16(x) (bswap_16((x))) -#define cpu_to_be32(x) (bswap_32((x))) -#define cpu_to_be64(x) (bswap_64((x))) - -#define le16_to_cpu(x) (x) -#define le32_to_cpu(x) (x) -#define le64_to_cpu(x) (x) - -#define cpu_to_le16(x) (x) -#define cpu_to_le32(x) (x) -#define cpu_to_le64(x) (x) - -#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */ - - -#endif /* __LINUX_ENDIAN_DOT_H__ */ diff --git a/group/include/list.h b/group/include/list.h deleted file mode 100644 index 8100cbc..0000000 --- a/group/include/list.h +++ /dev/null @@ -1,336 +0,0 @@ -/* Copied from include/linux/list.h */ - -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - -/** - * container_of - cast a member of a structure out to the containing structure - * - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - - -/* - * These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. - */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is - * empty _and_ checks that no other CPU might be - * in the process of still modifying either member - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - * - * @head: the list to test. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_first_entry - get the first element from a list - * @ptr: the list head to take the element from. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - * - * Note, that list is expected to be not empty. - */ -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; pos != (head); pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use as a start point in - * list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - iterate over list of given type - * continuing after existing point - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - - -#endif diff --git a/group/libgfscontrol/Makefile.am b/group/libgfscontrol/Makefile.am deleted file mode 100644 index 9e6eb0d..0000000 --- a/group/libgfscontrol/Makefile.am +++ /dev/null @@ -1,9 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -noinst_HEADERS = libgfscontrol.h - -noinst_LTLIBRARIES = libgfscontrol.la - -libgfscontrol_la_SOURCES = main.c - -libgfscontrol_la_CPPFLAGS = -I$(top_srcdir)/group/gfs_controld diff --git a/group/libgfscontrol/libgfscontrol.h b/group/libgfscontrol/libgfscontrol.h deleted file mode 100644 index e5bb969..0000000 --- a/group/libgfscontrol/libgfscontrol.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _LIBGFSCONTROL_H_ -#define _LIBGFSCONTROL_H_ - -/* Maximum mountgroup name length, should match DLM_LOCKSPACE_LEN from - linux/dlmconstants.h. The libcpg limit is larger at - CPG_MAX_NAME_LENGTH 128. Our cpg name includes a "gfs:" prefix before - the mountgroup name. */ - -#define GFS_MOUNTGROUP_LEN 64 - -#define GFSC_DUMP_SIZE (1024 * 1024) - -#define GFSC_NF_MEMBER 0x00000001 /* node is member in cg */ -#define GFSC_NF_START 0x00000002 /* start message recvd */ -#define GFSC_NF_DISALLOWED 0x00000004 /* node disallowed in cg */ -#define GFSC_NF_KERNEL_MOUNT_DONE 0x00000008 -#define GFSC_NF_KERNEL_MOUNT_ERROR 0x00000010 -#define GFSC_NF_READONLY 0x00000020 -#define GFSC_NF_SPECTATOR 0x00000040 -#define GFSC_NF_CHECK_DLM 0x00000080 - -struct gfsc_node { - int nodeid; - int jid; - uint32_t flags; - uint32_t added_seq; - uint32_t removed_seq; - int failed_reason; -}; - -struct gfsc_change { - int member_count; - int joined_count; - int remove_count; - int failed_count; - int wait_condition; /* 0 no, 1 fencing, 2 quorum, 3 fs */ - int wait_messages; /* 0 no, 1 yes */ - uint32_t seq; - uint32_t combined_seq; -}; - -#define GFSC_MF_JOINING 0x00000001 -#define GFSC_MF_LEAVING 0x00000002 -#define GFSC_MF_KERNEL_STOPPED 0x00000004 -#define GFSC_MF_KERNEL_MOUNT_DONE 0x00000008 -#define GFSC_MF_KERNEL_MOUNT_ERROR 0x00000010 -#define GFSC_MF_FIRST_RECOVERY_NEEDED 0x00000020 -#define GFSC_MF_FIRST_RECOVERY_MSG 0x00000040 -#define GFSC_MF_LOCAL_RECOVERY_BUSY 0x00000080 - -struct gfsc_mountgroup { - int group_mode; - struct gfsc_change cg_prev; /* completed change (started_change) */ - struct gfsc_change cg_next; /* in-progress change (changes list) */ - int journals_need_recovery; /* count of jounals need_recovery */ - uint32_t flags; - uint32_t global_id; - char name[GFS_MOUNTGROUP_LEN+1]; -}; - -/* gfsc_mountgroup_nodes() types - - MEMBERS: members in completed (prev) change, - zero if there's no completed (prev) change - NEXT: members in in-progress (next) change, - zero if there's no in-progress (next) change - ALL: NEXT + nonmembers if there's an in-progress (next) change, - MEMBERS + nonmembers if there's no in-progress (next) change, but - there is a completed (prev) change - nonmembers if there's no in-progress (next) or completed (prev) - change (possible?) - - gfsc_node_info() returns info for in-progress (next) change, if one exists, - otherwise it returns info for completed (prev) change. -*/ - -#define GFSC_NODES_ALL 1 -#define GFSC_NODES_MEMBERS 2 -#define GFSC_NODES_NEXT 3 - -int gfsc_dump_debug(char *buf); -int gfsc_dump_plocks(char *name, char *buf); -int gfsc_mountgroup_info(char *mgname, struct gfsc_mountgroup *mg); -int gfsc_node_info(char *mgname, int nodeid, struct gfsc_node *node); -int gfsc_mountgroups(int max, int *count, struct gfsc_mountgroup *mgs); -int gfsc_mountgroup_nodes(char *mgname, int type, int max, int *count, - struct gfsc_node *nodes); - -struct gfsc_mount_args { - char dir[PATH_MAX]; - char type[PATH_MAX]; - char proto[PATH_MAX]; - char table[PATH_MAX]; - char options[PATH_MAX]; - char dev[PATH_MAX]; - char hostdata[PATH_MAX]; -}; - -/* - * mount.gfs connects to gfs_controld, - * mount.gfs tells gfs_controld to do a join or remount, - * mount.gfs reads the result of the join or remount from gfs_controld, - * mount.gfs tells gfs_controld the result of the mount(2), - * mount.gfs disconnects from gfs_controld - */ - -int gfsc_fs_connect(void); -int gfsc_fs_join(int fd, struct gfsc_mount_args *ma); -int gfsc_fs_remount(int fd, struct gfsc_mount_args *ma); -int gfsc_fs_result(int fd, int *result, struct gfsc_mount_args *ma); -int gfsc_fs_mount_done(int fd, struct gfsc_mount_args *ma, int result); -void gfsc_fs_disconnect(int fd); - -/* - * mount.gfs tells gfs_controld to do a leave (due to a mount failure) - * for unmount, gfs_controld leaves due to a message from the kernel - */ - -int gfsc_fs_leave(struct gfsc_mount_args *ma, int reason); - -#endif - diff --git a/group/libgfscontrol/main.c b/group/libgfscontrol/main.c deleted file mode 100644 index 96a8e03..0000000 --- a/group/libgfscontrol/main.c +++ /dev/null @@ -1,436 +0,0 @@ -#include "clusterautoconfig.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <stdint.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <limits.h> - -#include "libgfscontrol.h" -#include "gfs_controld.h" - -static int do_read(int fd, void *buf, size_t count) -{ - int rv, off = 0; - - while (off < count) { - rv = read(fd, (char *)buf + off, count - off); - if (rv == 0) - return -1; - if (rv == -1 && errno == EINTR) - continue; - if (rv == -1) - return -1; - off += rv; - } - return 0; -} - -static int do_write(int fd, void *buf, size_t count) -{ - int rv, off = 0; - - retry: - rv = write(fd, (char *)buf + off, count); - if (rv == -1 && errno == EINTR) - goto retry; - if (rv < 0) { - return rv; - } - - if (rv != count) { - count -= rv; - off += rv; - goto retry; - } - return 0; -} - -static int do_connect(const char *sock_path) -{ - struct sockaddr_un sun; - socklen_t addrlen; - int rv, fd; - - fd = socket(PF_UNIX, SOCK_STREAM, 0); - if (fd < 0) - goto out; - - memset(&sun, 0, sizeof(sun)); - sun.sun_family = AF_UNIX; - strcpy(&sun.sun_path[1], sock_path); - addrlen = sizeof(sa_family_t) + strlen(sun.sun_path+1) + 1; - - rv = connect(fd, (struct sockaddr *) &sun, addrlen); - if (rv < 0) { - close(fd); - fd = rv; - } - out: - return fd; -} - -static void init_header(struct gfsc_header *h, int cmd, char *name, - int extra_len) -{ - memset(h, 0, sizeof(struct gfsc_header)); - - h->magic = GFSC_MAGIC; - h->version = GFSC_VERSION; - h->len = sizeof(struct gfsc_header) + extra_len; - h->command = cmd; - - if (name) - strncpy(h->name, name, GFS_MOUNTGROUP_LEN); -} - -static int do_dump(int cmd, char *name, char *buf) -{ - struct gfsc_header h, *rh; - char *reply; - int reply_len; - int fd, rv; - - init_header(&h, cmd, name, 0); - - reply_len = sizeof(struct gfsc_header) + GFSC_DUMP_SIZE; - reply = malloc(reply_len); - if (!reply) { - rv = -1; - goto out; - } - memset(reply, 0, reply_len); - - fd = do_connect(GFSC_QUERY_SOCK_PATH); - if (fd < 0) { - rv = fd; - goto out_free; - } - - rv = do_write(fd, &h, sizeof(h)); - if (rv < 0) - goto out_close; - - /* won't always get back the full reply_len */ - do_read(fd, reply, reply_len); - - rh = (struct gfsc_header *)reply; - rv = rh->data; - if (rv < 0) - goto out_close; - - memcpy(buf, (char *)reply + sizeof(struct gfsc_header), - GFSC_DUMP_SIZE); - out_close: - close(fd); - out_free: - free(reply); - out: - return rv; -} - -int gfsc_dump_debug(char *buf) -{ - return do_dump(GFSC_CMD_DUMP_DEBUG, NULL, buf); -} - -int gfsc_dump_plocks(char *name, char *buf) -{ - return do_dump(GFSC_CMD_DUMP_PLOCKS, name, buf); -} - -int gfsc_node_info(char *name, int nodeid, struct gfsc_node *node) -{ - struct gfsc_header h, *rh; - char reply[sizeof(struct gfsc_header) + sizeof(struct gfsc_node)]; - int fd, rv; - - init_header(&h, GFSC_CMD_NODE_INFO, name, 0); - h.data = nodeid; - - memset(reply, 0, sizeof(reply)); - - fd = do_connect(GFSC_QUERY_SOCK_PATH); - if (fd < 0) { - rv = fd; - goto out; - } - - rv = do_write(fd, &h, sizeof(h)); - if (rv < 0) - goto out_close; - - rv = do_read(fd, reply, sizeof(reply)); - if (rv < 0) - goto out_close; - - rh = (struct gfsc_header *)reply; - rv = rh->data; - if (rv < 0) - goto out_close; - - memcpy(node, (char *)reply + sizeof(struct gfsc_header), - sizeof(struct gfsc_node)); - out_close: - close(fd); - out: - return rv; -} - -int gfsc_mountgroup_info(char *name, struct gfsc_mountgroup *mountgroup) -{ - struct gfsc_header h, *rh; - char reply[sizeof(struct gfsc_header) + sizeof(struct gfsc_mountgroup)]; - int fd, rv; - - init_header(&h, GFSC_CMD_MOUNTGROUP_INFO, name, 0); - - memset(reply, 0, sizeof(reply)); - - fd = do_connect(GFSC_QUERY_SOCK_PATH); - if (fd < 0) { - rv = fd; - goto out; - } - - rv = do_write(fd, &h, sizeof(h)); - if (rv < 0) - goto out_close; - - rv = do_read(fd, reply, sizeof(reply)); - if (rv < 0) - goto out_close; - - rh = (struct gfsc_header *)reply; - rv = rh->data; - if (rv < 0) - goto out_close; - - memcpy(mountgroup, (char *)reply + sizeof(struct gfsc_header), - sizeof(struct gfsc_mountgroup)); - out_close: - close(fd); - out: - return rv; -} - -int gfsc_mountgroups(int max, int *count, struct gfsc_mountgroup *mgs) -{ - struct gfsc_header h, *rh; - char *reply; - int reply_len; - int fd, rv, result, mg_count; - - init_header(&h, GFSC_CMD_MOUNTGROUPS, NULL, 0); - h.data = max; - - reply_len = sizeof(struct gfsc_header) + - (max * sizeof(struct gfsc_mountgroup)); - reply = malloc(reply_len); - if (!reply) { - rv = -1; - goto out; - } - memset(reply, 0, reply_len); - - fd = do_connect(GFSC_QUERY_SOCK_PATH); - if (fd < 0) { - rv = fd; - goto out_free; - } - - rv = do_write(fd, &h, sizeof(h)); - if (rv < 0) - goto out_close; - - /* won't usually get back the full reply_len */ - do_read(fd, reply, reply_len); - - rh = (struct gfsc_header *)reply; - result = rh->data; - if (result < 0 && result != -E2BIG) { - rv = result; - goto out_close; - } - - if (result == -E2BIG) { - *count = -E2BIG; - mg_count = max; - } else { - *count = result; - mg_count = result; - } - rv = 0; - - memcpy(mgs, (char *)reply + sizeof(struct gfsc_header), - mg_count * sizeof(struct gfsc_mountgroup)); - out_close: - close(fd); - out_free: - free(reply); - out: - return rv; -} - -int gfsc_mountgroup_nodes(char *name, int type, int max, int *count, - struct gfsc_node *nodes) -{ - struct gfsc_header h, *rh; - char *reply; - int reply_len; - int fd, rv, result, node_count; - - init_header(&h, GFSC_CMD_MOUNTGROUP_NODES, name, 0); - h.option = type; - h.data = max; - - reply_len = sizeof(struct gfsc_header) + - (max * sizeof(struct gfsc_node)); - reply = malloc(reply_len); - if (!reply) { - rv = -1; - goto out; - } - memset(reply, 0, reply_len); - - fd = do_connect(GFSC_QUERY_SOCK_PATH); - if (fd < 0) { - rv = fd; - goto out_free; - } - - rv = do_write(fd, &h, sizeof(h)); - if (rv < 0) - goto out_close; - - /* won't usually get back the full reply_len */ - do_read(fd, reply, reply_len); - - rh = (struct gfsc_header *)reply; - result = rh->data; - if (result < 0 && result != -E2BIG) { - rv = result; - goto out_close; - } - - if (result == -E2BIG) { - *count = -E2BIG; - node_count = max; - } else { - *count = result; - node_count = result; - } - rv = 0; - - memcpy(nodes, (char *)reply + sizeof(struct gfsc_header), - node_count * sizeof(struct gfsc_node)); - out_close: - close(fd); - out_free: - free(reply); - out: - return rv; -} - -int gfsc_fs_connect(void) -{ - return do_connect(GFSC_SOCK_PATH); -} - -void gfsc_fs_disconnect(int fd) -{ - close(fd); -} - -int gfsc_fs_join(int fd, struct gfsc_mount_args *ma) -{ - char msg[sizeof(struct gfsc_header) + sizeof(struct gfsc_mount_args)]; - struct gfsc_header *h = (struct gfsc_header *)msg; - char *name = strstr(ma->table, ":") + 1; - - init_header(h, GFSC_CMD_FS_JOIN, name, sizeof(struct gfsc_mount_args)); - - memcpy(msg + sizeof(struct gfsc_header), ma, - sizeof(struct gfsc_mount_args)); - - return do_write(fd, msg, sizeof(msg)); -} - -int gfsc_fs_remount(int fd, struct gfsc_mount_args *ma) -{ - char msg[sizeof(struct gfsc_header) + sizeof(struct gfsc_mount_args)]; - struct gfsc_header *h = (struct gfsc_header *)msg; - char *name = strstr(ma->table, ":") + 1; - - init_header(h, GFSC_CMD_FS_REMOUNT, name, - sizeof(struct gfsc_mount_args)); - - memcpy(msg + sizeof(struct gfsc_header), ma, - sizeof(struct gfsc_mount_args)); - - return do_write(fd, msg, sizeof(msg)); -} - -int gfsc_fs_result(int fd, int *result, struct gfsc_mount_args *ma) -{ - char reply[sizeof(struct gfsc_header) + sizeof(struct gfsc_mount_args)]; - struct gfsc_header *h = (struct gfsc_header *)reply; - int rv; - - rv = do_read(fd, reply, sizeof(reply)); - if (rv < 0) - goto out; - - *result = h->data; - - memcpy(ma, reply + sizeof(struct gfsc_header), - sizeof(struct gfsc_mount_args)); - out: - return rv; -} - -int gfsc_fs_mount_done(int fd, struct gfsc_mount_args *ma, int result) -{ - char msg[sizeof(struct gfsc_header) + sizeof(struct gfsc_mount_args)]; - struct gfsc_header *h = (struct gfsc_header *)msg; - char *name = strstr(ma->table, ":") + 1; - - init_header(h, GFSC_CMD_FS_MOUNT_DONE, name, - sizeof(struct gfsc_mount_args)); - - h->data = result; - - memcpy(msg + sizeof(struct gfsc_header), ma, - sizeof(struct gfsc_mount_args)); - - return do_write(fd, msg, sizeof(msg)); -} - -int gfsc_fs_leave(struct gfsc_mount_args *ma, int reason) -{ - char msg[sizeof(struct gfsc_header) + sizeof(struct gfsc_mount_args)]; - struct gfsc_header *h = (struct gfsc_header *)msg; - char *name = strstr(ma->table, ":") + 1; - int fd, err; - - init_header(h, GFSC_CMD_FS_LEAVE, name, - sizeof(struct gfsc_mount_args)); - - h->data = reason; - - memcpy(msg + sizeof(struct gfsc_header), ma, - sizeof(struct gfsc_mount_args)); - - fd = do_connect(GFSC_SOCK_PATH); - if (fd < 0) - return fd; - - err = do_write(fd, msg, sizeof(msg)); - close(fd); - return err; -} - diff --git a/group/libgfscontrol/target.mk b/group/libgfscontrol/target.mk deleted file mode 100644 index 23bd3ac..0000000 --- a/group/libgfscontrol/target.mk +++ /dev/null @@ -1,3 +0,0 @@ - -$(eval $(call make-library,libgfscontrol.a)) - diff --git a/group/man/Makefile.am b/group/man/Makefile.am deleted file mode 100644 index 87d81e5..0000000 --- a/group/man/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -dist_man_MANS = gfs_controld.8 diff --git a/group/man/gfs_controld.8 b/group/man/gfs_controld.8 deleted file mode 100644 index 3d053ee..0000000 --- a/group/man/gfs_controld.8 +++ /dev/null @@ -1,122 +0,0 @@ -.TH gfs_controld 8 - -.SH NAME -gfs_controld - daemon that manages mounting, unmounting, recovery and -posix locks - -.SH SYNOPSIS -.B -gfs_controld -[\fIOPTION\fR]... - -.SH DESCRIPTION -GFS lives in the kernel, and the cluster infrastructure (cluster -membership and group management) lives in user space. GFS in the kernel -needs to adjust/recover for certain cluster events. It's the job of -gfs_controld to receive these events and reconfigure gfs as needed. -gfs_controld controls and configures gfs through sysfs files that are -considered gfs-internal interfaces; not a general API/ABI. - -Mounting, unmounting and node failure are the main cluster events that -gfs_controld controls. It also manages the assignment of journals to -different nodes. The mount.gfs and umount.gfs programs communicate with -gfs_controld to join/leave the mount group and receive the necessary -options for the kernel mount. - -GFS also sends all posix lock operations to gfs_controld for processing. -gfs_controld manages cluster-wide posix locks for gfs and passes results -back to gfs in the kernel. - -.SH CONFIGURATION FILE - -Optional cluster.conf settings are placed in the <gfs_controld> section. - -.SS Posix locks - -Heavy use of plocks can result in high network load. The rate at which -plocks are processed are limited by the -.I plock_rate_limit -setting, which limits the maximum plock performance, and limits potentially -excessive network load. This value is the maximum number of plock operations -a single node will process every second. To achieve maximum posix locking -performance, the rate limiting should be disabled by setting it to 0. The -default value is 100. - - <gfs_controld plock_rate_limit="100"/> - -To optimize performance for repeated locking of the same locks by -processes on a single node, -.I plock_ownership -can be set to 1. The default is 0. If this is enabled, gfs_controld -cannot interoperate with older versions that did not support this option. - - <gfs_controld plock_ownership="1"/> - -Three options can be used to tune the behavior of the plock_ownership -optimization. All three relate to the caching of lock ownership state. -Specifically, they define how aggressively cached ownership state is dropped. -More caching of ownership state can result in better performance, at the -expense of more memory usage. - -.I drop_resources_time -is the frequency of drop attempts in milliseconds. Default 10000 (10 sec). - -.I drop_resources_count -is the maximum number of items to drop from the cache each time. Default 10. - -.I drop_resources_age -is the time in milliseconds a cached item should be unused before being -considered for dropping. Default 10000 (10 sec). - - <gfs_controld drop_resources_time="10000" drop_resources_count="10" - drop_resources_age="10000"/> - - -.SH OPTIONS -.TP -\fB-D\fP -Run the daemon in the foreground and print debug statements to stdout. -.TP -\fB-P\fP -Enable posix lock debugging messages. -.TP -\fB-w\fP -Disable the "withdraw" feature. -.TP -\fB-p\fP -Disable posix lock handling. -.TP -\fB-l\fP <num> -Limit the rate at which posix lock messages are sent to <num> messages per -second. 0 disables the limit and results in the maximum performance of -posix locks. Default 100. -.TP -\fB-o\fP <num> -Enable (1) or disable (0) plock ownership optimization. Default 0. All -nodes must run with the same value. -.TP -\fB-t\fP <ms> -Ownership cache tuning, drop resources time (milliseconds). Default 10000. -.TP -\fB-c\fP <ms> -Ownership cache tuning, drop resources count. Default 10. -.TP -\fB-a\fP <ms> -Ownership cache tuning, drop resources age (milliseconds). Default 10000. -.TP -\fB-h\fP -Print out a help message describing available options, then exit. -.TP -\fB-V\fP -Print the version information and exit. - -.SH DEBUGGING -The gfs_controld daemon keeps a circular buffer of debug messages that can -be dumped with the 'group_tool dump gfs' command. - -The state of all gfs posix locks can also be dumped from gfs_controld with -the 'group_tool dump plocks <fsname>' command. - -.SH SEE ALSO -groupd(8), group_tool(8) - diff --git a/group/man/target.mk b/group/man/target.mk deleted file mode 100644 index 2567e81..0000000 --- a/group/man/target.mk +++ /dev/null @@ -1,3 +0,0 @@ - -manpages += *.[0-9] - diff --git a/tests/Makefile.am b/tests/Makefile.am index a97b8c5..af01c49 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,5 +1,6 @@ TESTS_ENVIRONMENT = TOPBUILDDIR=$(top_builddir) EXTRA_DIST = tool_tests.sh +CLEANFILES = tests.log
if BUILD_TESTS check_PROGRAMS = check_libgfs2
cluster-commits@lists.stg.fedorahosted.org