Thread View
j
: Next unread message
k
: Previous unread message
j a
: Jump to all threads
j l
: Jump to MailingList overview
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=3bc9b4611a…
Commit: 3bc9b4611a837632c6bc89586c0fdce2bdb5ba4d
Parent: b440452a8a553e1b3df2a92144077ba4459688fc
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 17:03:21 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 17:03:21 2011 -0500
Makefiles: fix include paths
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/Makefile | 2 +-
dlm_tool/Makefile | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlm_controld/Makefile b/dlm_controld/Makefile
index f643ec3..722d8c2 100644
--- a/dlm_controld/Makefile
+++ b/dlm_controld/Makefile
@@ -45,7 +45,7 @@ BIN_CFLAGS += -D_GNU_SOURCE -g \
BIN_CFLAGS += -fPIE -DPIE
BIN_CFLAGS += `xml2-config --cflags`
-BIN_CFLAGS += -I../dlm/libdlm
+BIN_CFLAGS += -I../libdlm
BIN_LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
BIN_LDFLAGS += `xml2-config --libs`
diff --git a/dlm_tool/Makefile b/dlm_tool/Makefile
index 9f24a24..f2cd4b6 100644
--- a/dlm_tool/Makefile
+++ b/dlm_tool/Makefile
@@ -25,7 +25,7 @@ BIN_CFLAGS += -D_GNU_SOURCE -g \
-fdiagnostics-show-option \
BIN_CFLAGS += -fPIE -DPIE
-BIN_CFLAGS += -I../include -I../dlm/libdlm -I../dlm_controld
+BIN_CFLAGS += -I../include -I../libdlm -I../dlm_controld
BIN_LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
BIN_LDFLAGS += -lpthread -ldlm -ldlmcontrol
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=b440452a8a…
Commit: b440452a8a553e1b3df2a92144077ba4459688fc
Parent: 8d03f2e225d3a2109def78ddf9c7bed13575ea2e
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 16:48:57 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 16:54:07 2011 -0500
libdlm: move files
and new Makefile
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm/libdlm/51-dlm.rules | 5 -
dlm/libdlm/libdlm.c | 1487 ------------------------------------
dlm/libdlm/libdlm.h | 275 -------
dlm/libdlm/libdlm_internal.h | 9 -
dlm/man/dlm_cleanup.3 | 1 -
dlm/man/dlm_close_lockspace.3 | 1 -
dlm/man/dlm_create_lockspace.3 | 94 ---
dlm/man/dlm_dispatch.3 | 1 -
dlm/man/dlm_get_fd.3 | 1 -
dlm/man/dlm_lock.3 | 239 ------
dlm/man/dlm_lock_wait.3 | 1 -
dlm/man/dlm_ls_lock.3 | 1 -
dlm/man/dlm_ls_lock_wait.3 | 1 -
dlm/man/dlm_ls_lockx.3 | 1 -
dlm/man/dlm_ls_pthread_init.3 | 1 -
dlm/man/dlm_ls_unlock.3 | 1 -
dlm/man/dlm_ls_unlock_wait.3 | 1 -
dlm/man/dlm_new_lockspace.3 | 1 -
dlm/man/dlm_open_lockspace.3 | 1 -
dlm/man/dlm_pthread_init.3 | 1 -
dlm/man/dlm_release_lockspace.3 | 1 -
dlm/man/dlm_unlock.3 | 94 ---
dlm/man/dlm_unlock_wait.3 | 1 -
dlm/man/libdlm.3 | 105 ---
libdlm/51-dlm.rules | 5 +
libdlm/Makefile | 117 +++
libdlm/libdlm.c | 1485 +++++++++++++++++++++++++++++++++++
libdlm/libdlm.h | 275 +++++++
libdlm/libdlm_internal.h | 9 +
libdlm/man/dlm_cleanup.3 | 1 +
libdlm/man/dlm_close_lockspace.3 | 1 +
libdlm/man/dlm_create_lockspace.3 | 94 +++
libdlm/man/dlm_dispatch.3 | 1 +
libdlm/man/dlm_get_fd.3 | 1 +
libdlm/man/dlm_lock.3 | 239 ++++++
libdlm/man/dlm_lock_wait.3 | 1 +
libdlm/man/dlm_ls_lock.3 | 1 +
libdlm/man/dlm_ls_lock_wait.3 | 1 +
libdlm/man/dlm_ls_lockx.3 | 1 +
libdlm/man/dlm_ls_pthread_init.3 | 1 +
libdlm/man/dlm_ls_unlock.3 | 1 +
libdlm/man/dlm_ls_unlock_wait.3 | 1 +
libdlm/man/dlm_new_lockspace.3 | 1 +
libdlm/man/dlm_open_lockspace.3 | 1 +
libdlm/man/dlm_pthread_init.3 | 1 +
libdlm/man/dlm_release_lockspace.3 | 1 +
libdlm/man/dlm_unlock.3 | 94 +++
libdlm/man/dlm_unlock_wait.3 | 1 +
libdlm/man/libdlm.3 | 105 +++
49 files changed, 2439 insertions(+), 2324 deletions(-)
diff --git a/dlm/libdlm/51-dlm.rules b/dlm/libdlm/51-dlm.rules
deleted file mode 100644
index f71e79d..0000000
--- a/dlm/libdlm/51-dlm.rules
+++ /dev/null
@@ -1,5 +0,0 @@
-KERNEL=="dlm-control", NAME="misc/dlm-control", MODE="0666"
-KERNEL=="dlm-monitor", NAME="misc/dlm-monitor", MODE="0666"
-KERNEL=="dlm_default", NAME="misc/dlm_default", MODE="0666"
-KERNEL=="dlm_*", NAME="misc/%k", MODE="0660"
-
diff --git a/dlm/libdlm/libdlm.c b/dlm/libdlm/libdlm.c
deleted file mode 100644
index 823bb84..0000000
--- a/dlm/libdlm/libdlm.c
+++ /dev/null
@@ -1,1487 +0,0 @@
-#include "clusterautoconfig.h"
-
-#ifdef _REENTRANT
-#include <pthread.h>
-#endif
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/param.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <dirent.h>
-#include <linux/major.h>
-#ifdef HAVE_SELINUX
-#include <selinux/selinux.h>
-#endif
-#include <linux/types.h>
-#include <linux/dlm.h>
-#define BUILDING_LIBDLM
-#include "libdlm.h"
-#include <linux/dlm_device.h>
-
-#define MISC_PREFIX "/dev/misc/"
-#define DLM_PREFIX "dlm_"
-#define DLM_MISC_PREFIX MISC_PREFIX DLM_PREFIX
-#define DLM_CONTROL_NAME "dlm-control"
-#define DLM_CONTROL_PATH MISC_PREFIX DLM_CONTROL_NAME
-#define DEFAULT_LOCKSPACE "default"
-
-/*
- * V5 of the dlm_device.h kernel/user interface structs
- */
-
-struct dlm_lock_params_v5 {
- __u8 mode;
- __u8 namelen;
- __u16 flags;
- __u32 lkid;
- __u32 parent;
- void *castparam;
- void *castaddr;
- void *bastparam;
- void *bastaddr;
- struct dlm_lksb *lksb;
- char lvb[DLM_USER_LVB_LEN];
- char name[0];
-};
-
-struct dlm_write_request_v5 {
- __u32 version[3];
- __u8 cmd;
- __u8 is64bit;
- __u8 unused[2];
-
- union {
- struct dlm_lock_params_v5 lock;
- struct dlm_lspace_params lspace;
- } i;
-};
-
-struct dlm_lock_result_v5 {
- __u32 length;
- void *user_astaddr;
- void *user_astparam;
- struct dlm_lksb *user_lksb;
- struct dlm_lksb lksb;
- __u8 bast_mode;
- __u8 unused[3];
- /* Offsets may be zero if no data is present */
- __u32 lvb_offset;
-};
-
-
-/*
- * One of these per lockspace in use by the application
- */
-
-struct dlm_ls_info {
- int fd;
-#ifdef _REENTRANT
- pthread_t tid;
-#else
- int tid;
-#endif
-};
-
-/*
- * The default lockspace.
- * I've resisted putting locking around this as the user should be
- * "sensible" and only do lockspace operations either in the
- * main thread or ... carefully...
- */
-
-static struct dlm_ls_info *default_ls = NULL;
-static int control_fd = -1;
-static struct dlm_device_version kernel_version;
-static int kernel_version_detected = 0;
-
-
-static int release_lockspace(uint32_t minor, uint32_t flags);
-
-
-static void ls_dev_name(const char *lsname, char *devname, int devlen)
-{
- snprintf(devname, devlen, DLM_MISC_PREFIX "%s", lsname);
-}
-
-static void dummy_ast_routine(void *arg)
-{
-}
-
-#ifdef _REENTRANT
-/* Used for the synchronous and "simplified, synchronous" API routines */
-struct lock_wait
-{
- pthread_cond_t cond;
- pthread_mutex_t mutex;
- struct dlm_lksb lksb;
-};
-
-static void sync_ast_routine(void *arg)
-{
- struct lock_wait *lwait = arg;
-
- pthread_mutex_lock(&lwait->mutex);
- pthread_cond_signal(&lwait->cond);
- pthread_mutex_unlock(&lwait->mutex);
-}
-
-/* lock_resource & unlock_resource
- * are the simplified, synchronous API.
- * Aways uses the default lockspace.
- */
-int lock_resource(const char *resource, int mode, int flags, int *lockid)
-{
- int status;
- struct lock_wait lwait;
-
- if (default_ls == NULL)
- {
- if (dlm_pthread_init())
- {
- return -1;
- }
- }
-
- if (!lockid)
- {
- errno = EINVAL;
- return -1;
- }
-
- /* Conversions need the lockid in the LKSB */
- if (flags & LKF_CONVERT)
- lwait.lksb.sb_lkid = *lockid;
-
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- status = dlm_lock(mode,
- &lwait.lksb,
- flags,
- resource,
- strlen(resource),
- 0,
- sync_ast_routine,
- &lwait,
- NULL,
- NULL);
- if (status)
- return status;
-
- /* Wait for it to complete */
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
-
- *lockid = lwait.lksb.sb_lkid;
-
- errno = lwait.lksb.sb_status;
- if (lwait.lksb.sb_status)
- return -1;
- else
- return 0;
-}
-
-
-int unlock_resource(int lockid)
-{
- int status;
- struct lock_wait lwait;
-
- if (default_ls == NULL)
- {
- errno = -ENOTCONN;
- return -1;
- }
-
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- status = dlm_unlock(lockid, 0, &lwait.lksb, &lwait);
-
- if (status)
- return status;
-
- /* Wait for it to complete */
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
-
- errno = lwait.lksb.sb_status;
- if (lwait.lksb.sb_status != DLM_EUNLOCK)
- return -1;
- else
- return 0;
-}
-
-/* Tidy up threads after a lockspace is closed */
-static int ls_pthread_cleanup(struct dlm_ls_info *lsinfo)
-{
- int status = 0;
- int fd;
-
- /* Must close the fd after the thread has finished */
- fd = lsinfo->fd;
- if (lsinfo->tid)
- {
- status = pthread_cancel(lsinfo->tid);
- if (!status)
- pthread_join(lsinfo->tid, NULL);
- }
- if (!status)
- {
- free(lsinfo);
- close(fd);
- }
-
- return status;
-}
-
-/* Cleanup default lockspace */
-int dlm_pthread_cleanup(void)
-{
- struct dlm_ls_info *lsinfo = default_ls;
-
- /* Protect users from their own stupidity */
- if (!lsinfo)
- return 0;
-
- default_ls = NULL;
-
- return ls_pthread_cleanup(lsinfo);
-}
-#else
-
-/* Non-pthread version of cleanup */
-static int ls_pthread_cleanup(struct dlm_ls_info *lsinfo)
-{
- close(lsinfo->fd);
- free(lsinfo);
- return 0;
-}
-#endif
-
-
-static void set_version_v5(struct dlm_write_request_v5 *req)
-{
- req->version[0] = kernel_version.version[0];
- req->version[1] = kernel_version.version[1];
- req->version[2] = kernel_version.version[2];
- if (sizeof(long) == sizeof(long long))
- req->is64bit = 1;
- else
- req->is64bit = 0;
-}
-
-static void set_version_v6(struct dlm_write_request *req)
-{
- req->version[0] = kernel_version.version[0];
- req->version[1] = kernel_version.version[1];
- req->version[2] = kernel_version.version[2];
- if (sizeof(long) == sizeof(long long))
- req->is64bit = 1;
- else
- req->is64bit = 0;
-}
-
-static int open_default_lockspace(void)
-{
- if (!default_ls) {
- dlm_lshandle_t ls;
-
- /* This isn't the race it looks, create_lockspace will
- * do the right thing if the lockspace has already been
- * created.
- */
-
- ls = dlm_open_lockspace(DEFAULT_LOCKSPACE);
- if (!ls)
- ls = dlm_create_lockspace(DEFAULT_LOCKSPACE, 0600);
- if (!ls)
- return -1;
-
- default_ls = (struct dlm_ls_info *)ls;
- }
- return 0;
-}
-
-static void detect_kernel_version(void)
-{
- struct dlm_device_version v;
- int rv;
-
- rv = read(control_fd, &v, sizeof(struct dlm_device_version));
- if (rv < 0) {
- kernel_version.version[0] = 5;
- kernel_version.version[1] = 0;
- kernel_version.version[2] = 0;
- } else {
- kernel_version.version[0] = v.version[0];
- kernel_version.version[1] = v.version[1];
- kernel_version.version[2] = v.version[2];
- }
-
- kernel_version_detected = 1;
-}
-
-static int find_control_minor(int *minor)
-{
- FILE *f;
- char name[256];
- int found = 0, m = 0;
-
- f = fopen("/proc/misc", "r");
- if (!f)
- return -1;
-
- while (!feof(f)) {
- if (fscanf(f, "%d %s", &m, name) != 2)
- continue;
- if (strcmp(name, DLM_CONTROL_NAME))
- continue;
- found = 1;
- break;
- }
- fclose(f);
-
- if (found) {
- *minor = m;
- return 0;
- }
- return -1;
-}
-
-static int open_control_device(void)
-{
- struct stat st;
- int i, rv, minor, found = 0;
-
- if (control_fd > -1)
- goto out;
-
- rv = find_control_minor(&minor);
- if (rv < 0)
- return -1;
-
- /* wait for udev to create the device */
-
- for (i = 0; i < 10; i++) {
- if (stat(DLM_CONTROL_PATH, &st) == 0 &&
- minor(st.st_rdev) == minor) {
- found = 1;
- break;
- }
- sleep(1);
- continue;
- }
-
- if (!found)
- return -1;
-
- control_fd = open(DLM_CONTROL_PATH, O_RDWR);
- if (control_fd == -1)
- return -1;
-
- out:
- fcntl(control_fd, F_SETFD, 1);
-
- if (!kernel_version_detected)
- detect_kernel_version();
- return 0;
-}
-
-/* the max number of characters in a sysfs device name, not including \0 */
-#define MAX_SYSFS_NAME 19
-
-static int find_udev_device(const char *lockspace, int minor, char *udev_path)
-{
- char basename[PATH_MAX];
- char tmp_path[PATH_MAX];
- DIR *d;
- struct dirent *de;
- struct stat st;
- size_t basename_len;
- int i;
-
- ls_dev_name(lockspace, udev_path, PATH_MAX);
- snprintf(basename, PATH_MAX, DLM_PREFIX "%s", lockspace);
- basename_len = strlen(basename);
-
- for (i = 0; i < 10; i++) {
-
- /* look for a device with the full name */
-
- if (stat(udev_path, &st) == 0 && minor(st.st_rdev) == minor)
- return 0;
-
- if (basename_len < MAX_SYSFS_NAME) {
- sleep(1);
- continue;
- }
-
- /* look for a device with a truncated name */
-
- d = opendir(MISC_PREFIX);
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
- if (strlen(de->d_name) < MAX_SYSFS_NAME)
- continue;
- if (strncmp(de->d_name, basename, MAX_SYSFS_NAME))
- continue;
- snprintf(tmp_path, PATH_MAX, MISC_PREFIX "%s",
- de->d_name);
- if (stat(tmp_path, &st))
- continue;
- if (minor(st.st_rdev) != minor)
- continue;
-
- /* truncated name */
- strncpy(udev_path, tmp_path, PATH_MAX);
- closedir(d);
- return 0;
- }
- closedir(d);
- sleep(1);
- }
-
- return -1;
-}
-
-/*
- * do_dlm_dispatch()
- * Read an ast from the kernel.
- */
-
-static int do_dlm_dispatch_v5(int fd)
-{
- char resultbuf[sizeof(struct dlm_lock_result_v5) + DLM_USER_LVB_LEN];
- struct dlm_lock_result_v5 *result = (struct dlm_lock_result_v5 *)resultbuf;
- char *fullresult = NULL;
- int status;
- void (*astaddr)(void *astarg);
-
- status = read(fd, result, sizeof(resultbuf));
- if (status <= 0)
- return -1;
-
- /* This shouldn't happen any more, can probably be removed */
-
- if (result->length != status) {
- int newstat;
-
- fullresult = malloc(result->length);
- if (!fullresult)
- return -1;
-
- newstat = read(fd, (struct dlm_lock_result_v5 *)fullresult,
- result->length);
-
- /* If it read OK then use the new data. otherwise we can
- still deliver the AST, it just might not have all the
- info in it...hmmm */
-
- if (newstat == result->length)
- result = (struct dlm_lock_result_v5 *)fullresult;
- } else {
- fullresult = resultbuf;
- }
-
-
- /* Copy lksb to user's buffer - except the LVB ptr */
- memcpy(result->user_lksb, &result->lksb,
- sizeof(struct dlm_lksb) - sizeof(char*));
-
- /* Flip the status. Kernel space likes negative return codes,
- userspace positive ones */
- result->user_lksb->sb_status = -result->user_lksb->sb_status;
-
- /* Copy optional items */
- if (result->lvb_offset)
- memcpy(result->user_lksb->sb_lvbptr,
- fullresult + result->lvb_offset, DLM_LVB_LEN);
-
- /* Call AST */
- if (result->user_astaddr) {
- astaddr = result->user_astaddr;
- astaddr(result->user_astparam);
- }
-
- if (fullresult != resultbuf)
- free(fullresult);
-
- return 0;
-}
-
-static int do_dlm_dispatch_v6(int fd)
-{
- char resultbuf[sizeof(struct dlm_lock_result) + DLM_USER_LVB_LEN];
- struct dlm_lock_result *result = (struct dlm_lock_result *)resultbuf;
- int status;
- void (*astaddr)(void *astarg);
-
- status = read(fd, result, sizeof(resultbuf));
- if (status <= 0)
- return -1;
-
- /* Copy lksb to user's buffer - except the LVB ptr */
- memcpy(result->user_lksb, &result->lksb,
- sizeof(struct dlm_lksb) - sizeof(char*));
-
- /* Copy lvb to user's buffer */
- if (result->lvb_offset)
- memcpy(result->user_lksb->sb_lvbptr,
- (char *)result + result->lvb_offset, DLM_LVB_LEN);
-
- result->user_lksb->sb_status = -result->user_lksb->sb_status;
-
- if (result->user_astaddr) {
- astaddr = result->user_astaddr;
- astaddr(result->user_astparam);
- }
-
- return 0;
-}
-
-static int do_dlm_dispatch(int fd)
-{
- if (kernel_version.version[0] == 5)
- return do_dlm_dispatch_v5(fd);
- else
- return do_dlm_dispatch_v6(fd);
-}
-
-
-/*
- * sync_write()
- * Helper routine which supports the synchronous DLM calls. This
- * writes a parameter block down to the DLM and waits for the
- * operation to complete. This hides the different completion mechanism
- * used when called from the main thread or the DLM 'AST' thread.
- */
-
-#ifdef _REENTRANT
-
-static int sync_write_v5(struct dlm_ls_info *lsinfo,
- struct dlm_write_request_v5 *req, int len)
-{
- struct lock_wait lwait;
- int status;
-
- if (pthread_self() == lsinfo->tid) {
- /* This is the DLM worker thread, don't use lwait to sync */
- req->i.lock.castaddr = dummy_ast_routine;
- req->i.lock.castparam = NULL;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- while (req->i.lock.lksb->sb_status == EINPROG) {
- do_dlm_dispatch_v5(lsinfo->fd);
- }
- } else {
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- req->i.lock.castaddr = sync_ast_routine;
- req->i.lock.castparam = &lwait;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
- }
-
- return status; /* lock status is in the lksb */
-}
-
-static int sync_write_v6(struct dlm_ls_info *lsinfo,
- struct dlm_write_request *req, int len)
-{
- struct lock_wait lwait;
- int status;
-
- if (pthread_self() == lsinfo->tid) {
- /* This is the DLM worker thread, don't use lwait to sync */
- req->i.lock.castaddr = dummy_ast_routine;
- req->i.lock.castparam = NULL;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- while (req->i.lock.lksb->sb_status == EINPROG) {
- do_dlm_dispatch_v6(lsinfo->fd);
- }
- } else {
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- req->i.lock.castaddr = sync_ast_routine;
- req->i.lock.castparam = &lwait;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
- }
-
- return status; /* lock status is in the lksb */
-}
-
-#else /* _REENTRANT */
-
-static int sync_write_v5(struct dlm_ls_info *lsinfo,
- struct dlm_write_request_v5 *req, int len)
-{
- int status;
-
- req->i.lock.castaddr = dummy_ast_routine;
- req->i.lock.castparam = NULL;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- while (req->i.lock.lksb->sb_status == EINPROG) {
- do_dlm_dispatch_v5(lsinfo->fd);
- }
-
- errno = req->i.lock.lksb->sb_status;
- if (errno && errno != EUNLOCK)
- return -1;
- return 0;
-}
-
-static int sync_write_v6(struct dlm_ls_info *lsinfo,
- struct dlm_write_request *req, int len)
-{
- int status;
-
- req->i.lock.castaddr = dummy_ast_routine;
- req->i.lock.castparam = NULL;
-
- status = write(lsinfo->fd, req, len);
- if (status < 0)
- return -1;
-
- while (req->i.lock.lksb->sb_status == EINPROG) {
- do_dlm_dispatch_v6(lsinfo->fd);
- }
-
- errno = req->i.lock.lksb->sb_status;
- if (errno && errno != EUNLOCK)
- return -1;
- return 0;
-}
-
-#endif /* _REENTRANT */
-
-
-/*
- * Lock
- * All the ways to request/convert a lock
- */
-
-static int ls_lock_v5(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg))
-{
- char parambuf[sizeof(struct dlm_write_request_v5) + DLM_RESNAME_MAXLEN];
- struct dlm_write_request_v5 *req = (struct dlm_write_request_v5 *)parambuf;
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- int status;
- int len;
-
- memset(req, 0, sizeof(*req));
- set_version_v5(req);
-
- req->cmd = DLM_USER_LOCK;
- req->i.lock.mode = mode;
- req->i.lock.flags = (flags & ~LKF_WAIT);
- req->i.lock.lkid = lksb->sb_lkid;
- req->i.lock.parent = parent;
- req->i.lock.lksb = lksb;
- req->i.lock.castaddr = astaddr;
- req->i.lock.bastaddr = bastaddr;
- req->i.lock.castparam = astarg; /* same comp and blocking ast arg */
- req->i.lock.bastparam = astarg;
-
- if (flags & LKF_CONVERT) {
- req->i.lock.namelen = 0;
- } else {
- if (namelen > DLM_RESNAME_MAXLEN) {
- errno = EINVAL;
- return -1;
- }
- req->i.lock.namelen = namelen;
- memcpy(req->i.lock.name, name, namelen);
- }
-
- if (flags & LKF_VALBLK) {
- memcpy(req->i.lock.lvb, lksb->sb_lvbptr, DLM_LVB_LEN);
- }
-
- len = sizeof(struct dlm_write_request_v5) + namelen;
- lksb->sb_status = EINPROG;
-
- if (flags & LKF_WAIT)
- status = sync_write_v5(lsinfo, req, len);
- else
- status = write(lsinfo->fd, req, len);
-
- if (status < 0)
- return -1;
-
- /*
- * the lock id is the return value from the write on the device
- */
-
- if (status > 0)
- lksb->sb_lkid = status;
- return 0;
-}
-
-static int ls_lock_v6(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- uint64_t *xid,
- uint64_t *timeout)
-{
- char parambuf[sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN];
- struct dlm_write_request *req = (struct dlm_write_request *)parambuf;
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- int status;
- int len;
-
- memset(req, 0, sizeof(*req));
- set_version_v6(req);
-
- req->cmd = DLM_USER_LOCK;
- req->i.lock.mode = mode;
- req->i.lock.flags = (flags & ~LKF_WAIT);
- req->i.lock.lkid = lksb->sb_lkid;
- req->i.lock.parent = parent;
- req->i.lock.lksb = lksb;
- req->i.lock.castaddr = astaddr;
- req->i.lock.bastaddr = bastaddr;
- req->i.lock.castparam = astarg; /* same comp and blocking ast arg */
- req->i.lock.bastparam = astarg;
-
- if (xid)
- req->i.lock.xid = *xid;
- if (timeout)
- req->i.lock.timeout = *timeout;
-
- if (flags & LKF_CONVERT) {
- req->i.lock.namelen = 0;
- } else {
- if (namelen > DLM_RESNAME_MAXLEN) {
- errno = EINVAL;
- return -1;
- }
- req->i.lock.namelen = namelen;
- memcpy(req->i.lock.name, name, namelen);
- }
-
- if (flags & LKF_VALBLK) {
- memcpy(req->i.lock.lvb, lksb->sb_lvbptr, DLM_LVB_LEN);
- }
-
- len = sizeof(struct dlm_write_request) + namelen;
- lksb->sb_status = EINPROG;
-
- if (flags & LKF_WAIT)
- status = sync_write_v6(lsinfo, req, len);
- else
- status = write(lsinfo->fd, req, len);
-
- if (status < 0)
- return -1;
-
- /*
- * the lock id is the return value from the write on the device
- */
-
- if (status > 0)
- lksb->sb_lkid = status;
- return 0;
-}
-
-static int ls_lock(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range)
-{
- /* no support for range locks */
- if (range) {
- errno = ENOSYS;
- return -1;
- }
-
- if (flags & LKF_VALBLK && !lksb->sb_lvbptr) {
- errno = EINVAL;
- return -1;
- }
-
- if (kernel_version.version[0] == 5)
- return ls_lock_v5(ls, mode, lksb, flags, name, namelen, parent,
- astaddr, astarg, bastaddr);
- else
- return ls_lock_v6(ls, mode, lksb, flags, name, namelen, parent,
- astaddr, astarg, bastaddr, NULL, NULL);
-}
-
-/*
- * Extended async locking in own lockspace
- */
-int dlm_ls_lockx(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- uint64_t *xid,
- uint64_t *timeout)
-{
- if (kernel_version.version[0] < 6) {
- errno = ENOSYS;
- return -1;
- }
-
- return ls_lock_v6(ls, mode, lksb, flags, name, namelen, parent,
- astaddr, astarg, bastaddr, xid, timeout);
-}
-
-/*
- * Async locking in own lockspace
- */
-int dlm_ls_lock(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range)
-{
- return ls_lock(ls, mode, lksb, flags, name, namelen, parent,
- astaddr, astarg, bastaddr, range);
-}
-
-/*
- * Sync locking in own lockspace
- */
-int dlm_ls_lock_wait(dlm_lshandle_t ls,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range)
-{
- return ls_lock(ls, mode, lksb, flags | LKF_WAIT, name, namelen, parent,
- NULL, bastarg, bastaddr, range);
-}
-
-/*
- * Async locking in the default lockspace
- */
-int dlm_lock(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range)
-{
- if (open_default_lockspace())
- return -1;
-
- return ls_lock(default_ls, mode, lksb, flags, name, namelen, parent,
- astaddr, astarg, bastaddr, range);
-}
-
-/*
- * Sync locking in the default lockspace
- */
-int dlm_lock_wait(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range)
-{
- if (open_default_lockspace())
- return -1;
-
- return ls_lock(default_ls, mode, lksb, flags | LKF_WAIT, name, namelen,
- parent, NULL, bastarg, bastaddr, range);
-}
-
-
-/*
- * Unlock
- * All the ways to unlock/cancel a lock
- */
-
-static int ls_unlock_v5(struct dlm_ls_info *lsinfo, uint32_t lkid,
- uint32_t flags, struct dlm_lksb *lksb, void *astarg)
-{
- struct dlm_write_request_v5 req;
-
- set_version_v5(&req);
- req.cmd = DLM_USER_UNLOCK;
- req.i.lock.lkid = lkid;
- req.i.lock.flags = (flags & ~LKF_WAIT);
- req.i.lock.lksb = lksb;
- req.i.lock.castparam = astarg;
- /* DLM_USER_UNLOCK will default to existing completion AST */
- req.i.lock.castaddr = 0;
- lksb->sb_status = EINPROG;
-
- if (flags & LKF_WAIT)
- return sync_write_v5(lsinfo, &req, sizeof(req));
- else
- return write(lsinfo->fd, &req, sizeof(req));
-}
-
-static int ls_unlock_v6(struct dlm_ls_info *lsinfo, uint32_t lkid,
- uint32_t flags, struct dlm_lksb *lksb, void *astarg)
-{
- struct dlm_write_request req;
-
- set_version_v6(&req);
- req.cmd = DLM_USER_UNLOCK;
- req.i.lock.lkid = lkid;
- req.i.lock.flags = (flags & ~LKF_WAIT);
- req.i.lock.lksb = lksb;
- req.i.lock.namelen = 0;
- req.i.lock.castparam = astarg;
- /* DLM_USER_UNLOCK will default to existing completion AST */
- req.i.lock.castaddr = 0;
- lksb->sb_status = EINPROG;
-
- if (flags & LKF_WAIT)
- return sync_write_v6(lsinfo, &req, sizeof(req));
- else
- return write(lsinfo->fd, &req, sizeof(req));
-}
-
-int dlm_ls_unlock(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags,
- struct dlm_lksb *lksb, void *astarg)
-{
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- int status;
-
- if (ls == NULL) {
- errno = ENOTCONN;
- return -1;
- }
-
- if (!lkid) {
- errno = EINVAL;
- return -1;
- }
-
- if (kernel_version.version[0] == 5)
- status = ls_unlock_v5(lsinfo, lkid, flags, lksb, astarg);
- else
- status = ls_unlock_v6(lsinfo, lkid, flags, lksb, astarg);
-
- if (status < 0)
- return -1;
- return 0;
-}
-
-int dlm_ls_unlock_wait(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags,
- struct dlm_lksb *lksb)
-{
- return dlm_ls_unlock(ls, lkid, flags | LKF_WAIT, lksb, NULL);
-}
-
-int dlm_unlock_wait(uint32_t lkid, uint32_t flags, struct dlm_lksb *lksb)
-{
- return dlm_ls_unlock_wait(default_ls, lkid, flags | LKF_WAIT, lksb);
-}
-
-int dlm_unlock(uint32_t lkid, uint32_t flags, struct dlm_lksb *lksb,
- void *astarg)
-{
- return dlm_ls_unlock(default_ls, lkid, flags, lksb, astarg);
-}
-
-int dlm_ls_deadlock_cancel(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags)
-{
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- struct dlm_write_request req;
-
- if (kernel_version.version[0] < 6) {
- errno = ENOSYS;
- return -1;
- }
-
- if (ls == NULL) {
- errno = ENOTCONN;
- return -1;
- }
-
- if (!lkid) {
- errno = EINVAL;
- return -1;
- }
-
- set_version_v6(&req);
- req.cmd = DLM_USER_DEADLOCK;
- req.i.lock.lkid = lkid;
- req.i.lock.flags = flags;
-
- return write(lsinfo->fd, &req, sizeof(req));
-}
-
-
-/*
- * Purge
- * Clear away orphan locks
- */
-
-int dlm_ls_purge(dlm_lshandle_t ls, int nodeid, int pid)
-{
- struct dlm_write_request req;
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- int status;
-
- if (kernel_version.version[0] < 6) {
- errno = ENOSYS;
- return -1;
- }
-
- if (ls == NULL) {
- errno = ENOTCONN;
- return -1;
- }
-
- set_version_v6(&req);
- req.cmd = DLM_USER_PURGE;
- req.i.purge.nodeid = nodeid;
- req.i.purge.pid = pid;
-
- status = write(lsinfo->fd, &req, sizeof(req));
-
- if (status < 0)
- return -1;
- return 0;
-}
-
-
-/* These two routines for for users that want to
- * do their own fd handling.
- * This allows a non-threaded app to use the DLM.
- */
-int dlm_get_fd(void)
-{
- if (default_ls)
- {
- return default_ls->fd;
- }
- else
- {
- if (open_default_lockspace())
- return -1;
- else
- return default_ls->fd;
- }
-}
-
-int dlm_dispatch(int fd)
-{
- int status;
- int fdflags;
-
- fdflags = fcntl(fd, F_GETFL, 0);
- fcntl(fd, F_SETFL, fdflags | O_NONBLOCK);
- do
- {
- status = do_dlm_dispatch(fd);
- } while (status == 0);
-
- /* EAGAIN is not an error */
- if (status < 0 && errno == EAGAIN)
- status = 0;
-
- fcntl(fd, F_SETFL, fdflags);
- return status;
-}
-
-/* Converts a lockspace handle into a file descriptor */
-int dlm_ls_get_fd(dlm_lshandle_t lockspace)
-{
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)lockspace;
-
- return lsinfo->fd;
-}
-
-#ifdef _REENTRANT
-static void *dlm_recv_thread(void *lsinfo)
-{
- struct dlm_ls_info *lsi = lsinfo;
-
- for (;;)
- do_dlm_dispatch(lsi->fd);
-
- return NULL;
-}
-
-/* Multi-threaded callers normally use this */
-int dlm_pthread_init(void)
-{
- if (open_default_lockspace())
- return -1;
-
- if (default_ls->tid)
- {
- errno = EEXIST;
- return -1;
- }
-
- if (pthread_create(&default_ls->tid, NULL, dlm_recv_thread, default_ls))
- {
- int saved_errno = errno;
- close(default_ls->fd);
- free(default_ls);
- default_ls = NULL;
- errno = saved_errno;
- return -1;
- }
- return 0;
-}
-
-/* And same, for those with their own lockspace */
-int dlm_ls_pthread_init(dlm_lshandle_t ls)
-{
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
-
- if (lsinfo->tid)
- {
- errno = EEXIST;
- return -1;
- }
-
- return pthread_create(&lsinfo->tid, NULL, dlm_recv_thread, (void *)ls);
-}
-#endif
-
-/*
- * Lockspace manipulation functions
- * Privileged users (checked by the kernel) can create/release lockspaces
- */
-
-static int create_lockspace_v5(const char *name, uint32_t flags)
-{
- char reqbuf[sizeof(struct dlm_write_request_v5) + DLM_LOCKSPACE_LEN];
- struct dlm_write_request_v5 *req = (struct dlm_write_request_v5 *)reqbuf;
- int namelen = strlen(name);
- int minor;
-
- memset(reqbuf, 0, sizeof(reqbuf));
- set_version_v5(req);
-
- req->cmd = DLM_USER_CREATE_LOCKSPACE;
- req->i.lspace.flags = flags;
-
- if (namelen > DLM_LOCKSPACE_LEN) {
- errno = EINVAL;
- return -1;
- }
- memcpy(req->i.lspace.name, name, namelen);
-
- minor = write(control_fd, req, sizeof(*req) + namelen);
-
- return minor;
-}
-
-static int create_lockspace_v6(const char *name, uint32_t flags)
-{
- char reqbuf[sizeof(struct dlm_write_request) + DLM_LOCKSPACE_LEN];
- struct dlm_write_request *req = (struct dlm_write_request *)reqbuf;
- int namelen = strlen(name);
- int minor;
-
- memset(reqbuf, 0, sizeof(reqbuf));
- set_version_v6(req);
-
- req->cmd = DLM_USER_CREATE_LOCKSPACE;
- req->i.lspace.flags = flags;
-
- if (namelen > DLM_LOCKSPACE_LEN) {
- errno = EINVAL;
- return -1;
- }
- memcpy(req->i.lspace.name, name, namelen);
-
- minor = write(control_fd, req, sizeof(*req) + namelen);
-
- return minor;
-}
-
-static dlm_lshandle_t create_lockspace(const char *name, mode_t mode,
- uint32_t flags)
-{
- char dev_path[PATH_MAX];
- char udev_path[PATH_MAX];
- struct dlm_ls_info *newls;
- int error, saved_errno, minor;
-
- /* We use the control device for creating lockspaces. */
- if (open_control_device())
- return NULL;
-
- newls = malloc(sizeof(struct dlm_ls_info));
- if (!newls)
- return NULL;
-
- ls_dev_name(name, dev_path, sizeof(dev_path));
-
- if (kernel_version.version[0] == 5)
- minor = create_lockspace_v5(name, flags);
- else
- minor = create_lockspace_v6(name, flags);
-
- if (minor < 0)
- goto fail;
-
- /* Wait for udev to create the device; the device it creates may
- have a truncated name due to the sysfs device name limit. */
-
- error = find_udev_device(name, minor, udev_path);
- if (error)
- goto fail;
-
- /* If the symlink already exists, find_udev_device() will return
- it and we'll skip this. */
-
- if (strcmp(dev_path, udev_path)) {
- error = symlink(udev_path, dev_path);
- if (error)
- goto fail;
- }
-
- /* Open it and return the struct as a handle */
-
- newls->fd = open(dev_path, O_RDWR);
- if (newls->fd == -1)
- goto fail;
- if (mode)
- fchmod(newls->fd, mode);
- newls->tid = 0;
- fcntl(newls->fd, F_SETFD, 1);
- return (dlm_lshandle_t)newls;
-
- fail:
- saved_errno = errno;
- free(newls);
- errno = saved_errno;
- return NULL;
-}
-
-dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode, uint32_t flags)
-{
- return create_lockspace(name, mode, flags);
-}
-
-dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode)
-{
- return create_lockspace(name, mode, 0);
-}
-
-static int release_lockspace_v5(uint32_t minor, uint32_t flags)
-{
- struct dlm_write_request_v5 req;
-
- set_version_v5(&req);
- req.cmd = DLM_USER_REMOVE_LOCKSPACE;
- req.i.lspace.minor = minor;
- req.i.lspace.flags = flags;
-
- return write(control_fd, &req, sizeof(req));
-}
-
-static int release_lockspace_v6(uint32_t minor, uint32_t flags)
-{
- struct dlm_write_request req;
-
- set_version_v6(&req);
- req.cmd = DLM_USER_REMOVE_LOCKSPACE;
- req.i.lspace.minor = minor;
- req.i.lspace.flags = flags;
-
- return write(control_fd, &req, sizeof(req));
-}
-
-static int release_lockspace(uint32_t minor, uint32_t flags)
-{
- if (kernel_version.version[0] == 5)
- return release_lockspace_v5(minor, flags);
- else
- return release_lockspace_v6(minor, flags);
-}
-
-int dlm_release_lockspace(const char *name, dlm_lshandle_t ls, int force)
-{
- char dev_path[PATH_MAX];
- struct stat st;
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
- uint32_t flags = 0;
- int fd, is_symlink = 0;
-
- ls_dev_name(name, dev_path, sizeof(dev_path));
- if (!lstat(dev_path, &st) && S_ISLNK(st.st_mode))
- is_symlink = 1;
-
- /* We need the minor number */
- if (fstat(lsinfo->fd, &st))
- return -1;
-
- /* Close the lockspace first if it's in use */
- ls_pthread_cleanup(lsinfo);
-
- if (open_control_device())
- return -1;
-
- if (force)
- flags = DLM_USER_LSFLG_FORCEFREE;
-
- release_lockspace(minor(st.st_rdev), flags);
-
- if (!is_symlink)
- return 0;
-
- /* The following open is used to detect if our release was the last.
- It will fail if our release was the last, because either:
- . udev has already removed the truncated sysfs device name (ENOENT)
- . the misc device has been deregistered in the kernel (ENODEV)
- (the deregister completes before release returns)
-
- So, if the open fails, we know that our release was the last,
- udev will be removing the device with the truncated name (if it
- hasn't already), and we should remove the symlink. */
-
- fd = open(dev_path, O_RDWR);
- if (fd < 0)
- unlink(dev_path);
- else
- close(fd); /* our release was not the last */
-
- return 0;
-}
-
-/*
- * Normal users just open/close lockspaces
- */
-
-dlm_lshandle_t dlm_open_lockspace(const char *name)
-{
- char dev_name[PATH_MAX];
- struct dlm_ls_info *newls;
- int saved_errno;
-
- /* Need to detect kernel version */
- if (open_control_device())
- return NULL;
-
- newls = malloc(sizeof(struct dlm_ls_info));
- if (!newls)
- return NULL;
-
- newls->tid = 0;
- ls_dev_name(name, dev_name, sizeof(dev_name));
-
- newls->fd = open(dev_name, O_RDWR);
- saved_errno = errno;
-
- if (newls->fd == -1) {
- free(newls);
- errno = saved_errno;
- return NULL;
- }
- fcntl(newls->fd, F_SETFD, 1);
- return (dlm_lshandle_t)newls;
-}
-
-int dlm_close_lockspace(dlm_lshandle_t ls)
-{
- struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
-
- ls_pthread_cleanup(lsinfo);
- return 0;
-}
-
-int dlm_kernel_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
-{
- if (open_control_device())
- return -1;
- *major = kernel_version.version[0];
- *minor = kernel_version.version[1];
- *patch = kernel_version.version[2];
- return 0;
-}
-
-void dlm_library_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
-{
- *major = DLM_DEVICE_VERSION_MAJOR;
- *minor = DLM_DEVICE_VERSION_MINOR;
- *patch = DLM_DEVICE_VERSION_PATCH;
-}
-
diff --git a/dlm/libdlm/libdlm.h b/dlm/libdlm/libdlm.h
deleted file mode 100644
index 17a552c..0000000
--- a/dlm/libdlm/libdlm.h
+++ /dev/null
@@ -1,275 +0,0 @@
-#ifndef __LIBDLM_H
-#define __LIBDLM_H
-
-/*
- * Typedefs for things that are compatible with the kernel but replicated here
- * so that users only need the libdlm include file. libdlm itself needs the
- * full kernel file so shouldn't use these.
- */
-
-#define DLM_LVB_LEN 32
-
-#ifndef BUILDING_LIBDLM
-
-/*
- * These two lengths are copied from linux/dlmconstants.h
- * They are the max length of a lockspace name and the max length of a
- * resource name.
- */
-
-#define DLM_LOCKSPACE_LEN 64
-#define DLM_RESNAME_MAXLEN 64
-
-struct dlm_lksb {
- int sb_status;
- uint32_t sb_lkid;
- char sb_flags;
- char *sb_lvbptr;
-};
-
-/* lksb flags */
-#define DLM_SBF_DEMOTED 0x01
-#define DLM_SBF_VALNOTVALID 0x02
-#define DLM_SBF_ALTMODE 0x04
-
-/* dlm_new_lockspace flags */
-#define DLM_LSFL_NODIR 0x00000001
-#define DLM_LSFL_TIMEWARN 0x00000002
-#define DLM_LSFL_FS 0x00000004
-#define DLM_LSFL_NEWEXCL 0x00000008
-
-#endif
-
-
-#if 0
-/* Dummy definition to keep linkages */
-struct dlm_queryinfo;
-#endif
-
-extern int dlm_kernel_version(uint32_t *maj, uint32_t *min, uint32_t *patch);
-extern void dlm_library_version(uint32_t *maj, uint32_t *min, uint32_t *patch);
-
-
-/*
- * Using the default lockspace
- *
- * lock_resource() - simple sync request or convert (requires pthreads)
- * unlock_resource() - simple sync unlock (requires pthreads)
- * dlm_lock() - async request or convert
- * dlm_unlock() - async unlock or cancel
- * dlm_lock_wait() - sync request or convert
- * dlm_unlock_wait() - sync unlock or cancel
- */
-
-#ifdef _REENTRANT
-extern int lock_resource(const char *resource, int mode, int flags, int *lockid);
-extern int unlock_resource(int lockid);
-#endif
-
-extern int dlm_lock(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unusued */
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range); /* unused */
-
-extern int dlm_unlock(uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg);
-
-extern int dlm_lock_wait(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range); /* unused */
-
-extern int dlm_unlock_wait(uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb);
-
-
-/*
- * These two are for users that want to do their own FD handling
- *
- * dlm_get_fd() - returns fd for the default lockspace for polling and dispatch
- * dlm_dispatch() - dispatches pending asts and basts
- */
-
-extern int dlm_get_fd(void);
-extern int dlm_dispatch(int fd);
-
-
-/*
- * Creating your own lockspace
- *
- * dlm_create_lockspace() - create and open a lockspace and return a handle
- * to it. Privileges are required to create/release.
- * dlm_new_lockspace() - same as create but allows flags
- * dlm_open_lockspace() - simply returns a handle for an existing lockspace and
- * may be called by ordinary users.
- * dlm_release_lockspace()
- * dlm_close_lockspace()
- * dlm_ls_get_fd()
- *
- * NOTE: that if you dlm_create_lockspace() then dlm_open_lockspace() you will
- * have two open files on the same device. Hardly a major problem but I thought
- * it worth pointing out.
- */
-
-typedef void *dlm_lshandle_t;
-
-extern dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
-extern int dlm_release_lockspace(const char *name, dlm_lshandle_t ls,
- int force);
-extern dlm_lshandle_t dlm_open_lockspace(const char *name);
-extern int dlm_close_lockspace(dlm_lshandle_t ls);
-extern int dlm_ls_get_fd(dlm_lshandle_t ls);
-extern dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode,
- uint32_t flags);
-
-
-/*
- * Using your own lockspace
- *
- * dlm_ls_lock()
- * dlm_ls_lockx()
- * dlm_ls_unlock()
- * dlm_ls_lock_wait()
- * dlm_ls_unlock_wait()
- * dlm_ls_deadlock_cancel()
- * dlm_ls_purge()
- */
-
-extern int dlm_ls_lock(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range); /* unused */
-
-extern int dlm_ls_lockx(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- uint64_t *xid,
- uint64_t *timeout);
-
-extern int dlm_ls_unlock(dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg);
-
-extern int dlm_ls_lock_wait(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range); /* unused */
-
-extern int dlm_ls_unlock_wait(dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb);
-
-extern int dlm_ls_deadlock_cancel(dlm_lshandle_t ls,
- uint32_t lkid,
- uint32_t flags);
-
-extern int dlm_ls_purge(dlm_lshandle_t lockspace,
- int nodeid,
- int pid);
-
-
-/*
- * For threaded applications
- *
- * dlm_pthread_init()
- * dlm_ls_pthread_init() - call this before any locking operations and the ASTs
- * will be delivered in their own thread.
- * dlm_pthread_cleanup() - call the cleanup routine at application exit
- * (optional) or, if the locking functions are in a
- * shared library that is to be unloaded.
- *
- * dlm_close/release_lockspace() will tidy the threads for a non-default
- * lockspace
- */
-
-#ifdef _REENTRANT
-extern int dlm_pthread_init(void);
-extern int dlm_ls_pthread_init(dlm_lshandle_t lockspace);
-extern int dlm_pthread_cleanup(void);
-#endif
-
-
-/*
- * Lock modes
- */
-
-#define LKM_NLMODE 0 /* null lock */
-#define LKM_CRMODE 1 /* concurrent read */
-#define LKM_CWMODE 2 /* concurrent write */
-#define LKM_PRMODE 3 /* protected read */
-#define LKM_PWMODE 4 /* protected write */
-#define LKM_EXMODE 5 /* exclusive */
-
-
-/*
- * Locking flags - these match the ones in dlm.h
- */
-
-#define LKF_NOQUEUE 0x00000001
-#define LKF_CANCEL 0x00000002
-#define LKF_CONVERT 0x00000004
-#define LKF_VALBLK 0x00000008
-#define LKF_QUECVT 0x00000010
-#define LKF_IVVALBLK 0x00000020
-#define LKF_CONVDEADLK 0x00000040
-#define LKF_PERSISTENT 0x00000080
-#define LKF_NODLCKWT 0x00000100
-#define LKF_NODLCKBLK 0x00000200
-#define LKF_EXPEDITE 0x00000400
-#define LKF_NOQUEUEBAST 0x00000800
-#define LKF_HEADQUE 0x00001000
-#define LKF_NOORDER 0x00002000
-#define LKF_ORPHAN 0x00004000
-#define LKF_ALTPR 0x00008000
-#define LKF_ALTCW 0x00010000
-#define LKF_FORCEUNLOCK 0x00020000
-#define LKF_TIMEOUT 0x00040000
-#define LKF_WAIT 0x80000000 /* Userspace only, for sync API calls */
-
-/*
- * Extra return codes used by the DLM
- */
-
-#define ECANCEL 0x10001
-#define EUNLOCK 0x10002
-#define EINPROG 0x10003 /* lock operation is in progress */
-
-#endif
-
diff --git a/dlm/libdlm/libdlm_internal.h b/dlm/libdlm/libdlm_internal.h
deleted file mode 100644
index c8b270e..0000000
--- a/dlm/libdlm/libdlm_internal.h
+++ /dev/null
@@ -1,9 +0,0 @@
-
-/* Needed before we include the kernel libdlm header */
-#define __user
-typedef uint8_t __u8;
-typedef uint16_t __u16;
-typedef uint32_t __u32;
-#define BUILDING_LIBDLM
-
-
diff --git a/dlm/man/dlm_cleanup.3 b/dlm/man/dlm_cleanup.3
deleted file mode 100644
index db4a9cf..0000000
--- a/dlm/man/dlm_cleanup.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/libdlm.3
diff --git a/dlm/man/dlm_close_lockspace.3 b/dlm/man/dlm_close_lockspace.3
deleted file mode 100644
index e5db408..0000000
--- a/dlm/man/dlm_close_lockspace.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_create_lockspace.3
diff --git a/dlm/man/dlm_create_lockspace.3 b/dlm/man/dlm_create_lockspace.3
deleted file mode 100644
index 3879e85..0000000
--- a/dlm/man/dlm_create_lockspace.3
+++ /dev/null
@@ -1,94 +0,0 @@
-.TH DLM_CREATE_LOCKSPACE 3 "July 5, 2007" "libdlm functions"
-.SH NAME
-dlm_create_lockspace, dlm_open_lockspace, dlm_close_lockspace, dlm_release_lockspace \- manipulate DLM lockspaces
-.SH SYNOPSIS
-.nf
- #include <libdlm.h>
-
-dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
-dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode,
- uint32_t flags);
-dlm_lshandle_t dlm_open_lockspace(const char *name);
-int dlm_close_lockspace(dlm_lshandle_t ls);
-int dlm_release_lockspace(const char *name, dlm_lshandle_t ls,
- int force);
-
-.fi
-.SH DESCRIPTION
-The DLM allows locks to be partitioned into "lockspaces", and these can be manipulated by userspace calls. It is possible (though not recommended) for an application to have multiple lockspaces open at one time.
-
-Many of the DLM calls work on the "default" lockspace, which should be fine for most users. The calls with _ls_ in them allow you to isolate your application from all others running in the cluster. Remember, lockspaces are a cluster-wide resource, so if you create a lockspace called "myls" it will share locks with a lockspace called "myls" on all nodes. These calls allow users to create & remove lockspaces, and users to connect to existing lockspace to store their locks there.
-.PP
-.SS
-dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
-.br
-This creates a lockspace called <name> and the mode of the file user to access it will be <mode> (subject to umask as usual). The lockspace must not already exist on this node, if it does -1 will be returned and errno will be set to EEXIST. If you really want to use this lockspace you can then use dlm_open_lockspace() below. The name is the name of a misc device that will be created in /dev/misc.
-.br
-On success a handle to the lockspace is returned, which can be used to pass into subsequent dlm_ls_lock/unlock calls. Make no assumptions as to the content of this handle as it's content may change in future.
-.br
-The caller must have CAP_SYSADMIN privileges to do this operation.
-.PP
-Return codes:
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
-.nf
-EINVAL An invalid parameter was passed to the call
-ENOMEM A (kernel) memory allocation failed
-EEXIST The lockspace already exists
-EPERM Process does not have capability to create lockspaces
-ENOSYS A fatal error occurred initializing the DLM
-Any error returned by the open() system call
-.fi
-.SS
-int dlm_new_lockspace(const char *name, mode_t mode, uint32_t flags)
-.PP
-Performs the same function as
-.B dlm_create_lockspace()
-above, but passes some creation flags to the call that affect the lockspace being created. Currently supported flags are:
-.nf
-DLM_LSFL_NODIR the lockspace should not use a resource directory
-DLM_LSFL_TIMEWARN the dlm should emit warnings over netlink when locks
- have been waiting too long; required for deadlock
- detection
-.fi
-.SS
-int dlm_release_lockspace(const char *name, dlm_lshandle_t ls, int force)
-.PP
-Deletes a lockspace. If the lockspace still has active locks then -1 will be returned and errno set to EBUSY. Both the lockspace handle /and/ the name must be specified. This call also closes the lockspace and stops the thread associated with the lockspace, if any.
-.br
-Note that other nodes in the cluster may still have locks open on this lockspace. This call only removes the lockspace from the current node. If the force flag is set then the lockspace will be removed even if another user on this node has active locks in it. Existing users will NOT be notified if you do this, so be careful.
-.br
-The caller must have CAP_SYSADMIN privileges to do this operation.
-.PP
-Return codes:
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
-.nf
-EINVAL An invalid parameter was passed to the call
-EPERM Process does not have capability to release lockspaces
-EBUSY The lockspace could not be freed because it still
- contains locks and force was not set.
-.fi
-
-.SS
-dlm_lshandle_t dlm_open_lockspace(const char *name)
-.PP
-Opens an already existing lockspace and returns a handle to it.
-.PP
-Return codes:
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to an error returned by the open() system call
-.SS
-int dlm_close_lockspace(dlm_lshandle_t ls)
-.br
-Close the lockspace. Any locks held by this process will be freed. If a thread is associated with this lockspace then it will be stopped.
-.PP
-Return codes:
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
-.nf
-EINVAL lockspace was not a valid lockspace handle
-.fi
-
-
-.SH SEE ALSO
-
-.BR libdlm (3),
-.BR dlm_unlock (3),
-.BR dlm_lock (3),
diff --git a/dlm/man/dlm_dispatch.3 b/dlm/man/dlm_dispatch.3
deleted file mode 100644
index db4a9cf..0000000
--- a/dlm/man/dlm_dispatch.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/libdlm.3
diff --git a/dlm/man/dlm_get_fd.3 b/dlm/man/dlm_get_fd.3
deleted file mode 100644
index db4a9cf..0000000
--- a/dlm/man/dlm_get_fd.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/libdlm.3
diff --git a/dlm/man/dlm_lock.3 b/dlm/man/dlm_lock.3
deleted file mode 100644
index 3c5f8b5..0000000
--- a/dlm/man/dlm_lock.3
+++ /dev/null
@@ -1,239 +0,0 @@
-.TH DLM_LOCK 3 "July 5, 2007" "libdlm functions"
-.SH NAME
-dlm_lock \- acquire or convert a DLM lock
-.SH SYNOPSIS
-.nf
- #include <libdlm.h>
-
-int dlm_lock(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range); /* unused */
-
-int dlm_lock_wait(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range); /* unused */
-
-int dlm_ls_lock(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- void *range); /* unused */
-
-int dlm_ls_lock_wait(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unusued */
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- void *range); /* unused */
-
-int dlm_ls_lockx(dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent, /* unused */
- (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- uint64_t *xid,
- uint64_t *timeout);
-
-
-
-.fi
-.SH DESCRIPTION
-dlm_lock and its variants acquire and convert locks in the DLM.
-.PP
-dlm_lock() operations are asynchronous. If the call to dlm_lock returns an error then the operation has failed and the AST routine will not be called. If dlm_lock returns 0 it is still possible that the lock operation will fail. The AST routine will be called when the locking is complete or has failed and the status is returned in the lksb.
-.B dlm_lock_wait()
-will wait until the lock operation has completed and returns the final completion status.
-.B dlm_ls_lock()
-is the same as
-.B dlm_lock()
-but takes a lockspace argument. This lockspace must have been previously opened by
-.B dlm_lockspace_open() or
-.B dlm_lockspace_create().
-.PP
-For conversion operations the name and namelen are ignored and the lock ID in the LKSB is used to identify the lock to be converted.
-.PP
-If a lock value block is specified then in general, a grant or a conversion to an equal-level or higher-level lock mode reads the lock value from the resource into the caller's lock value block. When a lock conversion from EX or PW to an equal-level or lower-level lock mode occurs, the contents of the caller's lock value block are written into the resource. If the LVB is invalidated the lksb.sb_flags member will be set to DLM_SBF_VALNOTVALID. Lock values blocks are always 32 bytes long.
-.PP
-If the AST routines or parameter are passed to a conversion operation then they will overwrite those values that were passed to a previous dlm_lock call.
-.PP
-.B mode
-Lock mode to acquire or convert to.
-.nf
- LKM_NLMODE NULL Lock
- LKM_CRMODE Concurrent read
- LKM_CWMODE Concurrent write
- LKM_PRMODE Protected read
- LKM_PWMODE Protected write
- LKM_EXMODE Exclusive
-.fi
-.PP
-.B flags
-Affect the operation of the lock call:
-.nf
- LKF_NOQUEUE Don't queue the lock. If it cannot be granted return
- -EAGAIN
- LKF_CONVERT Convert an existing lock
- LKF_VALBLK Lock has a value block
- LKF_QUECVT Put conversion to the back of the queue
- LKF_EXPEDITE Grant a NL lock immediately regardless of other locks
- on the conversion queue
- LKF_PERSISTENT Specifies a lock that will not be unlocked when the
- process exits; it will become an orphan lock.
- LKF_CONVDEADLK Enable internal conversion deadlock resolution where
- the lock's granted mode may be set to NL and
- DLM_SBF_DEMOTED is returned in lksb.sb_flags.
- LKF_NODLCKWT Do not consider this lock when trying to detect
- deadlock conditions.
- LKF_NODLCKBLK Not implemented
- LKF_NOQUEUEBAST Send blocking ASTs even for NOQUEUE operations
- LKF_HEADQUE Add locks to the head of the convert or waiting queue
- LKF_NOORDER Avoid the VMS rules on grant order
- LKF_ALTPR If the requested mode can't be granted (generally CW),
- try to grant in PR and return DLM_SBF_ALTMODE.
- LKF_ALTCW If the requested mode can't be granted (generally PR),
- try to grant in CW and return DLM_SBF_ALTMODE.
- LKF_TIMEOUT The lock will time out per the timeout arg.
-
-.fi
-.PP
-.B lksb
-Lock Status block
-.br
-This structure contains the returned lock ID, the actual
-status of the lock operation (all lock ops are asynchronous)
-and the value block if LKF_VALBLK is set.
-.PP
-.B name
-.br
-Name of the lock. Can be binary, max 64 bytes. Ignored for lock
-conversions. (Should be a string to work with debugging tools.)
-.PP
-.B namelen
-.br
-Length of the above name. Ignored for lock conversions.
-.PP
-.B parent
-.br
-ID of parent lock or NULL if this is a top-level lock. This is currently unused.
-.PP
-.B ast
-.br
-Address of AST routine to be called when the lock operation
-completes. The final completion status of the lock will be
-in the lksb. the AST routine must not be NULL.
-.PP
-.B astargs
-.br
-Argument to pass to the AST routine (most people pass the lksb
-in here but it can be anything you like.)
-.PP
-.B bast
-.br
-Blocking AST routine. address of a function to call if this
-lock is blocking another. The function will be called with
-astargs.
-.PP
-.B range
-.br
-This is unused.
-.PP
-.B xid
-.br
-Optional transaction ID for deadlock detection.
-.PP
-.B timeout
-.br
-Timeout in centiseconds. If it takes longer than this to acquire the lock
-(usually because it is already blocked by another lock), then the AST
-will trigger with ETIMEDOUT as the status. If the lock operation is a conversion
-then the lock will remain at its current status. If this is a new lock then
-the lock will not exist and any LKB in the lksb will be invalid. This is
-ignored without the LKF_TIMEOUT flag.
-.PP
-.SS Return values
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
-.PP
-.nf
-EINVAL An invalid parameter was passed to the call (eg bad lock
- mode or flag)
-ENOMEM A (kernel) memory allocation failed
-EAGAIN LKF_NOQUEUE was requested and the lock could not be
- granted
-EBUSY The lock is currently being locked or converted
-EFAULT The userland buffer could not be read/written by the
- kernel (this indicates a library problem)
-EDEADLOCK The lock operation is causing a deadlock and has been
- cancelled. If this was a conversion then the lock is
- reverted to its previously granted state. If it was a
- new lock then it has not been granted. (NB Only
- conversion deadlocks are currently detected)
-.PP
-If an error is returned in the AST, then lksb.sb_status is set to the one of the above values instead of zero.
-.SS Structures
-.nf
-struct dlm_lksb {
- int sb_status; /* Final status of lock operation */
- uint32_t sb_lkid; /* ID of lock. Returned from dlm_lock()
- on first use. Used as input to
- dlm_lock() for a conversion operation */
- char sb_flags; /* Completion flags, see above */
- char sb_lvbptr; /* Optional pointer to lock value block */
-};
-
-.fi
-.SH EXAMPLE
-.nf
-int status;
-struct dlm_lksb lksb;
-
-status = dlm_lock_wait(LKM_EXMODE,
- &lksb,
- LKF_NOQUEUE,
- "MyLock",
- strlen("MyLock"),
- 0, // Parent,
- NULL, // bast arg
- NULL, // bast routine,
- NULL); // Range
-
-if (status == 0)
- dlm_unlock_wait(lksb.sb_lkid, 0, &lksb);
-
-.fi
-
-.SH SEE ALSO
-
-.BR libdlm (3),
-.BR dlm_unlock (3),
-.BR dlm_open_lockspace (3),
-.BR dlm_create_lockspace (3),
-.BR dlm_close_lockspace (3),
-.BR dlm_release_lockspace (3)
diff --git a/dlm/man/dlm_lock_wait.3 b/dlm/man/dlm_lock_wait.3
deleted file mode 100644
index a99225c..0000000
--- a/dlm/man/dlm_lock_wait.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_lock.3
diff --git a/dlm/man/dlm_ls_lock.3 b/dlm/man/dlm_ls_lock.3
deleted file mode 100644
index a99225c..0000000
--- a/dlm/man/dlm_ls_lock.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_lock.3
diff --git a/dlm/man/dlm_ls_lock_wait.3 b/dlm/man/dlm_ls_lock_wait.3
deleted file mode 100644
index a99225c..0000000
--- a/dlm/man/dlm_ls_lock_wait.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_lock.3
diff --git a/dlm/man/dlm_ls_lockx.3 b/dlm/man/dlm_ls_lockx.3
deleted file mode 100644
index a99225c..0000000
--- a/dlm/man/dlm_ls_lockx.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_lock.3
diff --git a/dlm/man/dlm_ls_pthread_init.3 b/dlm/man/dlm_ls_pthread_init.3
deleted file mode 100644
index db4a9cf..0000000
--- a/dlm/man/dlm_ls_pthread_init.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/libdlm.3
diff --git a/dlm/man/dlm_ls_unlock.3 b/dlm/man/dlm_ls_unlock.3
deleted file mode 100644
index 91babd2..0000000
--- a/dlm/man/dlm_ls_unlock.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_unlock.3
diff --git a/dlm/man/dlm_ls_unlock_wait.3 b/dlm/man/dlm_ls_unlock_wait.3
deleted file mode 100644
index 91babd2..0000000
--- a/dlm/man/dlm_ls_unlock_wait.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_unlock.3
diff --git a/dlm/man/dlm_new_lockspace.3 b/dlm/man/dlm_new_lockspace.3
deleted file mode 100644
index e5db408..0000000
--- a/dlm/man/dlm_new_lockspace.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_create_lockspace.3
diff --git a/dlm/man/dlm_open_lockspace.3 b/dlm/man/dlm_open_lockspace.3
deleted file mode 100644
index e5db408..0000000
--- a/dlm/man/dlm_open_lockspace.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_create_lockspace.3
diff --git a/dlm/man/dlm_pthread_init.3 b/dlm/man/dlm_pthread_init.3
deleted file mode 100644
index db4a9cf..0000000
--- a/dlm/man/dlm_pthread_init.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/libdlm.3
diff --git a/dlm/man/dlm_release_lockspace.3 b/dlm/man/dlm_release_lockspace.3
deleted file mode 100644
index e5db408..0000000
--- a/dlm/man/dlm_release_lockspace.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_create_lockspace.3
diff --git a/dlm/man/dlm_unlock.3 b/dlm/man/dlm_unlock.3
deleted file mode 100644
index 9023139..0000000
--- a/dlm/man/dlm_unlock.3
+++ /dev/null
@@ -1,94 +0,0 @@
-.TH DLM_UNLOCK 3 "July 5, 2007" "libdlm functions"
-.SH NAME
-dlm_unlock \- unlock a DLM lock
-.SH SYNOPSIS
-.nf
-#include <libdlm.h>
-
-int dlm_unlock(uint32_t lkid,
- uint32_t flags, struct dlm_lksb *lksb, void *astarg);
-
-int dlm_unlock_wait(uint32_t lkid,
- uint32_t flags, struct dlm_lksb *lksb);
-
-.fi
-.SH DESCRIPTION
-.B dlm_unlock()
-unlocks a lock previously acquired by dlm_lock and its variants.
-.PP
-Unless
-.B dlm_unlock_wait()
-is used unlocks are also asynchronous. The AST routine is called when the resource is successfully unlocked (see below).
-.PP
-.B lkid
-Lock ID as returned in the lksb
-.PP
-.B flags
-flags affecting the unlock operation:
-.nf
- LKF_CANCEL Cancel a pending lock or conversion.
- This returns the lock to it's previously
- granted mode (in case of a conversion) or
- unlocks it (in case of a waiting lock).
- LKF_IVVALBLK Invalidate value block
- LKF_FORCEUNLOCK Unlock the lock even if it's waiting.
-.fi
-.PP
-.B lksb
-LKSB to return status and value block information.
-.PP
-.B astarg
-New parameter to be passed to the completion AST.
-The completion AST routine is the
-last completion AST routine specified in a dlm_lock call.
-If dlm_lock_wait() was the last routine to issue a lock,
-dlm_unlock_wait() must be used to release the lock. If dlm_lock()
-was the last routine to issue a lock then either dlm_unlock()
-or dlm_unlock_wait() may be called.
-.PP
-
-.SS Return values
-0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
-.PP
-.nf
-EINVAL An invalid parameter was passed to the call (eg bad
- lock mode or flag)
-EINPROGRESS The lock is already being unlocked
-EBUSY The lock is currently being locked or converted
-ENOTEMPTY An attempt to made to unlock a parent lock that still has
- child locks.
-ECANCEL A lock conversion was successfully cancelled
-EUNLOCK An unlock operation completed successfully
- (sb_status only)
-EFAULT The userland buffer could not be read/written by the
- kernel
-.fi
-If an error is returned in the AST, then lksb.sb_status is set to the one of the above numbers instead of zero.
-.SH EXAMPLE
-.nf
-int status;
-struct dlm_lksb lksb;
-
-status = dlm_lock_wait(LKM_EXMODE,
- &lksb,
- LKF_NOQUEUE,
- "MyLock",
- strlen("MyLock"),
- 0, // Parent,
- NULL, // bast arg
- NULL, // bast routine,
- NULL); // Range
-
-if (status == 0)
- dlm_unlock_wait(lksb.sb_lkid, 0, &lksb);
-
-.fi
-
-.SH SEE ALSO
-
-.BR libdlm (3),
-.BR dlm_lock (3),
-.BR dlm_open_lockspace (3),
-.BR dlm_create_lockspace (3),
-.BR dlm_close_lockspace (3),
-.BR dlm_release_lockspace (3)
diff --git a/dlm/man/dlm_unlock_wait.3 b/dlm/man/dlm_unlock_wait.3
deleted file mode 100644
index 91babd2..0000000
--- a/dlm/man/dlm_unlock_wait.3
+++ /dev/null
@@ -1 +0,0 @@
-.so man3/dlm_unlock.3
diff --git a/dlm/man/libdlm.3 b/dlm/man/libdlm.3
deleted file mode 100644
index a020560..0000000
--- a/dlm/man/libdlm.3
+++ /dev/null
@@ -1,105 +0,0 @@
-.TH LIBDLM 3 "July 5, 2007" "libdlm functions"
-.SH NAME
-libdlm \- dlm_get_fd, dlm_dispatch, dlm_pthread_init, dlm_ls_pthread_init, dlm_cleanup
-.SH SYNOPSIS
-.nf
-#include <libdlm.h>
-.nf
-int dlm_pthread_init();
-int dlm_ls_pthread_init(dlm_lshandle_t lockspace);
-int dlm_pthread_cleanup();
-int dlm_get_fd(void);
-int dlm_dispatch(int fd);
-
-link with -ldlm
-.fi
-.SH DESCRIPTION
-libdlm provides the programmatic userspace interface to the Distributed Lock manager. It provides all the calls you need to manipulate locks & lockspaces
-.br
-libdlm can be used in pthread or non-pthread applications. For pthread applications simply call the following function before doing any lock operations. If you're using pthreads, remember to define _REENTRANT at the top of the program or using -D_REENTRANT on the compile line.
-.br
-pthreads is the normal way of using the DLM. This way you simply initialize the DLM's thread and all the AST routines will be delivered in that thread. You just call the dlm_lock() etc routines in the main line of your program.
-.br
-If you don't want to use pthreads or you want to handle the dlm callback ASTs yourself then you can get an FD handle to the DLM device and call
-.B dlm_dispatch()
-on it whenever it becomes active. That was ASTs will be delivered in the context of the thread/process that called
-.B dlm_dispatch().
-
-
-.SS int dlm_pthread_init()
-.br
-Creates a thread to receive all lock ASTs. The AST callback function for lock operations will be called in the context of this thread. If there is a potential for local resource access conflicts you must provide your own pthread-based locking in the AST routine.
-.PP
-.SS int dlm_ls_pthread_init(dlm_lshandle_t lockspace)
-.br
-As dlm_pthread_init but initializes a thread for the specified lockspace.
-.PP
-.SS int dlm_pthread_cleanup()
-.br
-Cleans up the default lockspace threads after use. Normally you don't need to call this, but if the locking code is in a dynamically loadable shared library this will probably be necessary.
-.br
-For non-pthread based applications the DLM provides a file descriptor that the program can feed into poll/select. If activity is detected on that FD then a dispatch function should be called:
-.PP
-.SS int dlm_get_fd()
-Returns a file-descriptor for the DLM suitable for passing in to poll() or select().
-.PP
-.SS int dlm_dispatch(int fd)
-.br
-Reads from the DLM and calls any AST routines that may be needed. This routine runs in the context of the caller so no extra locking is needed to protect local resources.
-.PP
-
-
-.SH libdlm_lt
-There also exists a "light" version of the libdlm library called libdlm_lt. This is provided for those applications that do not want to use pthread functions. If you use this library it is important that your application is NOT compiled with -D_REENTRANT or linked with libpthread.
-
-.SH EXAMPLES
-
-Create a lockspace and start a thread to deliver its callbacks:
-.nf
-dlm_lshandle_t ls;
-
-ls = dlm_create_lockspace("myLS", 0660);
-dlm_ls_pthread_init(ls);
-
- ...
-
-status = dlm_ls_lock(ls,
- ... );
-
-
-.fi
-.PP
- Using poll(2) to wait for and dispatch ASTs
-.nf
-
-
-static int poll_for_ast(dlm_lshandle_t ls)
-{
- struct pollfd pfd;
-
- pfd.fd = dlm_ls_get_fd(ls);
- pfd.events = POLLIN;
- while (!ast_called)
- {
- if (poll(&pfd, 1, 0) < 0)
- {
- perror("poll");
- return -1;
- }
- dlm_dispatch(dlm_ls_get_fd(ls));
- }
- ast_called = 0;
- return 0;
-}
-.fi
-
-
-.SH SEE ALSO
-
-.BR libdlm (3),
-.BR dlm_lock (3),
-.BR dlm_unlock (3),
-.BR dlm_open_lockspace (3),
-.BR dlm_create_lockspace (3),
-.BR dlm_close_lockspace (3),
-.BR dlm_release_lockspace (3)
diff --git a/libdlm/51-dlm.rules b/libdlm/51-dlm.rules
new file mode 100644
index 0000000..f71e79d
--- /dev/null
+++ b/libdlm/51-dlm.rules
@@ -0,0 +1,5 @@
+KERNEL=="dlm-control", NAME="misc/dlm-control", MODE="0666"
+KERNEL=="dlm-monitor", NAME="misc/dlm-monitor", MODE="0666"
+KERNEL=="dlm_default", NAME="misc/dlm_default", MODE="0666"
+KERNEL=="dlm_*", NAME="misc/%k", MODE="0660"
+
diff --git a/libdlm/Makefile b/libdlm/Makefile
new file mode 100644
index 0000000..2b8b6d3
--- /dev/null
+++ b/libdlm/Makefile
@@ -0,0 +1,117 @@
+LIB_NAME = libdlm
+LIB_MAJOR = 3
+LIB_MINOR = 0
+LIB_O = $(LIB_NAME).o
+LIB_SO = $(LIB_NAME).so
+LIB_SMAJOR = $(LIB_SO).$(LIB_MAJOR)
+LIB_TARGET = $(LIB_SO).$(LIB_MAJOR).$(LIB_MINOR)
+
+LLT_NAME = libdlm_lt
+LLT_MAJOR = 3
+LLT_MINOR = 0
+LLT_O = $(LLT_NAME).o
+LLT_SO = $(LLT_NAME).so
+LLT_SMAJOR = $(LLT_SO).$(LLT_MAJOR)
+LLT_TARGET = $(LLT_SO).$(LLT_MAJOR).$(LLT_MINOR)
+
+HDR_TARGET = libdlm.h
+
+MAN_TARGET = \
+ man/dlm_cleanup.3 \
+ man/dlm_close_lockspace.3 \
+ man/dlm_create_lockspace.3 \
+ man/dlm_dispatch.3 \
+ man/dlm_get_fd.3 \
+ man/dlm_lock.3 \
+ man/dlm_lock_wait.3 \
+ man/dlm_ls_lock.3 \
+ man/dlm_ls_lock_wait.3 \
+ man/dlm_ls_lockx.3 \
+ man/dlm_ls_pthread_init.3 \
+ man/dlm_ls_unlock.3 \
+ man/dlm_ls_unlock_wait.3 \
+ man/dlm_new_lockspace.3 \
+ man/dlm_open_lockspace.3 \
+ man/dlm_pthread_init.3 \
+ man/dlm_release_lockspace.3 \
+ man/dlm_unlock.3 \
+ man/dlm_unlock_wait.3 \
+ man/libdlm.3
+
+UDEV_TARGET = 51-dlm.rules
+
+SOURCE = libdlm.c
+
+CFLAGS += -D_GNU_SOURCE -g \
+ -Wall \
+ -Wformat \
+ -Wformat-security \
+ -Wmissing-prototypes \
+ -Wnested-externs \
+ -Wpointer-arith \
+ -Wextra -Wshadow \
+ -Wcast-align \
+ -Wwrite-strings \
+ -Waggregate-return \
+ -Wstrict-prototypes \
+ -Winline \
+ -Wredundant-decls \
+ -Wno-sign-compare \
+ -Wno-unused-parameter \
+ -Wp,-D_FORTIFY_SOURCE=2 \
+ -fexceptions \
+ -fasynchronous-unwind-tables \
+ -fdiagnostics-show-option \
+ -fPIC \
+
+LIB_CFLAGS += $(CFLAGS) -D_REENTRANT
+LLT_CFLAGS += $(CFLAGS)
+
+LDFLAGS += -Wl
+
+LIB_LDFLAGS += $(LDFLAGS) -lpthread
+LLT_LDFLAGS += $(LDFLAGS)
+
+all: $(LIB_TARGET) $(LLT_TARGET)
+
+$(LIB_O): $(SOURCE)
+ $(CC) $(LIB_CFLAGS) -c -o $@ $<
+
+$(LLT_O): $(SOURCE)
+ $(CC) $(LLT_CFLAGS) -c -o $@ $<
+
+$(LIB_TARGET): $(LIB_O)
+ $(CC) $(LIB_LDFLAGS) -shared -o $@ -Wl,-soname=$(LIB_SMAJOR) $^
+ ln -sf $(LIB_TARGET) $(LIB_SO)
+ ln -sf $(LIB_TARGET) $(LIB_SMAJOR)
+
+$(LLT_TARGET): $(LLT_O)
+ $(CC) $(LLT_LDFLAGS) -shared -o $@ -Wl,-soname=$(LLT_SMAJOR) $^
+ ln -sf $(LLT_TARGET) $(LLT_SO)
+ ln -sf $(LLT_TARGET) $(LLT_SMAJOR)
+
+clean:
+ rm -f *.o *.so *.so.* *.a
+
+INSTALL=$(shell which install)
+
+DESTDIR=
+LIBDIR=/usr/lib64
+HDRDIR=/usr/include
+MANDIR=/usr/share/man
+UDEVDIR=/etc/udev/rules.d
+
+.PHONY: install
+install: all
+ $(INSTALL) -d $(DESTDIR)/$(LIBDIR)
+ $(INSTALL) -d $(DESTDIR)/$(HDRDIR)
+ $(INSTALL) -d $(DESTDIR)/$(MANDIR)/man3
+ $(INSTALL) -d $(DESTDIR)/$(UDEVDIR)
+ $(INSTALL) -c -m 755 $(LIB_TARGET) $(DESTDIR)/$(LIBDIR)
+ $(INSTALL) -c -m 755 $(LLT_TARGET) $(DESTDIR)/$(LIBDIR)
+ cp -a $(LIB_SO) $(DESTDIR)/$(LIBDIR)
+ cp -a $(LLT_SO) $(DESTDIR)/$(LIBDIR)
+ $(INSTALL) -c -m 644 $(HDR_TARGET) $(DESTDIR)/$(HDRDIR)
+ $(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man3/
+ $(INSTALL) -m 644 $(UDEV_TARGET) $(DESTDIR)/$(UDEVDIR)
+
diff --git a/libdlm/libdlm.c b/libdlm/libdlm.c
new file mode 100644
index 0000000..a5157e2
--- /dev/null
+++ b/libdlm/libdlm.c
@@ -0,0 +1,1485 @@
+#ifdef _REENTRANT
+#include <pthread.h>
+#endif
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <linux/major.h>
+#ifdef HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <linux/types.h>
+#include <linux/dlm.h>
+#define BUILDING_LIBDLM
+#include "libdlm.h"
+#include <linux/dlm_device.h>
+
+#define MISC_PREFIX "/dev/misc/"
+#define DLM_PREFIX "dlm_"
+#define DLM_MISC_PREFIX MISC_PREFIX DLM_PREFIX
+#define DLM_CONTROL_NAME "dlm-control"
+#define DLM_CONTROL_PATH MISC_PREFIX DLM_CONTROL_NAME
+#define DEFAULT_LOCKSPACE "default"
+
+/*
+ * V5 of the dlm_device.h kernel/user interface structs
+ */
+
+struct dlm_lock_params_v5 {
+ __u8 mode;
+ __u8 namelen;
+ __u16 flags;
+ __u32 lkid;
+ __u32 parent;
+ void *castparam;
+ void *castaddr;
+ void *bastparam;
+ void *bastaddr;
+ struct dlm_lksb *lksb;
+ char lvb[DLM_USER_LVB_LEN];
+ char name[0];
+};
+
+struct dlm_write_request_v5 {
+ __u32 version[3];
+ __u8 cmd;
+ __u8 is64bit;
+ __u8 unused[2];
+
+ union {
+ struct dlm_lock_params_v5 lock;
+ struct dlm_lspace_params lspace;
+ } i;
+};
+
+struct dlm_lock_result_v5 {
+ __u32 length;
+ void *user_astaddr;
+ void *user_astparam;
+ struct dlm_lksb *user_lksb;
+ struct dlm_lksb lksb;
+ __u8 bast_mode;
+ __u8 unused[3];
+ /* Offsets may be zero if no data is present */
+ __u32 lvb_offset;
+};
+
+
+/*
+ * One of these per lockspace in use by the application
+ */
+
+struct dlm_ls_info {
+ int fd;
+#ifdef _REENTRANT
+ pthread_t tid;
+#else
+ int tid;
+#endif
+};
+
+/*
+ * The default lockspace.
+ * I've resisted putting locking around this as the user should be
+ * "sensible" and only do lockspace operations either in the
+ * main thread or ... carefully...
+ */
+
+static struct dlm_ls_info *default_ls = NULL;
+static int control_fd = -1;
+static struct dlm_device_version kernel_version;
+static int kernel_version_detected = 0;
+
+
+static int release_lockspace(uint32_t minor, uint32_t flags);
+
+
+static void ls_dev_name(const char *lsname, char *devname, int devlen)
+{
+ snprintf(devname, devlen, DLM_MISC_PREFIX "%s", lsname);
+}
+
+static void dummy_ast_routine(void *arg)
+{
+}
+
+#ifdef _REENTRANT
+/* Used for the synchronous and "simplified, synchronous" API routines */
+struct lock_wait
+{
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ struct dlm_lksb lksb;
+};
+
+static void sync_ast_routine(void *arg)
+{
+ struct lock_wait *lwait = arg;
+
+ pthread_mutex_lock(&lwait->mutex);
+ pthread_cond_signal(&lwait->cond);
+ pthread_mutex_unlock(&lwait->mutex);
+}
+
+/* lock_resource & unlock_resource
+ * are the simplified, synchronous API.
+ * Aways uses the default lockspace.
+ */
+int lock_resource(const char *resource, int mode, int flags, int *lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ if (default_ls == NULL)
+ {
+ if (dlm_pthread_init())
+ {
+ return -1;
+ }
+ }
+
+ if (!lockid)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* Conversions need the lockid in the LKSB */
+ if (flags & LKF_CONVERT)
+ lwait.lksb.sb_lkid = *lockid;
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ status = dlm_lock(mode,
+ &lwait.lksb,
+ flags,
+ resource,
+ strlen(resource),
+ 0,
+ sync_ast_routine,
+ &lwait,
+ NULL,
+ NULL);
+ if (status)
+ return status;
+
+ /* Wait for it to complete */
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ *lockid = lwait.lksb.sb_lkid;
+
+ errno = lwait.lksb.sb_status;
+ if (lwait.lksb.sb_status)
+ return -1;
+ else
+ return 0;
+}
+
+
+int unlock_resource(int lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ if (default_ls == NULL)
+ {
+ errno = -ENOTCONN;
+ return -1;
+ }
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ status = dlm_unlock(lockid, 0, &lwait.lksb, &lwait);
+
+ if (status)
+ return status;
+
+ /* Wait for it to complete */
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ errno = lwait.lksb.sb_status;
+ if (lwait.lksb.sb_status != DLM_EUNLOCK)
+ return -1;
+ else
+ return 0;
+}
+
+/* Tidy up threads after a lockspace is closed */
+static int ls_pthread_cleanup(struct dlm_ls_info *lsinfo)
+{
+ int status = 0;
+ int fd;
+
+ /* Must close the fd after the thread has finished */
+ fd = lsinfo->fd;
+ if (lsinfo->tid)
+ {
+ status = pthread_cancel(lsinfo->tid);
+ if (!status)
+ pthread_join(lsinfo->tid, NULL);
+ }
+ if (!status)
+ {
+ free(lsinfo);
+ close(fd);
+ }
+
+ return status;
+}
+
+/* Cleanup default lockspace */
+int dlm_pthread_cleanup(void)
+{
+ struct dlm_ls_info *lsinfo = default_ls;
+
+ /* Protect users from their own stupidity */
+ if (!lsinfo)
+ return 0;
+
+ default_ls = NULL;
+
+ return ls_pthread_cleanup(lsinfo);
+}
+#else
+
+/* Non-pthread version of cleanup */
+static int ls_pthread_cleanup(struct dlm_ls_info *lsinfo)
+{
+ close(lsinfo->fd);
+ free(lsinfo);
+ return 0;
+}
+#endif
+
+
+static void set_version_v5(struct dlm_write_request_v5 *req)
+{
+ req->version[0] = kernel_version.version[0];
+ req->version[1] = kernel_version.version[1];
+ req->version[2] = kernel_version.version[2];
+ if (sizeof(long) == sizeof(long long))
+ req->is64bit = 1;
+ else
+ req->is64bit = 0;
+}
+
+static void set_version_v6(struct dlm_write_request *req)
+{
+ req->version[0] = kernel_version.version[0];
+ req->version[1] = kernel_version.version[1];
+ req->version[2] = kernel_version.version[2];
+ if (sizeof(long) == sizeof(long long))
+ req->is64bit = 1;
+ else
+ req->is64bit = 0;
+}
+
+static int open_default_lockspace(void)
+{
+ if (!default_ls) {
+ dlm_lshandle_t ls;
+
+ /* This isn't the race it looks, create_lockspace will
+ * do the right thing if the lockspace has already been
+ * created.
+ */
+
+ ls = dlm_open_lockspace(DEFAULT_LOCKSPACE);
+ if (!ls)
+ ls = dlm_create_lockspace(DEFAULT_LOCKSPACE, 0600);
+ if (!ls)
+ return -1;
+
+ default_ls = (struct dlm_ls_info *)ls;
+ }
+ return 0;
+}
+
+static void detect_kernel_version(void)
+{
+ struct dlm_device_version v;
+ int rv;
+
+ rv = read(control_fd, &v, sizeof(struct dlm_device_version));
+ if (rv < 0) {
+ kernel_version.version[0] = 5;
+ kernel_version.version[1] = 0;
+ kernel_version.version[2] = 0;
+ } else {
+ kernel_version.version[0] = v.version[0];
+ kernel_version.version[1] = v.version[1];
+ kernel_version.version[2] = v.version[2];
+ }
+
+ kernel_version_detected = 1;
+}
+
+static int find_control_minor(int *minor)
+{
+ FILE *f;
+ char name[256];
+ int found = 0, m = 0;
+
+ f = fopen("/proc/misc", "r");
+ if (!f)
+ return -1;
+
+ while (!feof(f)) {
+ if (fscanf(f, "%d %s", &m, name) != 2)
+ continue;
+ if (strcmp(name, DLM_CONTROL_NAME))
+ continue;
+ found = 1;
+ break;
+ }
+ fclose(f);
+
+ if (found) {
+ *minor = m;
+ return 0;
+ }
+ return -1;
+}
+
+static int open_control_device(void)
+{
+ struct stat st;
+ int i, rv, minor, found = 0;
+
+ if (control_fd > -1)
+ goto out;
+
+ rv = find_control_minor(&minor);
+ if (rv < 0)
+ return -1;
+
+ /* wait for udev to create the device */
+
+ for (i = 0; i < 10; i++) {
+ if (stat(DLM_CONTROL_PATH, &st) == 0 &&
+ minor(st.st_rdev) == minor) {
+ found = 1;
+ break;
+ }
+ sleep(1);
+ continue;
+ }
+
+ if (!found)
+ return -1;
+
+ control_fd = open(DLM_CONTROL_PATH, O_RDWR);
+ if (control_fd == -1)
+ return -1;
+
+ out:
+ fcntl(control_fd, F_SETFD, 1);
+
+ if (!kernel_version_detected)
+ detect_kernel_version();
+ return 0;
+}
+
+/* the max number of characters in a sysfs device name, not including \0 */
+#define MAX_SYSFS_NAME 19
+
+static int find_udev_device(const char *lockspace, int minor, char *udev_path)
+{
+ char bname[PATH_MAX];
+ char tmp_path[PATH_MAX];
+ DIR *d;
+ struct dirent *de;
+ struct stat st;
+ size_t basename_len;
+ int i;
+
+ ls_dev_name(lockspace, udev_path, PATH_MAX);
+ snprintf(bname, PATH_MAX, DLM_PREFIX "%s", lockspace);
+ basename_len = strlen(bname);
+
+ for (i = 0; i < 10; i++) {
+
+ /* look for a device with the full name */
+
+ if (stat(udev_path, &st) == 0 && minor(st.st_rdev) == minor)
+ return 0;
+
+ if (basename_len < MAX_SYSFS_NAME) {
+ sleep(1);
+ continue;
+ }
+
+ /* look for a device with a truncated name */
+
+ d = opendir(MISC_PREFIX);
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+ if (strlen(de->d_name) < MAX_SYSFS_NAME)
+ continue;
+ if (strncmp(de->d_name, bname, MAX_SYSFS_NAME))
+ continue;
+ snprintf(tmp_path, PATH_MAX, MISC_PREFIX "%s",
+ de->d_name);
+ if (stat(tmp_path, &st))
+ continue;
+ if (minor(st.st_rdev) != minor)
+ continue;
+
+ /* truncated name */
+ strncpy(udev_path, tmp_path, PATH_MAX);
+ closedir(d);
+ return 0;
+ }
+ closedir(d);
+ sleep(1);
+ }
+
+ return -1;
+}
+
+/*
+ * do_dlm_dispatch()
+ * Read an ast from the kernel.
+ */
+
+static int do_dlm_dispatch_v5(int fd)
+{
+ char resultbuf[sizeof(struct dlm_lock_result_v5) + DLM_USER_LVB_LEN];
+ struct dlm_lock_result_v5 *result = (struct dlm_lock_result_v5 *)resultbuf;
+ char *fullresult = NULL;
+ int status;
+ void (*astaddr)(void *astarg);
+
+ status = read(fd, result, sizeof(resultbuf));
+ if (status <= 0)
+ return -1;
+
+ /* This shouldn't happen any more, can probably be removed */
+
+ if (result->length != status) {
+ int newstat;
+
+ fullresult = malloc(result->length);
+ if (!fullresult)
+ return -1;
+
+ newstat = read(fd, (struct dlm_lock_result_v5 *)fullresult,
+ result->length);
+
+ /* If it read OK then use the new data. otherwise we can
+ still deliver the AST, it just might not have all the
+ info in it...hmmm */
+
+ if (newstat == result->length)
+ result = (struct dlm_lock_result_v5 *)fullresult;
+ } else {
+ fullresult = resultbuf;
+ }
+
+
+ /* Copy lksb to user's buffer - except the LVB ptr */
+ memcpy(result->user_lksb, &result->lksb,
+ sizeof(struct dlm_lksb) - sizeof(char*));
+
+ /* Flip the status. Kernel space likes negative return codes,
+ userspace positive ones */
+ result->user_lksb->sb_status = -result->user_lksb->sb_status;
+
+ /* Copy optional items */
+ if (result->lvb_offset)
+ memcpy(result->user_lksb->sb_lvbptr,
+ fullresult + result->lvb_offset, DLM_LVB_LEN);
+
+ /* Call AST */
+ if (result->user_astaddr) {
+ astaddr = result->user_astaddr;
+ astaddr(result->user_astparam);
+ }
+
+ if (fullresult != resultbuf)
+ free(fullresult);
+
+ return 0;
+}
+
+static int do_dlm_dispatch_v6(int fd)
+{
+ char resultbuf[sizeof(struct dlm_lock_result) + DLM_USER_LVB_LEN];
+ struct dlm_lock_result *result = (struct dlm_lock_result *)resultbuf;
+ int status;
+ void (*astaddr)(void *astarg);
+
+ status = read(fd, result, sizeof(resultbuf));
+ if (status <= 0)
+ return -1;
+
+ /* Copy lksb to user's buffer - except the LVB ptr */
+ memcpy(result->user_lksb, &result->lksb,
+ sizeof(struct dlm_lksb) - sizeof(char*));
+
+ /* Copy lvb to user's buffer */
+ if (result->lvb_offset)
+ memcpy(result->user_lksb->sb_lvbptr,
+ (char *)result + result->lvb_offset, DLM_LVB_LEN);
+
+ result->user_lksb->sb_status = -result->user_lksb->sb_status;
+
+ if (result->user_astaddr) {
+ astaddr = result->user_astaddr;
+ astaddr(result->user_astparam);
+ }
+
+ return 0;
+}
+
+static int do_dlm_dispatch(int fd)
+{
+ if (kernel_version.version[0] == 5)
+ return do_dlm_dispatch_v5(fd);
+ else
+ return do_dlm_dispatch_v6(fd);
+}
+
+
+/*
+ * sync_write()
+ * Helper routine which supports the synchronous DLM calls. This
+ * writes a parameter block down to the DLM and waits for the
+ * operation to complete. This hides the different completion mechanism
+ * used when called from the main thread or the DLM 'AST' thread.
+ */
+
+#ifdef _REENTRANT
+
+static int sync_write_v5(struct dlm_ls_info *lsinfo,
+ struct dlm_write_request_v5 *req, int len)
+{
+ struct lock_wait lwait;
+ int status;
+
+ if (pthread_self() == lsinfo->tid) {
+ /* This is the DLM worker thread, don't use lwait to sync */
+ req->i.lock.castaddr = dummy_ast_routine;
+ req->i.lock.castparam = NULL;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ while (req->i.lock.lksb->sb_status == EINPROG) {
+ do_dlm_dispatch_v5(lsinfo->fd);
+ }
+ } else {
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ req->i.lock.castaddr = sync_ast_routine;
+ req->i.lock.castparam = &lwait;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+ }
+
+ return status; /* lock status is in the lksb */
+}
+
+static int sync_write_v6(struct dlm_ls_info *lsinfo,
+ struct dlm_write_request *req, int len)
+{
+ struct lock_wait lwait;
+ int status;
+
+ if (pthread_self() == lsinfo->tid) {
+ /* This is the DLM worker thread, don't use lwait to sync */
+ req->i.lock.castaddr = dummy_ast_routine;
+ req->i.lock.castparam = NULL;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ while (req->i.lock.lksb->sb_status == EINPROG) {
+ do_dlm_dispatch_v6(lsinfo->fd);
+ }
+ } else {
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ req->i.lock.castaddr = sync_ast_routine;
+ req->i.lock.castparam = &lwait;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+ }
+
+ return status; /* lock status is in the lksb */
+}
+
+#else /* _REENTRANT */
+
+static int sync_write_v5(struct dlm_ls_info *lsinfo,
+ struct dlm_write_request_v5 *req, int len)
+{
+ int status;
+
+ req->i.lock.castaddr = dummy_ast_routine;
+ req->i.lock.castparam = NULL;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ while (req->i.lock.lksb->sb_status == EINPROG) {
+ do_dlm_dispatch_v5(lsinfo->fd);
+ }
+
+ errno = req->i.lock.lksb->sb_status;
+ if (errno && errno != EUNLOCK)
+ return -1;
+ return 0;
+}
+
+static int sync_write_v6(struct dlm_ls_info *lsinfo,
+ struct dlm_write_request *req, int len)
+{
+ int status;
+
+ req->i.lock.castaddr = dummy_ast_routine;
+ req->i.lock.castparam = NULL;
+
+ status = write(lsinfo->fd, req, len);
+ if (status < 0)
+ return -1;
+
+ while (req->i.lock.lksb->sb_status == EINPROG) {
+ do_dlm_dispatch_v6(lsinfo->fd);
+ }
+
+ errno = req->i.lock.lksb->sb_status;
+ if (errno && errno != EUNLOCK)
+ return -1;
+ return 0;
+}
+
+#endif /* _REENTRANT */
+
+
+/*
+ * Lock
+ * All the ways to request/convert a lock
+ */
+
+static int ls_lock_v5(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg))
+{
+ char parambuf[sizeof(struct dlm_write_request_v5) + DLM_RESNAME_MAXLEN];
+ struct dlm_write_request_v5 *req = (struct dlm_write_request_v5 *)parambuf;
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ int status;
+ int len;
+
+ memset(req, 0, sizeof(*req));
+ set_version_v5(req);
+
+ req->cmd = DLM_USER_LOCK;
+ req->i.lock.mode = mode;
+ req->i.lock.flags = (flags & ~LKF_WAIT);
+ req->i.lock.lkid = lksb->sb_lkid;
+ req->i.lock.parent = parent;
+ req->i.lock.lksb = lksb;
+ req->i.lock.castaddr = astaddr;
+ req->i.lock.bastaddr = bastaddr;
+ req->i.lock.castparam = astarg; /* same comp and blocking ast arg */
+ req->i.lock.bastparam = astarg;
+
+ if (flags & LKF_CONVERT) {
+ req->i.lock.namelen = 0;
+ } else {
+ if (namelen > DLM_RESNAME_MAXLEN) {
+ errno = EINVAL;
+ return -1;
+ }
+ req->i.lock.namelen = namelen;
+ memcpy(req->i.lock.name, name, namelen);
+ }
+
+ if (flags & LKF_VALBLK) {
+ memcpy(req->i.lock.lvb, lksb->sb_lvbptr, DLM_LVB_LEN);
+ }
+
+ len = sizeof(struct dlm_write_request_v5) + namelen;
+ lksb->sb_status = EINPROG;
+
+ if (flags & LKF_WAIT)
+ status = sync_write_v5(lsinfo, req, len);
+ else
+ status = write(lsinfo->fd, req, len);
+
+ if (status < 0)
+ return -1;
+
+ /*
+ * the lock id is the return value from the write on the device
+ */
+
+ if (status > 0)
+ lksb->sb_lkid = status;
+ return 0;
+}
+
+static int ls_lock_v6(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ uint64_t *xid,
+ uint64_t *timeout)
+{
+ char parambuf[sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN];
+ struct dlm_write_request *req = (struct dlm_write_request *)parambuf;
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ int status;
+ int len;
+
+ memset(req, 0, sizeof(*req));
+ set_version_v6(req);
+
+ req->cmd = DLM_USER_LOCK;
+ req->i.lock.mode = mode;
+ req->i.lock.flags = (flags & ~LKF_WAIT);
+ req->i.lock.lkid = lksb->sb_lkid;
+ req->i.lock.parent = parent;
+ req->i.lock.lksb = lksb;
+ req->i.lock.castaddr = astaddr;
+ req->i.lock.bastaddr = bastaddr;
+ req->i.lock.castparam = astarg; /* same comp and blocking ast arg */
+ req->i.lock.bastparam = astarg;
+
+ if (xid)
+ req->i.lock.xid = *xid;
+ if (timeout)
+ req->i.lock.timeout = *timeout;
+
+ if (flags & LKF_CONVERT) {
+ req->i.lock.namelen = 0;
+ } else {
+ if (namelen > DLM_RESNAME_MAXLEN) {
+ errno = EINVAL;
+ return -1;
+ }
+ req->i.lock.namelen = namelen;
+ memcpy(req->i.lock.name, name, namelen);
+ }
+
+ if (flags & LKF_VALBLK) {
+ memcpy(req->i.lock.lvb, lksb->sb_lvbptr, DLM_LVB_LEN);
+ }
+
+ len = sizeof(struct dlm_write_request) + namelen;
+ lksb->sb_status = EINPROG;
+
+ if (flags & LKF_WAIT)
+ status = sync_write_v6(lsinfo, req, len);
+ else
+ status = write(lsinfo->fd, req, len);
+
+ if (status < 0)
+ return -1;
+
+ /*
+ * the lock id is the return value from the write on the device
+ */
+
+ if (status > 0)
+ lksb->sb_lkid = status;
+ return 0;
+}
+
+static int ls_lock(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range)
+{
+ /* no support for range locks */
+ if (range) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ if (flags & LKF_VALBLK && !lksb->sb_lvbptr) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (kernel_version.version[0] == 5)
+ return ls_lock_v5(ls, mode, lksb, flags, name, namelen, parent,
+ astaddr, astarg, bastaddr);
+ else
+ return ls_lock_v6(ls, mode, lksb, flags, name, namelen, parent,
+ astaddr, astarg, bastaddr, NULL, NULL);
+}
+
+/*
+ * Extended async locking in own lockspace
+ */
+int dlm_ls_lockx(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ uint64_t *xid,
+ uint64_t *timeout)
+{
+ if (kernel_version.version[0] < 6) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ return ls_lock_v6(ls, mode, lksb, flags, name, namelen, parent,
+ astaddr, astarg, bastaddr, xid, timeout);
+}
+
+/*
+ * Async locking in own lockspace
+ */
+int dlm_ls_lock(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range)
+{
+ return ls_lock(ls, mode, lksb, flags, name, namelen, parent,
+ astaddr, astarg, bastaddr, range);
+}
+
+/*
+ * Sync locking in own lockspace
+ */
+int dlm_ls_lock_wait(dlm_lshandle_t ls,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range)
+{
+ return ls_lock(ls, mode, lksb, flags | LKF_WAIT, name, namelen, parent,
+ NULL, bastarg, bastaddr, range);
+}
+
+/*
+ * Async locking in the default lockspace
+ */
+int dlm_lock(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range)
+{
+ if (open_default_lockspace())
+ return -1;
+
+ return ls_lock(default_ls, mode, lksb, flags, name, namelen, parent,
+ astaddr, astarg, bastaddr, range);
+}
+
+/*
+ * Sync locking in the default lockspace
+ */
+int dlm_lock_wait(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent,
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range)
+{
+ if (open_default_lockspace())
+ return -1;
+
+ return ls_lock(default_ls, mode, lksb, flags | LKF_WAIT, name, namelen,
+ parent, NULL, bastarg, bastaddr, range);
+}
+
+
+/*
+ * Unlock
+ * All the ways to unlock/cancel a lock
+ */
+
+static int ls_unlock_v5(struct dlm_ls_info *lsinfo, uint32_t lkid,
+ uint32_t flags, struct dlm_lksb *lksb, void *astarg)
+{
+ struct dlm_write_request_v5 req;
+
+ set_version_v5(&req);
+ req.cmd = DLM_USER_UNLOCK;
+ req.i.lock.lkid = lkid;
+ req.i.lock.flags = (flags & ~LKF_WAIT);
+ req.i.lock.lksb = lksb;
+ req.i.lock.castparam = astarg;
+ /* DLM_USER_UNLOCK will default to existing completion AST */
+ req.i.lock.castaddr = 0;
+ lksb->sb_status = EINPROG;
+
+ if (flags & LKF_WAIT)
+ return sync_write_v5(lsinfo, &req, sizeof(req));
+ else
+ return write(lsinfo->fd, &req, sizeof(req));
+}
+
+static int ls_unlock_v6(struct dlm_ls_info *lsinfo, uint32_t lkid,
+ uint32_t flags, struct dlm_lksb *lksb, void *astarg)
+{
+ struct dlm_write_request req;
+
+ set_version_v6(&req);
+ req.cmd = DLM_USER_UNLOCK;
+ req.i.lock.lkid = lkid;
+ req.i.lock.flags = (flags & ~LKF_WAIT);
+ req.i.lock.lksb = lksb;
+ req.i.lock.namelen = 0;
+ req.i.lock.castparam = astarg;
+ /* DLM_USER_UNLOCK will default to existing completion AST */
+ req.i.lock.castaddr = 0;
+ lksb->sb_status = EINPROG;
+
+ if (flags & LKF_WAIT)
+ return sync_write_v6(lsinfo, &req, sizeof(req));
+ else
+ return write(lsinfo->fd, &req, sizeof(req));
+}
+
+int dlm_ls_unlock(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags,
+ struct dlm_lksb *lksb, void *astarg)
+{
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ int status;
+
+ if (ls == NULL) {
+ errno = ENOTCONN;
+ return -1;
+ }
+
+ if (!lkid) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (kernel_version.version[0] == 5)
+ status = ls_unlock_v5(lsinfo, lkid, flags, lksb, astarg);
+ else
+ status = ls_unlock_v6(lsinfo, lkid, flags, lksb, astarg);
+
+ if (status < 0)
+ return -1;
+ return 0;
+}
+
+int dlm_ls_unlock_wait(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags,
+ struct dlm_lksb *lksb)
+{
+ return dlm_ls_unlock(ls, lkid, flags | LKF_WAIT, lksb, NULL);
+}
+
+int dlm_unlock_wait(uint32_t lkid, uint32_t flags, struct dlm_lksb *lksb)
+{
+ return dlm_ls_unlock_wait(default_ls, lkid, flags | LKF_WAIT, lksb);
+}
+
+int dlm_unlock(uint32_t lkid, uint32_t flags, struct dlm_lksb *lksb,
+ void *astarg)
+{
+ return dlm_ls_unlock(default_ls, lkid, flags, lksb, astarg);
+}
+
+int dlm_ls_deadlock_cancel(dlm_lshandle_t ls, uint32_t lkid, uint32_t flags)
+{
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ struct dlm_write_request req;
+
+ if (kernel_version.version[0] < 6) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ if (ls == NULL) {
+ errno = ENOTCONN;
+ return -1;
+ }
+
+ if (!lkid) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ set_version_v6(&req);
+ req.cmd = DLM_USER_DEADLOCK;
+ req.i.lock.lkid = lkid;
+ req.i.lock.flags = flags;
+
+ return write(lsinfo->fd, &req, sizeof(req));
+}
+
+
+/*
+ * Purge
+ * Clear away orphan locks
+ */
+
+int dlm_ls_purge(dlm_lshandle_t ls, int nodeid, int pid)
+{
+ struct dlm_write_request req;
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ int status;
+
+ if (kernel_version.version[0] < 6) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ if (ls == NULL) {
+ errno = ENOTCONN;
+ return -1;
+ }
+
+ set_version_v6(&req);
+ req.cmd = DLM_USER_PURGE;
+ req.i.purge.nodeid = nodeid;
+ req.i.purge.pid = pid;
+
+ status = write(lsinfo->fd, &req, sizeof(req));
+
+ if (status < 0)
+ return -1;
+ return 0;
+}
+
+
+/* These two routines for for users that want to
+ * do their own fd handling.
+ * This allows a non-threaded app to use the DLM.
+ */
+int dlm_get_fd(void)
+{
+ if (default_ls)
+ {
+ return default_ls->fd;
+ }
+ else
+ {
+ if (open_default_lockspace())
+ return -1;
+ else
+ return default_ls->fd;
+ }
+}
+
+int dlm_dispatch(int fd)
+{
+ int status;
+ int fdflags;
+
+ fdflags = fcntl(fd, F_GETFL, 0);
+ fcntl(fd, F_SETFL, fdflags | O_NONBLOCK);
+ do
+ {
+ status = do_dlm_dispatch(fd);
+ } while (status == 0);
+
+ /* EAGAIN is not an error */
+ if (status < 0 && errno == EAGAIN)
+ status = 0;
+
+ fcntl(fd, F_SETFL, fdflags);
+ return status;
+}
+
+/* Converts a lockspace handle into a file descriptor */
+int dlm_ls_get_fd(dlm_lshandle_t lockspace)
+{
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)lockspace;
+
+ return lsinfo->fd;
+}
+
+#ifdef _REENTRANT
+static void *dlm_recv_thread(void *lsinfo)
+{
+ struct dlm_ls_info *lsi = lsinfo;
+
+ for (;;)
+ do_dlm_dispatch(lsi->fd);
+
+ return NULL;
+}
+
+/* Multi-threaded callers normally use this */
+int dlm_pthread_init(void)
+{
+ if (open_default_lockspace())
+ return -1;
+
+ if (default_ls->tid)
+ {
+ errno = EEXIST;
+ return -1;
+ }
+
+ if (pthread_create(&default_ls->tid, NULL, dlm_recv_thread, default_ls))
+ {
+ int saved_errno = errno;
+ close(default_ls->fd);
+ free(default_ls);
+ default_ls = NULL;
+ errno = saved_errno;
+ return -1;
+ }
+ return 0;
+}
+
+/* And same, for those with their own lockspace */
+int dlm_ls_pthread_init(dlm_lshandle_t ls)
+{
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+
+ if (lsinfo->tid)
+ {
+ errno = EEXIST;
+ return -1;
+ }
+
+ return pthread_create(&lsinfo->tid, NULL, dlm_recv_thread, (void *)ls);
+}
+#endif
+
+/*
+ * Lockspace manipulation functions
+ * Privileged users (checked by the kernel) can create/release lockspaces
+ */
+
+static int create_lockspace_v5(const char *name, uint32_t flags)
+{
+ char reqbuf[sizeof(struct dlm_write_request_v5) + DLM_LOCKSPACE_LEN];
+ struct dlm_write_request_v5 *req = (struct dlm_write_request_v5 *)reqbuf;
+ int namelen = strlen(name);
+ int minor;
+
+ memset(reqbuf, 0, sizeof(reqbuf));
+ set_version_v5(req);
+
+ req->cmd = DLM_USER_CREATE_LOCKSPACE;
+ req->i.lspace.flags = flags;
+
+ if (namelen > DLM_LOCKSPACE_LEN) {
+ errno = EINVAL;
+ return -1;
+ }
+ memcpy(req->i.lspace.name, name, namelen);
+
+ minor = write(control_fd, req, sizeof(*req) + namelen);
+
+ return minor;
+}
+
+static int create_lockspace_v6(const char *name, uint32_t flags)
+{
+ char reqbuf[sizeof(struct dlm_write_request) + DLM_LOCKSPACE_LEN];
+ struct dlm_write_request *req = (struct dlm_write_request *)reqbuf;
+ int namelen = strlen(name);
+ int minor;
+
+ memset(reqbuf, 0, sizeof(reqbuf));
+ set_version_v6(req);
+
+ req->cmd = DLM_USER_CREATE_LOCKSPACE;
+ req->i.lspace.flags = flags;
+
+ if (namelen > DLM_LOCKSPACE_LEN) {
+ errno = EINVAL;
+ return -1;
+ }
+ memcpy(req->i.lspace.name, name, namelen);
+
+ minor = write(control_fd, req, sizeof(*req) + namelen);
+
+ return minor;
+}
+
+static dlm_lshandle_t create_lockspace(const char *name, mode_t mode,
+ uint32_t flags)
+{
+ char dev_path[PATH_MAX];
+ char udev_path[PATH_MAX];
+ struct dlm_ls_info *newls;
+ int error, saved_errno, minor;
+
+ /* We use the control device for creating lockspaces. */
+ if (open_control_device())
+ return NULL;
+
+ newls = malloc(sizeof(struct dlm_ls_info));
+ if (!newls)
+ return NULL;
+
+ ls_dev_name(name, dev_path, sizeof(dev_path));
+
+ if (kernel_version.version[0] == 5)
+ minor = create_lockspace_v5(name, flags);
+ else
+ minor = create_lockspace_v6(name, flags);
+
+ if (minor < 0)
+ goto fail;
+
+ /* Wait for udev to create the device; the device it creates may
+ have a truncated name due to the sysfs device name limit. */
+
+ error = find_udev_device(name, minor, udev_path);
+ if (error)
+ goto fail;
+
+ /* If the symlink already exists, find_udev_device() will return
+ it and we'll skip this. */
+
+ if (strcmp(dev_path, udev_path)) {
+ error = symlink(udev_path, dev_path);
+ if (error)
+ goto fail;
+ }
+
+ /* Open it and return the struct as a handle */
+
+ newls->fd = open(dev_path, O_RDWR);
+ if (newls->fd == -1)
+ goto fail;
+ if (mode)
+ fchmod(newls->fd, mode);
+ newls->tid = 0;
+ fcntl(newls->fd, F_SETFD, 1);
+ return (dlm_lshandle_t)newls;
+
+ fail:
+ saved_errno = errno;
+ free(newls);
+ errno = saved_errno;
+ return NULL;
+}
+
+dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode, uint32_t flags)
+{
+ return create_lockspace(name, mode, flags);
+}
+
+dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode)
+{
+ return create_lockspace(name, mode, 0);
+}
+
+static int release_lockspace_v5(uint32_t minor, uint32_t flags)
+{
+ struct dlm_write_request_v5 req;
+
+ set_version_v5(&req);
+ req.cmd = DLM_USER_REMOVE_LOCKSPACE;
+ req.i.lspace.minor = minor;
+ req.i.lspace.flags = flags;
+
+ return write(control_fd, &req, sizeof(req));
+}
+
+static int release_lockspace_v6(uint32_t minor, uint32_t flags)
+{
+ struct dlm_write_request req;
+
+ set_version_v6(&req);
+ req.cmd = DLM_USER_REMOVE_LOCKSPACE;
+ req.i.lspace.minor = minor;
+ req.i.lspace.flags = flags;
+
+ return write(control_fd, &req, sizeof(req));
+}
+
+static int release_lockspace(uint32_t minor, uint32_t flags)
+{
+ if (kernel_version.version[0] == 5)
+ return release_lockspace_v5(minor, flags);
+ else
+ return release_lockspace_v6(minor, flags);
+}
+
+int dlm_release_lockspace(const char *name, dlm_lshandle_t ls, int force)
+{
+ char dev_path[PATH_MAX];
+ struct stat st;
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+ uint32_t flags = 0;
+ int fd, is_symlink = 0;
+
+ ls_dev_name(name, dev_path, sizeof(dev_path));
+ if (!lstat(dev_path, &st) && S_ISLNK(st.st_mode))
+ is_symlink = 1;
+
+ /* We need the minor number */
+ if (fstat(lsinfo->fd, &st))
+ return -1;
+
+ /* Close the lockspace first if it's in use */
+ ls_pthread_cleanup(lsinfo);
+
+ if (open_control_device())
+ return -1;
+
+ if (force)
+ flags = DLM_USER_LSFLG_FORCEFREE;
+
+ release_lockspace(minor(st.st_rdev), flags);
+
+ if (!is_symlink)
+ return 0;
+
+ /* The following open is used to detect if our release was the last.
+ It will fail if our release was the last, because either:
+ . udev has already removed the truncated sysfs device name (ENOENT)
+ . the misc device has been deregistered in the kernel (ENODEV)
+ (the deregister completes before release returns)
+
+ So, if the open fails, we know that our release was the last,
+ udev will be removing the device with the truncated name (if it
+ hasn't already), and we should remove the symlink. */
+
+ fd = open(dev_path, O_RDWR);
+ if (fd < 0)
+ unlink(dev_path);
+ else
+ close(fd); /* our release was not the last */
+
+ return 0;
+}
+
+/*
+ * Normal users just open/close lockspaces
+ */
+
+dlm_lshandle_t dlm_open_lockspace(const char *name)
+{
+ char dev_name[PATH_MAX];
+ struct dlm_ls_info *newls;
+ int saved_errno;
+
+ /* Need to detect kernel version */
+ if (open_control_device())
+ return NULL;
+
+ newls = malloc(sizeof(struct dlm_ls_info));
+ if (!newls)
+ return NULL;
+
+ newls->tid = 0;
+ ls_dev_name(name, dev_name, sizeof(dev_name));
+
+ newls->fd = open(dev_name, O_RDWR);
+ saved_errno = errno;
+
+ if (newls->fd == -1) {
+ free(newls);
+ errno = saved_errno;
+ return NULL;
+ }
+ fcntl(newls->fd, F_SETFD, 1);
+ return (dlm_lshandle_t)newls;
+}
+
+int dlm_close_lockspace(dlm_lshandle_t ls)
+{
+ struct dlm_ls_info *lsinfo = (struct dlm_ls_info *)ls;
+
+ ls_pthread_cleanup(lsinfo);
+ return 0;
+}
+
+int dlm_kernel_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
+{
+ if (open_control_device())
+ return -1;
+ *major = kernel_version.version[0];
+ *minor = kernel_version.version[1];
+ *patch = kernel_version.version[2];
+ return 0;
+}
+
+void dlm_library_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
+{
+ *major = DLM_DEVICE_VERSION_MAJOR;
+ *minor = DLM_DEVICE_VERSION_MINOR;
+ *patch = DLM_DEVICE_VERSION_PATCH;
+}
+
diff --git a/libdlm/libdlm.h b/libdlm/libdlm.h
new file mode 100644
index 0000000..17a552c
--- /dev/null
+++ b/libdlm/libdlm.h
@@ -0,0 +1,275 @@
+#ifndef __LIBDLM_H
+#define __LIBDLM_H
+
+/*
+ * Typedefs for things that are compatible with the kernel but replicated here
+ * so that users only need the libdlm include file. libdlm itself needs the
+ * full kernel file so shouldn't use these.
+ */
+
+#define DLM_LVB_LEN 32
+
+#ifndef BUILDING_LIBDLM
+
+/*
+ * These two lengths are copied from linux/dlmconstants.h
+ * They are the max length of a lockspace name and the max length of a
+ * resource name.
+ */
+
+#define DLM_LOCKSPACE_LEN 64
+#define DLM_RESNAME_MAXLEN 64
+
+struct dlm_lksb {
+ int sb_status;
+ uint32_t sb_lkid;
+ char sb_flags;
+ char *sb_lvbptr;
+};
+
+/* lksb flags */
+#define DLM_SBF_DEMOTED 0x01
+#define DLM_SBF_VALNOTVALID 0x02
+#define DLM_SBF_ALTMODE 0x04
+
+/* dlm_new_lockspace flags */
+#define DLM_LSFL_NODIR 0x00000001
+#define DLM_LSFL_TIMEWARN 0x00000002
+#define DLM_LSFL_FS 0x00000004
+#define DLM_LSFL_NEWEXCL 0x00000008
+
+#endif
+
+
+#if 0
+/* Dummy definition to keep linkages */
+struct dlm_queryinfo;
+#endif
+
+extern int dlm_kernel_version(uint32_t *maj, uint32_t *min, uint32_t *patch);
+extern void dlm_library_version(uint32_t *maj, uint32_t *min, uint32_t *patch);
+
+
+/*
+ * Using the default lockspace
+ *
+ * lock_resource() - simple sync request or convert (requires pthreads)
+ * unlock_resource() - simple sync unlock (requires pthreads)
+ * dlm_lock() - async request or convert
+ * dlm_unlock() - async unlock or cancel
+ * dlm_lock_wait() - sync request or convert
+ * dlm_unlock_wait() - sync unlock or cancel
+ */
+
+#ifdef _REENTRANT
+extern int lock_resource(const char *resource, int mode, int flags, int *lockid);
+extern int unlock_resource(int lockid);
+#endif
+
+extern int dlm_lock(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unusued */
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range); /* unused */
+
+extern int dlm_unlock(uint32_t lkid,
+ uint32_t flags,
+ struct dlm_lksb *lksb,
+ void *astarg);
+
+extern int dlm_lock_wait(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range); /* unused */
+
+extern int dlm_unlock_wait(uint32_t lkid,
+ uint32_t flags,
+ struct dlm_lksb *lksb);
+
+
+/*
+ * These two are for users that want to do their own FD handling
+ *
+ * dlm_get_fd() - returns fd for the default lockspace for polling and dispatch
+ * dlm_dispatch() - dispatches pending asts and basts
+ */
+
+extern int dlm_get_fd(void);
+extern int dlm_dispatch(int fd);
+
+
+/*
+ * Creating your own lockspace
+ *
+ * dlm_create_lockspace() - create and open a lockspace and return a handle
+ * to it. Privileges are required to create/release.
+ * dlm_new_lockspace() - same as create but allows flags
+ * dlm_open_lockspace() - simply returns a handle for an existing lockspace and
+ * may be called by ordinary users.
+ * dlm_release_lockspace()
+ * dlm_close_lockspace()
+ * dlm_ls_get_fd()
+ *
+ * NOTE: that if you dlm_create_lockspace() then dlm_open_lockspace() you will
+ * have two open files on the same device. Hardly a major problem but I thought
+ * it worth pointing out.
+ */
+
+typedef void *dlm_lshandle_t;
+
+extern dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
+extern int dlm_release_lockspace(const char *name, dlm_lshandle_t ls,
+ int force);
+extern dlm_lshandle_t dlm_open_lockspace(const char *name);
+extern int dlm_close_lockspace(dlm_lshandle_t ls);
+extern int dlm_ls_get_fd(dlm_lshandle_t ls);
+extern dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode,
+ uint32_t flags);
+
+
+/*
+ * Using your own lockspace
+ *
+ * dlm_ls_lock()
+ * dlm_ls_lockx()
+ * dlm_ls_unlock()
+ * dlm_ls_lock_wait()
+ * dlm_ls_unlock_wait()
+ * dlm_ls_deadlock_cancel()
+ * dlm_ls_purge()
+ */
+
+extern int dlm_ls_lock(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range); /* unused */
+
+extern int dlm_ls_lockx(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ uint64_t *xid,
+ uint64_t *timeout);
+
+extern int dlm_ls_unlock(dlm_lshandle_t lockspace,
+ uint32_t lkid,
+ uint32_t flags,
+ struct dlm_lksb *lksb,
+ void *astarg);
+
+extern int dlm_ls_lock_wait(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range); /* unused */
+
+extern int dlm_ls_unlock_wait(dlm_lshandle_t lockspace,
+ uint32_t lkid,
+ uint32_t flags,
+ struct dlm_lksb *lksb);
+
+extern int dlm_ls_deadlock_cancel(dlm_lshandle_t ls,
+ uint32_t lkid,
+ uint32_t flags);
+
+extern int dlm_ls_purge(dlm_lshandle_t lockspace,
+ int nodeid,
+ int pid);
+
+
+/*
+ * For threaded applications
+ *
+ * dlm_pthread_init()
+ * dlm_ls_pthread_init() - call this before any locking operations and the ASTs
+ * will be delivered in their own thread.
+ * dlm_pthread_cleanup() - call the cleanup routine at application exit
+ * (optional) or, if the locking functions are in a
+ * shared library that is to be unloaded.
+ *
+ * dlm_close/release_lockspace() will tidy the threads for a non-default
+ * lockspace
+ */
+
+#ifdef _REENTRANT
+extern int dlm_pthread_init(void);
+extern int dlm_ls_pthread_init(dlm_lshandle_t lockspace);
+extern int dlm_pthread_cleanup(void);
+#endif
+
+
+/*
+ * Lock modes
+ */
+
+#define LKM_NLMODE 0 /* null lock */
+#define LKM_CRMODE 1 /* concurrent read */
+#define LKM_CWMODE 2 /* concurrent write */
+#define LKM_PRMODE 3 /* protected read */
+#define LKM_PWMODE 4 /* protected write */
+#define LKM_EXMODE 5 /* exclusive */
+
+
+/*
+ * Locking flags - these match the ones in dlm.h
+ */
+
+#define LKF_NOQUEUE 0x00000001
+#define LKF_CANCEL 0x00000002
+#define LKF_CONVERT 0x00000004
+#define LKF_VALBLK 0x00000008
+#define LKF_QUECVT 0x00000010
+#define LKF_IVVALBLK 0x00000020
+#define LKF_CONVDEADLK 0x00000040
+#define LKF_PERSISTENT 0x00000080
+#define LKF_NODLCKWT 0x00000100
+#define LKF_NODLCKBLK 0x00000200
+#define LKF_EXPEDITE 0x00000400
+#define LKF_NOQUEUEBAST 0x00000800
+#define LKF_HEADQUE 0x00001000
+#define LKF_NOORDER 0x00002000
+#define LKF_ORPHAN 0x00004000
+#define LKF_ALTPR 0x00008000
+#define LKF_ALTCW 0x00010000
+#define LKF_FORCEUNLOCK 0x00020000
+#define LKF_TIMEOUT 0x00040000
+#define LKF_WAIT 0x80000000 /* Userspace only, for sync API calls */
+
+/*
+ * Extra return codes used by the DLM
+ */
+
+#define ECANCEL 0x10001
+#define EUNLOCK 0x10002
+#define EINPROG 0x10003 /* lock operation is in progress */
+
+#endif
+
diff --git a/libdlm/libdlm_internal.h b/libdlm/libdlm_internal.h
new file mode 100644
index 0000000..c8b270e
--- /dev/null
+++ b/libdlm/libdlm_internal.h
@@ -0,0 +1,9 @@
+
+/* Needed before we include the kernel libdlm header */
+#define __user
+typedef uint8_t __u8;
+typedef uint16_t __u16;
+typedef uint32_t __u32;
+#define BUILDING_LIBDLM
+
+
diff --git a/libdlm/man/dlm_cleanup.3 b/libdlm/man/dlm_cleanup.3
new file mode 100644
index 0000000..db4a9cf
--- /dev/null
+++ b/libdlm/man/dlm_cleanup.3
@@ -0,0 +1 @@
+.so man3/libdlm.3
diff --git a/libdlm/man/dlm_close_lockspace.3 b/libdlm/man/dlm_close_lockspace.3
new file mode 100644
index 0000000..e5db408
--- /dev/null
+++ b/libdlm/man/dlm_close_lockspace.3
@@ -0,0 +1 @@
+.so man3/dlm_create_lockspace.3
diff --git a/libdlm/man/dlm_create_lockspace.3 b/libdlm/man/dlm_create_lockspace.3
new file mode 100644
index 0000000..3879e85
--- /dev/null
+++ b/libdlm/man/dlm_create_lockspace.3
@@ -0,0 +1,94 @@
+.TH DLM_CREATE_LOCKSPACE 3 "July 5, 2007" "libdlm functions"
+.SH NAME
+dlm_create_lockspace, dlm_open_lockspace, dlm_close_lockspace, dlm_release_lockspace \- manipulate DLM lockspaces
+.SH SYNOPSIS
+.nf
+ #include <libdlm.h>
+
+dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
+dlm_lshandle_t dlm_new_lockspace(const char *name, mode_t mode,
+ uint32_t flags);
+dlm_lshandle_t dlm_open_lockspace(const char *name);
+int dlm_close_lockspace(dlm_lshandle_t ls);
+int dlm_release_lockspace(const char *name, dlm_lshandle_t ls,
+ int force);
+
+.fi
+.SH DESCRIPTION
+The DLM allows locks to be partitioned into "lockspaces", and these can be manipulated by userspace calls. It is possible (though not recommended) for an application to have multiple lockspaces open at one time.
+
+Many of the DLM calls work on the "default" lockspace, which should be fine for most users. The calls with _ls_ in them allow you to isolate your application from all others running in the cluster. Remember, lockspaces are a cluster-wide resource, so if you create a lockspace called "myls" it will share locks with a lockspace called "myls" on all nodes. These calls allow users to create & remove lockspaces, and users to connect to existing lockspace to store their locks there.
+.PP
+.SS
+dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
+.br
+This creates a lockspace called <name> and the mode of the file user to access it will be <mode> (subject to umask as usual). The lockspace must not already exist on this node, if it does -1 will be returned and errno will be set to EEXIST. If you really want to use this lockspace you can then use dlm_open_lockspace() below. The name is the name of a misc device that will be created in /dev/misc.
+.br
+On success a handle to the lockspace is returned, which can be used to pass into subsequent dlm_ls_lock/unlock calls. Make no assumptions as to the content of this handle as it's content may change in future.
+.br
+The caller must have CAP_SYSADMIN privileges to do this operation.
+.PP
+Return codes:
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
+.nf
+EINVAL An invalid parameter was passed to the call
+ENOMEM A (kernel) memory allocation failed
+EEXIST The lockspace already exists
+EPERM Process does not have capability to create lockspaces
+ENOSYS A fatal error occurred initializing the DLM
+Any error returned by the open() system call
+.fi
+.SS
+int dlm_new_lockspace(const char *name, mode_t mode, uint32_t flags)
+.PP
+Performs the same function as
+.B dlm_create_lockspace()
+above, but passes some creation flags to the call that affect the lockspace being created. Currently supported flags are:
+.nf
+DLM_LSFL_NODIR the lockspace should not use a resource directory
+DLM_LSFL_TIMEWARN the dlm should emit warnings over netlink when locks
+ have been waiting too long; required for deadlock
+ detection
+.fi
+.SS
+int dlm_release_lockspace(const char *name, dlm_lshandle_t ls, int force)
+.PP
+Deletes a lockspace. If the lockspace still has active locks then -1 will be returned and errno set to EBUSY. Both the lockspace handle /and/ the name must be specified. This call also closes the lockspace and stops the thread associated with the lockspace, if any.
+.br
+Note that other nodes in the cluster may still have locks open on this lockspace. This call only removes the lockspace from the current node. If the force flag is set then the lockspace will be removed even if another user on this node has active locks in it. Existing users will NOT be notified if you do this, so be careful.
+.br
+The caller must have CAP_SYSADMIN privileges to do this operation.
+.PP
+Return codes:
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
+.nf
+EINVAL An invalid parameter was passed to the call
+EPERM Process does not have capability to release lockspaces
+EBUSY The lockspace could not be freed because it still
+ contains locks and force was not set.
+.fi
+
+.SS
+dlm_lshandle_t dlm_open_lockspace(const char *name)
+.PP
+Opens an already existing lockspace and returns a handle to it.
+.PP
+Return codes:
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to an error returned by the open() system call
+.SS
+int dlm_close_lockspace(dlm_lshandle_t ls)
+.br
+Close the lockspace. Any locks held by this process will be freed. If a thread is associated with this lockspace then it will be stopped.
+.PP
+Return codes:
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
+.nf
+EINVAL lockspace was not a valid lockspace handle
+.fi
+
+
+.SH SEE ALSO
+
+.BR libdlm (3),
+.BR dlm_unlock (3),
+.BR dlm_lock (3),
diff --git a/libdlm/man/dlm_dispatch.3 b/libdlm/man/dlm_dispatch.3
new file mode 100644
index 0000000..db4a9cf
--- /dev/null
+++ b/libdlm/man/dlm_dispatch.3
@@ -0,0 +1 @@
+.so man3/libdlm.3
diff --git a/libdlm/man/dlm_get_fd.3 b/libdlm/man/dlm_get_fd.3
new file mode 100644
index 0000000..db4a9cf
--- /dev/null
+++ b/libdlm/man/dlm_get_fd.3
@@ -0,0 +1 @@
+.so man3/libdlm.3
diff --git a/libdlm/man/dlm_lock.3 b/libdlm/man/dlm_lock.3
new file mode 100644
index 0000000..3c5f8b5
--- /dev/null
+++ b/libdlm/man/dlm_lock.3
@@ -0,0 +1,239 @@
+.TH DLM_LOCK 3 "July 5, 2007" "libdlm functions"
+.SH NAME
+dlm_lock \- acquire or convert a DLM lock
+.SH SYNOPSIS
+.nf
+ #include <libdlm.h>
+
+int dlm_lock(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range); /* unused */
+
+int dlm_lock_wait(uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range); /* unused */
+
+int dlm_ls_lock(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ void (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ void *range); /* unused */
+
+int dlm_ls_lock_wait(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unusued */
+ void *bastarg,
+ void (*bastaddr) (void *bastarg),
+ void *range); /* unused */
+
+int dlm_ls_lockx(dlm_lshandle_t lockspace,
+ uint32_t mode,
+ struct dlm_lksb *lksb,
+ uint32_t flags,
+ const void *name,
+ unsigned int namelen,
+ uint32_t parent, /* unused */
+ (*astaddr) (void *astarg),
+ void *astarg,
+ void (*bastaddr) (void *astarg),
+ uint64_t *xid,
+ uint64_t *timeout);
+
+
+
+.fi
+.SH DESCRIPTION
+dlm_lock and its variants acquire and convert locks in the DLM.
+.PP
+dlm_lock() operations are asynchronous. If the call to dlm_lock returns an error then the operation has failed and the AST routine will not be called. If dlm_lock returns 0 it is still possible that the lock operation will fail. The AST routine will be called when the locking is complete or has failed and the status is returned in the lksb.
+.B dlm_lock_wait()
+will wait until the lock operation has completed and returns the final completion status.
+.B dlm_ls_lock()
+is the same as
+.B dlm_lock()
+but takes a lockspace argument. This lockspace must have been previously opened by
+.B dlm_lockspace_open() or
+.B dlm_lockspace_create().
+.PP
+For conversion operations the name and namelen are ignored and the lock ID in the LKSB is used to identify the lock to be converted.
+.PP
+If a lock value block is specified then in general, a grant or a conversion to an equal-level or higher-level lock mode reads the lock value from the resource into the caller's lock value block. When a lock conversion from EX or PW to an equal-level or lower-level lock mode occurs, the contents of the caller's lock value block are written into the resource. If the LVB is invalidated the lksb.sb_flags member will be set to DLM_SBF_VALNOTVALID. Lock values blocks are always 32 bytes long.
+.PP
+If the AST routines or parameter are passed to a conversion operation then they will overwrite those values that were passed to a previous dlm_lock call.
+.PP
+.B mode
+Lock mode to acquire or convert to.
+.nf
+ LKM_NLMODE NULL Lock
+ LKM_CRMODE Concurrent read
+ LKM_CWMODE Concurrent write
+ LKM_PRMODE Protected read
+ LKM_PWMODE Protected write
+ LKM_EXMODE Exclusive
+.fi
+.PP
+.B flags
+Affect the operation of the lock call:
+.nf
+ LKF_NOQUEUE Don't queue the lock. If it cannot be granted return
+ -EAGAIN
+ LKF_CONVERT Convert an existing lock
+ LKF_VALBLK Lock has a value block
+ LKF_QUECVT Put conversion to the back of the queue
+ LKF_EXPEDITE Grant a NL lock immediately regardless of other locks
+ on the conversion queue
+ LKF_PERSISTENT Specifies a lock that will not be unlocked when the
+ process exits; it will become an orphan lock.
+ LKF_CONVDEADLK Enable internal conversion deadlock resolution where
+ the lock's granted mode may be set to NL and
+ DLM_SBF_DEMOTED is returned in lksb.sb_flags.
+ LKF_NODLCKWT Do not consider this lock when trying to detect
+ deadlock conditions.
+ LKF_NODLCKBLK Not implemented
+ LKF_NOQUEUEBAST Send blocking ASTs even for NOQUEUE operations
+ LKF_HEADQUE Add locks to the head of the convert or waiting queue
+ LKF_NOORDER Avoid the VMS rules on grant order
+ LKF_ALTPR If the requested mode can't be granted (generally CW),
+ try to grant in PR and return DLM_SBF_ALTMODE.
+ LKF_ALTCW If the requested mode can't be granted (generally PR),
+ try to grant in CW and return DLM_SBF_ALTMODE.
+ LKF_TIMEOUT The lock will time out per the timeout arg.
+
+.fi
+.PP
+.B lksb
+Lock Status block
+.br
+This structure contains the returned lock ID, the actual
+status of the lock operation (all lock ops are asynchronous)
+and the value block if LKF_VALBLK is set.
+.PP
+.B name
+.br
+Name of the lock. Can be binary, max 64 bytes. Ignored for lock
+conversions. (Should be a string to work with debugging tools.)
+.PP
+.B namelen
+.br
+Length of the above name. Ignored for lock conversions.
+.PP
+.B parent
+.br
+ID of parent lock or NULL if this is a top-level lock. This is currently unused.
+.PP
+.B ast
+.br
+Address of AST routine to be called when the lock operation
+completes. The final completion status of the lock will be
+in the lksb. the AST routine must not be NULL.
+.PP
+.B astargs
+.br
+Argument to pass to the AST routine (most people pass the lksb
+in here but it can be anything you like.)
+.PP
+.B bast
+.br
+Blocking AST routine. address of a function to call if this
+lock is blocking another. The function will be called with
+astargs.
+.PP
+.B range
+.br
+This is unused.
+.PP
+.B xid
+.br
+Optional transaction ID for deadlock detection.
+.PP
+.B timeout
+.br
+Timeout in centiseconds. If it takes longer than this to acquire the lock
+(usually because it is already blocked by another lock), then the AST
+will trigger with ETIMEDOUT as the status. If the lock operation is a conversion
+then the lock will remain at its current status. If this is a new lock then
+the lock will not exist and any LKB in the lksb will be invalid. This is
+ignored without the LKF_TIMEOUT flag.
+.PP
+.SS Return values
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
+.PP
+.nf
+EINVAL An invalid parameter was passed to the call (eg bad lock
+ mode or flag)
+ENOMEM A (kernel) memory allocation failed
+EAGAIN LKF_NOQUEUE was requested and the lock could not be
+ granted
+EBUSY The lock is currently being locked or converted
+EFAULT The userland buffer could not be read/written by the
+ kernel (this indicates a library problem)
+EDEADLOCK The lock operation is causing a deadlock and has been
+ cancelled. If this was a conversion then the lock is
+ reverted to its previously granted state. If it was a
+ new lock then it has not been granted. (NB Only
+ conversion deadlocks are currently detected)
+.PP
+If an error is returned in the AST, then lksb.sb_status is set to the one of the above values instead of zero.
+.SS Structures
+.nf
+struct dlm_lksb {
+ int sb_status; /* Final status of lock operation */
+ uint32_t sb_lkid; /* ID of lock. Returned from dlm_lock()
+ on first use. Used as input to
+ dlm_lock() for a conversion operation */
+ char sb_flags; /* Completion flags, see above */
+ char sb_lvbptr; /* Optional pointer to lock value block */
+};
+
+.fi
+.SH EXAMPLE
+.nf
+int status;
+struct dlm_lksb lksb;
+
+status = dlm_lock_wait(LKM_EXMODE,
+ &lksb,
+ LKF_NOQUEUE,
+ "MyLock",
+ strlen("MyLock"),
+ 0, // Parent,
+ NULL, // bast arg
+ NULL, // bast routine,
+ NULL); // Range
+
+if (status == 0)
+ dlm_unlock_wait(lksb.sb_lkid, 0, &lksb);
+
+.fi
+
+.SH SEE ALSO
+
+.BR libdlm (3),
+.BR dlm_unlock (3),
+.BR dlm_open_lockspace (3),
+.BR dlm_create_lockspace (3),
+.BR dlm_close_lockspace (3),
+.BR dlm_release_lockspace (3)
diff --git a/libdlm/man/dlm_lock_wait.3 b/libdlm/man/dlm_lock_wait.3
new file mode 100644
index 0000000..a99225c
--- /dev/null
+++ b/libdlm/man/dlm_lock_wait.3
@@ -0,0 +1 @@
+.so man3/dlm_lock.3
diff --git a/libdlm/man/dlm_ls_lock.3 b/libdlm/man/dlm_ls_lock.3
new file mode 100644
index 0000000..a99225c
--- /dev/null
+++ b/libdlm/man/dlm_ls_lock.3
@@ -0,0 +1 @@
+.so man3/dlm_lock.3
diff --git a/libdlm/man/dlm_ls_lock_wait.3 b/libdlm/man/dlm_ls_lock_wait.3
new file mode 100644
index 0000000..a99225c
--- /dev/null
+++ b/libdlm/man/dlm_ls_lock_wait.3
@@ -0,0 +1 @@
+.so man3/dlm_lock.3
diff --git a/libdlm/man/dlm_ls_lockx.3 b/libdlm/man/dlm_ls_lockx.3
new file mode 100644
index 0000000..a99225c
--- /dev/null
+++ b/libdlm/man/dlm_ls_lockx.3
@@ -0,0 +1 @@
+.so man3/dlm_lock.3
diff --git a/libdlm/man/dlm_ls_pthread_init.3 b/libdlm/man/dlm_ls_pthread_init.3
new file mode 100644
index 0000000..db4a9cf
--- /dev/null
+++ b/libdlm/man/dlm_ls_pthread_init.3
@@ -0,0 +1 @@
+.so man3/libdlm.3
diff --git a/libdlm/man/dlm_ls_unlock.3 b/libdlm/man/dlm_ls_unlock.3
new file mode 100644
index 0000000..91babd2
--- /dev/null
+++ b/libdlm/man/dlm_ls_unlock.3
@@ -0,0 +1 @@
+.so man3/dlm_unlock.3
diff --git a/libdlm/man/dlm_ls_unlock_wait.3 b/libdlm/man/dlm_ls_unlock_wait.3
new file mode 100644
index 0000000..91babd2
--- /dev/null
+++ b/libdlm/man/dlm_ls_unlock_wait.3
@@ -0,0 +1 @@
+.so man3/dlm_unlock.3
diff --git a/libdlm/man/dlm_new_lockspace.3 b/libdlm/man/dlm_new_lockspace.3
new file mode 100644
index 0000000..e5db408
--- /dev/null
+++ b/libdlm/man/dlm_new_lockspace.3
@@ -0,0 +1 @@
+.so man3/dlm_create_lockspace.3
diff --git a/libdlm/man/dlm_open_lockspace.3 b/libdlm/man/dlm_open_lockspace.3
new file mode 100644
index 0000000..e5db408
--- /dev/null
+++ b/libdlm/man/dlm_open_lockspace.3
@@ -0,0 +1 @@
+.so man3/dlm_create_lockspace.3
diff --git a/libdlm/man/dlm_pthread_init.3 b/libdlm/man/dlm_pthread_init.3
new file mode 100644
index 0000000..db4a9cf
--- /dev/null
+++ b/libdlm/man/dlm_pthread_init.3
@@ -0,0 +1 @@
+.so man3/libdlm.3
diff --git a/libdlm/man/dlm_release_lockspace.3 b/libdlm/man/dlm_release_lockspace.3
new file mode 100644
index 0000000..e5db408
--- /dev/null
+++ b/libdlm/man/dlm_release_lockspace.3
@@ -0,0 +1 @@
+.so man3/dlm_create_lockspace.3
diff --git a/libdlm/man/dlm_unlock.3 b/libdlm/man/dlm_unlock.3
new file mode 100644
index 0000000..9023139
--- /dev/null
+++ b/libdlm/man/dlm_unlock.3
@@ -0,0 +1,94 @@
+.TH DLM_UNLOCK 3 "July 5, 2007" "libdlm functions"
+.SH NAME
+dlm_unlock \- unlock a DLM lock
+.SH SYNOPSIS
+.nf
+#include <libdlm.h>
+
+int dlm_unlock(uint32_t lkid,
+ uint32_t flags, struct dlm_lksb *lksb, void *astarg);
+
+int dlm_unlock_wait(uint32_t lkid,
+ uint32_t flags, struct dlm_lksb *lksb);
+
+.fi
+.SH DESCRIPTION
+.B dlm_unlock()
+unlocks a lock previously acquired by dlm_lock and its variants.
+.PP
+Unless
+.B dlm_unlock_wait()
+is used unlocks are also asynchronous. The AST routine is called when the resource is successfully unlocked (see below).
+.PP
+.B lkid
+Lock ID as returned in the lksb
+.PP
+.B flags
+flags affecting the unlock operation:
+.nf
+ LKF_CANCEL Cancel a pending lock or conversion.
+ This returns the lock to it's previously
+ granted mode (in case of a conversion) or
+ unlocks it (in case of a waiting lock).
+ LKF_IVVALBLK Invalidate value block
+ LKF_FORCEUNLOCK Unlock the lock even if it's waiting.
+.fi
+.PP
+.B lksb
+LKSB to return status and value block information.
+.PP
+.B astarg
+New parameter to be passed to the completion AST.
+The completion AST routine is the
+last completion AST routine specified in a dlm_lock call.
+If dlm_lock_wait() was the last routine to issue a lock,
+dlm_unlock_wait() must be used to release the lock. If dlm_lock()
+was the last routine to issue a lock then either dlm_unlock()
+or dlm_unlock_wait() may be called.
+.PP
+
+.SS Return values
+0 is returned if the call completed successfully. If not, -1 is returned and errno is set to one of the following:
+.PP
+.nf
+EINVAL An invalid parameter was passed to the call (eg bad
+ lock mode or flag)
+EINPROGRESS The lock is already being unlocked
+EBUSY The lock is currently being locked or converted
+ENOTEMPTY An attempt to made to unlock a parent lock that still has
+ child locks.
+ECANCEL A lock conversion was successfully cancelled
+EUNLOCK An unlock operation completed successfully
+ (sb_status only)
+EFAULT The userland buffer could not be read/written by the
+ kernel
+.fi
+If an error is returned in the AST, then lksb.sb_status is set to the one of the above numbers instead of zero.
+.SH EXAMPLE
+.nf
+int status;
+struct dlm_lksb lksb;
+
+status = dlm_lock_wait(LKM_EXMODE,
+ &lksb,
+ LKF_NOQUEUE,
+ "MyLock",
+ strlen("MyLock"),
+ 0, // Parent,
+ NULL, // bast arg
+ NULL, // bast routine,
+ NULL); // Range
+
+if (status == 0)
+ dlm_unlock_wait(lksb.sb_lkid, 0, &lksb);
+
+.fi
+
+.SH SEE ALSO
+
+.BR libdlm (3),
+.BR dlm_lock (3),
+.BR dlm_open_lockspace (3),
+.BR dlm_create_lockspace (3),
+.BR dlm_close_lockspace (3),
+.BR dlm_release_lockspace (3)
diff --git a/libdlm/man/dlm_unlock_wait.3 b/libdlm/man/dlm_unlock_wait.3
new file mode 100644
index 0000000..91babd2
--- /dev/null
+++ b/libdlm/man/dlm_unlock_wait.3
@@ -0,0 +1 @@
+.so man3/dlm_unlock.3
diff --git a/libdlm/man/libdlm.3 b/libdlm/man/libdlm.3
new file mode 100644
index 0000000..a020560
--- /dev/null
+++ b/libdlm/man/libdlm.3
@@ -0,0 +1,105 @@
+.TH LIBDLM 3 "July 5, 2007" "libdlm functions"
+.SH NAME
+libdlm \- dlm_get_fd, dlm_dispatch, dlm_pthread_init, dlm_ls_pthread_init, dlm_cleanup
+.SH SYNOPSIS
+.nf
+#include <libdlm.h>
+.nf
+int dlm_pthread_init();
+int dlm_ls_pthread_init(dlm_lshandle_t lockspace);
+int dlm_pthread_cleanup();
+int dlm_get_fd(void);
+int dlm_dispatch(int fd);
+
+link with -ldlm
+.fi
+.SH DESCRIPTION
+libdlm provides the programmatic userspace interface to the Distributed Lock manager. It provides all the calls you need to manipulate locks & lockspaces
+.br
+libdlm can be used in pthread or non-pthread applications. For pthread applications simply call the following function before doing any lock operations. If you're using pthreads, remember to define _REENTRANT at the top of the program or using -D_REENTRANT on the compile line.
+.br
+pthreads is the normal way of using the DLM. This way you simply initialize the DLM's thread and all the AST routines will be delivered in that thread. You just call the dlm_lock() etc routines in the main line of your program.
+.br
+If you don't want to use pthreads or you want to handle the dlm callback ASTs yourself then you can get an FD handle to the DLM device and call
+.B dlm_dispatch()
+on it whenever it becomes active. That was ASTs will be delivered in the context of the thread/process that called
+.B dlm_dispatch().
+
+
+.SS int dlm_pthread_init()
+.br
+Creates a thread to receive all lock ASTs. The AST callback function for lock operations will be called in the context of this thread. If there is a potential for local resource access conflicts you must provide your own pthread-based locking in the AST routine.
+.PP
+.SS int dlm_ls_pthread_init(dlm_lshandle_t lockspace)
+.br
+As dlm_pthread_init but initializes a thread for the specified lockspace.
+.PP
+.SS int dlm_pthread_cleanup()
+.br
+Cleans up the default lockspace threads after use. Normally you don't need to call this, but if the locking code is in a dynamically loadable shared library this will probably be necessary.
+.br
+For non-pthread based applications the DLM provides a file descriptor that the program can feed into poll/select. If activity is detected on that FD then a dispatch function should be called:
+.PP
+.SS int dlm_get_fd()
+Returns a file-descriptor for the DLM suitable for passing in to poll() or select().
+.PP
+.SS int dlm_dispatch(int fd)
+.br
+Reads from the DLM and calls any AST routines that may be needed. This routine runs in the context of the caller so no extra locking is needed to protect local resources.
+.PP
+
+
+.SH libdlm_lt
+There also exists a "light" version of the libdlm library called libdlm_lt. This is provided for those applications that do not want to use pthread functions. If you use this library it is important that your application is NOT compiled with -D_REENTRANT or linked with libpthread.
+
+.SH EXAMPLES
+
+Create a lockspace and start a thread to deliver its callbacks:
+.nf
+dlm_lshandle_t ls;
+
+ls = dlm_create_lockspace("myLS", 0660);
+dlm_ls_pthread_init(ls);
+
+ ...
+
+status = dlm_ls_lock(ls,
+ ... );
+
+
+.fi
+.PP
+ Using poll(2) to wait for and dispatch ASTs
+.nf
+
+
+static int poll_for_ast(dlm_lshandle_t ls)
+{
+ struct pollfd pfd;
+
+ pfd.fd = dlm_ls_get_fd(ls);
+ pfd.events = POLLIN;
+ while (!ast_called)
+ {
+ if (poll(&pfd, 1, 0) < 0)
+ {
+ perror("poll");
+ return -1;
+ }
+ dlm_dispatch(dlm_ls_get_fd(ls));
+ }
+ ast_called = 0;
+ return 0;
+}
+.fi
+
+
+.SH SEE ALSO
+
+.BR libdlm (3),
+.BR dlm_lock (3),
+.BR dlm_unlock (3),
+.BR dlm_open_lockspace (3),
+.BR dlm_create_lockspace (3),
+.BR dlm_close_lockspace (3),
+.BR dlm_release_lockspace (3)
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=8d03f2e225…
Commit: 8d03f2e225d3a2109def78ddf9c7bed13575ea2e
Parent: 2a1ff7ea5b9c86e3e6ade2ea8da1bd22e2b61749
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 14:56:23 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 14:56:23 2011 -0500
dlm_tool: move files
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm/man/dlm_tool.8 | 98 ----
dlm/tool/main.c | 1334 -------------------------------------------
dlm_controld/dlm_daemon.h | 2 +-
dlm_controld/endian.h | 68 ---
dlm_controld/linux_endian.h | 68 +++
dlm_tool/Makefile | 54 ++
dlm_tool/dlm_tool.8 | 98 ++++
dlm_tool/main.c | 1334 +++++++++++++++++++++++++++++++++++++++++++
8 files changed, 1555 insertions(+), 1501 deletions(-)
diff --git a/dlm/man/dlm_tool.8 b/dlm/man/dlm_tool.8
deleted file mode 100644
index df9aa64..0000000
--- a/dlm/man/dlm_tool.8
+++ /dev/null
@@ -1,98 +0,0 @@
-.TH DLM_TOOL 8 2009-01-20 cluster cluster
-
-.SH NAME
-dlm_tool \- a utility for the dlm and dlm_controld daemon
-
-.SH SYNOPSIS
-.B dlm_tool
-[COMMAND] [OPTIONS]
-[
-.I name
-]
-
-.SH DESCRIPTION
-.TP
-.B ls
-Display internal dlm_controld state about lockspaces.
-
-.TP
-.B dump
-Dump dlm_controld debug buffer.
-
-.TP
-.B log_plock
-Dump dlm_controld plock debug buffer.
-
-.TP
-.BI plocks " name"
-Dump posix locks from dlm_controld for the lockspace.
-
-.TP
-.BI lockdump " name"
-Minimal display of locks from the lockspace.
-
-.TP
-.BI lockdebug " name"
-Extended display of locks from the lockspace.
-
-.TP
-.BI join " name"
-Join a lockspace.
-
-.TP
-.BI leave " name"
-Leave a lockspace.
-
-.TP
-.BI deadlock_check " name"
-Start a deadlock detection cycle for the lockspace.
-
-.SH OPTIONS
-.TP
-.B \-n
-Show all node information in ls.
-
-.TP
-.BI \-d " num"
-Resource directory enabled (1) or disabled (0) during join. Default 0.
-
-.TP
-.BI \-e " num"
-Exclusive create off/on (0/1) in join. Default 0.
-
-.TP
-.BI \-f " num"
-FS memory allocation off/on (0/1) in join. Default 0.
-
-.TP
-.BI \-m " mode"
-The permission mode (in octal) of the lockspace device created by join.
-Default 0600.
-
-.TP
-.B \-M
-Dump MSTCPY locks in addition to locks held by local processes.
-
-.TP
-.B \-s
-Summary following lockdebug output (experimental, format may change).
-
-.TP
-.B \-v
-Verbose lockdebug output.
-
-.TP
-.B \-w
-Wide lockdebug output.
-
-.TP
-.B \-h
-Print a help message describing available options, then exit.
-
-.TP
-.B \-V
-Print program version information, then exit.
-
-.SH SEE ALSO
-.BR dlm_controld (8)
-
diff --git a/dlm/tool/main.c b/dlm/tool/main.c
deleted file mode 100644
index 236463f..0000000
--- a/dlm/tool/main.c
+++ /dev/null
@@ -1,1334 +0,0 @@
-#include "clusterautoconfig.h"
-
-#include <sys/types.h>
-#include <sys/un.h>
-#include <inttypes.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <fcntl.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <netinet/in.h>
-
-#include <linux/dlmconstants.h>
-#include "libdlm.h"
-#include "libdlmcontrol.h"
-#include "copyright.cf"
-
-#define LKM_IVMODE -1
-
-#define OP_JOIN 1
-#define OP_LEAVE 2
-#define OP_JOINLEAVE 3
-#define OP_LIST 4
-#define OP_DEADLOCK_CHECK 5
-#define OP_DUMP 6
-#define OP_PLOCKS 7
-#define OP_LOCKDUMP 8
-#define OP_LOCKDEBUG 9
-#define OP_LOG_PLOCK 10
-
-static char *prog_name;
-static char *lsname;
-static int operation;
-static int opt_ind;
-static int ls_all_nodes = 0;
-static int opt_dir = 0;
-static int opt_excl = 0;
-static int opt_fs = 0;
-static int dump_mstcpy = 0;
-static mode_t create_mode = 0600;
-static int verbose;
-static int wide;
-static int summarize;
-
-#define MAX_LS 128
-#define MAX_NODES 128
-
-/* from linux/fs/dlm/dlm_internal.h */
-#define DLM_LKSTS_WAITING 1
-#define DLM_LKSTS_GRANTED 2
-#define DLM_LKSTS_CONVERT 3
-
-#define DLM_MSG_REQUEST 1
-#define DLM_MSG_CONVERT 2
-#define DLM_MSG_UNLOCK 3
-#define DLM_MSG_CANCEL 4
-#define DLM_MSG_REQUEST_REPLY 5
-#define DLM_MSG_CONVERT_REPLY 6
-#define DLM_MSG_UNLOCK_REPLY 7
-#define DLM_MSG_CANCEL_REPLY 8
-#define DLM_MSG_GRANT 9
-#define DLM_MSG_BAST 10
-#define DLM_MSG_LOOKUP 11
-#define DLM_MSG_REMOVE 12
-#define DLM_MSG_LOOKUP_REPLY 13
-#define DLM_MSG_PURGE 14
-
-
-struct dlmc_lockspace lss[MAX_LS];
-struct dlmc_node nodes[MAX_NODES];
-
-struct rinfo {
- int print_granted;
- int print_convert;
- int print_waiting;
- int print_lookup;
- int namelen;
- int nodeid;
- int lvb;
- unsigned int lkb_count;
- unsigned int lkb_granted;
- unsigned int lkb_convert;
- unsigned int lkb_waiting;
- unsigned int lkb_lookup;
- unsigned int lkb_wait_msg;
- unsigned int lkb_master_copy;
- unsigned int lkb_local_copy;
- unsigned int lkb_process_copy;
-};
-
-struct summary {
- unsigned int rsb_total;
- unsigned int rsb_with_lvb;
- unsigned int rsb_no_locks;
- unsigned int rsb_lookup;
- unsigned int rsb_master;
- unsigned int rsb_local;
- unsigned int rsb_nodeid_error;
- unsigned int lkb_count;
- unsigned int lkb_granted;
- unsigned int lkb_convert;
- unsigned int lkb_waiting;
- unsigned int lkb_lookup;
- unsigned int lkb_wait_msg;
- unsigned int lkb_master_copy;
- unsigned int lkb_local_copy;
- unsigned int lkb_process_copy;
- unsigned int expect_replies;
-};
-
-static const char *mode_str(int mode)
-{
- switch (mode) {
- case -1:
- return "IV";
- case LKM_NLMODE:
- return "NL";
- case LKM_CRMODE:
- return "CR";
- case LKM_CWMODE:
- return "CW";
- case LKM_PRMODE:
- return "PR";
- case LKM_PWMODE:
- return "PW";
- case LKM_EXMODE:
- return "EX";
- }
- return "??";
-}
-
-static const char *msg_str(int type)
-{
- switch (type) {
- case DLM_MSG_REQUEST:
- return "request";
- case DLM_MSG_CONVERT:
- return "convert";
- case DLM_MSG_UNLOCK:
- return "unlock ";
- case DLM_MSG_CANCEL:
- return "cancel ";
- case DLM_MSG_REQUEST_REPLY:
- return "r_reply";
- case DLM_MSG_CONVERT_REPLY:
- return "c_reply";
- case DLM_MSG_UNLOCK_REPLY:
- return "u_reply";
- case DLM_MSG_CANCEL_REPLY:
- return "c_reply";
- case DLM_MSG_GRANT:
- return "grant ";
- case DLM_MSG_BAST:
- return "bast ";
- case DLM_MSG_LOOKUP:
- return "lookup ";
- case DLM_MSG_REMOVE:
- return "remove ";
- case DLM_MSG_LOOKUP_REPLY:
- return "l_reply";
- case DLM_MSG_PURGE:
- return "purge ";
- default:
- return "unknown";
- }
-}
-
-static void print_usage(void)
-{
- printf("Usage:\n");
- printf("\n");
- printf("dlm_tool [options] [join | leave | lockdump | lockdebug |\n"
- " ls | dump | log_plock | plocks |\n"
- " deadlock_check]\n");
- printf("\n");
- printf("Options:\n");
- printf(" -n Show all node information in ls\n");
- printf(" -d <n> Resource directory off/on (0/1) in join, default 0\n");
- printf(" -e <n> Exclusive create off/on (0/1) in join, default 0\n");
- printf(" -f <n> FS memory allocation off/on (0/1) in join, default 0\n");
- printf(" -m <mode> Permission mode for lockspace device (octal), default 0600\n");
- printf(" -M Print MSTCPY locks in lockdump\n"
- " (remote locks that are locally mastered)\n");
- printf(" -s Summary following lockdebug output\n");
- printf(" (experimental, format not fixed)\n");
- printf(" -v Verbose lockdebug output\n");
- printf(" -w Wide lockdebug output\n");
- printf(" -h Print this help, then exit\n");
- printf(" -V Print program version information, then exit\n");
- printf("\n");
-}
-
-#define OPTION_STRING "MhVnd:m:e:f:vws"
-
-static void decode_arguments(int argc, char **argv)
-{
- int cont = 1;
- int optchar;
- int need_lsname;
- char modebuf[8];
-
- while (cont) {
- optchar = getopt(argc, argv, OPTION_STRING);
-
- switch (optchar) {
- case 'd':
- opt_dir = atoi(optarg);
- break;
-
- case 'e':
- opt_excl = atoi(optarg);
- break;
-
- case 'f':
- opt_fs = atoi(optarg);
- break;
-
- case 'm':
- memset(modebuf, 0, sizeof(modebuf));
- snprintf(modebuf, 8, "%s", optarg);
- sscanf(modebuf, "%o", &create_mode);
- break;
-
- case 'M':
- dump_mstcpy = 1;
- break;
-
- case 'n':
- ls_all_nodes = 1;
- break;
-
- case 's':
- summarize = 1;
- break;
-
- case 'v':
- verbose = 1;
- break;
-
- case 'w':
- wide = 1;
- break;
-
- case 'h':
- print_usage();
- exit(EXIT_SUCCESS);
- break;
-
- case 'V':
- printf("%s %s (built %s %s)\n",
- prog_name, VERSION, __DATE__, __TIME__);
- printf("%s\n", REDHAT_COPYRIGHT);
- exit(EXIT_SUCCESS);
- break;
-
- case ':':
- case '?':
- fprintf(stderr, "Please use '-h' for usage.\n");
- exit(EXIT_FAILURE);
- break;
-
- case EOF:
- cont = 0;
- break;
-
- default:
- fprintf(stderr, "unknown option: %c\n", optchar);
- exit(EXIT_FAILURE);
- break;
- };
- }
-
- need_lsname = 1;
-
- while (optind < argc) {
-
- /*
- * libdlm
- */
-
- if (!strncmp(argv[optind], "join", 4) &&
- (strlen(argv[optind]) == 4)) {
- operation = OP_JOIN;
- opt_ind = optind + 1;
- break;
- } else if (!strncmp(argv[optind], "leave", 5) &&
- (strlen(argv[optind]) == 5)) {
- operation = OP_LEAVE;
- opt_ind = optind + 1;
- break;
- } else if (!strncmp(argv[optind], "joinleave", 9) &&
- (strlen(argv[optind]) == 9)) {
- operation = OP_JOINLEAVE;
- opt_ind = optind + 1;
- break;
- }
-
- /*
- * libdlmcontrol
- */
-
- else if (!strncmp(argv[optind], "ls", 2) &&
- (strlen(argv[optind]) == 2)) {
- operation = OP_LIST;
- opt_ind = optind + 1;
- need_lsname = 0;
- break;
- } else if (!strncmp(argv[optind], "deadlock_check", 14) &&
- (strlen(argv[optind]) == 14)) {
- operation = OP_DEADLOCK_CHECK;
- opt_ind = optind + 1;
- break;
- } else if (!strncmp(argv[optind], "dump", 4) &&
- (strlen(argv[optind]) == 4)) {
- operation = OP_DUMP;
- opt_ind = optind + 1;
- need_lsname = 0;
- break;
- } else if (!strncmp(argv[optind], "plocks", 6) &&
- (strlen(argv[optind]) == 6)) {
- operation = OP_PLOCKS;
- opt_ind = optind + 1;
- break;
- } else if (!strncmp(argv[optind], "log_plock", 9) &&
- (strlen(argv[optind]) == 9)) {
- operation = OP_LOG_PLOCK;
- opt_ind = optind + 1;
- need_lsname = 0;
- break;
- }
-
- /*
- * debugfs
- */
-
- else if (!strncmp(argv[optind], "lockdump", 8) &&
- (strlen(argv[optind]) == 8)) {
- operation = OP_LOCKDUMP;
- opt_ind = optind + 1;
- break;
- } else if (!strncmp(argv[optind], "lockdebug", 9) &&
- (strlen(argv[optind]) == 9)) {
- operation = OP_LOCKDEBUG;
- opt_ind = optind + 1;
- break;
- }
- optind++;
- }
-
- if (!operation || !opt_ind) {
- print_usage();
- exit(EXIT_FAILURE);
- }
-
- if (optind < argc - 1)
- lsname = argv[opt_ind];
- else if (need_lsname) {
- fprintf(stderr, "lockspace name required\n");
- exit(EXIT_FAILURE);
- }
-}
-
-static int do_write(int fd, void *buf, size_t count)
-{
- int rv, off = 0;
-
- retry:
- rv = write(fd, (char *)buf + off, count);
- if (rv == -1 && errno == EINTR)
- goto retry;
- if (rv < 0)
- return rv;
-
- if (rv != count) {
- count -= rv;
- off += rv;
- goto retry;
- }
- return 0;
-}
-
-static char *flag_str(uint32_t flags)
-{
- static char join_flags[128];
-
- memset(join_flags, 0, sizeof(join_flags));
-
- strcat(join_flags, "flags ");
-
- if (flags & DLM_LSFL_NODIR)
- strcat(join_flags, "NODIR ");
-
- if (flags & DLM_LSFL_NEWEXCL)
- strcat(join_flags, "NEWEXCL ");
-
- if (flags & DLM_LSFL_FS)
- strcat(join_flags, "FS ");
-
- return join_flags;
-}
-
-static void do_join(char *name)
-{
- dlm_lshandle_t *dh;
- uint32_t flags = 0;
-
- if (!opt_dir)
- flags |= DLM_LSFL_NODIR;
-
- if (opt_excl)
- flags |= DLM_LSFL_NEWEXCL;
-
- if (opt_fs)
- flags |= DLM_LSFL_FS;
-
- printf("Joining lockspace \"%s\" permission %o %s\n",
- name, create_mode, flags ? flag_str(flags) : "");
- fflush(stdout);
-
- dh = dlm_new_lockspace(name, create_mode, flags);
- if (!dh) {
- fprintf(stderr, "dlm_new_lockspace %s error %d\n",
- name, errno);
- exit(-1);
- }
-
- dlm_close_lockspace(dh);
- /* there's no autofree so the ls should stay around */
- printf("done\n");
-}
-
-static void do_leave(char *name)
-{
- dlm_lshandle_t *dh;
-
- printf("Leaving lockspace \"%s\"\n", name);
- fflush(stdout);
-
- dh = dlm_open_lockspace(name);
- if (!dh) {
- fprintf(stderr, "dlm_open_lockspace %s error %p %d\n",
- name, dh, errno);
- exit(-1);
- }
-
- dlm_release_lockspace(name, dh, 1);
- printf("done\n");
-}
-
-static char *pr_master(int nodeid)
-{
- static char buf[64];
-
- memset(buf, 0, sizeof(buf));
-
- if (nodeid > 0)
- sprintf(buf, "Local %d", nodeid);
- else if (!nodeid)
- sprintf(buf, "Master");
- else if (nodeid == -1)
- sprintf(buf, "Lookup");
-
- return buf;
-}
-
-static char *pr_extra(uint32_t flags, int root_list, int recover_list,
- int recover_locks_count, char *first_lkid)
-{
- static char buf[128];
- int first = 0;
-
- memset(buf, 0, sizeof(buf));
-
- if (strcmp(first_lkid, "0"))
- first = 1;
-
- if (flags || first || root_list || recover_list || recover_locks_count)
- sprintf(buf,
- "flags %08x first_lkid %s root %d recover %d locks %d",
- flags, first_lkid, root_list, recover_list, recover_locks_count);
-
- return buf;
-}
-
-static void print_rsb(char *line, struct rinfo *ri)
-{
- char type[4], namefmt[4], *p;
- char addr[64];
- char first_lkid[64];
- int rv, nodeid, root_list, recover_list, recover_locks_count, namelen;
- uint32_t flags;
-
- rv = sscanf(line, "%s %s %d %s %u %d %d %u %u %s",
- type,
- addr,
- &nodeid,
- first_lkid,
- &flags,
- &root_list,
- &recover_list,
- &recover_locks_count,
- &namelen,
- namefmt);
-
- if (rv != 10)
- goto fail;
-
- /* used for lkb prints */
- ri->nodeid = nodeid;
-
- ri->namelen = namelen;
-
- p = strchr(line, '\n');
- if (!p)
- goto fail;
- *p = '\0';
-
- p = strstr(line, namefmt);
- if (!p)
- goto fail;
- p += 4;
-
- strcat(addr, " ");
-
- if (!strncmp(namefmt, "str", 3))
- printf("Resource len %2d \"%s\"\n", namelen, p);
- else if (!strncmp(namefmt, "hex", 3))
- printf("Resource len %2d hex %s\n", namelen, p);
- else
- goto fail;
-
- printf("%-16s %s\n",
- pr_master(nodeid),
- pr_extra(flags, root_list, recover_list, recover_locks_count, first_lkid));
- return;
-
- fail:
- fprintf(stderr, "print_rsb error rv %d line \"%s\"\n", rv, line);
-}
-
-static void print_lvb(char *line)
-{
- char lvb[1024];
- char type[4];
- int i, c, rv, lvblen;
- uint32_t lvbseq;
-
- memset(lvb, 0, 1024);
-
- rv = sscanf(line, "%s %u %d %[0-9A-Fa-f ]", type, &lvbseq, &lvblen, lvb);
-
- if (rv != 4) {
- fprintf(stderr, "print_lvb error rv %d line \"%s\"\n", rv, line);
- return;
- }
-
- printf("LVB len %d seq %u\n", lvblen, lvbseq);
-
- for (c = 0, i = 0; ; i++) {
- printf("%c", lvb[i]);
- if (lvb[i] != ' ')
- c++;
- if (!wide && lvb[i] == ' ' && !(c % 32))
- printf("\n");
- if (c == (lvblen * 2))
- break;
- }
- printf("\n");
-}
-
-struct lkb {
- uint64_t xid, timestamp, time_bast;
- uint32_t id, remid, exflags, flags, lvbseq;
- int nodeid, ownpid, status, grmode, rqmode, highbast, rsb_lookup, wait_type;
-};
-
-static const char *pr_grmode(struct lkb *lkb)
-{
- if (lkb->status == DLM_LKSTS_GRANTED || lkb->status == DLM_LKSTS_CONVERT)
- return mode_str(lkb->grmode);
- else if (lkb->status == DLM_LKSTS_WAITING || lkb->rsb_lookup)
- return "--";
- else
- return "XX";
-}
-
-static const char *pr_rqmode(struct lkb *lkb)
-{
- static char buf[5];
-
- memset(buf, 0, sizeof(buf));
-
- if (lkb->status == DLM_LKSTS_GRANTED) {
- return " ";
- } else if (lkb->status == DLM_LKSTS_CONVERT ||
- lkb->status == DLM_LKSTS_WAITING ||
- lkb->rsb_lookup) {
- sprintf(buf, "(%s)", mode_str(lkb->rqmode));
- return buf;
- } else {
- return "(XX)";
- }
-}
-
-static const char *pr_remote(struct lkb *lkb, struct rinfo *ri)
-{
- static char buf[64];
-
- memset(buf, 0, sizeof(buf));
-
- if (!lkb->nodeid) {
- return " ";
- } else if (lkb->nodeid != ri->nodeid) {
- sprintf(buf, "Remote: %3d %08x", lkb->nodeid, lkb->remid);
- return buf;
- } else {
- sprintf(buf, "Master: %3d %08x", lkb->nodeid, lkb->remid);
- return buf;
- }
-}
-
-static const char *pr_wait(struct lkb *lkb)
-{
- static char buf[16];
-
- memset(buf, 0, sizeof(buf));
-
- if (!lkb->wait_type) {
- return " ";
- } else {
- sprintf(buf, " wait %02d", lkb->wait_type);
- return buf;
- }
-}
-
-static char *pr_verbose(struct lkb *lkb)
-{
- static char buf[128];
-
- memset(buf, 0, sizeof(buf));
-
- sprintf(buf, "time %016llu flags %08x %08x bast %d %llu",
- (unsigned long long)lkb->timestamp,
- lkb->exflags, lkb->flags, lkb->highbast,
- (unsigned long long)lkb->time_bast);
-
- return buf;
-}
-
-static void print_lkb(char *line, struct rinfo *ri)
-{
- struct lkb lkb;
- char type[4];
- int rv;
-
- rv = sscanf(line, "%s %x %d %x %u %"PRIu64" %x %x %d %d %d %d %d %d %u %"PRIu64" %"PRIu64,
- type,
- &lkb.id,
- &lkb.nodeid,
- &lkb.remid,
- &lkb.ownpid,
- &lkb.xid,
- &lkb.exflags,
- &lkb.flags,
- &lkb.status,
- &lkb.grmode,
- &lkb.rqmode,
- &lkb.highbast,
- &lkb.rsb_lookup,
- &lkb.wait_type,
- &lkb.lvbseq,
- &lkb.timestamp,
- &lkb.time_bast);
-
- ri->lkb_count++;
-
- if (lkb.status == DLM_LKSTS_GRANTED) {
- if (!ri->print_granted++)
- printf("Granted\n");
- ri->lkb_granted++;
- }
- if (lkb.status == DLM_LKSTS_CONVERT) {
- if (!ri->print_convert++)
- printf("Convert\n");
- ri->lkb_convert++;
- }
- if (lkb.status == DLM_LKSTS_WAITING) {
- if (!ri->print_waiting++)
- printf("Waiting\n");
- ri->lkb_waiting++;
- }
- if (lkb.rsb_lookup) {
- if (!ri->print_lookup++)
- printf("Lookup\n");
- ri->lkb_lookup++;
- }
-
- if (lkb.wait_type)
- ri->lkb_wait_msg++;
-
- if (!ri->nodeid) {
- if (lkb.nodeid)
- ri->lkb_master_copy++;
- else
- ri->lkb_local_copy++;
- } else {
- ri->lkb_process_copy++;
- }
-
- printf("%08x %s %s %s %s %s\n",
- lkb.id, pr_grmode(&lkb), pr_rqmode(&lkb),
- pr_remote(&lkb, ri), pr_wait(&lkb),
- (verbose && wide) ? pr_verbose(&lkb) : "");
-
- if (verbose && !wide)
- printf("%s\n", pr_verbose(&lkb));
-}
-
-static void clear_rinfo(struct rinfo *ri)
-{
- memset(ri, 0, sizeof(struct rinfo));
- ri->nodeid = -9;
-}
-
-static void count_rinfo(struct summary *s, struct rinfo *ri)
-{
- /* the first time called */
- if (!ri->namelen)
- return;
-
- s->rsb_total++;
-
- if (ri->lvb)
- s->rsb_with_lvb++;
-
- if (!ri->lkb_count) {
- s->rsb_no_locks++;
- printf("no locks\n");
- }
-
- if (!ri->nodeid)
- s->rsb_master++;
- else if (ri->nodeid == -1)
- s->rsb_lookup++;
- else if (ri->nodeid > 0)
- s->rsb_local++;
- else
- s->rsb_nodeid_error++;
-
- s->lkb_count += ri->lkb_count;
- s->lkb_granted += ri->lkb_granted;
- s->lkb_convert += ri->lkb_convert;
- s->lkb_waiting += ri->lkb_waiting;
- s->lkb_lookup += ri->lkb_lookup;
- s->lkb_wait_msg += ri->lkb_wait_msg;
- s->lkb_master_copy += ri->lkb_master_copy;
- s->lkb_local_copy += ri->lkb_local_copy;
- s->lkb_process_copy += ri->lkb_process_copy;
-}
-
-static void print_summary(struct summary *s)
-{
- printf("rsb\n");
- printf(" total %u\n", s->rsb_total);
- printf(" master %u\n", s->rsb_master);
- printf(" remote master %u\n", s->rsb_local);
- printf(" lookup master %u\n", s->rsb_lookup);
- printf(" with lvb %u\n", s->rsb_with_lvb);
- printf(" with no locks %u\n", s->rsb_no_locks);
- printf(" nodeid error %u\n", s->rsb_nodeid_error);
- printf("\n");
-
- printf("lkb\n");
- printf(" total %u\n", s->lkb_count);
- printf(" granted %u\n", s->lkb_granted);
- printf(" convert %u\n", s->lkb_convert);
- printf(" waiting %u\n", s->lkb_waiting);
- printf(" local copy %u\n", s->lkb_local_copy);
- printf(" master copy %u\n", s->lkb_master_copy);
- printf(" process copy %u\n", s->lkb_process_copy);
- printf(" rsb lookup %u\n", s->lkb_lookup);
- printf(" wait message %u\n", s->lkb_wait_msg);
- printf(" expect reply %u\n", s->expect_replies);
-}
-
-#define LOCK_LINE_MAX 1024
-
-static void do_waiters(char *name, struct summary *sum)
-{
- FILE *file;
- char path[PATH_MAX];
- char line[LOCK_LINE_MAX];
- char rname[65];
- int header = 0;
- int i, j, spaces;
- int rv, nodeid, wait_type;
- uint32_t id;
-
- snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_waiters", name);
-
- file = fopen(path, "r");
- if (!file)
- return;
-
- while (fgets(line, LOCK_LINE_MAX, file)) {
- if (!header) {
- printf("\n");
- printf("Expecting reply\n");
- header = 1;
- }
-
- rv = sscanf(line, "%x %d %d",
- &id, &wait_type, &nodeid);
-
- if (rv != 3) {
- printf("waiters: %s", line);
- continue;
- }
-
- /* parse the resource name from the remainder of the line */
- j = 0;
- spaces = 0;
-
- for (i = 0; i < LOCK_LINE_MAX; i++) {
- if (line[i] == '\n')
- break;
- if (spaces == 3) {
- rname[j++] = line[i];
- if (j == (sizeof(rname) - 1))
- break;
- } else if (line[i] == ' ') {
- spaces++;
- }
- }
-
- printf("nodeid %2d msg %s lkid %08x resource \"%s\"\n",
- nodeid, msg_str(wait_type), id, rname);
-
- sum->expect_replies++;
- }
- fclose(file);
-}
-
-static void do_lockdebug(char *name)
-{
- struct summary summary;
- struct rinfo info;
- FILE *file;
- char path[PATH_MAX];
- char line[LOCK_LINE_MAX];
- int old = 0;
-
- snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_all", name);
-
- file = fopen(path, "r");
- if (!file) {
- snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s", name);
- file = fopen(path, "r");
- if (!file) {
- fprintf(stderr, "can't open %s: %s\n", path, strerror(errno));
- return;
- }
- old = 1;
- }
-
- memset(&summary, 0, sizeof(struct summary));
- memset(&info, 0, sizeof(struct rinfo));
-
- while (fgets(line, LOCK_LINE_MAX, file)) {
-
- if (old)
- goto raw;
-
- if (!strncmp(line, "version", 7))
- continue;
-
- if (!strncmp(line, "rsb", 3)) {
- count_rinfo(&summary, &info);
- clear_rinfo(&info);
- printf("\n");
- print_rsb(line, &info);
- continue;
- }
-
- if (!strncmp(line, "lvb", 3)) {
- print_lvb(line);
- info.lvb = 1;
- continue;
- }
-
- if (!strncmp(line, "lkb", 3)) {
- print_lkb(line, &info);
- continue;
- }
- raw:
- printf("%s", line);
- }
- fclose(file);
-
- do_waiters(name, &summary);
-
- if (summarize) {
- printf("\n");
- print_summary(&summary);
- }
-}
-
-static void parse_r_name(char *line, char *name)
-{
- char *p;
- int i = 0;
- int begin = 0;
-
- for (p = line; ; p++) {
- if (*p == '"') {
- if (begin)
- break;
- begin = 1;
- continue;
- }
- if (begin)
- name[i++] = *p;
- }
-}
-
-static void do_lockdump(char *name)
-{
- FILE *file;
- char path[PATH_MAX];
- char line[LOCK_LINE_MAX];
- char r_name[65];
- int r_nodeid;
- int r_len;
- int rv;
- unsigned int time;
- unsigned long long xid;
- uint32_t id;
- int nodeid;
- uint32_t remid;
- int ownpid;
- uint32_t exflags;
- uint32_t flags;
- int8_t status;
- int8_t grmode;
- int8_t rqmode;
-
- snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_locks", name);
-
- file = fopen(path, "r");
- if (!file) {
- fprintf(stderr, "can't open %s: %s\n", path, strerror(errno));
- return;
- }
-
- /* skip the header on the first line */
- if (!fgets(line, LOCK_LINE_MAX, file))
- return;
-
- while (fgets(line, LOCK_LINE_MAX, file)) {
- rv = sscanf(line, "%x %d %x %u %llu %x %x %hhd %hhd %hhd %u %d %d",
- &id,
- &nodeid,
- &remid,
- &ownpid,
- &xid,
- &exflags,
- &flags,
- &status,
- &grmode,
- &rqmode,
- &time,
- &r_nodeid,
- &r_len);
-
- if (rv != 13) {
- fprintf(stderr, "invalid debugfs line %d: %s\n",
- rv, line);
- return;
- }
-
- memset(r_name, 0, sizeof(r_name));
- parse_r_name(line, r_name);
-
- /* don't print MSTCPY locks without -M */
- if (!r_nodeid && nodeid) {
- if (!dump_mstcpy)
- continue;
- printf("id %08x gr %s rq %s pid %u MSTCPY %d \"%s\"\n",
- id, mode_str(grmode), mode_str(rqmode),
- ownpid, nodeid, r_name);
- continue;
- }
-
- /* A hack because dlm-kernel doesn't set rqmode back to IV when
- a NOQUEUE convert fails, which means in a lockdump it looks
- like a granted lock is still converting since rqmode is not
- IV. (does it make sense to include status in the output,
- e.g. G,C,W?) */
-
- if (status == DLM_LKSTS_GRANTED)
- rqmode = LKM_IVMODE;
-
- printf("id %08x gr %s rq %s pid %u master %d \"%s\"\n",
- id, mode_str(grmode), mode_str(rqmode),
- ownpid, nodeid, r_name);
- }
-
- fclose(file);
-}
-
-static char *dlmc_lf_str(uint32_t flags)
-{
- static char str[128];
- int i = 0;
-
- memset(str, 0, sizeof(str));
-
- if (flags & DLMC_LF_SAVE_PLOCKS) {
- i++;
- strcat(str, "save_plock");
- }
- if (flags & DLMC_LF_NEED_PLOCKS) {
- strcat(str, i++ ? "," : "");
- strcat(str, "need_plock");
- }
- if (flags & DLMC_LF_FS_REGISTERED) {
- strcat(str, i++ ? "," : "");
- strcat(str, "fs_reg");
- }
- if (flags & DLMC_LF_KERNEL_STOPPED) {
- strcat(str, i++ ? "," : "");
- strcat(str, "kern_stop");
- }
- if (flags & DLMC_LF_LEAVING) {
- strcat(str, i++ ? "," : "");
- strcat(str, "leave");
- }
- if (flags & DLMC_LF_JOINING) {
- strcat(str, i++ ? "," : "");
- strcat(str, "join");
- }
-
- return str;
-}
-
-static const char *nf_check_str(uint32_t flags)
-{
- if (flags & DLMC_NF_CHECK_FENCING)
- return "fence";
-
- if (flags & DLMC_NF_CHECK_QUORUM)
- return "quorum";
-
- if (flags & DLMC_NF_CHECK_FS)
- return "fs";
-
- return "none";
-}
-
-static const char *condition_str(int cond)
-{
- switch (cond) {
- case 0:
- return "";
- case 1:
- return "fencing";
- case 2:
- return "quorum";
- case 3:
- return "fs";
- case 4:
- return "pending";
- default:
- return "unknown";
- }
-}
-
-static int node_compare(const void *va, const void *vb)
-{
- const struct dlmc_node *a = va;
- const struct dlmc_node *b = vb;
-
- return a->nodeid - b->nodeid;
-}
-
-static void show_nodeids(int count, struct dlmc_node *nodes_in)
-{
- struct dlmc_node *n = nodes_in;
- int i;
-
- for (i = 0; i < count; i++) {
- printf("%d ", n->nodeid);
- n++;
- }
- printf("\n");
-}
-
-static void show_ls(struct dlmc_lockspace *ls)
-{
- int rv, node_count;
-
- printf("name %s\n", ls->name);
- printf("id 0x%08x\n", ls->global_id);
- printf("flags 0x%08x %s\n",
- ls->flags, dlmc_lf_str(ls->flags));
- printf("change member %d joined %d remove %d failed %d seq %d,%d\n",
- ls->cg_prev.member_count, ls->cg_prev.joined_count,
- ls->cg_prev.remove_count, ls->cg_prev.failed_count,
- ls->cg_prev.combined_seq, ls->cg_prev.seq);
-
- node_count = 0;
- memset(&nodes, 0, sizeof(nodes));
- rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_MEMBERS,
- MAX_NODES, &node_count, nodes);
- if (rv < 0) {
- printf("members error\n");
- goto next;
- }
- qsort(nodes, node_count, sizeof(struct dlmc_node), node_compare);
-
- printf("members ");
- show_nodeids(node_count, nodes);
-
- next:
- if (!ls->cg_next.seq)
- return;
-
- printf("new change member %d joined %d remove %d failed %d seq %d,%d\n",
- ls->cg_next.member_count, ls->cg_next.joined_count,
- ls->cg_next.remove_count, ls->cg_next.failed_count,
- ls->cg_next.combined_seq, ls->cg_next.seq);
-
- printf("new status wait_messages %d wait_condition %d %s\n",
- ls->cg_next.wait_messages, ls->cg_next.wait_condition,
- condition_str(ls->cg_next.wait_condition));
-
- node_count = 0;
- memset(&nodes, 0, sizeof(nodes));
- rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_NEXT,
- MAX_NODES, &node_count, nodes);
- if (rv < 0) {
- printf("new members error\n");
- return;
- }
- qsort(nodes, node_count, sizeof(struct dlmc_node), node_compare);
-
- printf("new members ");
- show_nodeids(node_count, nodes);
-}
-
-static int member_int(struct dlmc_node *n)
-{
- if (n->flags & DLMC_NF_DISALLOWED)
- return -1;
- if (n->flags & DLMC_NF_MEMBER)
- return 1;
- return 0;
-}
-
-static void show_all_nodes(int count, struct dlmc_node *nodes_in)
-{
- struct dlmc_node *n = nodes_in;
- int i;
-
- for (i = 0; i < count; i++) {
- printf("nodeid %d member %d failed %d start %d seq_add %u seq_rem %u check %s\n",
- n->nodeid,
- member_int(n),
- n->failed_reason,
- (n->flags & DLMC_NF_START) ? 1 : 0,
- n->added_seq,
- n->removed_seq,
- nf_check_str(n->flags));
- n++;
- }
-}
-
-static void do_list(char *name)
-{
- struct dlmc_lockspace *ls;
- int node_count;
- int ls_count;
- int rv;
- int i;
-
- memset(lss, 0, sizeof(lss));
-
- if (name) {
- ls_count = 1;
- rv = dlmc_lockspace_info(name, lss);
- } else {
- rv = dlmc_lockspaces(MAX_LS, &ls_count, lss);
- }
-
- if (rv < 0)
- exit(EXIT_FAILURE); /* dlm_controld probably not running */
-
- if (ls_count)
- printf("dlm lockspaces\n");
-
- for (i = 0; i < ls_count; i++) {
- ls = &lss[i];
-
- show_ls(ls);
-
- if (!ls_all_nodes)
- goto next;
-
- node_count = 0;
- memset(&nodes, 0, sizeof(nodes));
-
- rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_ALL,
- MAX_NODES, &node_count, nodes);
- if (rv < 0) {
- printf("all nodes error %d %d\n", rv, errno);
- goto next;
- }
-
- qsort(nodes, node_count, sizeof(struct dlmc_node),node_compare);
-
- printf("all nodes\n");
- show_all_nodes(node_count, nodes);
- next:
- printf("\n");
- }
-}
-
-static void do_deadlock_check(char *name)
-{
- dlmc_deadlock_check(name);
-}
-
-static void do_plocks(char *name)
-{
- char buf[DLMC_DUMP_SIZE];
-
- memset(buf, 0, sizeof(buf));
-
- dlmc_dump_plocks(name, buf);
-
- buf[DLMC_DUMP_SIZE-1] = '\0';
-
- do_write(STDOUT_FILENO, buf, strlen(buf));
-}
-
-static void do_dump(void)
-{
- char buf[DLMC_DUMP_SIZE];
-
- memset(buf, 0, sizeof(buf));
-
- dlmc_dump_debug(buf);
-
- buf[DLMC_DUMP_SIZE-1] = '\0';
-
- do_write(STDOUT_FILENO, buf, strlen(buf));
- printf("\n");
-}
-
-static void do_log_plock(void)
-{
- char buf[DLMC_DUMP_SIZE];
-
- memset(buf, 0, sizeof(buf));
-
- dlmc_dump_log_plock(buf);
-
- buf[DLMC_DUMP_SIZE-1] = '\0';
-
- do_write(STDOUT_FILENO, buf, strlen(buf));
- printf("\n");
-}
-
-int main(int argc, char **argv)
-{
- prog_name = argv[0];
- decode_arguments(argc, argv);
-
- switch (operation) {
-
- /* calls to libdlm; pass a command to dlm-kernel */
-
- case OP_JOIN:
- do_join(lsname);
- break;
-
- case OP_LEAVE:
- do_leave(lsname);
- break;
-
- case OP_JOINLEAVE:
- do_join(lsname);
- do_leave(lsname);
- break;
-
- /* calls to libdlmcontrol; pass a command/query to dlm_controld */
-
- case OP_LIST:
- do_list(lsname);
- break;
-
- case OP_DUMP:
- do_dump();
- break;
-
- case OP_LOG_PLOCK:
- do_log_plock();
- break;
-
- case OP_PLOCKS:
- do_plocks(lsname);
- break;
-
- case OP_DEADLOCK_CHECK:
- do_deadlock_check(lsname);
- break;
-
- /* calls to read debugfs; query info from dlm-kernel */
-
- case OP_LOCKDUMP:
- do_lockdump(lsname);
- break;
-
- case OP_LOCKDEBUG:
- do_lockdebug(lsname);
- break;
- }
- return 0;
-}
-
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 70ad2d8..0c89f67 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -41,7 +41,7 @@
#include "config.h"
#include "list.h"
#include "rbtree.h"
-#include "endian.h"
+#include "linux_endian.h"
/* TODO: cleanup */
#define CLUSTERVARLIB "/var/lib/cluster"
diff --git a/dlm_controld/endian.h b/dlm_controld/endian.h
deleted file mode 100644
index 43089d2..0000000
--- a/dlm_controld/endian.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef __LINUX_ENDIAN_DOT_H__
-#define __LINUX_ENDIAN_DOT_H__
-
-
-#include <endian.h>
-#include <byteswap.h>
-
-
-/* I'm not sure which versions of alpha glibc/gcc are broken,
- so fix all of them. */
-#ifdef __alpha__
-#undef bswap_64
-static __inline__ unsigned long bswap_64(unsigned long x)
-{
- unsigned int h = x >> 32;
- unsigned int l = x;
-
- h = bswap_32(h);
- l = bswap_32(l);
-
- return ((unsigned long)l << 32) | h;
-}
-#endif /* __alpha__ */
-
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-
-#define be16_to_cpu(x) (x)
-#define be32_to_cpu(x) (x)
-#define be64_to_cpu(x) (x)
-
-#define cpu_to_be16(x) (x)
-#define cpu_to_be32(x) (x)
-#define cpu_to_be64(x) (x)
-
-#define le16_to_cpu(x) (bswap_16((x)))
-#define le32_to_cpu(x) (bswap_32((x)))
-#define le64_to_cpu(x) (bswap_64((x)))
-
-#define cpu_to_le16(x) (bswap_16((x)))
-#define cpu_to_le32(x) (bswap_32((x)))
-#define cpu_to_le64(x) (bswap_64((x)))
-
-#endif /* __BYTE_ORDER == __BIG_ENDIAN */
-
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-
-#define be16_to_cpu(x) (bswap_16((x)))
-#define be32_to_cpu(x) (bswap_32((x)))
-#define be64_to_cpu(x) (bswap_64((x)))
-
-#define cpu_to_be16(x) (bswap_16((x)))
-#define cpu_to_be32(x) (bswap_32((x)))
-#define cpu_to_be64(x) (bswap_64((x)))
-
-#define le16_to_cpu(x) (x)
-#define le32_to_cpu(x) (x)
-#define le64_to_cpu(x) (x)
-
-#define cpu_to_le16(x) (x)
-#define cpu_to_le32(x) (x)
-#define cpu_to_le64(x) (x)
-
-#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */
-
-
-#endif /* __LINUX_ENDIAN_DOT_H__ */
diff --git a/dlm_controld/linux_endian.h b/dlm_controld/linux_endian.h
new file mode 100644
index 0000000..43089d2
--- /dev/null
+++ b/dlm_controld/linux_endian.h
@@ -0,0 +1,68 @@
+#ifndef __LINUX_ENDIAN_DOT_H__
+#define __LINUX_ENDIAN_DOT_H__
+
+
+#include <endian.h>
+#include <byteswap.h>
+
+
+/* I'm not sure which versions of alpha glibc/gcc are broken,
+ so fix all of them. */
+#ifdef __alpha__
+#undef bswap_64
+static __inline__ unsigned long bswap_64(unsigned long x)
+{
+ unsigned int h = x >> 32;
+ unsigned int l = x;
+
+ h = bswap_32(h);
+ l = bswap_32(l);
+
+ return ((unsigned long)l << 32) | h;
+}
+#endif /* __alpha__ */
+
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+
+#define be16_to_cpu(x) (x)
+#define be32_to_cpu(x) (x)
+#define be64_to_cpu(x) (x)
+
+#define cpu_to_be16(x) (x)
+#define cpu_to_be32(x) (x)
+#define cpu_to_be64(x) (x)
+
+#define le16_to_cpu(x) (bswap_16((x)))
+#define le32_to_cpu(x) (bswap_32((x)))
+#define le64_to_cpu(x) (bswap_64((x)))
+
+#define cpu_to_le16(x) (bswap_16((x)))
+#define cpu_to_le32(x) (bswap_32((x)))
+#define cpu_to_le64(x) (bswap_64((x)))
+
+#endif /* __BYTE_ORDER == __BIG_ENDIAN */
+
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#define be16_to_cpu(x) (bswap_16((x)))
+#define be32_to_cpu(x) (bswap_32((x)))
+#define be64_to_cpu(x) (bswap_64((x)))
+
+#define cpu_to_be16(x) (bswap_16((x)))
+#define cpu_to_be32(x) (bswap_32((x)))
+#define cpu_to_be64(x) (bswap_64((x)))
+
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+
+#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */
+
+
+#endif /* __LINUX_ENDIAN_DOT_H__ */
diff --git a/dlm_tool/Makefile b/dlm_tool/Makefile
new file mode 100644
index 0000000..9f24a24
--- /dev/null
+++ b/dlm_tool/Makefile
@@ -0,0 +1,54 @@
+BIN_TARGET = dlm_tool
+MAN_TARGET = dlm_tool.8
+
+BIN_SOURCE = main.c
+
+BIN_CFLAGS += -D_GNU_SOURCE -g \
+ -Wall \
+ -Wformat \
+ -Wformat-security \
+ -Wmissing-prototypes \
+ -Wnested-externs \
+ -Wpointer-arith \
+ -Wextra -Wshadow \
+ -Wcast-align \
+ -Wwrite-strings \
+ -Waggregate-return \
+ -Wstrict-prototypes \
+ -Winline \
+ -Wredundant-decls \
+ -Wno-sign-compare \
+ -Wno-unused-parameter \
+ -Wp,-D_FORTIFY_SOURCE=2 \
+ -fexceptions \
+ -fasynchronous-unwind-tables \
+ -fdiagnostics-show-option \
+
+BIN_CFLAGS += -fPIE -DPIE
+BIN_CFLAGS += -I../include -I../dlm/libdlm -I../dlm_controld
+
+BIN_LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
+BIN_LDFLAGS += -lpthread -ldlm -ldlmcontrol
+
+all: $(BIN_TARGET)
+
+$(BIN_TARGET): $(BIN_SOURCE)
+ $(CC) $(BIN_CFLAGS) $(BIN_LDFLAGS) $(BIN_SOURCE) -o $@
+
+clean:
+ rm -f *.o *.so *.so.* $(BIN_TARGET)
+
+
+INSTALL=$(shell which install)
+
+DESTDIR=
+BINDIR=/usr/sbin
+MANDIR=/usr/share/man
+
+.PHONY: install
+install: all
+ $(INSTALL) -d $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -d $(DESTDIR)/$(MANDIR)/man8
+ $(INSTALL) -c -m 755 $(BIN_TARGET) $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
+
diff --git a/dlm_tool/dlm_tool.8 b/dlm_tool/dlm_tool.8
new file mode 100644
index 0000000..df9aa64
--- /dev/null
+++ b/dlm_tool/dlm_tool.8
@@ -0,0 +1,98 @@
+.TH DLM_TOOL 8 2009-01-20 cluster cluster
+
+.SH NAME
+dlm_tool \- a utility for the dlm and dlm_controld daemon
+
+.SH SYNOPSIS
+.B dlm_tool
+[COMMAND] [OPTIONS]
+[
+.I name
+]
+
+.SH DESCRIPTION
+.TP
+.B ls
+Display internal dlm_controld state about lockspaces.
+
+.TP
+.B dump
+Dump dlm_controld debug buffer.
+
+.TP
+.B log_plock
+Dump dlm_controld plock debug buffer.
+
+.TP
+.BI plocks " name"
+Dump posix locks from dlm_controld for the lockspace.
+
+.TP
+.BI lockdump " name"
+Minimal display of locks from the lockspace.
+
+.TP
+.BI lockdebug " name"
+Extended display of locks from the lockspace.
+
+.TP
+.BI join " name"
+Join a lockspace.
+
+.TP
+.BI leave " name"
+Leave a lockspace.
+
+.TP
+.BI deadlock_check " name"
+Start a deadlock detection cycle for the lockspace.
+
+.SH OPTIONS
+.TP
+.B \-n
+Show all node information in ls.
+
+.TP
+.BI \-d " num"
+Resource directory enabled (1) or disabled (0) during join. Default 0.
+
+.TP
+.BI \-e " num"
+Exclusive create off/on (0/1) in join. Default 0.
+
+.TP
+.BI \-f " num"
+FS memory allocation off/on (0/1) in join. Default 0.
+
+.TP
+.BI \-m " mode"
+The permission mode (in octal) of the lockspace device created by join.
+Default 0600.
+
+.TP
+.B \-M
+Dump MSTCPY locks in addition to locks held by local processes.
+
+.TP
+.B \-s
+Summary following lockdebug output (experimental, format may change).
+
+.TP
+.B \-v
+Verbose lockdebug output.
+
+.TP
+.B \-w
+Wide lockdebug output.
+
+.TP
+.B \-h
+Print a help message describing available options, then exit.
+
+.TP
+.B \-V
+Print program version information, then exit.
+
+.SH SEE ALSO
+.BR dlm_controld (8)
+
diff --git a/dlm_tool/main.c b/dlm_tool/main.c
new file mode 100644
index 0000000..4df930a
--- /dev/null
+++ b/dlm_tool/main.c
@@ -0,0 +1,1334 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include <linux/dlmconstants.h>
+#include "libdlm.h"
+#include "libdlmcontrol.h"
+#include "copyright.cf"
+
+#define LKM_IVMODE -1
+
+#define OP_JOIN 1
+#define OP_LEAVE 2
+#define OP_JOINLEAVE 3
+#define OP_LIST 4
+#define OP_DEADLOCK_CHECK 5
+#define OP_DUMP 6
+#define OP_PLOCKS 7
+#define OP_LOCKDUMP 8
+#define OP_LOCKDEBUG 9
+#define OP_LOG_PLOCK 10
+
+static char *prog_name;
+static char *lsname;
+static int operation;
+static int opt_ind;
+static int ls_all_nodes = 0;
+static int opt_dir = 0;
+static int opt_excl = 0;
+static int opt_fs = 0;
+static int dump_mstcpy = 0;
+static mode_t create_mode = 0600;
+static int verbose;
+static int wide;
+static int summarize;
+
+#define VERSION "master"
+
+#define MAX_LS 128
+#define MAX_NODES 128
+
+/* from linux/fs/dlm/dlm_internal.h */
+#define DLM_LKSTS_WAITING 1
+#define DLM_LKSTS_GRANTED 2
+#define DLM_LKSTS_CONVERT 3
+
+#define DLM_MSG_REQUEST 1
+#define DLM_MSG_CONVERT 2
+#define DLM_MSG_UNLOCK 3
+#define DLM_MSG_CANCEL 4
+#define DLM_MSG_REQUEST_REPLY 5
+#define DLM_MSG_CONVERT_REPLY 6
+#define DLM_MSG_UNLOCK_REPLY 7
+#define DLM_MSG_CANCEL_REPLY 8
+#define DLM_MSG_GRANT 9
+#define DLM_MSG_BAST 10
+#define DLM_MSG_LOOKUP 11
+#define DLM_MSG_REMOVE 12
+#define DLM_MSG_LOOKUP_REPLY 13
+#define DLM_MSG_PURGE 14
+
+
+struct dlmc_lockspace lss[MAX_LS];
+struct dlmc_node nodes[MAX_NODES];
+
+struct rinfo {
+ int print_granted;
+ int print_convert;
+ int print_waiting;
+ int print_lookup;
+ int namelen;
+ int nodeid;
+ int lvb;
+ unsigned int lkb_count;
+ unsigned int lkb_granted;
+ unsigned int lkb_convert;
+ unsigned int lkb_waiting;
+ unsigned int lkb_lookup;
+ unsigned int lkb_wait_msg;
+ unsigned int lkb_master_copy;
+ unsigned int lkb_local_copy;
+ unsigned int lkb_process_copy;
+};
+
+struct summary {
+ unsigned int rsb_total;
+ unsigned int rsb_with_lvb;
+ unsigned int rsb_no_locks;
+ unsigned int rsb_lookup;
+ unsigned int rsb_master;
+ unsigned int rsb_local;
+ unsigned int rsb_nodeid_error;
+ unsigned int lkb_count;
+ unsigned int lkb_granted;
+ unsigned int lkb_convert;
+ unsigned int lkb_waiting;
+ unsigned int lkb_lookup;
+ unsigned int lkb_wait_msg;
+ unsigned int lkb_master_copy;
+ unsigned int lkb_local_copy;
+ unsigned int lkb_process_copy;
+ unsigned int expect_replies;
+};
+
+static const char *mode_str(int mode)
+{
+ switch (mode) {
+ case -1:
+ return "IV";
+ case LKM_NLMODE:
+ return "NL";
+ case LKM_CRMODE:
+ return "CR";
+ case LKM_CWMODE:
+ return "CW";
+ case LKM_PRMODE:
+ return "PR";
+ case LKM_PWMODE:
+ return "PW";
+ case LKM_EXMODE:
+ return "EX";
+ }
+ return "??";
+}
+
+static const char *msg_str(int type)
+{
+ switch (type) {
+ case DLM_MSG_REQUEST:
+ return "request";
+ case DLM_MSG_CONVERT:
+ return "convert";
+ case DLM_MSG_UNLOCK:
+ return "unlock ";
+ case DLM_MSG_CANCEL:
+ return "cancel ";
+ case DLM_MSG_REQUEST_REPLY:
+ return "r_reply";
+ case DLM_MSG_CONVERT_REPLY:
+ return "c_reply";
+ case DLM_MSG_UNLOCK_REPLY:
+ return "u_reply";
+ case DLM_MSG_CANCEL_REPLY:
+ return "c_reply";
+ case DLM_MSG_GRANT:
+ return "grant ";
+ case DLM_MSG_BAST:
+ return "bast ";
+ case DLM_MSG_LOOKUP:
+ return "lookup ";
+ case DLM_MSG_REMOVE:
+ return "remove ";
+ case DLM_MSG_LOOKUP_REPLY:
+ return "l_reply";
+ case DLM_MSG_PURGE:
+ return "purge ";
+ default:
+ return "unknown";
+ }
+}
+
+static void print_usage(void)
+{
+ printf("Usage:\n");
+ printf("\n");
+ printf("dlm_tool [options] [join | leave | lockdump | lockdebug |\n"
+ " ls | dump | log_plock | plocks |\n"
+ " deadlock_check]\n");
+ printf("\n");
+ printf("Options:\n");
+ printf(" -n Show all node information in ls\n");
+ printf(" -d <n> Resource directory off/on (0/1) in join, default 0\n");
+ printf(" -e <n> Exclusive create off/on (0/1) in join, default 0\n");
+ printf(" -f <n> FS memory allocation off/on (0/1) in join, default 0\n");
+ printf(" -m <mode> Permission mode for lockspace device (octal), default 0600\n");
+ printf(" -M Print MSTCPY locks in lockdump\n"
+ " (remote locks that are locally mastered)\n");
+ printf(" -s Summary following lockdebug output\n");
+ printf(" (experimental, format not fixed)\n");
+ printf(" -v Verbose lockdebug output\n");
+ printf(" -w Wide lockdebug output\n");
+ printf(" -h Print this help, then exit\n");
+ printf(" -V Print program version information, then exit\n");
+ printf("\n");
+}
+
+#define OPTION_STRING "MhVnd:m:e:f:vws"
+
+static void decode_arguments(int argc, char **argv)
+{
+ int cont = 1;
+ int optchar;
+ int need_lsname;
+ char modebuf[8];
+
+ while (cont) {
+ optchar = getopt(argc, argv, OPTION_STRING);
+
+ switch (optchar) {
+ case 'd':
+ opt_dir = atoi(optarg);
+ break;
+
+ case 'e':
+ opt_excl = atoi(optarg);
+ break;
+
+ case 'f':
+ opt_fs = atoi(optarg);
+ break;
+
+ case 'm':
+ memset(modebuf, 0, sizeof(modebuf));
+ snprintf(modebuf, 8, "%s", optarg);
+ sscanf(modebuf, "%o", &create_mode);
+ break;
+
+ case 'M':
+ dump_mstcpy = 1;
+ break;
+
+ case 'n':
+ ls_all_nodes = 1;
+ break;
+
+ case 's':
+ summarize = 1;
+ break;
+
+ case 'v':
+ verbose = 1;
+ break;
+
+ case 'w':
+ wide = 1;
+ break;
+
+ case 'h':
+ print_usage();
+ exit(EXIT_SUCCESS);
+ break;
+
+ case 'V':
+ printf("%s %s (built %s %s)\n",
+ prog_name, VERSION, __DATE__, __TIME__);
+ printf("%s\n", REDHAT_COPYRIGHT);
+ exit(EXIT_SUCCESS);
+ break;
+
+ case ':':
+ case '?':
+ fprintf(stderr, "Please use '-h' for usage.\n");
+ exit(EXIT_FAILURE);
+ break;
+
+ case EOF:
+ cont = 0;
+ break;
+
+ default:
+ fprintf(stderr, "unknown option: %c\n", optchar);
+ exit(EXIT_FAILURE);
+ break;
+ };
+ }
+
+ need_lsname = 1;
+
+ while (optind < argc) {
+
+ /*
+ * libdlm
+ */
+
+ if (!strncmp(argv[optind], "join", 4) &&
+ (strlen(argv[optind]) == 4)) {
+ operation = OP_JOIN;
+ opt_ind = optind + 1;
+ break;
+ } else if (!strncmp(argv[optind], "leave", 5) &&
+ (strlen(argv[optind]) == 5)) {
+ operation = OP_LEAVE;
+ opt_ind = optind + 1;
+ break;
+ } else if (!strncmp(argv[optind], "joinleave", 9) &&
+ (strlen(argv[optind]) == 9)) {
+ operation = OP_JOINLEAVE;
+ opt_ind = optind + 1;
+ break;
+ }
+
+ /*
+ * libdlmcontrol
+ */
+
+ else if (!strncmp(argv[optind], "ls", 2) &&
+ (strlen(argv[optind]) == 2)) {
+ operation = OP_LIST;
+ opt_ind = optind + 1;
+ need_lsname = 0;
+ break;
+ } else if (!strncmp(argv[optind], "deadlock_check", 14) &&
+ (strlen(argv[optind]) == 14)) {
+ operation = OP_DEADLOCK_CHECK;
+ opt_ind = optind + 1;
+ break;
+ } else if (!strncmp(argv[optind], "dump", 4) &&
+ (strlen(argv[optind]) == 4)) {
+ operation = OP_DUMP;
+ opt_ind = optind + 1;
+ need_lsname = 0;
+ break;
+ } else if (!strncmp(argv[optind], "plocks", 6) &&
+ (strlen(argv[optind]) == 6)) {
+ operation = OP_PLOCKS;
+ opt_ind = optind + 1;
+ break;
+ } else if (!strncmp(argv[optind], "log_plock", 9) &&
+ (strlen(argv[optind]) == 9)) {
+ operation = OP_LOG_PLOCK;
+ opt_ind = optind + 1;
+ need_lsname = 0;
+ break;
+ }
+
+ /*
+ * debugfs
+ */
+
+ else if (!strncmp(argv[optind], "lockdump", 8) &&
+ (strlen(argv[optind]) == 8)) {
+ operation = OP_LOCKDUMP;
+ opt_ind = optind + 1;
+ break;
+ } else if (!strncmp(argv[optind], "lockdebug", 9) &&
+ (strlen(argv[optind]) == 9)) {
+ operation = OP_LOCKDEBUG;
+ opt_ind = optind + 1;
+ break;
+ }
+ optind++;
+ }
+
+ if (!operation || !opt_ind) {
+ print_usage();
+ exit(EXIT_FAILURE);
+ }
+
+ if (optind < argc - 1)
+ lsname = argv[opt_ind];
+ else if (need_lsname) {
+ fprintf(stderr, "lockspace name required\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int do_write(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ retry:
+ rv = write(fd, (char *)buf + off, count);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0)
+ return rv;
+
+ if (rv != count) {
+ count -= rv;
+ off += rv;
+ goto retry;
+ }
+ return 0;
+}
+
+static char *flag_str(uint32_t flags)
+{
+ static char join_flags[128];
+
+ memset(join_flags, 0, sizeof(join_flags));
+
+ strcat(join_flags, "flags ");
+
+ if (flags & DLM_LSFL_NODIR)
+ strcat(join_flags, "NODIR ");
+
+ if (flags & DLM_LSFL_NEWEXCL)
+ strcat(join_flags, "NEWEXCL ");
+
+ if (flags & DLM_LSFL_FS)
+ strcat(join_flags, "FS ");
+
+ return join_flags;
+}
+
+static void do_join(char *name)
+{
+ dlm_lshandle_t *dh;
+ uint32_t flags = 0;
+
+ if (!opt_dir)
+ flags |= DLM_LSFL_NODIR;
+
+ if (opt_excl)
+ flags |= DLM_LSFL_NEWEXCL;
+
+ if (opt_fs)
+ flags |= DLM_LSFL_FS;
+
+ printf("Joining lockspace \"%s\" permission %o %s\n",
+ name, create_mode, flags ? flag_str(flags) : "");
+ fflush(stdout);
+
+ dh = dlm_new_lockspace(name, create_mode, flags);
+ if (!dh) {
+ fprintf(stderr, "dlm_new_lockspace %s error %d\n",
+ name, errno);
+ exit(-1);
+ }
+
+ dlm_close_lockspace(dh);
+ /* there's no autofree so the ls should stay around */
+ printf("done\n");
+}
+
+static void do_leave(char *name)
+{
+ dlm_lshandle_t *dh;
+
+ printf("Leaving lockspace \"%s\"\n", name);
+ fflush(stdout);
+
+ dh = dlm_open_lockspace(name);
+ if (!dh) {
+ fprintf(stderr, "dlm_open_lockspace %s error %p %d\n",
+ name, dh, errno);
+ exit(-1);
+ }
+
+ dlm_release_lockspace(name, dh, 1);
+ printf("done\n");
+}
+
+static char *pr_master(int nodeid)
+{
+ static char buf[64];
+
+ memset(buf, 0, sizeof(buf));
+
+ if (nodeid > 0)
+ sprintf(buf, "Local %d", nodeid);
+ else if (!nodeid)
+ sprintf(buf, "Master");
+ else if (nodeid == -1)
+ sprintf(buf, "Lookup");
+
+ return buf;
+}
+
+static char *pr_extra(uint32_t flags, int root_list, int recover_list,
+ int recover_locks_count, char *first_lkid)
+{
+ static char buf[128];
+ int first = 0;
+
+ memset(buf, 0, sizeof(buf));
+
+ if (strcmp(first_lkid, "0"))
+ first = 1;
+
+ if (flags || first || root_list || recover_list || recover_locks_count)
+ sprintf(buf,
+ "flags %08x first_lkid %s root %d recover %d locks %d",
+ flags, first_lkid, root_list, recover_list, recover_locks_count);
+
+ return buf;
+}
+
+static void print_rsb(char *line, struct rinfo *ri)
+{
+ char type[4], namefmt[4], *p;
+ char addr[64];
+ char first_lkid[64];
+ int rv, nodeid, root_list, recover_list, recover_locks_count, namelen;
+ uint32_t flags;
+
+ rv = sscanf(line, "%s %s %d %s %u %d %d %u %u %s",
+ type,
+ addr,
+ &nodeid,
+ first_lkid,
+ &flags,
+ &root_list,
+ &recover_list,
+ &recover_locks_count,
+ &namelen,
+ namefmt);
+
+ if (rv != 10)
+ goto fail;
+
+ /* used for lkb prints */
+ ri->nodeid = nodeid;
+
+ ri->namelen = namelen;
+
+ p = strchr(line, '\n');
+ if (!p)
+ goto fail;
+ *p = '\0';
+
+ p = strstr(line, namefmt);
+ if (!p)
+ goto fail;
+ p += 4;
+
+ strcat(addr, " ");
+
+ if (!strncmp(namefmt, "str", 3))
+ printf("Resource len %2d \"%s\"\n", namelen, p);
+ else if (!strncmp(namefmt, "hex", 3))
+ printf("Resource len %2d hex %s\n", namelen, p);
+ else
+ goto fail;
+
+ printf("%-16s %s\n",
+ pr_master(nodeid),
+ pr_extra(flags, root_list, recover_list, recover_locks_count, first_lkid));
+ return;
+
+ fail:
+ fprintf(stderr, "print_rsb error rv %d line \"%s\"\n", rv, line);
+}
+
+static void print_lvb(char *line)
+{
+ char lvb[1024];
+ char type[4];
+ int i, c, rv, lvblen;
+ uint32_t lvbseq;
+
+ memset(lvb, 0, 1024);
+
+ rv = sscanf(line, "%s %u %d %[0-9A-Fa-f ]", type, &lvbseq, &lvblen, lvb);
+
+ if (rv != 4) {
+ fprintf(stderr, "print_lvb error rv %d line \"%s\"\n", rv, line);
+ return;
+ }
+
+ printf("LVB len %d seq %u\n", lvblen, lvbseq);
+
+ for (c = 0, i = 0; ; i++) {
+ printf("%c", lvb[i]);
+ if (lvb[i] != ' ')
+ c++;
+ if (!wide && lvb[i] == ' ' && !(c % 32))
+ printf("\n");
+ if (c == (lvblen * 2))
+ break;
+ }
+ printf("\n");
+}
+
+struct lkb {
+ uint64_t xid, timestamp, time_bast;
+ uint32_t id, remid, exflags, flags, lvbseq;
+ int nodeid, ownpid, status, grmode, rqmode, highbast, rsb_lookup, wait_type;
+};
+
+static const char *pr_grmode(struct lkb *lkb)
+{
+ if (lkb->status == DLM_LKSTS_GRANTED || lkb->status == DLM_LKSTS_CONVERT)
+ return mode_str(lkb->grmode);
+ else if (lkb->status == DLM_LKSTS_WAITING || lkb->rsb_lookup)
+ return "--";
+ else
+ return "XX";
+}
+
+static const char *pr_rqmode(struct lkb *lkb)
+{
+ static char buf[5];
+
+ memset(buf, 0, sizeof(buf));
+
+ if (lkb->status == DLM_LKSTS_GRANTED) {
+ return " ";
+ } else if (lkb->status == DLM_LKSTS_CONVERT ||
+ lkb->status == DLM_LKSTS_WAITING ||
+ lkb->rsb_lookup) {
+ sprintf(buf, "(%s)", mode_str(lkb->rqmode));
+ return buf;
+ } else {
+ return "(XX)";
+ }
+}
+
+static const char *pr_remote(struct lkb *lkb, struct rinfo *ri)
+{
+ static char buf[64];
+
+ memset(buf, 0, sizeof(buf));
+
+ if (!lkb->nodeid) {
+ return " ";
+ } else if (lkb->nodeid != ri->nodeid) {
+ sprintf(buf, "Remote: %3d %08x", lkb->nodeid, lkb->remid);
+ return buf;
+ } else {
+ sprintf(buf, "Master: %3d %08x", lkb->nodeid, lkb->remid);
+ return buf;
+ }
+}
+
+static const char *pr_wait(struct lkb *lkb)
+{
+ static char buf[16];
+
+ memset(buf, 0, sizeof(buf));
+
+ if (!lkb->wait_type) {
+ return " ";
+ } else {
+ sprintf(buf, " wait %02d", lkb->wait_type);
+ return buf;
+ }
+}
+
+static char *pr_verbose(struct lkb *lkb)
+{
+ static char buf[128];
+
+ memset(buf, 0, sizeof(buf));
+
+ sprintf(buf, "time %016llu flags %08x %08x bast %d %llu",
+ (unsigned long long)lkb->timestamp,
+ lkb->exflags, lkb->flags, lkb->highbast,
+ (unsigned long long)lkb->time_bast);
+
+ return buf;
+}
+
+static void print_lkb(char *line, struct rinfo *ri)
+{
+ struct lkb lkb;
+ char type[4];
+ int rv;
+
+ rv = sscanf(line, "%s %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu",
+ type,
+ &lkb.id,
+ &lkb.nodeid,
+ &lkb.remid,
+ &lkb.ownpid,
+ (unsigned long long *)&lkb.xid,
+ &lkb.exflags,
+ &lkb.flags,
+ &lkb.status,
+ &lkb.grmode,
+ &lkb.rqmode,
+ &lkb.highbast,
+ &lkb.rsb_lookup,
+ &lkb.wait_type,
+ &lkb.lvbseq,
+ (unsigned long long *)&lkb.timestamp,
+ (unsigned long long *)&lkb.time_bast);
+
+ ri->lkb_count++;
+
+ if (lkb.status == DLM_LKSTS_GRANTED) {
+ if (!ri->print_granted++)
+ printf("Granted\n");
+ ri->lkb_granted++;
+ }
+ if (lkb.status == DLM_LKSTS_CONVERT) {
+ if (!ri->print_convert++)
+ printf("Convert\n");
+ ri->lkb_convert++;
+ }
+ if (lkb.status == DLM_LKSTS_WAITING) {
+ if (!ri->print_waiting++)
+ printf("Waiting\n");
+ ri->lkb_waiting++;
+ }
+ if (lkb.rsb_lookup) {
+ if (!ri->print_lookup++)
+ printf("Lookup\n");
+ ri->lkb_lookup++;
+ }
+
+ if (lkb.wait_type)
+ ri->lkb_wait_msg++;
+
+ if (!ri->nodeid) {
+ if (lkb.nodeid)
+ ri->lkb_master_copy++;
+ else
+ ri->lkb_local_copy++;
+ } else {
+ ri->lkb_process_copy++;
+ }
+
+ printf("%08x %s %s %s %s %s\n",
+ lkb.id, pr_grmode(&lkb), pr_rqmode(&lkb),
+ pr_remote(&lkb, ri), pr_wait(&lkb),
+ (verbose && wide) ? pr_verbose(&lkb) : "");
+
+ if (verbose && !wide)
+ printf("%s\n", pr_verbose(&lkb));
+}
+
+static void clear_rinfo(struct rinfo *ri)
+{
+ memset(ri, 0, sizeof(struct rinfo));
+ ri->nodeid = -9;
+}
+
+static void count_rinfo(struct summary *s, struct rinfo *ri)
+{
+ /* the first time called */
+ if (!ri->namelen)
+ return;
+
+ s->rsb_total++;
+
+ if (ri->lvb)
+ s->rsb_with_lvb++;
+
+ if (!ri->lkb_count) {
+ s->rsb_no_locks++;
+ printf("no locks\n");
+ }
+
+ if (!ri->nodeid)
+ s->rsb_master++;
+ else if (ri->nodeid == -1)
+ s->rsb_lookup++;
+ else if (ri->nodeid > 0)
+ s->rsb_local++;
+ else
+ s->rsb_nodeid_error++;
+
+ s->lkb_count += ri->lkb_count;
+ s->lkb_granted += ri->lkb_granted;
+ s->lkb_convert += ri->lkb_convert;
+ s->lkb_waiting += ri->lkb_waiting;
+ s->lkb_lookup += ri->lkb_lookup;
+ s->lkb_wait_msg += ri->lkb_wait_msg;
+ s->lkb_master_copy += ri->lkb_master_copy;
+ s->lkb_local_copy += ri->lkb_local_copy;
+ s->lkb_process_copy += ri->lkb_process_copy;
+}
+
+static void print_summary(struct summary *s)
+{
+ printf("rsb\n");
+ printf(" total %u\n", s->rsb_total);
+ printf(" master %u\n", s->rsb_master);
+ printf(" remote master %u\n", s->rsb_local);
+ printf(" lookup master %u\n", s->rsb_lookup);
+ printf(" with lvb %u\n", s->rsb_with_lvb);
+ printf(" with no locks %u\n", s->rsb_no_locks);
+ printf(" nodeid error %u\n", s->rsb_nodeid_error);
+ printf("\n");
+
+ printf("lkb\n");
+ printf(" total %u\n", s->lkb_count);
+ printf(" granted %u\n", s->lkb_granted);
+ printf(" convert %u\n", s->lkb_convert);
+ printf(" waiting %u\n", s->lkb_waiting);
+ printf(" local copy %u\n", s->lkb_local_copy);
+ printf(" master copy %u\n", s->lkb_master_copy);
+ printf(" process copy %u\n", s->lkb_process_copy);
+ printf(" rsb lookup %u\n", s->lkb_lookup);
+ printf(" wait message %u\n", s->lkb_wait_msg);
+ printf(" expect reply %u\n", s->expect_replies);
+}
+
+#define LOCK_LINE_MAX 1024
+
+static void do_waiters(char *name, struct summary *sum)
+{
+ FILE *file;
+ char path[PATH_MAX];
+ char line[LOCK_LINE_MAX];
+ char rname[65];
+ int header = 0;
+ int i, j, spaces;
+ int rv, nodeid, wait_type;
+ uint32_t id;
+
+ snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_waiters", name);
+
+ file = fopen(path, "r");
+ if (!file)
+ return;
+
+ while (fgets(line, LOCK_LINE_MAX, file)) {
+ if (!header) {
+ printf("\n");
+ printf("Expecting reply\n");
+ header = 1;
+ }
+
+ rv = sscanf(line, "%x %d %d",
+ &id, &wait_type, &nodeid);
+
+ if (rv != 3) {
+ printf("waiters: %s", line);
+ continue;
+ }
+
+ /* parse the resource name from the remainder of the line */
+ j = 0;
+ spaces = 0;
+
+ for (i = 0; i < LOCK_LINE_MAX; i++) {
+ if (line[i] == '\n')
+ break;
+ if (spaces == 3) {
+ rname[j++] = line[i];
+ if (j == (sizeof(rname) - 1))
+ break;
+ } else if (line[i] == ' ') {
+ spaces++;
+ }
+ }
+
+ printf("nodeid %2d msg %s lkid %08x resource \"%s\"\n",
+ nodeid, msg_str(wait_type), id, rname);
+
+ sum->expect_replies++;
+ }
+ fclose(file);
+}
+
+static void do_lockdebug(char *name)
+{
+ struct summary summary;
+ struct rinfo info;
+ FILE *file;
+ char path[PATH_MAX];
+ char line[LOCK_LINE_MAX];
+ int old = 0;
+
+ snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_all", name);
+
+ file = fopen(path, "r");
+ if (!file) {
+ snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s", name);
+ file = fopen(path, "r");
+ if (!file) {
+ fprintf(stderr, "can't open %s: %s\n", path, strerror(errno));
+ return;
+ }
+ old = 1;
+ }
+
+ memset(&summary, 0, sizeof(struct summary));
+ memset(&info, 0, sizeof(struct rinfo));
+
+ while (fgets(line, LOCK_LINE_MAX, file)) {
+
+ if (old)
+ goto raw;
+
+ if (!strncmp(line, "version", 7))
+ continue;
+
+ if (!strncmp(line, "rsb", 3)) {
+ count_rinfo(&summary, &info);
+ clear_rinfo(&info);
+ printf("\n");
+ print_rsb(line, &info);
+ continue;
+ }
+
+ if (!strncmp(line, "lvb", 3)) {
+ print_lvb(line);
+ info.lvb = 1;
+ continue;
+ }
+
+ if (!strncmp(line, "lkb", 3)) {
+ print_lkb(line, &info);
+ continue;
+ }
+ raw:
+ printf("%s", line);
+ }
+ fclose(file);
+
+ do_waiters(name, &summary);
+
+ if (summarize) {
+ printf("\n");
+ print_summary(&summary);
+ }
+}
+
+static void parse_r_name(char *line, char *name)
+{
+ char *p;
+ int i = 0;
+ int begin = 0;
+
+ for (p = line; ; p++) {
+ if (*p == '"') {
+ if (begin)
+ break;
+ begin = 1;
+ continue;
+ }
+ if (begin)
+ name[i++] = *p;
+ }
+}
+
+static void do_lockdump(char *name)
+{
+ FILE *file;
+ char path[PATH_MAX];
+ char line[LOCK_LINE_MAX];
+ char r_name[65];
+ int r_nodeid;
+ int r_len;
+ int rv;
+ unsigned int tm;
+ unsigned long long xid;
+ uint32_t id;
+ int nodeid;
+ uint32_t remid;
+ int ownpid;
+ uint32_t exflags;
+ uint32_t flags;
+ int8_t status;
+ int8_t grmode;
+ int8_t rqmode;
+
+ snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_locks", name);
+
+ file = fopen(path, "r");
+ if (!file) {
+ fprintf(stderr, "can't open %s: %s\n", path, strerror(errno));
+ return;
+ }
+
+ /* skip the header on the first line */
+ if (!fgets(line, LOCK_LINE_MAX, file))
+ return;
+
+ while (fgets(line, LOCK_LINE_MAX, file)) {
+ rv = sscanf(line, "%x %d %x %u %llu %x %x %hhd %hhd %hhd %u %d %d",
+ &id,
+ &nodeid,
+ &remid,
+ &ownpid,
+ &xid,
+ &exflags,
+ &flags,
+ &status,
+ &grmode,
+ &rqmode,
+ &tm,
+ &r_nodeid,
+ &r_len);
+
+ if (rv != 13) {
+ fprintf(stderr, "invalid debugfs line %d: %s\n",
+ rv, line);
+ return;
+ }
+
+ memset(r_name, 0, sizeof(r_name));
+ parse_r_name(line, r_name);
+
+ /* don't print MSTCPY locks without -M */
+ if (!r_nodeid && nodeid) {
+ if (!dump_mstcpy)
+ continue;
+ printf("id %08x gr %s rq %s pid %u MSTCPY %d \"%s\"\n",
+ id, mode_str(grmode), mode_str(rqmode),
+ ownpid, nodeid, r_name);
+ continue;
+ }
+
+ /* A hack because dlm-kernel doesn't set rqmode back to IV when
+ a NOQUEUE convert fails, which means in a lockdump it looks
+ like a granted lock is still converting since rqmode is not
+ IV. (does it make sense to include status in the output,
+ e.g. G,C,W?) */
+
+ if (status == DLM_LKSTS_GRANTED)
+ rqmode = LKM_IVMODE;
+
+ printf("id %08x gr %s rq %s pid %u master %d \"%s\"\n",
+ id, mode_str(grmode), mode_str(rqmode),
+ ownpid, nodeid, r_name);
+ }
+
+ fclose(file);
+}
+
+static char *dlmc_lf_str(uint32_t flags)
+{
+ static char str[128];
+ int i = 0;
+
+ memset(str, 0, sizeof(str));
+
+ if (flags & DLMC_LF_SAVE_PLOCKS) {
+ i++;
+ strcat(str, "save_plock");
+ }
+ if (flags & DLMC_LF_NEED_PLOCKS) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "need_plock");
+ }
+ if (flags & DLMC_LF_FS_REGISTERED) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "fs_reg");
+ }
+ if (flags & DLMC_LF_KERNEL_STOPPED) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "kern_stop");
+ }
+ if (flags & DLMC_LF_LEAVING) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "leave");
+ }
+ if (flags & DLMC_LF_JOINING) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "join");
+ }
+
+ return str;
+}
+
+static const char *nf_check_str(uint32_t flags)
+{
+ if (flags & DLMC_NF_CHECK_FENCING)
+ return "fence";
+
+ if (flags & DLMC_NF_CHECK_QUORUM)
+ return "quorum";
+
+ if (flags & DLMC_NF_CHECK_FS)
+ return "fs";
+
+ return "none";
+}
+
+static const char *condition_str(int cond)
+{
+ switch (cond) {
+ case 0:
+ return "";
+ case 1:
+ return "fencing";
+ case 2:
+ return "quorum";
+ case 3:
+ return "fs";
+ case 4:
+ return "pending";
+ default:
+ return "unknown";
+ }
+}
+
+static int node_compare(const void *va, const void *vb)
+{
+ const struct dlmc_node *a = va;
+ const struct dlmc_node *b = vb;
+
+ return a->nodeid - b->nodeid;
+}
+
+static void show_nodeids(int count, struct dlmc_node *nodes_in)
+{
+ struct dlmc_node *n = nodes_in;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ printf("%d ", n->nodeid);
+ n++;
+ }
+ printf("\n");
+}
+
+static void show_ls(struct dlmc_lockspace *ls)
+{
+ int rv, node_count;
+
+ printf("name %s\n", ls->name);
+ printf("id 0x%08x\n", ls->global_id);
+ printf("flags 0x%08x %s\n",
+ ls->flags, dlmc_lf_str(ls->flags));
+ printf("change member %d joined %d remove %d failed %d seq %d,%d\n",
+ ls->cg_prev.member_count, ls->cg_prev.joined_count,
+ ls->cg_prev.remove_count, ls->cg_prev.failed_count,
+ ls->cg_prev.combined_seq, ls->cg_prev.seq);
+
+ node_count = 0;
+ memset(&nodes, 0, sizeof(nodes));
+ rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_MEMBERS,
+ MAX_NODES, &node_count, nodes);
+ if (rv < 0) {
+ printf("members error\n");
+ goto next;
+ }
+ qsort(nodes, node_count, sizeof(struct dlmc_node), node_compare);
+
+ printf("members ");
+ show_nodeids(node_count, nodes);
+
+ next:
+ if (!ls->cg_next.seq)
+ return;
+
+ printf("new change member %d joined %d remove %d failed %d seq %d,%d\n",
+ ls->cg_next.member_count, ls->cg_next.joined_count,
+ ls->cg_next.remove_count, ls->cg_next.failed_count,
+ ls->cg_next.combined_seq, ls->cg_next.seq);
+
+ printf("new status wait_messages %d wait_condition %d %s\n",
+ ls->cg_next.wait_messages, ls->cg_next.wait_condition,
+ condition_str(ls->cg_next.wait_condition));
+
+ node_count = 0;
+ memset(&nodes, 0, sizeof(nodes));
+ rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_NEXT,
+ MAX_NODES, &node_count, nodes);
+ if (rv < 0) {
+ printf("new members error\n");
+ return;
+ }
+ qsort(nodes, node_count, sizeof(struct dlmc_node), node_compare);
+
+ printf("new members ");
+ show_nodeids(node_count, nodes);
+}
+
+static int member_int(struct dlmc_node *n)
+{
+ if (n->flags & DLMC_NF_DISALLOWED)
+ return -1;
+ if (n->flags & DLMC_NF_MEMBER)
+ return 1;
+ return 0;
+}
+
+static void show_all_nodes(int count, struct dlmc_node *nodes_in)
+{
+ struct dlmc_node *n = nodes_in;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ printf("nodeid %d member %d failed %d start %d seq_add %u seq_rem %u check %s\n",
+ n->nodeid,
+ member_int(n),
+ n->failed_reason,
+ (n->flags & DLMC_NF_START) ? 1 : 0,
+ n->added_seq,
+ n->removed_seq,
+ nf_check_str(n->flags));
+ n++;
+ }
+}
+
+static void do_list(char *name)
+{
+ struct dlmc_lockspace *ls;
+ int node_count;
+ int ls_count;
+ int rv;
+ int i;
+
+ memset(lss, 0, sizeof(lss));
+
+ if (name) {
+ ls_count = 1;
+ rv = dlmc_lockspace_info(name, lss);
+ } else {
+ rv = dlmc_lockspaces(MAX_LS, &ls_count, lss);
+ }
+
+ if (rv < 0)
+ exit(EXIT_FAILURE); /* dlm_controld probably not running */
+
+ if (ls_count)
+ printf("dlm lockspaces\n");
+
+ for (i = 0; i < ls_count; i++) {
+ ls = &lss[i];
+
+ show_ls(ls);
+
+ if (!ls_all_nodes)
+ goto next;
+
+ node_count = 0;
+ memset(&nodes, 0, sizeof(nodes));
+
+ rv = dlmc_lockspace_nodes(ls->name, DLMC_NODES_ALL,
+ MAX_NODES, &node_count, nodes);
+ if (rv < 0) {
+ printf("all nodes error %d %d\n", rv, errno);
+ goto next;
+ }
+
+ qsort(nodes, node_count, sizeof(struct dlmc_node),node_compare);
+
+ printf("all nodes\n");
+ show_all_nodes(node_count, nodes);
+ next:
+ printf("\n");
+ }
+}
+
+static void do_deadlock_check(char *name)
+{
+ dlmc_deadlock_check(name);
+}
+
+static void do_plocks(char *name)
+{
+ char buf[DLMC_DUMP_SIZE];
+
+ memset(buf, 0, sizeof(buf));
+
+ dlmc_dump_plocks(name, buf);
+
+ buf[DLMC_DUMP_SIZE-1] = '\0';
+
+ do_write(STDOUT_FILENO, buf, strlen(buf));
+}
+
+static void do_dump(void)
+{
+ char buf[DLMC_DUMP_SIZE];
+
+ memset(buf, 0, sizeof(buf));
+
+ dlmc_dump_debug(buf);
+
+ buf[DLMC_DUMP_SIZE-1] = '\0';
+
+ do_write(STDOUT_FILENO, buf, strlen(buf));
+ printf("\n");
+}
+
+static void do_log_plock(void)
+{
+ char buf[DLMC_DUMP_SIZE];
+
+ memset(buf, 0, sizeof(buf));
+
+ dlmc_dump_log_plock(buf);
+
+ buf[DLMC_DUMP_SIZE-1] = '\0';
+
+ do_write(STDOUT_FILENO, buf, strlen(buf));
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ prog_name = argv[0];
+ decode_arguments(argc, argv);
+
+ switch (operation) {
+
+ /* calls to libdlm; pass a command to dlm-kernel */
+
+ case OP_JOIN:
+ do_join(lsname);
+ break;
+
+ case OP_LEAVE:
+ do_leave(lsname);
+ break;
+
+ case OP_JOINLEAVE:
+ do_join(lsname);
+ do_leave(lsname);
+ break;
+
+ /* calls to libdlmcontrol; pass a command/query to dlm_controld */
+
+ case OP_LIST:
+ do_list(lsname);
+ break;
+
+ case OP_DUMP:
+ do_dump();
+ break;
+
+ case OP_LOG_PLOCK:
+ do_log_plock();
+ break;
+
+ case OP_PLOCKS:
+ do_plocks(lsname);
+ break;
+
+ case OP_DEADLOCK_CHECK:
+ do_deadlock_check(lsname);
+ break;
+
+ /* calls to read debugfs; query info from dlm-kernel */
+
+ case OP_LOCKDUMP:
+ do_lockdump(lsname);
+ break;
+
+ case OP_LOCKDEBUG:
+ do_lockdebug(lsname);
+ break;
+ }
+ return 0;
+}
+
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=2a1ff7ea5b…
Commit: 2a1ff7ea5b9c86e3e6ade2ea8da1bd22e2b61749
Parent: 7c18aed3909b817e0f18840dc4ea8737a4f11ac6
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 14:15:57 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 14:15:57 2011 -0500
dlm_controld: build libdlmcontrol
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm/libdlmcontrol/libdlmcontrol.h | 90 --------
dlm/libdlmcontrol/main.c | 421 -------------------------------------
dlm_controld/Makefile | 30 +++-
dlm_controld/lib.c | 419 ++++++++++++++++++++++++++++++++++++
dlm_controld/libdlmcontrol.h | 90 ++++++++
5 files changed, 537 insertions(+), 513 deletions(-)
diff --git a/dlm/libdlmcontrol/libdlmcontrol.h b/dlm/libdlmcontrol/libdlmcontrol.h
deleted file mode 100644
index 64a3814..0000000
--- a/dlm/libdlmcontrol/libdlmcontrol.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _LIBDLMCONTROL_H_
-#define _LIBDLMCONTROL_H_
-
-#define DLMC_DUMP_SIZE (1024 * 1024)
-
-#define DLMC_NF_MEMBER 0x00000001 /* node is member in cg */
-#define DLMC_NF_START 0x00000002 /* start message recvd for cg */
-#define DLMC_NF_DISALLOWED 0x00000004 /* node disallowed in cg */
-#define DLMC_NF_CHECK_FENCING 0x00000008
-#define DLMC_NF_CHECK_QUORUM 0x00000010
-#define DLMC_NF_CHECK_FS 0x00000020
-
-struct dlmc_node {
- int nodeid;
- uint32_t flags;
- uint32_t added_seq;
- uint32_t removed_seq;
- int failed_reason;
-};
-
-struct dlmc_change {
- int member_count;
- int joined_count;
- int remove_count;
- int failed_count;
- int wait_condition; /* 0 no, 1 fencing, 2 quorum, 3 fs */
- int wait_messages; /* 0 no, 1 yes */
- uint32_t seq;
- uint32_t combined_seq;
-};
-
-#define DLMC_LF_JOINING 0x00000001
-#define DLMC_LF_LEAVING 0x00000002
-#define DLMC_LF_KERNEL_STOPPED 0x00000004
-#define DLMC_LF_FS_REGISTERED 0x00000008
-#define DLMC_LF_NEED_PLOCKS 0x00000010
-#define DLMC_LF_SAVE_PLOCKS 0x00000020
-
-struct dlmc_lockspace {
- int group_mode;
- struct dlmc_change cg_prev; /* completed change (started_change) */
- struct dlmc_change cg_next; /* in-progress change (changes list) */
- uint32_t flags;
- uint32_t global_id;
- char name[DLM_LOCKSPACE_LEN+1];
-};
-
-/* dlmc_lockspace_nodes() types
-
- MEMBERS: members in completed (prev) change,
- zero if there's no completed (prev) change
- NEXT: members in in-progress (next) change,
- zero if there's no in-progress (next) change
- ALL: NEXT + nonmembers if there's an in-progress (next) change,
- MEMBERS + nonmembers if there's no in-progress (next) change, but
- there is a completed (prev) change
- nonmembers if there's no in-progress (next) or completed (prev)
- change (possible?)
-
- dlmc_node_info() returns info for in-progress (next) change, if one exists,
- otherwise it returns info for completed (prev) change.
-*/
-
-#define DLMC_NODES_ALL 1
-#define DLMC_NODES_MEMBERS 2
-#define DLMC_NODES_NEXT 3
-
-int dlmc_dump_debug(char *buf);
-int dlmc_dump_log_plock(char *buf);
-int dlmc_dump_plocks(char *name, char *buf);
-int dlmc_lockspace_info(char *lsname, struct dlmc_lockspace *ls);
-int dlmc_node_info(char *lsname, int nodeid, struct dlmc_node *node);
-int dlmc_lockspaces(int max, int *count, struct dlmc_lockspace *lss);
-int dlmc_lockspace_nodes(char *lsname, int type, int max, int *count,
- struct dlmc_node *nodes);
-
-#define DLMC_RESULT_REGISTER 1
-#define DLMC_RESULT_NOTIFIED 2
-
-int dlmc_fs_connect(void);
-void dlmc_fs_disconnect(int fd);
-int dlmc_fs_register(int fd, char *name);
-int dlmc_fs_unregister(int fd, char *name);
-int dlmc_fs_notified(int fd, char *name, int nodeid);
-int dlmc_fs_result(int fd, char *name, int *type, int *nodeid, int *result);
-
-int dlmc_deadlock_check(char *name);
-
-#endif
-
diff --git a/dlm/libdlmcontrol/main.c b/dlm/libdlmcontrol/main.c
deleted file mode 100644
index a4cf500..0000000
--- a/dlm/libdlmcontrol/main.c
+++ /dev/null
@@ -1,421 +0,0 @@
-#include "clusterautoconfig.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include <linux/dlmconstants.h>
-#include "dlm_controld.h"
-#include "libdlmcontrol.h"
-
-static int do_read(int fd, void *buf, size_t count)
-{
- int rv, off = 0;
-
- while (off < count) {
- rv = read(fd, (char *)buf + off, count - off);
- if (rv == 0)
- return -1;
- if (rv == -1 && errno == EINTR)
- continue;
- if (rv == -1)
- return -1;
- off += rv;
- }
- return 0;
-}
-
-static int do_write(int fd, void *buf, size_t count)
-{
- int rv, off = 0;
-
- retry:
- rv = write(fd, (char *)buf + off, count);
- if (rv == -1 && errno == EINTR)
- goto retry;
- if (rv < 0) {
- return rv;
- }
-
- if (rv != count) {
- count -= rv;
- off += rv;
- goto retry;
- }
- return 0;
-}
-
-static int do_connect(const char *sock_path)
-{
- struct sockaddr_un sun;
- socklen_t addrlen;
- int rv, fd;
-
- fd = socket(PF_UNIX, SOCK_STREAM, 0);
- if (fd < 0)
- goto out;
-
- memset(&sun, 0, sizeof(sun));
- sun.sun_family = AF_UNIX;
- strcpy(&sun.sun_path[1], sock_path);
- addrlen = sizeof(sa_family_t) + strlen(sun.sun_path+1) + 1;
-
- rv = connect(fd, (struct sockaddr *) &sun, addrlen);
- if (rv < 0) {
- close(fd);
- fd = rv;
- }
- out:
- return fd;
-}
-
-static void init_header(struct dlmc_header *h, int cmd, char *name,
- int extra_len)
-{
- memset(h, 0, sizeof(struct dlmc_header));
-
- h->magic = DLMC_MAGIC;
- h->version = DLMC_VERSION;
- h->len = sizeof(struct dlmc_header) + extra_len;
- h->command = cmd;
-
- if (name)
- strncpy(h->name, name, DLM_LOCKSPACE_LEN);
-}
-
-static char copy_buf[DLMC_DUMP_SIZE];
-
-static int do_dump(int cmd, char *name, char *buf)
-{
- struct dlmc_header h;
- int fd, rv, len;
-
- memset(copy_buf, 0, DLMC_DUMP_SIZE);
-
- init_header(&h, cmd, name, 0);
-
- fd = do_connect(DLMC_QUERY_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- memset(&h, 0, sizeof(h));
-
- rv = do_read(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- len = h.len - sizeof(h);
-
- if (len <= 0 || len > DLMC_DUMP_SIZE)
- goto out_close;
-
- rv = do_read(fd, copy_buf, len);
- if (rv < 0)
- goto out_close;
-
- memcpy(buf, copy_buf, len);
- out_close:
- close(fd);
- out:
- return rv;
-}
-
-int dlmc_dump_debug(char *buf)
-{
- return do_dump(DLMC_CMD_DUMP_DEBUG, NULL, buf);
-}
-
-int dlmc_dump_log_plock(char *buf)
-{
- return do_dump(DLMC_CMD_DUMP_LOG_PLOCK, NULL, buf);
-}
-
-int dlmc_dump_plocks(char *name, char *buf)
-{
- return do_dump(DLMC_CMD_DUMP_PLOCKS, name, buf);
-}
-
-int dlmc_node_info(char *name, int nodeid, struct dlmc_node *node)
-{
- struct dlmc_header h, *rh;
- char reply[sizeof(struct dlmc_header) + sizeof(struct dlmc_node)];
- int fd, rv;
-
- init_header(&h, DLMC_CMD_NODE_INFO, name, 0);
- h.data = nodeid;
-
- memset(reply, 0, sizeof(reply));
-
- fd = do_connect(DLMC_QUERY_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- rv = do_read(fd, reply, sizeof(reply));
- if (rv < 0)
- goto out_close;
-
- rh = (struct dlmc_header *)reply;
- rv = rh->data;
- if (rv < 0)
- goto out_close;
-
- memcpy(node, (char *)reply + sizeof(struct dlmc_header),
- sizeof(struct dlmc_node));
- out_close:
- close(fd);
- out:
- return rv;
-}
-
-int dlmc_lockspace_info(char *name, struct dlmc_lockspace *lockspace)
-{
- struct dlmc_header h, *rh;
- char reply[sizeof(struct dlmc_header) + sizeof(struct dlmc_lockspace)];
- int fd, rv;
-
- init_header(&h, DLMC_CMD_LOCKSPACE_INFO, name, 0);
-
- memset(reply, 0, sizeof(reply));
-
- fd = do_connect(DLMC_QUERY_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- rv = do_read(fd, reply, sizeof(reply));
- if (rv < 0)
- goto out_close;
-
- rh = (struct dlmc_header *)reply;
- rv = rh->data;
- if (rv < 0)
- goto out_close;
-
- memcpy(lockspace, (char *)reply + sizeof(struct dlmc_header),
- sizeof(struct dlmc_lockspace));
- out_close:
- close(fd);
- out:
- return rv;
-}
-
-int dlmc_lockspaces(int max, int *count, struct dlmc_lockspace *lss)
-{
- struct dlmc_header h, *rh;
- char *reply;
- int reply_len;
- int fd, rv, result, ls_count;
-
- init_header(&h, DLMC_CMD_LOCKSPACES, NULL, 0);
- h.data = max;
-
- reply_len = sizeof(struct dlmc_header) +
- (max * sizeof(struct dlmc_lockspace));
- reply = malloc(reply_len);
- if (!reply) {
- rv = -1;
- goto out;
- }
- memset(reply, 0, reply_len);
-
- fd = do_connect(DLMC_QUERY_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- /* won't usually get back the full reply_len */
- do_read(fd, reply, reply_len);
-
- rh = (struct dlmc_header *)reply;
- result = rh->data;
- if (result < 0 && result != -E2BIG) {
- rv = result;
- goto out_close;
- }
-
- if (result == -E2BIG) {
- *count = -E2BIG;
- ls_count = max;
- } else {
- *count = result;
- ls_count = result;
- }
- rv = 0;
-
- memcpy(lss, (char *)reply + sizeof(struct dlmc_header),
- ls_count * sizeof(struct dlmc_lockspace));
- out_close:
- close(fd);
- out:
- return rv;
-}
-
-int dlmc_lockspace_nodes(char *name, int type, int max, int *count,
- struct dlmc_node *nodes)
-{
- struct dlmc_header h, *rh;
- char *reply;
- int reply_len;
- int fd, rv, result, node_count;
-
- init_header(&h, DLMC_CMD_LOCKSPACE_NODES, name, 0);
- h.option = type;
- h.data = max;
-
- reply_len = sizeof(struct dlmc_header) +
- (max * sizeof(struct dlmc_node));
- reply = malloc(reply_len);
- if (!reply) {
- rv = -1;
- goto out;
- }
- memset(reply, 0, reply_len);
-
- fd = do_connect(DLMC_QUERY_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- if (rv < 0)
- goto out_close;
-
- /* won't usually get back the full reply_len */
- do_read(fd, reply, reply_len);
-
- rh = (struct dlmc_header *)reply;
- result = rh->data;
- if (result < 0 && result != -E2BIG) {
- rv = result;
- goto out_close;
- }
-
- if (result == -E2BIG) {
- *count = -E2BIG;
- node_count = max;
- } else {
- *count = result;
- node_count = result;
- }
- rv = 0;
-
- memcpy(nodes, (char *)reply + sizeof(struct dlmc_header),
- node_count * sizeof(struct dlmc_node));
- out_close:
- close(fd);
- out:
- return rv;
-}
-
-int dlmc_fs_connect(void)
-{
- return do_connect(DLMC_SOCK_PATH);
-}
-
-void dlmc_fs_disconnect(int fd)
-{
- close(fd);
-}
-
-int dlmc_fs_register(int fd, char *name)
-{
- struct dlmc_header h;
-
- init_header(&h, DLMC_CMD_FS_REGISTER, name, 0);
-
- return do_write(fd, &h, sizeof(h));
-}
-
-int dlmc_fs_unregister(int fd, char *name)
-{
- struct dlmc_header h;
-
- init_header(&h, DLMC_CMD_FS_UNREGISTER, name, 0);
-
- return do_write(fd, &h, sizeof(h));
-}
-
-int dlmc_fs_notified(int fd, char *name, int nodeid)
-{
- struct dlmc_header h;
-
- init_header(&h, DLMC_CMD_FS_NOTIFIED, name, 0);
- h.data = nodeid;
-
- return do_write(fd, &h, sizeof(h));
-}
-
-int dlmc_fs_result(int fd, char *name, int *type, int *nodeid, int *result)
-{
- struct dlmc_header h;
- int rv;
-
- rv = do_read(fd, &h, sizeof(h));
- if (rv < 0)
- goto out;
-
- strncpy(name, h.name, DLM_LOCKSPACE_LEN);
- *nodeid = h.option;
- *result = h.data;
-
- switch (h.command) {
- case DLMC_CMD_FS_REGISTER:
- *type = DLMC_RESULT_REGISTER;
- break;
- case DLMC_CMD_FS_NOTIFIED:
- *type = DLMC_RESULT_NOTIFIED;
- break;
- default:
- *type = 0;
- }
- out:
- return rv;
-}
-
-int dlmc_deadlock_check(char *name)
-{
- struct dlmc_header h;
- int fd, rv;
-
- init_header(&h, DLMC_CMD_DEADLOCK_CHECK, name, 0);
-
- fd = do_connect(DLMC_SOCK_PATH);
- if (fd < 0) {
- rv = fd;
- goto out;
- }
-
- rv = do_write(fd, &h, sizeof(h));
- close(fd);
- out:
- return rv;
-}
-
diff --git a/dlm_controld/Makefile b/dlm_controld/Makefile
index d47f71b..f643ec3 100644
--- a/dlm_controld/Makefile
+++ b/dlm_controld/Makefile
@@ -1,6 +1,16 @@
BIN_TARGET = dlm_controld
+
MAN_TARGET = dlm_controld.8
+HDR_TARGET = libdlmcontrol.h
+
+LIB_NAME = libdlmcontrol
+LIB_MAJOR = 3
+LIB_MINOR = 1
+LIB_SO = $(LIB_NAME).so
+LIB_SMAJOR = $(LIB_SO).$(LIB_MAJOR)
+LIB_TARGET = $(LIB_SO).$(LIB_MAJOR).$(LIB_MINOR)
+
BIN_SOURCE = \
action.c \
cpg.c \
@@ -35,17 +45,26 @@ BIN_CFLAGS += -D_GNU_SOURCE -g \
BIN_CFLAGS += -fPIE -DPIE
BIN_CFLAGS += `xml2-config --cflags`
-BIN_CFLAGS += -I../dlm/libdlm -I../dlm/libdlmcontrol
+BIN_CFLAGS += -I../dlm/libdlm
BIN_LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
BIN_LDFLAGS += `xml2-config --libs`
BIN_LDFLAGS += -lpthread -llogthread -lcpg -lconfdb -lcfg -lquorum -lfenced
-all: $(BIN_TARGET)
+LIB_SOURCE = lib.c
+LIB_CFLAGS += $(BIN_CFLAGS)
+LIB_LDFLAGS += -Wl,-z,relro -pie
+
+all: $(LIB_TARGET) $(BIN_TARGET)
$(BIN_TARGET): $(BIN_SOURCE)
$(CC) $(BIN_CFLAGS) $(BIN_LDFLAGS) $(BIN_SOURCE) -o $@ -L.
+$(LIB_TARGET): $(LIB_SOURCE)
+ $(CC) $(LIB_CFLAGS) $(LIB_LDFLAGS) -shared -fPIC -o $@ -Wl,-soname=$(LIB_SMAJOR) $^
+ ln -sf $(LIB_TARGET) $(LIB_SO)
+ ln -sf $(LIB_TARGET) $(LIB_SMAJOR)
+
clean:
rm -f *.o *.so *.so.* $(BIN_TARGET)
@@ -54,12 +73,19 @@ INSTALL=$(shell which install)
DESTDIR=
BINDIR=/usr/sbin
+LIBDIR=/usr/lib64
+HDRDIR=/usr/include
MANDIR=/usr/share/man
.PHONY: install
install: all
$(INSTALL) -d $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -d $(DESTDIR)/$(LIBDIR)
+ $(INSTALL) -d $(DESTDIR)/$(HDRDIR)
$(INSTALL) -d $(DESTDIR)/$(MANDIR)/man8
$(INSTALL) -c -m 755 $(BIN_TARGET) $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -c -m 755 $(LIB_TARGET) $(DESTDIR)/$(LIBDIR)
+ cp -a $(LIB_SO) $(DESTDIR)/$(LIBDIR)
+ $(INSTALL) -c -m 644 $(HDR_TARGET) $(DESTDIR)/$(HDRDIR)
$(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
diff --git a/dlm_controld/lib.c b/dlm_controld/lib.c
new file mode 100644
index 0000000..e714ff9
--- /dev/null
+++ b/dlm_controld/lib.c
@@ -0,0 +1,419 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <linux/dlmconstants.h>
+#include "dlm_controld.h"
+#include "libdlmcontrol.h"
+
+static int do_read(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ while (off < count) {
+ rv = read(fd, (char *)buf + off, count - off);
+ if (rv == 0)
+ return -1;
+ if (rv == -1 && errno == EINTR)
+ continue;
+ if (rv == -1)
+ return -1;
+ off += rv;
+ }
+ return 0;
+}
+
+static int do_write(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ retry:
+ rv = write(fd, (char *)buf + off, count);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0) {
+ return rv;
+ }
+
+ if (rv != count) {
+ count -= rv;
+ off += rv;
+ goto retry;
+ }
+ return 0;
+}
+
+static int do_connect(const char *sock_path)
+{
+ struct sockaddr_un sun;
+ socklen_t addrlen;
+ int rv, fd;
+
+ fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ goto out;
+
+ memset(&sun, 0, sizeof(sun));
+ sun.sun_family = AF_UNIX;
+ strcpy(&sun.sun_path[1], sock_path);
+ addrlen = sizeof(sa_family_t) + strlen(sun.sun_path+1) + 1;
+
+ rv = connect(fd, (struct sockaddr *) &sun, addrlen);
+ if (rv < 0) {
+ close(fd);
+ fd = rv;
+ }
+ out:
+ return fd;
+}
+
+static void init_header(struct dlmc_header *h, int cmd, char *name,
+ int extra_len)
+{
+ memset(h, 0, sizeof(struct dlmc_header));
+
+ h->magic = DLMC_MAGIC;
+ h->version = DLMC_VERSION;
+ h->len = sizeof(struct dlmc_header) + extra_len;
+ h->command = cmd;
+
+ if (name)
+ strncpy(h->name, name, DLM_LOCKSPACE_LEN);
+}
+
+static char copy_buf[DLMC_DUMP_SIZE];
+
+static int do_dump(int cmd, char *name, char *buf)
+{
+ struct dlmc_header h;
+ int fd, rv, len;
+
+ memset(copy_buf, 0, DLMC_DUMP_SIZE);
+
+ init_header(&h, cmd, name, 0);
+
+ fd = do_connect(DLMC_QUERY_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ memset(&h, 0, sizeof(h));
+
+ rv = do_read(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ len = h.len - sizeof(h);
+
+ if (len <= 0 || len > DLMC_DUMP_SIZE)
+ goto out_close;
+
+ rv = do_read(fd, copy_buf, len);
+ if (rv < 0)
+ goto out_close;
+
+ memcpy(buf, copy_buf, len);
+ out_close:
+ close(fd);
+ out:
+ return rv;
+}
+
+int dlmc_dump_debug(char *buf)
+{
+ return do_dump(DLMC_CMD_DUMP_DEBUG, NULL, buf);
+}
+
+int dlmc_dump_log_plock(char *buf)
+{
+ return do_dump(DLMC_CMD_DUMP_LOG_PLOCK, NULL, buf);
+}
+
+int dlmc_dump_plocks(char *name, char *buf)
+{
+ return do_dump(DLMC_CMD_DUMP_PLOCKS, name, buf);
+}
+
+int dlmc_node_info(char *name, int nodeid, struct dlmc_node *node)
+{
+ struct dlmc_header h, *rh;
+ char reply[sizeof(struct dlmc_header) + sizeof(struct dlmc_node)];
+ int fd, rv;
+
+ init_header(&h, DLMC_CMD_NODE_INFO, name, 0);
+ h.data = nodeid;
+
+ memset(reply, 0, sizeof(reply));
+
+ fd = do_connect(DLMC_QUERY_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ rv = do_read(fd, reply, sizeof(reply));
+ if (rv < 0)
+ goto out_close;
+
+ rh = (struct dlmc_header *)reply;
+ rv = rh->data;
+ if (rv < 0)
+ goto out_close;
+
+ memcpy(node, (char *)reply + sizeof(struct dlmc_header),
+ sizeof(struct dlmc_node));
+ out_close:
+ close(fd);
+ out:
+ return rv;
+}
+
+int dlmc_lockspace_info(char *name, struct dlmc_lockspace *lockspace)
+{
+ struct dlmc_header h, *rh;
+ char reply[sizeof(struct dlmc_header) + sizeof(struct dlmc_lockspace)];
+ int fd, rv;
+
+ init_header(&h, DLMC_CMD_LOCKSPACE_INFO, name, 0);
+
+ memset(reply, 0, sizeof(reply));
+
+ fd = do_connect(DLMC_QUERY_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ rv = do_read(fd, reply, sizeof(reply));
+ if (rv < 0)
+ goto out_close;
+
+ rh = (struct dlmc_header *)reply;
+ rv = rh->data;
+ if (rv < 0)
+ goto out_close;
+
+ memcpy(lockspace, (char *)reply + sizeof(struct dlmc_header),
+ sizeof(struct dlmc_lockspace));
+ out_close:
+ close(fd);
+ out:
+ return rv;
+}
+
+int dlmc_lockspaces(int max, int *count, struct dlmc_lockspace *lss)
+{
+ struct dlmc_header h, *rh;
+ char *reply;
+ int reply_len;
+ int fd, rv, result, ls_count;
+
+ init_header(&h, DLMC_CMD_LOCKSPACES, NULL, 0);
+ h.data = max;
+
+ reply_len = sizeof(struct dlmc_header) +
+ (max * sizeof(struct dlmc_lockspace));
+ reply = malloc(reply_len);
+ if (!reply) {
+ rv = -1;
+ goto out;
+ }
+ memset(reply, 0, reply_len);
+
+ fd = do_connect(DLMC_QUERY_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ /* won't usually get back the full reply_len */
+ do_read(fd, reply, reply_len);
+
+ rh = (struct dlmc_header *)reply;
+ result = rh->data;
+ if (result < 0 && result != -E2BIG) {
+ rv = result;
+ goto out_close;
+ }
+
+ if (result == -E2BIG) {
+ *count = -E2BIG;
+ ls_count = max;
+ } else {
+ *count = result;
+ ls_count = result;
+ }
+ rv = 0;
+
+ memcpy(lss, (char *)reply + sizeof(struct dlmc_header),
+ ls_count * sizeof(struct dlmc_lockspace));
+ out_close:
+ close(fd);
+ out:
+ return rv;
+}
+
+int dlmc_lockspace_nodes(char *name, int type, int max, int *count,
+ struct dlmc_node *nodes)
+{
+ struct dlmc_header h, *rh;
+ char *reply;
+ int reply_len;
+ int fd, rv, result, node_count;
+
+ init_header(&h, DLMC_CMD_LOCKSPACE_NODES, name, 0);
+ h.option = type;
+ h.data = max;
+
+ reply_len = sizeof(struct dlmc_header) +
+ (max * sizeof(struct dlmc_node));
+ reply = malloc(reply_len);
+ if (!reply) {
+ rv = -1;
+ goto out;
+ }
+ memset(reply, 0, reply_len);
+
+ fd = do_connect(DLMC_QUERY_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out_close;
+
+ /* won't usually get back the full reply_len */
+ do_read(fd, reply, reply_len);
+
+ rh = (struct dlmc_header *)reply;
+ result = rh->data;
+ if (result < 0 && result != -E2BIG) {
+ rv = result;
+ goto out_close;
+ }
+
+ if (result == -E2BIG) {
+ *count = -E2BIG;
+ node_count = max;
+ } else {
+ *count = result;
+ node_count = result;
+ }
+ rv = 0;
+
+ memcpy(nodes, (char *)reply + sizeof(struct dlmc_header),
+ node_count * sizeof(struct dlmc_node));
+ out_close:
+ close(fd);
+ out:
+ return rv;
+}
+
+int dlmc_fs_connect(void)
+{
+ return do_connect(DLMC_SOCK_PATH);
+}
+
+void dlmc_fs_disconnect(int fd)
+{
+ close(fd);
+}
+
+int dlmc_fs_register(int fd, char *name)
+{
+ struct dlmc_header h;
+
+ init_header(&h, DLMC_CMD_FS_REGISTER, name, 0);
+
+ return do_write(fd, &h, sizeof(h));
+}
+
+int dlmc_fs_unregister(int fd, char *name)
+{
+ struct dlmc_header h;
+
+ init_header(&h, DLMC_CMD_FS_UNREGISTER, name, 0);
+
+ return do_write(fd, &h, sizeof(h));
+}
+
+int dlmc_fs_notified(int fd, char *name, int nodeid)
+{
+ struct dlmc_header h;
+
+ init_header(&h, DLMC_CMD_FS_NOTIFIED, name, 0);
+ h.data = nodeid;
+
+ return do_write(fd, &h, sizeof(h));
+}
+
+int dlmc_fs_result(int fd, char *name, int *type, int *nodeid, int *result)
+{
+ struct dlmc_header h;
+ int rv;
+
+ rv = do_read(fd, &h, sizeof(h));
+ if (rv < 0)
+ goto out;
+
+ strncpy(name, h.name, DLM_LOCKSPACE_LEN);
+ *nodeid = h.option;
+ *result = h.data;
+
+ switch (h.command) {
+ case DLMC_CMD_FS_REGISTER:
+ *type = DLMC_RESULT_REGISTER;
+ break;
+ case DLMC_CMD_FS_NOTIFIED:
+ *type = DLMC_RESULT_NOTIFIED;
+ break;
+ default:
+ *type = 0;
+ }
+ out:
+ return rv;
+}
+
+int dlmc_deadlock_check(char *name)
+{
+ struct dlmc_header h;
+ int fd, rv;
+
+ init_header(&h, DLMC_CMD_DEADLOCK_CHECK, name, 0);
+
+ fd = do_connect(DLMC_SOCK_PATH);
+ if (fd < 0) {
+ rv = fd;
+ goto out;
+ }
+
+ rv = do_write(fd, &h, sizeof(h));
+ close(fd);
+ out:
+ return rv;
+}
+
diff --git a/dlm_controld/libdlmcontrol.h b/dlm_controld/libdlmcontrol.h
new file mode 100644
index 0000000..64a3814
--- /dev/null
+++ b/dlm_controld/libdlmcontrol.h
@@ -0,0 +1,90 @@
+#ifndef _LIBDLMCONTROL_H_
+#define _LIBDLMCONTROL_H_
+
+#define DLMC_DUMP_SIZE (1024 * 1024)
+
+#define DLMC_NF_MEMBER 0x00000001 /* node is member in cg */
+#define DLMC_NF_START 0x00000002 /* start message recvd for cg */
+#define DLMC_NF_DISALLOWED 0x00000004 /* node disallowed in cg */
+#define DLMC_NF_CHECK_FENCING 0x00000008
+#define DLMC_NF_CHECK_QUORUM 0x00000010
+#define DLMC_NF_CHECK_FS 0x00000020
+
+struct dlmc_node {
+ int nodeid;
+ uint32_t flags;
+ uint32_t added_seq;
+ uint32_t removed_seq;
+ int failed_reason;
+};
+
+struct dlmc_change {
+ int member_count;
+ int joined_count;
+ int remove_count;
+ int failed_count;
+ int wait_condition; /* 0 no, 1 fencing, 2 quorum, 3 fs */
+ int wait_messages; /* 0 no, 1 yes */
+ uint32_t seq;
+ uint32_t combined_seq;
+};
+
+#define DLMC_LF_JOINING 0x00000001
+#define DLMC_LF_LEAVING 0x00000002
+#define DLMC_LF_KERNEL_STOPPED 0x00000004
+#define DLMC_LF_FS_REGISTERED 0x00000008
+#define DLMC_LF_NEED_PLOCKS 0x00000010
+#define DLMC_LF_SAVE_PLOCKS 0x00000020
+
+struct dlmc_lockspace {
+ int group_mode;
+ struct dlmc_change cg_prev; /* completed change (started_change) */
+ struct dlmc_change cg_next; /* in-progress change (changes list) */
+ uint32_t flags;
+ uint32_t global_id;
+ char name[DLM_LOCKSPACE_LEN+1];
+};
+
+/* dlmc_lockspace_nodes() types
+
+ MEMBERS: members in completed (prev) change,
+ zero if there's no completed (prev) change
+ NEXT: members in in-progress (next) change,
+ zero if there's no in-progress (next) change
+ ALL: NEXT + nonmembers if there's an in-progress (next) change,
+ MEMBERS + nonmembers if there's no in-progress (next) change, but
+ there is a completed (prev) change
+ nonmembers if there's no in-progress (next) or completed (prev)
+ change (possible?)
+
+ dlmc_node_info() returns info for in-progress (next) change, if one exists,
+ otherwise it returns info for completed (prev) change.
+*/
+
+#define DLMC_NODES_ALL 1
+#define DLMC_NODES_MEMBERS 2
+#define DLMC_NODES_NEXT 3
+
+int dlmc_dump_debug(char *buf);
+int dlmc_dump_log_plock(char *buf);
+int dlmc_dump_plocks(char *name, char *buf);
+int dlmc_lockspace_info(char *lsname, struct dlmc_lockspace *ls);
+int dlmc_node_info(char *lsname, int nodeid, struct dlmc_node *node);
+int dlmc_lockspaces(int max, int *count, struct dlmc_lockspace *lss);
+int dlmc_lockspace_nodes(char *lsname, int type, int max, int *count,
+ struct dlmc_node *nodes);
+
+#define DLMC_RESULT_REGISTER 1
+#define DLMC_RESULT_NOTIFIED 2
+
+int dlmc_fs_connect(void);
+void dlmc_fs_disconnect(int fd);
+int dlmc_fs_register(int fd, char *name);
+int dlmc_fs_unregister(int fd, char *name);
+int dlmc_fs_notified(int fd, char *name, int nodeid);
+int dlmc_fs_result(int fd, char *name, int *type, int *nodeid, int *result);
+
+int dlmc_deadlock_check(char *name);
+
+#endif
+
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=7c18aed390…
Commit: 7c18aed3909b817e0f18840dc4ea8737a4f11ac6
Parent: 0801783702c5d100bd1f3d1150552873676d7c0d
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 13:36:14 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 13:36:14 2011 -0500
dlm: use top level include dir
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
include/copyright.cf | 6 ++++++
make/copyright.cf | 6 ------
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/include/copyright.cf b/include/copyright.cf
new file mode 100644
index 0000000..3801aa9
--- /dev/null
+++ b/include/copyright.cf
@@ -0,0 +1,6 @@
+#ifndef __COPYRIGHT_DOT_CF__
+#define __COPYRIGHT_DOT_CF__
+
+#define REDHAT_COPYRIGHT "Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved."
+
+#endif /* __COPYRIGHT_DOT_CF__ */
diff --git a/make/copyright.cf b/make/copyright.cf
deleted file mode 100644
index 3801aa9..0000000
--- a/make/copyright.cf
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __COPYRIGHT_DOT_CF__
-#define __COPYRIGHT_DOT_CF__
-
-#define REDHAT_COPYRIGHT "Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved."
-
-#endif /* __COPYRIGHT_DOT_CF__ */
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=0801783702…
Commit: 0801783702c5d100bd1f3d1150552873676d7c0d
Parent: a5500be36aeaf39eaebe96a4db901335b88e3131
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 13:34:48 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 13:34:48 2011 -0500
dlm_controld: makefile adjustments
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/Makefile | 41 ++++++++++++++++++++++++++++-------------
1 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/dlm_controld/Makefile b/dlm_controld/Makefile
index a89c7b3..d47f71b 100644
--- a/dlm_controld/Makefile
+++ b/dlm_controld/Makefile
@@ -1,6 +1,7 @@
-TARGET = dlm_controld
+BIN_TARGET = dlm_controld
+MAN_TARGET = dlm_controld.8
-SOURCE = \
+BIN_SOURCE = \
action.c \
cpg.c \
crc.c \
@@ -11,7 +12,7 @@ SOURCE = \
logging.c \
rbtree.c
-CFLAGS += -D_GNU_SOURCE -g \
+BIN_CFLAGS += -D_GNU_SOURCE -g \
-Wall \
-Wformat \
-Wformat-security \
@@ -32,19 +33,33 @@ CFLAGS += -D_GNU_SOURCE -g \
-fasynchronous-unwind-tables \
-fdiagnostics-show-option \
-CFLAGS += -fPIE -DPIE
-CFLAGS += `xml2-config --cflags`
-CFLAGS += -I../dlm/libdlm -I../dlm/libdlmcontrol
+BIN_CFLAGS += -fPIE -DPIE
+BIN_CFLAGS += `xml2-config --cflags`
+BIN_CFLAGS += -I../dlm/libdlm -I../dlm/libdlmcontrol
-LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
-LDFLAGS += `xml2-config --libs`
-LDFLAGS += -lpthread -llogthread -lcpg -lconfdb -lcfg -lquorum -lfenced
+BIN_LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
+BIN_LDFLAGS += `xml2-config --libs`
+BIN_LDFLAGS += -lpthread -llogthread -lcpg -lconfdb -lcfg -lquorum -lfenced
-all: $(TARGET)
+all: $(BIN_TARGET)
-$(TARGET): $(SOURCE)
- $(CC) $(CFLAGS) $(LDFLAGS) $(SOURCE) -o $@ -L.
+$(BIN_TARGET): $(BIN_SOURCE)
+ $(CC) $(BIN_CFLAGS) $(BIN_LDFLAGS) $(BIN_SOURCE) -o $@ -L.
clean:
- rm -f *.o *.so *.so.* $(TARGET)
+ rm -f *.o *.so *.so.* $(BIN_TARGET)
+
+
+INSTALL=$(shell which install)
+
+DESTDIR=
+BINDIR=/usr/sbin
+MANDIR=/usr/share/man
+
+.PHONY: install
+install: all
+ $(INSTALL) -d $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -d $(DESTDIR)/$(MANDIR)/man8
+ $(INSTALL) -c -m 755 $(BIN_TARGET) $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=a5500be36a…
Commit: a5500be36aeaf39eaebe96a4db901335b88e3131
Parent: 42a8ff3ed7d88a7e7a81d12bb905512f5e9c4a27
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 13:26:50 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 13:26:50 2011 -0500
dlm_controld: adjust for moved files
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/Makefile | 2 +-
dlm_controld/dlm_daemon.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlm_controld/Makefile b/dlm_controld/Makefile
index 63dcb9b..a89c7b3 100644
--- a/dlm_controld/Makefile
+++ b/dlm_controld/Makefile
@@ -34,7 +34,7 @@ CFLAGS += -D_GNU_SOURCE -g \
CFLAGS += -fPIE -DPIE
CFLAGS += `xml2-config --cflags`
-CFLAGS += -I../include -I../../dlm/libdlm -I../../dlm/libdlmcontrol
+CFLAGS += -I../dlm/libdlm -I../dlm/libdlmcontrol
LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
LDFLAGS += `xml2-config --libs`
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 0c89f67..70ad2d8 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -41,7 +41,7 @@
#include "config.h"
#include "list.h"
#include "rbtree.h"
-#include "linux_endian.h"
+#include "endian.h"
/* TODO: cleanup */
#define CLUSTERVARLIB "/var/lib/cluster"
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=42a8ff3ed7…
Commit: 42a8ff3ed7d88a7e7a81d12bb905512f5e9c4a27
Parent: 2b5e4f87ca394113fd54395d5b5b3ab1e410dbb2
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 13:24:54 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 13:24:54 2011 -0500
dlm_controld: move source files
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
dlm_controld/Makefile | 50 +
dlm_controld/action.c | 1084 +++++++++++++++
dlm_controld/config.c | 93 ++
dlm_controld/config.h | 47 +
dlm_controld/cpg.c | 2606 +++++++++++++++++++++++++++++++++++++
dlm_controld/crc.c | 72 +
dlm_controld/deadlock.c | 1550 ++++++++++++++++++++++
dlm_controld/dlm_controld.8 | 313 +++++
dlm_controld/dlm_controld.h | 38 +
dlm_controld/dlm_daemon.h | 326 +++++
dlm_controld/endian.h | 68 +
dlm_controld/list.h | 336 +++++
dlm_controld/logging.c | 173 +++
dlm_controld/main.c | 1274 ++++++++++++++++++
dlm_controld/member_cman.c | 267 ++++
dlm_controld/netlink.c | 225 ++++
dlm_controld/plock.c | 2197 +++++++++++++++++++++++++++++++
dlm_controld/rbtree.c | 383 ++++++
dlm_controld/rbtree.h | 160 +++
group/dlm_controld/Makefile | 50 -
group/dlm_controld/action.c | 1084 ---------------
group/dlm_controld/config.c | 93 --
group/dlm_controld/config.h | 47 -
group/dlm_controld/cpg.c | 2606 -------------------------------------
group/dlm_controld/crc.c | 72 -
group/dlm_controld/deadlock.c | 1550 ----------------------
group/dlm_controld/dlm_controld.h | 38 -
group/dlm_controld/dlm_daemon.h | 326 -----
group/dlm_controld/logging.c | 173 ---
group/dlm_controld/main.c | 1274 ------------------
group/dlm_controld/member_cman.c | 267 ----
group/dlm_controld/netlink.c | 225 ----
group/dlm_controld/plock.c | 2197 -------------------------------
group/dlm_controld/rbtree.c | 383 ------
group/include/linux_endian.h | 68 -
group/include/list.h | 336 -----
group/include/rbtree.h | 160 ---
group/man/dlm_controld.8 | 313 -----
38 files changed, 11262 insertions(+), 11262 deletions(-)
diff --git a/dlm_controld/Makefile b/dlm_controld/Makefile
new file mode 100644
index 0000000..63dcb9b
--- /dev/null
+++ b/dlm_controld/Makefile
@@ -0,0 +1,50 @@
+TARGET = dlm_controld
+
+SOURCE = \
+ action.c \
+ cpg.c \
+ crc.c \
+ main.c \
+ plock.c \
+ config.c \
+ member_cman.c \
+ logging.c \
+ rbtree.c
+
+CFLAGS += -D_GNU_SOURCE -g \
+ -Wall \
+ -Wformat \
+ -Wformat-security \
+ -Wmissing-prototypes \
+ -Wnested-externs \
+ -Wpointer-arith \
+ -Wextra -Wshadow \
+ -Wcast-align \
+ -Wwrite-strings \
+ -Waggregate-return \
+ -Wstrict-prototypes \
+ -Winline \
+ -Wredundant-decls \
+ -Wno-sign-compare \
+ -Wno-unused-parameter \
+ -Wp,-D_FORTIFY_SOURCE=2 \
+ -fexceptions \
+ -fasynchronous-unwind-tables \
+ -fdiagnostics-show-option \
+
+CFLAGS += -fPIE -DPIE
+CFLAGS += `xml2-config --cflags`
+CFLAGS += -I../include -I../../dlm/libdlm -I../../dlm/libdlmcontrol
+
+LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
+LDFLAGS += `xml2-config --libs`
+LDFLAGS += -lpthread -llogthread -lcpg -lconfdb -lcfg -lquorum -lfenced
+
+all: $(TARGET)
+
+$(TARGET): $(SOURCE)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(SOURCE) -o $@ -L.
+
+clean:
+ rm -f *.o *.so *.so.* $(TARGET)
+
diff --git a/dlm_controld/action.c b/dlm_controld/action.c
new file mode 100644
index 0000000..e9148a1
--- /dev/null
+++ b/dlm_controld/action.c
@@ -0,0 +1,1084 @@
+#include "dlm_daemon.h"
+
+#include <corosync/corotypes.h>
+#include <corosync/confdb.h>
+
+static int dir_members[MAX_NODES];
+static int dir_members_count;
+static int comms_nodes[MAX_NODES];
+static int comms_nodes_count;
+static char mg_name[DLM_LOCKSPACE_LEN+1];
+
+#define DLM_SYSFS_DIR "/sys/kernel/dlm"
+#define CLUSTER_DIR "/sys/kernel/config/dlm/cluster"
+#define SPACES_DIR "/sys/kernel/config/dlm/cluster/spaces"
+#define COMMS_DIR "/sys/kernel/config/dlm/cluster/comms"
+
+static int detect_protocol(void)
+{
+ confdb_handle_t handle;
+ hdb_handle_t totem_handle;
+ char key_value[256];
+ size_t value_len;
+ int rv, proto = -1;
+ confdb_callbacks_t callbacks = {
+ .confdb_key_change_notify_fn = NULL,
+ .confdb_object_create_change_notify_fn = NULL,
+ .confdb_object_delete_change_notify_fn = NULL
+ };
+
+ rv = confdb_initialize(&handle, &callbacks);
+ if (rv != CS_OK) {
+ log_error("confdb_initialize error %d", rv);
+ return -1;
+ }
+
+ rv = confdb_object_find_start(handle, OBJECT_PARENT_HANDLE);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find_start error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_object_find(handle, OBJECT_PARENT_HANDLE,
+ "totem", strlen("totem"), &totem_handle);
+ if (rv != CS_OK) {
+ log_error("confdb_object_find error %d", rv);
+ goto out;
+ }
+
+ rv = confdb_key_get(handle, totem_handle,
+ "rrp_mode", strlen("rrp_mode"),
+ key_value, &value_len);
+ if (rv != CS_OK) {
+ log_error("confdb_key_get error %d", rv);
+ goto out;
+ }
+
+ key_value[value_len] = '\0';
+ log_debug("totem/rrp_mode = '%s'", key_value);
+
+ if (!strcmp(key_value, "none"))
+ proto = PROTO_TCP;
+ else
+ proto = PROTO_SCTP;
+ out:
+ confdb_finalize(handle);
+ return proto;
+}
+
+/* look for an id that matches in e.g. /sys/fs/gfs/bull\:x/lock_module/id
+ and then extract the "x" as the name */
+
+static int get_mountgroup_name(uint32_t mg_id)
+{
+ char path[PATH_MAX];
+ char *fsname;
+ const char *fsdir;
+ DIR *d;
+ FILE *file;
+ struct dirent *de;
+ uint32_t id;
+ int retry_gfs2 = 1;
+ int rv, error;
+
+ fsdir = "/sys/fs/gfs";
+ retry:
+ rv = -1;
+
+ d = opendir(fsdir);
+ if (!d) {
+ log_debug("%s: opendir failed: %d", path, errno);
+ goto out;
+ }
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ id = 0;
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/lock_module/id",
+ fsdir, de->d_name);
+
+ file = fopen(path, "r");
+ if (!file) {
+ log_error("can't open %s %d", path, errno);
+ continue;
+ }
+
+ error = fscanf(file, "%u", &id);
+ fclose(file);
+
+ if (error != 1) {
+ log_error("bad read %s %d", path, errno);
+ continue;
+ }
+ if (id != mg_id) {
+ log_debug("get_mountgroup_name skip %x %s",
+ id, de->d_name);
+ continue;
+ }
+
+ /* take the fsname out of clustername:fsname */
+ fsname = strstr(de->d_name, ":");
+ if (!fsname) {
+ log_debug("get_mountgroup_name skip2 %x %s",
+ id, de->d_name);
+ continue;
+ }
+ fsname++;
+
+ log_debug("get_mountgroup_name found %x %s %s",
+ id, de->d_name, fsname);
+ strncpy(mg_name, fsname, sizeof(mg_name));
+ rv = 0;
+ break;
+ }
+
+ closedir(d);
+
+ out:
+ if (rv && retry_gfs2) {
+ retry_gfs2 = 0;
+ fsdir = "/sys/fs/gfs2";
+ goto retry;
+ }
+
+ return rv;
+}
+
+/* This is for the case where dlm_controld exits/fails, abandoning dlm
+ lockspaces in the kernel, and then dlm_controld is restarted. When
+ dlm_controld exits and abandons lockspaces, that node needs to be
+ rebooted to clear the uncontrolled lockspaces from the kernel. */
+
+int check_uncontrolled_lockspaces(void)
+{
+ DIR *d;
+ struct dirent *de;
+ int count = 0;
+
+ d = opendir(DLM_SYSFS_DIR);
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ log_error("found uncontrolled lockspace %s", de->d_name);
+ count++;
+ }
+ closedir(d);
+
+ if (count) {
+ kick_node_from_cluster(our_nodeid);
+ return -1;
+ }
+ return 0;
+}
+
+/* find the mountgroup with "mg_id" in sysfs, get it's name, then look for
+ the ls with with the same name in lockspaces list, return its id */
+
+void set_associated_id(uint32_t mg_id)
+{
+ struct lockspace *ls;
+ int rv;
+
+ log_debug("set_associated_id mg_id %x %d", mg_id, mg_id);
+
+ memset(&mg_name, 0, sizeof(mg_name));
+
+ rv = get_mountgroup_name(mg_id);
+ if (rv) {
+ log_error("no mountgroup found with id %x", mg_id);
+ return;
+ }
+
+ ls = find_ls(mg_name);
+ if (!ls) {
+ log_error("no lockspace found with name %s for mg_id %x",
+ mg_name, mg_id);
+ return;
+ }
+
+ log_debug("set_associated_id mg %x is ls %x", mg_id, ls->global_id);
+
+ ls->associated_mg_id = mg_id;
+}
+
+static int do_sysfs(const char *name, const char *file, char *val)
+{
+ char fname[512];
+ int rv, fd;
+
+ sprintf(fname, "%s/%s/%s", DLM_SYSFS_DIR, name, file);
+
+ fd = open(fname, O_WRONLY);
+ if (fd < 0) {
+ log_error("open \"%s\" error %d %d", fname, fd, errno);
+ return -1;
+ }
+
+ log_debug("write \"%s\" to \"%s\"", val, fname);
+
+ rv = do_write(fd, val, strlen(val) + 1);
+ close(fd);
+ return rv;
+}
+
+int set_sysfs_control(char *name, int val)
+{
+ char buf[32];
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", val);
+
+ return do_sysfs(name, "control", buf);
+}
+
+int set_sysfs_event_done(char *name, int val)
+{
+ char buf[32];
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", val);
+
+ return do_sysfs(name, "event_done", buf);
+}
+
+int set_sysfs_id(char *name, uint32_t id)
+{
+ char buf[32];
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%u", id);
+
+ return do_sysfs(name, "id", buf);
+}
+
+static int update_dir_members(char *name)
+{
+ char path[PATH_MAX];
+ DIR *d;
+ struct dirent *de;
+ int i = 0;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes", SPACES_DIR, name);
+
+ d = opendir(path);
+ if (!d) {
+ log_debug("%s: opendir failed: %d", path, errno);
+ return -1;
+ }
+
+ memset(dir_members, 0, sizeof(dir_members));
+ dir_members_count = 0;
+
+ /* FIXME: we should probably read the nodeid in each dir instead */
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+ dir_members[i++] = atoi(de->d_name);
+ log_debug("dir_member %d", dir_members[i-1]);
+ }
+ closedir(d);
+
+ dir_members_count = i;
+ return 0;
+}
+
+static int id_exists(int id, int count, int *array)
+{
+ int i;
+ for (i = 0; i < count; i++) {
+ if (array[i] == id)
+ return 1;
+ }
+ return 0;
+}
+
+static int create_path(const char *path)
+{
+ mode_t old_umask;
+ int rv;
+
+ old_umask = umask(0022);
+ rv = mkdir(path, 0777);
+ if (rv < 0 && errno == EEXIST)
+ rv = 0;
+ if (rv < 0)
+ log_error("%s: mkdir failed: %d", path, errno);
+ umask(old_umask);
+ return rv;
+}
+
+int path_exists(const char *path)
+{
+ struct stat buf;
+
+ if (stat(path, &buf) < 0) {
+ if (errno != ENOENT)
+ log_error("%s: stat failed: %d", path, errno);
+ return 0;
+ }
+ return 1;
+}
+
+/* The "renew" nodes are those that have left and rejoined since the last
+ call to set_members(). We rmdir/mkdir for these nodes so dlm-kernel
+ can notice they've left and rejoined. */
+
+int set_configfs_members(char *name, int new_count, int *new_members,
+ int renew_count, int *renew_members)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int i, w, fd, rv, id, old_count, *old_members;
+ int do_renew;
+
+ /*
+ * create lockspace dir if it doesn't exist yet
+ */
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, name);
+
+ if (!path_exists(path)) {
+ if (create_path(path))
+ return -1;
+ }
+
+ /*
+ * remove/add lockspace members
+ */
+
+ rv = update_dir_members(name);
+ if (rv)
+ return rv;
+
+ old_members = dir_members;
+ old_count = dir_members_count;
+
+ for (i = 0; i < old_count; i++) {
+ id = old_members[i];
+ if (id_exists(id, new_count, new_members))
+ continue;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
+ SPACES_DIR, name, id);
+
+ log_debug("set_members rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv) {
+ log_error("%s: rmdir failed: %d", path, errno);
+ goto out;
+ }
+ }
+
+ /*
+ * remove lockspace dir after we've removed all the nodes
+ * (when we're shutting down and adding no new nodes)
+ */
+
+ if (!new_count) {
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, name);
+
+ log_debug("set_members lockspace rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv)
+ log_error("%s: rmdir failed: %d", path, errno);
+ }
+
+ for (i = 0; i < new_count; i++) {
+ id = new_members[i];
+
+ do_renew = 0;
+
+ if (id_exists(id, renew_count, renew_members))
+ do_renew = 1;
+ else if (id_exists(id, old_count, old_members))
+ continue;
+
+ if (!is_cluster_member(id))
+ update_cluster();
+ /*
+ * create node's dir
+ */
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
+ SPACES_DIR, name, id);
+
+ if (do_renew) {
+ log_debug("set_members renew rmdir \"%s\"", path);
+ rv = rmdir(path);
+ if (rv) {
+ log_error("%s: renew rmdir failed: %d",
+ path, errno);
+ goto out;
+ }
+ }
+
+ log_debug("set_members mkdir \"%s\"", path);
+
+ rv = create_path(path);
+ if (rv)
+ goto out;
+
+ /*
+ * set node's nodeid
+ */
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes/%d/nodeid",
+ SPACES_DIR, name, id);
+
+ rv = fd = open(path, O_WRONLY);
+ if (rv < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ goto out;
+ }
+
+ memset(buf, 0, 32);
+ snprintf(buf, 32, "%d", id);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d, %s", path, errno, buf);
+ close(fd);
+ goto out;
+ }
+ close(fd);
+
+ /*
+ * set node's weight
+ */
+
+ w = get_weight(id, name);
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes/%d/weight",
+ SPACES_DIR, name, id);
+
+ rv = fd = open(path, O_WRONLY);
+ if (rv < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ goto out;
+ }
+
+ memset(buf, 0, 32);
+ snprintf(buf, 32, "%d", w);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d, %s", path, errno, buf);
+ close(fd);
+ goto out;
+ }
+ close(fd);
+ }
+
+ rv = 0;
+ out:
+ return rv;
+}
+
+#if 0
+char *str_ip(char *addr)
+{
+ static char ip[256];
+ struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+ memset(ip, 0, sizeof(ip));
+ inet_ntop(AF_INET, &sin->sin_addr, ip, 256);
+ return ip;
+}
+#endif
+
+static char *str_ip(char *addr)
+{
+ static char str_ip_buf[INET6_ADDRSTRLEN];
+ struct sockaddr_storage *ss = (struct sockaddr_storage *)addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+ void *saddr;
+
+ if (ss->ss_family == AF_INET6)
+ saddr = &sin6->sin6_addr;
+ else
+ saddr = &sin->sin_addr;
+
+ inet_ntop(ss->ss_family, saddr, str_ip_buf, sizeof(str_ip_buf));
+ return str_ip_buf;
+}
+
+/* record the nodeids that are currently listed under
+ config/dlm/cluster/comms/ so that we can remove all of them */
+
+static int update_comms_nodes(void)
+{
+ char path[PATH_MAX];
+ DIR *d;
+ struct dirent *de;
+ int i = 0;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, COMMS_DIR);
+
+ d = opendir(path);
+ if (!d) {
+ log_debug("%s: opendir failed: %d", path, errno);
+ return -1;
+ }
+
+ memset(comms_nodes, 0, sizeof(comms_nodes));
+ comms_nodes_count = 0;
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+ comms_nodes[i++] = atoi(de->d_name);
+ }
+ closedir(d);
+
+ comms_nodes_count = i;
+ return 0;
+}
+
+/* clear out everything under config/dlm/cluster/comms/ */
+
+static void clear_configfs_comms(void)
+{
+ char path[PATH_MAX];
+ int i, rv;
+
+ rv = update_comms_nodes();
+ if (rv < 0)
+ return;
+
+ for (i = 0; i < comms_nodes_count; i++) {
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, comms_nodes[i]);
+
+ log_debug("clear_configfs_nodes rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv)
+ log_error("%s: rmdir failed: %d", path, errno);
+ }
+}
+
+static void clear_configfs_space_nodes(char *name)
+{
+ char path[PATH_MAX];
+ int i, rv;
+
+ rv = update_dir_members(name);
+ if (rv < 0)
+ return;
+
+ for (i = 0; i < dir_members_count; i++) {
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
+ SPACES_DIR, name, dir_members[i]);
+
+ log_debug("clear_configfs_space_nodes rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv)
+ log_error("%s: rmdir failed: %d", path, errno);
+ }
+}
+
+/* clear out everything under config/dlm/cluster/spaces/ */
+
+static void clear_configfs_spaces(void)
+{
+ char path[PATH_MAX];
+ DIR *d;
+ struct dirent *de;
+ int rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s", SPACES_DIR);
+
+ d = opendir(path);
+ if (!d) {
+ log_debug("%s: opendir failed: %d", path, errno);
+ return;
+ }
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ clear_configfs_space_nodes(de->d_name);
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, de->d_name);
+
+ log_debug("clear_configfs_spaces rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv)
+ log_error("%s: rmdir failed: %d", path, errno);
+ }
+ closedir(d);
+}
+
+static int add_configfs_base(void)
+{
+ int rv = 0;
+
+ if (!path_exists("/sys/kernel/config")) {
+ log_error("No /sys/kernel/config, is configfs loaded?");
+ return -1;
+ }
+
+ if (!path_exists("/sys/kernel/config/dlm")) {
+ log_error("No /sys/kernel/config/dlm, is the dlm loaded?");
+ return -1;
+ }
+
+ if (!path_exists("/sys/kernel/config/dlm/cluster"))
+ rv = create_path("/sys/kernel/config/dlm/cluster");
+
+ return rv;
+}
+
+int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
+{
+ char path[PATH_MAX];
+ char padded_addr[sizeof(struct sockaddr_storage)];
+ char buf[32];
+ int rv, fd;
+
+ log_debug("set_configfs_node %d %s local %d",
+ nodeid, str_ip(addr), local);
+
+ /*
+ * create comm dir for this node
+ */
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, nodeid);
+
+ rv = create_path(path);
+ if (rv)
+ return -1;
+
+ /*
+ * set the nodeid
+ */
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d/nodeid", COMMS_DIR, nodeid);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return -1;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", nodeid);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d, %s", path, errno, buf);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ /*
+ * set the address
+ */
+
+ memset(padded_addr, 0, sizeof(padded_addr));
+ memcpy(padded_addr, addr, addrlen);
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d/addr", COMMS_DIR, nodeid);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return -1;
+ }
+
+ rv = do_write(fd, padded_addr, sizeof(struct sockaddr_storage));
+ if (rv < 0) {
+ log_error("%s: write failed: %d %d", path, errno, rv);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ /*
+ * set local
+ */
+
+ if (!local)
+ goto out;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d/local", COMMS_DIR, nodeid);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return -1;
+ }
+
+ rv = do_write(fd, (void *)"1", strlen("1"));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ out:
+ return 0;
+}
+
+void del_configfs_node(int nodeid)
+{
+ char path[PATH_MAX];
+ int rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, nodeid);
+
+ log_debug("del_configfs_node rmdir \"%s\"", path);
+
+ rv = rmdir(path);
+ if (rv)
+ log_error("%s: rmdir failed: %d", path, errno);
+}
+
+static int set_configfs_protocol(int proto)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int fd, rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/protocol", CLUSTER_DIR);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return fd;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", proto);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ return rv;
+ }
+ close(fd);
+ log_debug("set protocol %d", proto);
+ return 0;
+}
+
+static int set_configfs_timewarn(int cs)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int fd, rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/timewarn_cs", CLUSTER_DIR);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return fd;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", cs);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ return rv;
+ }
+ close(fd);
+ log_debug("set timewarn_cs %d", cs);
+ return 0;
+}
+
+static int set_configfs_debug(int val)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int fd, rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/log_debug", CLUSTER_DIR);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return fd;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", val);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ return rv;
+ }
+ close(fd);
+ log_debug("set log_debug %d", val);
+ return 0;
+}
+
+#define NET_RMEM_DEFAULT 4194304
+#define NET_RMEM_MAX 4194304
+
+static int set_proc_rmem(void)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int fd, rv;
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "/proc/sys/net/core/rmem_default");
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return fd;
+ }
+
+ memset(buf, 0, sizeof(buf));
+
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 0) {
+ log_error("%s: read failed: %d", path, errno);
+ close(fd);
+ return rv;
+ }
+
+ if (atoi(buf) >= NET_RMEM_DEFAULT) {
+ close(fd);
+ goto next;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", NET_RMEM_DEFAULT);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ close(fd);
+ return rv;
+ }
+
+ close(fd);
+ log_debug("set %s %s", path, buf);
+
+ next:
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "/proc/sys/net/core/rmem_max");
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ log_error("%s: open failed: %d", path, errno);
+ return fd;
+ }
+
+ memset(buf, 0, sizeof(buf));
+
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 0) {
+ log_error("%s: read failed: %d", path, errno);
+ close(fd);
+ return rv;
+ }
+
+ if (atoi(buf) >= NET_RMEM_MAX) {
+ close(fd);
+ goto out;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, 32, "%d", NET_RMEM_MAX);
+
+ rv = do_write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_error("%s: write failed: %d", path, errno);
+ close(fd);
+ return rv;
+ }
+
+ close(fd);
+ log_debug("set %s %s", path, buf);
+ out:
+ return 0;
+}
+
+void clear_configfs(void)
+{
+ clear_configfs_comms();
+ clear_configfs_spaces();
+ rmdir("/sys/kernel/config/dlm/cluster");
+}
+
+int setup_configfs(void)
+{
+ int rv;
+
+ clear_configfs();
+
+ rv = add_configfs_base();
+ if (rv < 0)
+ return rv;
+
+ /* add configfs entries for existing nodes */
+ update_cluster();
+
+ /* the kernel has its own defaults for these values which we
+ don't want to change unless these have been set; -1 means
+ they have not been set on command line or config file */
+
+ if (cfgk_debug != -1)
+ set_configfs_debug(cfgk_debug);
+ if (cfgk_timewarn != -1)
+ set_configfs_timewarn(cfgk_timewarn);
+
+ if (cfgk_protocol == PROTO_DETECT) {
+ rv = detect_protocol();
+ if (rv == PROTO_TCP || rv == PROTO_SCTP)
+ cfgk_protocol = rv;
+ }
+
+ if (cfgk_protocol == PROTO_TCP || cfgk_protocol == PROTO_SCTP)
+ set_configfs_protocol(cfgk_protocol);
+
+ if (cfgk_protocol == PROTO_SCTP)
+ set_proc_rmem();
+
+ return 0;
+}
+
+static void find_minors(void)
+{
+ FILE *fl;
+ char name[256];
+ uint32_t number;
+ int found = 0;
+ int c;
+
+ control_minor = 0;
+ monitor_minor = 0;
+ plock_minor = 0;
+ old_plock_minor = 0;
+
+ if (!(fl = fopen("/proc/misc", "r"))) {
+ log_error("/proc/misc fopen failed: %s", strerror(errno));
+ return;
+ }
+
+ while (!feof(fl)) {
+ if (fscanf(fl, "%d %255s\n", &number, &name[0]) == 2) {
+
+ if (!strcmp(name, "dlm-control")) {
+ control_minor = number;
+ found++;
+ } else if (!strcmp(name, "dlm-monitor")) {
+ monitor_minor = number;
+ found++;
+ } else if (!strcmp(name, "dlm_plock")) {
+ plock_minor = number;
+ found++;
+ } else if (!strcmp(name, "lock_dlm_plock")) {
+ old_plock_minor = number;
+ found++;
+ }
+
+ } else do {
+ c = fgetc(fl);
+ } while (c != EOF && c != '\n');
+
+ if (found == 3)
+ break;
+ }
+ fclose(fl);
+
+ if (!found)
+ log_error("Is dlm missing from kernel? No misc devices found.");
+}
+
+static int find_udev_device(const char *path, uint32_t minor)
+{
+ struct stat st;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (stat(path, &st) == 0 && minor(st.st_rdev) == minor)
+ return 0;
+ sleep(1);
+ }
+
+ log_error("cannot find device %s with minor %d", path, minor);
+ return -1;
+}
+
+int setup_misc_devices(void)
+{
+ int rv;
+
+ find_minors();
+
+ if (control_minor) {
+ rv = find_udev_device("/dev/misc/dlm-control", control_minor);
+ if (rv < 0)
+ return rv;
+ log_debug("found /dev/misc/dlm-control minor %u",
+ control_minor);
+ }
+
+ if (monitor_minor) {
+ rv = find_udev_device("/dev/misc/dlm-monitor", monitor_minor);
+ if (rv < 0)
+ return rv;
+ log_debug("found /dev/misc/dlm-monitor minor %u",
+ monitor_minor);
+ }
+
+ if (plock_minor) {
+ rv = find_udev_device("/dev/misc/dlm_plock", plock_minor);
+ if (rv < 0)
+ return rv;
+ log_debug("found /dev/misc/dlm_plock minor %u",
+ plock_minor);
+ }
+
+ if (!plock_minor && old_plock_minor) {
+ rv = find_udev_device("/dev/misc/lock_dlm_plock",
+ old_plock_minor);
+ if (rv < 0)
+ return rv;
+ log_debug("found /dev/misc/lock_dlm_plock minor %u",
+ old_plock_minor);
+ }
+
+ return 0;
+}
+
diff --git a/dlm_controld/config.c b/dlm_controld/config.c
new file mode 100644
index 0000000..40b0c87
--- /dev/null
+++ b/dlm_controld/config.c
@@ -0,0 +1,93 @@
+#include "dlm_daemon.h"
+#include <libxml/tree.h>
+
+/* TODO:
+ <dlm>
+ <lockspace name="foo" nodir="1">
+ <master nodeid="1" weight="2"/>
+ <master nodeid="2" weight="3"/>
+ </lockspace>
+ </dlm>
+*/
+
+int get_weight(int nodeid, char *lockspace)
+{
+ /* default weight is 1 */
+ return 1;
+}
+
+static void proto_val(char *str, int *val)
+{
+ if (!strncasecmp(str, "tcp", 3))
+ *val = PROTO_TCP;
+ else if (!strncasecmp(str, "sctp", 4))
+ *val = PROTO_SCTP;
+ else if (!strncasecmp(str, "detect", 6))
+ *val = PROTO_DETECT;
+ else {
+ log_error("invalid protocol value %s", str);
+ }
+}
+
+static void set_val(xmlNode *root, const char *name, int *opt, int *val)
+{
+ xmlChar *str;
+
+ str = xmlGetProp(root, BAD_CAST name);
+ if (str && !(*opt)) {
+ *val = atoi((char *)str);
+ log_debug("config %s = %d", name, *val);
+ }
+}
+
+void setup_config(int update)
+{
+ xmlDoc *doc;
+ xmlNode *root;
+ xmlChar *str;
+
+ if (!path_exists(DLM_CONFIG_FILE))
+ return;
+
+ doc = xmlParseFile(DLM_CONFIG_FILE);
+ if (!doc) {
+ log_error("xml parse error %d %s", errno, DLM_CONFIG_FILE);
+ return;
+ }
+
+ root = xmlDocGetRootElement(doc);
+ if (!root) {
+ log_error("xml root error %d %s", errno, DLM_CONFIG_FILE);
+ xmlFreeDoc(doc);
+ return;
+ }
+
+ if (update)
+ goto do_update;
+
+ /* These config values are set from dlm.conf only if they haven't
+ already been set on the command line. */
+
+ str = xmlGetProp(root, BAD_CAST "protocol");
+ if (str && !optk_protocol) {
+ proto_val((char *)str, &cfgk_protocol);
+ log_debug("config protocol = %d", cfgk_protocol);
+ }
+
+ set_val(root, "log_debug", &optk_debug, &cfgk_debug);
+ set_val(root, "timewarn", &optk_timewarn, &cfgk_timewarn);
+ set_val(root, "enable_fencing", &optd_enable_fencing, &cfgd_enable_fencing);
+ set_val(root, "enable_quorum", &optd_enable_quorum, &cfgd_enable_quorum);
+ set_val(root, "enable_plock", &optd_enable_plock, &cfgd_enable_plock);
+ set_val(root, "plock_ownership", &optd_plock_ownership, &cfgd_plock_ownership);
+ do_update:
+ /* The following can be changed while running */
+ set_val(root, "plock_debug", &optd_plock_debug, &cfgd_plock_debug);
+ set_val(root, "plock_rate_limit", &optd_plock_rate_limit, &cfgd_plock_rate_limit);
+ set_val(root, "drop_resources_time", &optd_drop_resources_time, &cfgd_drop_resources_time);
+ set_val(root, "drop_resources_count", &optd_drop_resources_count, &cfgd_drop_resources_count);
+ set_val(root, "drop_resources_age", &optd_drop_resources_age, &cfgd_drop_resources_age);
+
+ xmlFreeDoc(doc);
+}
+
diff --git a/dlm_controld/config.h b/dlm_controld/config.h
new file mode 100644
index 0000000..ab37eed
--- /dev/null
+++ b/dlm_controld/config.h
@@ -0,0 +1,47 @@
+#ifndef __CONFIG_DOT_H__
+#define __CONFIG_DOT_H__
+
+/* the kernel has default values for debug, timewarn and protocol;
+ we only change them if new values are given on command line or in ccs */
+
+#define DEFAULT_DEBUG_LOGFILE 0
+#define DEFAULT_ENABLE_FENCING 1
+#define DEFAULT_ENABLE_QUORUM 0
+#define DEFAULT_ENABLE_PLOCK 1
+#define DEFAULT_PLOCK_DEBUG 0
+#define DEFAULT_PLOCK_RATE_LIMIT 0
+#define DEFAULT_PLOCK_OWNERSHIP 0
+#define DEFAULT_DROP_RESOURCES_TIME 10000 /* 10 sec */
+#define DEFAULT_DROP_RESOURCES_COUNT 10
+#define DEFAULT_DROP_RESOURCES_AGE 10000 /* 10 sec */
+
+extern int optk_debug;
+extern int optk_timewarn;
+extern int optk_protocol;
+extern int optd_debug_logfile;
+extern int optd_enable_fencing;
+extern int optd_enable_quorum;
+extern int optd_enable_plock;
+extern int optd_plock_debug;
+extern int optd_plock_rate_limit;
+extern int optd_plock_ownership;
+extern int optd_drop_resources_time;
+extern int optd_drop_resources_count;
+extern int optd_drop_resources_age;
+
+extern int cfgk_debug;
+extern int cfgk_timewarn;
+extern int cfgk_protocol;
+extern int cfgd_debug_logfile;
+extern int cfgd_enable_fencing;
+extern int cfgd_enable_quorum;
+extern int cfgd_enable_plock;
+extern int cfgd_plock_debug;
+extern int cfgd_plock_rate_limit;
+extern int cfgd_plock_ownership;
+extern int cfgd_drop_resources_time;
+extern int cfgd_drop_resources_count;
+extern int cfgd_drop_resources_age;
+
+#endif
+
diff --git a/dlm_controld/cpg.c b/dlm_controld/cpg.c
new file mode 100644
index 0000000..b3c1454
--- /dev/null
+++ b/dlm_controld/cpg.c
@@ -0,0 +1,2606 @@
+#include "dlm_daemon.h"
+
+struct protocol_version {
+ uint16_t major;
+ uint16_t minor;
+ uint16_t patch;
+ uint16_t flags;
+};
+
+struct protocol {
+ union {
+ struct protocol_version dm_ver;
+ uint16_t daemon_max[4];
+ };
+ union {
+ struct protocol_version km_ver;
+ uint16_t kernel_max[4];
+ };
+ union {
+ struct protocol_version dr_ver;
+ uint16_t daemon_run[4];
+ };
+ union {
+ struct protocol_version kr_ver;
+ uint16_t kernel_run[4];
+ };
+};
+
+struct member {
+ struct list_head list;
+ int nodeid;
+ int start; /* 1 if we received a start message for this change */
+ int added; /* 1 if added by this change */
+ int failed; /* 1 if failed in this change */
+ int disallowed;
+ uint32_t start_flags;
+};
+
+struct node {
+ struct list_head list;
+ int nodeid;
+ int check_fencing;
+ int check_quorum;
+ int check_fs;
+ int fs_notified;
+ uint64_t add_time;
+ uint64_t fail_time;
+ uint64_t fence_time; /* for debug */
+ uint64_t cluster_add_time;
+ uint64_t cluster_remove_time;
+ uint32_t fence_queries; /* for debug */
+ uint32_t added_seq; /* for queries */
+ uint32_t removed_seq; /* for queries */
+ int failed_reason; /* for queries */
+
+ struct protocol proto;
+};
+
+/* One of these change structs is created for every confchg a cpg gets. */
+
+#define CGST_WAIT_CONDITIONS 1
+#define CGST_WAIT_MESSAGES 2
+
+struct change {
+ struct list_head list;
+ struct list_head members;
+ struct list_head removed; /* nodes removed by this change */
+ int member_count;
+ int joined_count;
+ int remove_count;
+ int failed_count;
+ int state;
+ int we_joined;
+ uint32_t seq; /* used as a reference for debugging, and for queries */
+ uint32_t combined_seq; /* for queries */
+ uint64_t create_time;
+};
+
+struct ls_info {
+ uint32_t ls_info_size;
+ uint32_t id_info_size;
+ uint32_t id_info_count;
+
+ uint32_t started_count;
+
+ int member_count;
+ int joined_count;
+ int remove_count;
+ int failed_count;
+};
+
+struct id_info {
+ int nodeid;
+};
+
+int message_flow_control_on;
+static cpg_handle_t cpg_handle_daemon;
+static int cpg_fd_daemon;
+static struct protocol our_protocol;
+static struct list_head daemon_nodes;
+static struct cpg_address daemon_member[MAX_NODES];
+static int daemon_member_count;
+
+static void log_config(const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ char m_buf[128];
+ char j_buf[32];
+ char l_buf[32];
+ size_t i, len, pos;
+ int ret;
+
+ memset(m_buf, 0, sizeof(m_buf));
+ memset(j_buf, 0, sizeof(j_buf));
+ memset(l_buf, 0, sizeof(l_buf));
+
+ len = sizeof(m_buf);
+ pos = 0;
+ for (i = 0; i < member_list_entries; i++) {
+ ret = snprintf(m_buf + pos, len - pos, " %d",
+ member_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(j_buf);
+ pos = 0;
+ for (i = 0; i < joined_list_entries; i++) {
+ ret = snprintf(j_buf + pos, len - pos, " %d",
+ joined_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(l_buf);
+ pos = 0;
+ for (i = 0; i < left_list_entries; i++) {
+ ret = snprintf(l_buf + pos, len - pos, " %d",
+ left_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
+ member_list_entries, joined_list_entries, left_list_entries,
+ m_buf, j_buf, l_buf);
+}
+
+static void ls_info_in(struct ls_info *li)
+{
+ li->ls_info_size = le32_to_cpu(li->ls_info_size);
+ li->id_info_size = le32_to_cpu(li->id_info_size);
+ li->id_info_count = le32_to_cpu(li->id_info_count);
+ li->started_count = le32_to_cpu(li->started_count);
+ li->member_count = le32_to_cpu(li->member_count);
+ li->joined_count = le32_to_cpu(li->joined_count);
+ li->remove_count = le32_to_cpu(li->remove_count);
+ li->failed_count = le32_to_cpu(li->failed_count);
+}
+
+static void id_info_in(struct id_info *id)
+{
+ id->nodeid = le32_to_cpu(id->nodeid);
+}
+
+static void ids_in(struct ls_info *li, struct id_info *ids)
+{
+ struct id_info *id;
+ int i;
+
+ id = ids;
+ for (i = 0; i < li->id_info_count; i++) {
+ id_info_in(id);
+ id = (struct id_info *)((char *)id + li->id_info_size);
+ }
+}
+
+const char *msg_name(int type)
+{
+ switch (type) {
+ case DLM_MSG_PROTOCOL:
+ return "protocol";
+ case DLM_MSG_START:
+ return "start";
+ case DLM_MSG_PLOCK:
+ return "plock";
+ case DLM_MSG_PLOCK_OWN:
+ return "plock_own";
+ case DLM_MSG_PLOCK_DROP:
+ return "plock_drop";
+ case DLM_MSG_PLOCK_SYNC_LOCK:
+ return "plock_sync_lock";
+ case DLM_MSG_PLOCK_SYNC_WAITER:
+ return "plock_sync_waiter";
+ case DLM_MSG_PLOCKS_DATA:
+ return "plocks_data";
+ case DLM_MSG_PLOCKS_DONE:
+ return "plocks_done";
+ case DLM_MSG_DEADLK_CYCLE_START:
+ return "deadlk_cycle_start";
+ case DLM_MSG_DEADLK_CYCLE_END:
+ return "deadlk_cycle_end";
+ case DLM_MSG_DEADLK_CHECKPOINT_READY:
+ return "deadlk_checkpoint_ready";
+ case DLM_MSG_DEADLK_CANCEL_LOCK:
+ return "deadlk_cancel_lock";
+ default:
+ return "unknown";
+ }
+}
+
+static int _send_message(cpg_handle_t h, void *buf, int len, int type)
+{
+ struct iovec iov;
+ cpg_error_t error;
+ int retries = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ retry:
+ error = cpg_mcast_joined(h, CPG_TYPE_AGREED, &iov, 1);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ retries++;
+ usleep(1000);
+ if (!(retries % 100))
+ log_error("cpg_mcast_joined retry %d %s",
+ retries, msg_name(type));
+ goto retry;
+ }
+ if (error != CPG_OK) {
+ log_error("cpg_mcast_joined error %d handle %llx %s",
+ error, (unsigned long long)h, msg_name(type));
+ return -1;
+ }
+
+ if (retries)
+ log_debug("cpg_mcast_joined retried %d %s",
+ retries, msg_name(type));
+
+ return 0;
+}
+
+/* header fields caller needs to set: type, to_nodeid, flags, msgdata */
+
+void dlm_send_message(struct lockspace *ls, char *buf, int len)
+{
+ struct dlm_header *hd = (struct dlm_header *) buf;
+ int type = hd->type;
+
+ hd->version[0] = cpu_to_le16(our_protocol.daemon_run[0]);
+ hd->version[1] = cpu_to_le16(our_protocol.daemon_run[1]);
+ hd->version[2] = cpu_to_le16(our_protocol.daemon_run[2]);
+ hd->type = cpu_to_le16(hd->type);
+ hd->nodeid = cpu_to_le32(our_nodeid);
+ hd->to_nodeid = cpu_to_le32(hd->to_nodeid);
+ hd->global_id = cpu_to_le32(ls->global_id);
+ hd->flags = cpu_to_le32(hd->flags);
+ hd->msgdata = cpu_to_le32(hd->msgdata);
+ hd->msgdata2 = cpu_to_le32(hd->msgdata2);
+
+ _send_message(ls->cpg_handle, buf, len, type);
+}
+
+static struct member *find_memb(struct change *cg, int nodeid)
+{
+ struct member *memb;
+
+ list_for_each_entry(memb, &cg->members, list) {
+ if (memb->nodeid == nodeid)
+ return memb;
+ }
+ return NULL;
+}
+
+static struct lockspace *find_ls_handle(cpg_handle_t h)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->cpg_handle == h)
+ return ls;
+ }
+ return NULL;
+}
+
+static struct lockspace *find_ls_ci(int ci)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->cpg_client == ci)
+ return ls;
+ }
+ return NULL;
+}
+
+static void free_cg(struct change *cg)
+{
+ struct member *memb, *safe;
+
+ list_for_each_entry_safe(memb, safe, &cg->members, list) {
+ list_del(&memb->list);
+ free(memb);
+ }
+ list_for_each_entry_safe(memb, safe, &cg->removed, list) {
+ list_del(&memb->list);
+ free(memb);
+ }
+ free(cg);
+}
+
+static void free_ls(struct lockspace *ls)
+{
+ struct change *cg, *cg_safe;
+ struct node *node, *node_safe;
+
+ list_for_each_entry_safe(cg, cg_safe, &ls->changes, list) {
+ list_del(&cg->list);
+ free_cg(cg);
+ }
+
+ if (ls->started_change)
+ free_cg(ls->started_change);
+
+ list_for_each_entry_safe(node, node_safe, &ls->node_history, list) {
+ list_del(&node->list);
+ free(node);
+ }
+
+ free(ls);
+}
+
+
+/* Problem scenario:
+ nodes A,B,C are in fence domain
+ node C has gfs foo mounted
+ node C fails
+ nodes A,B begin fencing C (slow, not completed)
+ node B mounts gfs foo
+
+ We may end up having gfs foo mounted and being used on B before
+ C has been fenced. C could wake up corrupt fs.
+
+ So, we need to prevent any new gfs mounts while there are any
+ outstanding, incomplete fencing operations.
+
+ We also need to check that the specific failed nodes we know about have
+ been fenced (since fenced may not even have been notified that the node
+ has failed yet).
+
+ So, check that:
+ 1. has fenced fenced the node since we saw it fail?
+ 2. fenced has no outstanding fencing ops
+
+ For 1:
+ - node X fails
+ - we see node X fail and X has non-zero add_time,
+ set check_fencing and record the fail time
+ - wait for X to be removed from all dlm cpg's (probably not necessary)
+ - check that the fencing time is later than the recorded time above
+
+ Tracking fencing state when there are spurious partitions/merges...
+
+ from a spurious leave/join of node X, a lockspace will see:
+ - node X is a lockspace member
+ - node X fails, may be waiting for all cpgs to see failure or for fencing to
+ complete
+ - node X joins the lockspace - we want to process the change as usual, but
+ don't want to disrupt the code waiting for the fencing, and we want to
+ continue running properly once the remerged node is properly reset
+
+ ls->node_history
+ when we see a node not in this list, add entry for it with zero add_time
+ record the time we get a good start message from the node, add_time
+ clear add_time if the node leaves
+ if node fails with non-zero add_time, set check_fencing
+ when a node is fenced, clear add_time and clear check_fencing
+ if a node remerges after this, no good start message, no new add_time set
+ if a node fails with zero add_time, it doesn't need fencing
+ if a node remerges before it's been fenced, no good start message, no new
+ add_time set
+*/
+
+static struct node *get_node_history(struct lockspace *ls, int nodeid)
+{
+ struct node *node;
+
+ list_for_each_entry(node, &ls->node_history, list) {
+ if (node->nodeid == nodeid)
+ return node;
+ }
+ return NULL;
+}
+
+static void node_history_init(struct lockspace *ls, int nodeid,
+ struct change *cg)
+{
+ struct node *node;
+
+ node = get_node_history(ls, nodeid);
+ if (node)
+ goto out;
+
+ node = malloc(sizeof(struct node));
+ if (!node)
+ return;
+ memset(node, 0, sizeof(struct node));
+
+ node->nodeid = nodeid;
+ node->add_time = 0;
+ list_add_tail(&node->list, &ls->node_history);
+ out:
+ if (cg)
+ node->added_seq = cg->seq; /* for queries */
+}
+
+void node_history_cluster_add(int nodeid)
+{
+ struct lockspace *ls;
+ struct node *node;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ node_history_init(ls, nodeid, NULL);
+
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_add no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_add_time = time(NULL);
+ }
+}
+
+void node_history_cluster_remove(int nodeid)
+{
+ struct lockspace *ls;
+ struct node *node;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_remove no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_remove_time = time(NULL);
+ }
+}
+
+static void node_history_start(struct lockspace *ls, int nodeid)
+{
+ struct node *node;
+
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_start no nodeid %d", nodeid);
+ return;
+ }
+
+ node->add_time = time(NULL);
+}
+
+static void node_history_left(struct lockspace *ls, int nodeid,
+ struct change *cg)
+{
+ struct node *node;
+
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_left no nodeid %d", nodeid);
+ return;
+ }
+
+ node->add_time = 0;
+ node->removed_seq = cg->seq; /* for queries */
+}
+
+static void node_history_fail(struct lockspace *ls, int nodeid,
+ struct change *cg, int reason)
+{
+ struct node *node;
+
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("node_history_fail no nodeid %d", nodeid);
+ return;
+ }
+
+ if (cfgd_enable_fencing && node->add_time) {
+ node->check_fencing = 1;
+ node->fence_time = 0;
+ node->fence_queries = 0;
+ node->fail_time = time(NULL);
+ }
+
+ /* fenced will take care of making sure the quorum value
+ is adjusted for all the failures */
+
+ if (cfgd_enable_quorum && !cfgd_enable_fencing)
+ node->check_quorum = 1;
+
+ if (ls->fs_registered) {
+ log_group(ls, "check_fs nodeid %d set", nodeid);
+ node->check_fs = 1;
+ }
+
+ node->removed_seq = cg->seq; /* for queries */
+ node->failed_reason = reason; /* for queries */
+}
+
+static int check_fencing_done(struct lockspace *ls)
+{
+ struct node *node;
+ uint64_t last_fenced_time;
+ int in_progress, wait_count = 0;
+ int rv;
+
+ if (!cfgd_enable_fencing) {
+ log_group(ls, "check_fencing disabled");
+ return 1;
+ }
+
+ list_for_each_entry(node, &ls->node_history, list) {
+ if (!node->check_fencing)
+ continue;
+
+ /* check with fenced to see if the node has been
+ fenced since node->add_time */
+
+ rv = fence_node_time(node->nodeid, &last_fenced_time);
+ if (rv < 0)
+ log_error("fenced_node_info error %d", rv);
+
+ /* need >= not just > because in at least one case
+ we've seen fenced_time within the same second as
+ fail_time: with external fencing, e.g. fence_node */
+
+ if (last_fenced_time >= node->fail_time) {
+ log_group(ls, "check_fencing %d done "
+ "add %llu fail %llu last %llu",
+ node->nodeid,
+ (unsigned long long)node->add_time,
+ (unsigned long long)node->fail_time,
+ (unsigned long long)last_fenced_time);
+ node->check_fencing = 0;
+ node->add_time = 0;
+ node->fence_time = last_fenced_time;
+ } else {
+ if (!node->fence_queries ||
+ node->fence_time != last_fenced_time) {
+ log_group(ls, "check_fencing %d wait "
+ "add %llu fail %llu last %llu",
+ node->nodeid,
+ (unsigned long long)node->add_time,
+ (unsigned long long)node->fail_time,
+ (unsigned long long)last_fenced_time);
+ node->fence_queries++;
+ node->fence_time = last_fenced_time;
+ }
+ wait_count++;
+ }
+ }
+
+ if (wait_count)
+ return 0;
+
+ /* now check if there are any outstanding fencing ops (for nodes
+ we may not have seen in any lockspace), and return 0 if there
+ are any */
+
+ rv = fence_in_progress(&in_progress);
+ if (rv < 0) {
+ log_error("fenced_domain_info error %d", rv);
+ return 0;
+ }
+
+ if (in_progress)
+ return 0;
+
+ log_group(ls, "check_fencing done");
+ return 1;
+}
+
+static int check_quorum_done(struct lockspace *ls)
+{
+ struct node *node;
+ int wait_count = 0;
+
+ if (!cfgd_enable_quorum) {
+ log_group(ls, "check_quorum disabled");
+ return 1;
+ }
+
+ /* wait for quorum system (cman) to see all the same nodes failed, so
+ we know that cluster_quorate is adjusted for the same failures we've
+ seen (see comment in fenced about the assumption here) */
+
+ list_for_each_entry(node, &ls->node_history, list) {
+ if (!node->check_quorum)
+ continue;
+
+ if (!is_cluster_member(node->nodeid)) {
+ node->check_quorum = 0;
+ } else {
+ log_group(ls, "check_quorum nodeid %d is_cluster_member",
+ node->nodeid);
+ wait_count++;
+ }
+ }
+
+ if (wait_count)
+ return 0;
+
+ if (!cluster_quorate) {
+ log_group(ls, "check_quorum not quorate");
+ return 0;
+ }
+
+ log_group(ls, "check_quorum done");
+ return 1;
+}
+
+/* wait for local fs_controld to ack each failed node */
+
+static int check_fs_done(struct lockspace *ls)
+{
+ struct node *node;
+ int wait_count = 0;
+
+ /* no corresponding fs for this lockspace */
+ if (!ls->fs_registered) {
+ log_group(ls, "check_fs none registered");
+ return 1;
+ }
+
+ list_for_each_entry(node, &ls->node_history, list) {
+ if (!node->check_fs)
+ continue;
+
+ if (node->fs_notified) {
+ log_group(ls, "check_fs nodeid %d clear", node->nodeid);
+ node->check_fs = 0;
+ node->fs_notified = 0;
+ } else {
+ log_group(ls, "check_fs nodeid %d needs fs notify",
+ node->nodeid);
+ wait_count++;
+ }
+ }
+
+ if (wait_count)
+ return 0;
+
+ log_group(ls, "check_fs done");
+ return 1;
+}
+
+static int member_ids[MAX_NODES];
+static int member_count;
+static int renew_ids[MAX_NODES];
+static int renew_count;
+
+static void format_member_ids(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+ struct member *memb;
+
+ memset(member_ids, 0, sizeof(member_ids));
+ member_count = 0;
+
+ list_for_each_entry(memb, &cg->members, list)
+ member_ids[member_count++] = memb->nodeid;
+}
+
+/* list of nodeids that have left and rejoined since last start_kernel;
+ is any member of startcg in the left list of any other cg's?
+ (if it is, then it presumably must be flagged added in another) */
+
+static void format_renew_ids(struct lockspace *ls)
+{
+ struct change *cg, *startcg;
+ struct member *memb, *leftmemb;
+
+ startcg = list_first_entry(&ls->changes, struct change, list);
+
+ memset(renew_ids, 0, sizeof(renew_ids));
+ renew_count = 0;
+
+ list_for_each_entry(memb, &startcg->members, list) {
+ list_for_each_entry(cg, &ls->changes, list) {
+ if (cg == startcg)
+ continue;
+ list_for_each_entry(leftmemb, &cg->removed, list) {
+ if (memb->nodeid == leftmemb->nodeid) {
+ renew_ids[renew_count++] = memb->nodeid;
+ }
+ }
+ }
+ }
+
+}
+
+static void start_kernel(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+
+ if (!ls->kernel_stopped) {
+ log_error("start_kernel cg %u not stopped", cg->seq);
+ return;
+ }
+
+ log_group(ls, "start_kernel cg %u member_count %d",
+ cg->seq, cg->member_count);
+
+ /* needs to happen before setting control which starts recovery */
+ if (ls->joining)
+ set_sysfs_id(ls->name, ls->global_id);
+
+ format_member_ids(ls);
+ format_renew_ids(ls);
+ set_configfs_members(ls->name, member_count, member_ids,
+ renew_count, renew_ids);
+ set_sysfs_control(ls->name, 1);
+ ls->kernel_stopped = 0;
+
+ if (ls->joining) {
+ set_sysfs_event_done(ls->name, 0);
+ ls->joining = 0;
+ }
+}
+
+static void stop_kernel(struct lockspace *ls, uint32_t seq)
+{
+ if (!ls->kernel_stopped) {
+ log_group(ls, "stop_kernel cg %u", seq);
+ set_sysfs_control(ls->name, 0);
+ ls->kernel_stopped = 1;
+ }
+}
+
+/* the first condition is that the local lockspace is stopped which we
+ don't need to check for because stop_kernel(), which is synchronous,
+ was done when the change was created */
+
+static int wait_conditions_done(struct lockspace *ls)
+{
+ /* the fencing/quorum/fs conditions need to account for all the changes
+ that have occured since the last change applied to dlm-kernel, not
+ just the latest change */
+
+ if (!check_fencing_done(ls)) {
+ poll_fencing++;
+ return 0;
+ }
+
+ /* fencing waits for quorum, so we don't need to check quorum for any
+ reasons related to safety or protection, so enable_quorum defaults
+ to 0. This does mean that lockspaces (and cluster fs's) can be
+ started/enabled in an inquorate cluster if there are no outstanding
+ fencing operations. Some users or apps may want lockspaces/fs's to
+ only be enabled in a quorate cluster; enable_quorum can be set to 1
+ to get that behavior. The main advantage of not waiting for quorum
+ here is to allow lockspaces to be shut down (and cluster fs's
+ unmounted) in an inquorate cluster. */
+
+ if (!check_quorum_done(ls)) {
+ poll_quorum++;
+ return 0;
+ }
+
+ if (!check_fs_done(ls)) {
+ poll_fs++;
+ return 0;
+ }
+
+ return 1;
+}
+
+static int wait_messages_done(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+ struct member *memb;
+ int need = 0, total = 0;
+
+ list_for_each_entry(memb, &cg->members, list) {
+ if (!memb->start)
+ need++;
+ total++;
+ }
+
+ if (need) {
+ log_group(ls, "wait_messages cg %u need %d of %d",
+ cg->seq, need, total);
+ return 0;
+ }
+
+ log_group(ls, "wait_messages cg %u got all %d", cg->seq, total);
+ return 1;
+}
+
+static void cleanup_changes(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+ struct change *safe;
+
+ list_del(&cg->list);
+ if (ls->started_change)
+ free_cg(ls->started_change);
+ ls->started_change = cg;
+
+ ls->started_count++;
+ if (!ls->started_count)
+ ls->started_count++;
+
+ cg->combined_seq = cg->seq; /* for queries */
+
+ list_for_each_entry_safe(cg, safe, &ls->changes, list) {
+ ls->started_change->combined_seq = cg->seq; /* for queries */
+ list_del(&cg->list);
+ free_cg(cg);
+ }
+}
+
+/* There's a stream of confchg and messages. At one of these
+ messages, the low node needs to store plocks and new nodes
+ need to begin saving plock messages. A second message is
+ needed to say that the plocks are ready to be read.
+
+ When the last start message is recvd for a change, the low node
+ stores plocks and the new nodes begin saving messages. When the
+ store is done, low node sends plocks_stored message. When
+ new nodes recv this, they read the plocks and their saved messages.
+ plocks_stored message should identify a specific change, like start
+ messages do; if it doesn't match ls->started_change, then it's ignored.
+
+ If a confchg adding a new node arrives after plocks are stored but
+ before plocks_stored msg recvd, then the message is ignored. The low
+ node will send another plocks_stored message for the latest change
+ (although it may be able to reuse the ckpt if no plock state has changed).
+*/
+
+static void set_plock_data_node(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+ struct member *memb;
+ int low = 0;
+
+ list_for_each_entry(memb, &cg->members, list) {
+ if (!(memb->start_flags & DLM_MFLG_HAVEPLOCK))
+ continue;
+
+ if (!low || memb->nodeid < low)
+ low = memb->nodeid;
+ }
+
+ log_dlock(ls, "set_plock_data_node from %d to %d",
+ ls->plock_data_node, low);
+
+ ls->plock_data_node = low;
+}
+
+static struct id_info *get_id_struct(struct id_info *ids, int count, int size,
+ int nodeid)
+{
+ struct id_info *id = ids;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (id->nodeid == nodeid)
+ return id;
+ id = (struct id_info *)((char *)id + size);
+ }
+ return NULL;
+}
+
+/* do the change details in the message match the details of the given change */
+
+static int match_change(struct lockspace *ls, struct change *cg,
+ struct dlm_header *hd, struct ls_info *li,
+ struct id_info *ids)
+{
+ struct id_info *id;
+ struct member *memb;
+ struct node *node;
+ uint32_t seq = hd->msgdata;
+ int i, members_mismatch;
+
+ /* We can ignore messages if we're not in the list of members.
+ The one known time this will happen is after we've joined
+ the cpg, we can get messages for changes prior to the change
+ in which we're added. */
+
+ id = get_id_struct(ids, li->id_info_count, li->id_info_size,our_nodeid);
+
+ if (!id) {
+ log_group(ls, "match_change %d:%u skip %u we are not in members",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ memb = find_memb(cg, hd->nodeid);
+ if (!memb) {
+ log_group(ls, "match_change %d:%u skip %u sender not member",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (memb->start_flags & DLM_MFLG_NACK) {
+ log_group(ls, "match_change %d:%u skip %u is nacked",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (memb->start && hd->type == DLM_MSG_START) {
+ log_group(ls, "match_change %d:%u skip %u already start",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ /* a node's start can't match a change if the node joined the cluster
+ more recently than the change was created */
+
+ node = get_node_history(ls, hd->nodeid);
+ if (!node) {
+ log_group(ls, "match_change %d:%u skip cg %u no node history",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (node->cluster_add_time > cg->create_time) {
+ log_group(ls, "match_change %d:%u skip cg %u created %llu "
+ "cluster add %llu", hd->nodeid, seq, cg->seq,
+ (unsigned long long)cg->create_time,
+ (unsigned long long)node->cluster_add_time);
+ return 0;
+ }
+
+ /* verify this is the right change by matching the counts
+ and the nodeids of the current members */
+
+ if (li->member_count != cg->member_count ||
+ li->joined_count != cg->joined_count ||
+ li->remove_count != cg->remove_count ||
+ li->failed_count != cg->failed_count) {
+ log_group(ls, "match_change %d:%u skip %u expect counts "
+ "%d %d %d %d", hd->nodeid, seq, cg->seq,
+ cg->member_count, cg->joined_count,
+ cg->remove_count, cg->failed_count);
+ return 0;
+ }
+
+ members_mismatch = 0;
+ id = ids;
+
+ for (i = 0; i < li->id_info_count; i++) {
+ memb = find_memb(cg, id->nodeid);
+ if (!memb) {
+ log_group(ls, "match_change %d:%u skip %u no memb %d",
+ hd->nodeid, seq, cg->seq, id->nodeid);
+ members_mismatch = 1;
+ break;
+ }
+ id = (struct id_info *)((char *)id + li->id_info_size);
+ }
+
+ if (members_mismatch)
+ return 0;
+
+ log_group(ls, "match_change %d:%u matches cg %u", hd->nodeid, seq,
+ cg->seq);
+ return 1;
+}
+
+/* Unfortunately, there's no really simple way to match a message with the
+ specific change that it was sent for. We hope that by passing all the
+ details of the change in the message, we will be able to uniquely match the
+ it to the correct change. */
+
+/* A start message will usually be for the first (current) change on our list.
+ In some cases it will be for a non-current change, and we can ignore it:
+
+ 1. A,B,C get confchg1 adding C
+ 2. C sends start for confchg1
+ 3. A,B,C get confchg2 adding D
+ 4. A,B,C,D recv start from C for confchg1 - ignored
+ 5. C,D send start for confchg2
+ 6. A,B send start for confchg2
+ 7. A,B,C,D recv all start messages for confchg2, and start kernel
+
+ In step 4, how do the nodes know whether the start message from C is
+ for confchg1 or confchg2? Hopefully by comparing the counts and members. */
+
+static struct change *find_change(struct lockspace *ls, struct dlm_header *hd,
+ struct ls_info *li, struct id_info *ids)
+{
+ struct change *cg;
+
+ list_for_each_entry_reverse(cg, &ls->changes, list) {
+ if (!match_change(ls, cg, hd, li, ids))
+ continue;
+ return cg;
+ }
+
+ log_group(ls, "find_change %d:%u no match", hd->nodeid, hd->msgdata);
+ return NULL;
+}
+
+static int is_added(struct lockspace *ls, int nodeid)
+{
+ struct change *cg;
+ struct member *memb;
+
+ list_for_each_entry(cg, &ls->changes, list) {
+ memb = find_memb(cg, nodeid);
+ if (memb && memb->added)
+ return 1;
+ }
+ return 0;
+}
+
+static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct change *cg;
+ struct member *memb;
+ struct ls_info *li;
+ struct id_info *ids;
+ uint32_t seq = hd->msgdata;
+ int added;
+
+ log_group(ls, "receive_start %d:%u len %d", hd->nodeid, seq, len);
+
+ li = (struct ls_info *)((char *)hd + sizeof(struct dlm_header));
+ ids = (struct id_info *)((char *)li + sizeof(struct ls_info));
+
+ ls_info_in(li);
+ ids_in(li, ids);
+
+ cg = find_change(ls, hd, li, ids);
+ if (!cg)
+ return;
+
+ memb = find_memb(cg, hd->nodeid);
+ if (!memb) {
+ /* this should never happen since match_change checks it */
+ log_error("receive_start no member %d", hd->nodeid);
+ return;
+ }
+
+ memb->start_flags = hd->flags;
+
+ added = is_added(ls, hd->nodeid);
+
+ if (added && li->started_count && ls->started_count) {
+ log_error("receive_start %d:%u add node with started_count %u",
+ hd->nodeid, seq, li->started_count);
+
+ /* see comment in fence/fenced/cpg.c */
+ memb->disallowed = 1;
+ return;
+ }
+
+ if (memb->start_flags & DLM_MFLG_NACK) {
+ log_group(ls, "receive_start %d:%u is NACK", hd->nodeid, seq);
+ return;
+ }
+
+ node_history_start(ls, hd->nodeid);
+ memb->start = 1;
+}
+
+static void receive_plocks_done(struct lockspace *ls, struct dlm_header *hd,
+ int len)
+{
+ struct ls_info *li;
+ struct id_info *ids;
+
+ log_dlock(ls, "receive_plocks_done %d:%u flags %x plocks_data %u need %d save %d",
+ hd->nodeid, hd->msgdata, hd->flags, hd->msgdata2,
+ ls->need_plocks, ls->save_plocks);
+
+ if (!ls->need_plocks)
+ return;
+
+ if (ls->need_plocks && !ls->save_plocks)
+ return;
+
+ if (!ls->started_change) {
+ /* don't think this should happen */
+ log_elock(ls, "receive_plocks_done %d:%u no started_change",
+ hd->nodeid, hd->msgdata);
+ return;
+ }
+
+ li = (struct ls_info *)((char *)hd + sizeof(struct dlm_header));
+ ids = (struct id_info *)((char *)li + sizeof(struct ls_info));
+ ls_info_in(li);
+ ids_in(li, ids);
+
+ if (!match_change(ls, ls->started_change, hd, li, ids)) {
+ /* don't think this should happen */
+ log_elock(ls, "receive_plocks_done %d:%u no match_change",
+ hd->nodeid, hd->msgdata);
+
+ /* remove/free anything we've saved from
+ receive_plocks_data messages that weren't for us */
+ clear_plocks_data(ls);
+ return;
+ }
+
+ if (ls->recv_plocks_data_count != hd->msgdata2) {
+ log_elock(ls, "receive_plocks_done plocks_data %u recv %u",
+ hd->msgdata2, ls->recv_plocks_data_count);
+ }
+
+ process_saved_plocks(ls);
+ ls->need_plocks = 0;
+ ls->save_plocks = 0;
+
+ log_dlock(ls, "receive_plocks_done %d:%u plocks_data_count %u",
+ hd->nodeid, hd->msgdata, ls->recv_plocks_data_count);
+}
+
+static void send_info(struct lockspace *ls, struct change *cg, int type,
+ uint32_t flags, uint32_t msgdata2)
+{
+ struct dlm_header *hd;
+ struct ls_info *li;
+ struct id_info *id;
+ struct member *memb;
+ char *buf;
+ int len, id_count;
+
+ id_count = cg->member_count;
+
+ len = sizeof(struct dlm_header) + sizeof(struct ls_info) +
+ id_count * sizeof(struct id_info);
+
+ buf = malloc(len);
+ if (!buf) {
+ log_error("send_info len %d no mem", len);
+ return;
+ }
+ memset(buf, 0, len);
+
+ hd = (struct dlm_header *)buf;
+ li = (struct ls_info *)(buf + sizeof(*hd));
+ id = (struct id_info *)(buf + sizeof(*hd) + sizeof(*li));
+
+ /* fill in header (dlm_send_message handles part of header) */
+
+ hd->type = type;
+ hd->msgdata = cg->seq;
+ hd->flags = flags;
+ hd->msgdata2 = msgdata2;
+
+ if (ls->joining)
+ hd->flags |= DLM_MFLG_JOINING;
+ if (!ls->need_plocks)
+ hd->flags |= DLM_MFLG_HAVEPLOCK;
+
+ /* fill in ls_info */
+
+ li->ls_info_size = cpu_to_le32(sizeof(struct ls_info));
+ li->id_info_size = cpu_to_le32(sizeof(struct id_info));
+ li->id_info_count = cpu_to_le32(id_count);
+ li->started_count = cpu_to_le32(ls->started_count);
+ li->member_count = cpu_to_le32(cg->member_count);
+ li->joined_count = cpu_to_le32(cg->joined_count);
+ li->remove_count = cpu_to_le32(cg->remove_count);
+ li->failed_count = cpu_to_le32(cg->failed_count);
+
+ /* fill in id_info entries */
+
+ list_for_each_entry(memb, &cg->members, list) {
+ id->nodeid = cpu_to_le32(memb->nodeid);
+ id++;
+ }
+
+ dlm_send_message(ls, buf, len);
+
+ free(buf);
+}
+
+static void send_start(struct lockspace *ls, struct change *cg)
+{
+ log_group(ls, "send_start %d:%u counts %u %d %d %d %d",
+ our_nodeid, cg->seq, ls->started_count,
+ cg->member_count, cg->joined_count, cg->remove_count,
+ cg->failed_count);
+
+ send_info(ls, cg, DLM_MSG_START, 0, 0);
+}
+
+static void send_plocks_done(struct lockspace *ls, struct change *cg, uint32_t plocks_data)
+{
+ log_dlock(ls, "send_plocks_done %d:%u counts %u %d %d %d %d plocks_data %u",
+ our_nodeid, cg->seq, ls->started_count,
+ cg->member_count, cg->joined_count, cg->remove_count,
+ cg->failed_count, plocks_data);
+
+ send_info(ls, cg, DLM_MSG_PLOCKS_DONE, 0, plocks_data);
+}
+
+static int same_members(struct change *cg1, struct change *cg2)
+{
+ struct member *memb;
+
+ list_for_each_entry(memb, &cg1->members, list) {
+ if (!find_memb(cg2, memb->nodeid))
+ return 0;
+ }
+ return 1;
+}
+
+static void send_nacks(struct lockspace *ls, struct change *startcg)
+{
+ struct change *cg;
+
+ list_for_each_entry(cg, &ls->changes, list) {
+ if (cg->seq < startcg->seq &&
+ cg->member_count == startcg->member_count &&
+ cg->joined_count == startcg->joined_count &&
+ cg->remove_count == startcg->remove_count &&
+ cg->failed_count == startcg->failed_count &&
+ same_members(cg, startcg)) {
+ log_group(ls, "send nack old cg %u new cg %u",
+ cg->seq, startcg->seq);
+ send_info(ls, cg, DLM_MSG_START, DLM_MFLG_NACK, 0);
+ }
+ }
+}
+
+static int nodes_added(struct lockspace *ls)
+{
+ struct change *cg;
+
+ list_for_each_entry(cg, &ls->changes, list) {
+ if (cg->joined_count)
+ return 1;
+ }
+ return 0;
+}
+
+static void prepare_plocks(struct lockspace *ls)
+{
+ struct change *cg = list_first_entry(&ls->changes, struct change, list);
+ struct member *memb;
+ uint32_t plocks_data;
+
+ if (!cfgd_enable_plock || ls->disable_plock)
+ return;
+
+ log_dlock(ls, "prepare_plocks");
+
+ /* if we're the only node in the lockspace, then we are the data_node
+ and we don't need plocks */
+
+ if (cg->member_count == 1) {
+ list_for_each_entry(memb, &cg->members, list) {
+ if (memb->nodeid != our_nodeid) {
+ log_elock(ls, "prepare_plocks other member %d",
+ memb->nodeid);
+ }
+ }
+ ls->plock_data_node = our_nodeid;
+ ls->need_plocks = 0;
+ return;
+ }
+
+ /* the low node that indicated it had plock state in its last
+ start message is the data_node */
+
+ set_plock_data_node(ls);
+
+ /* there is no node with plock state, so there's no syncing to do */
+
+ if (!ls->plock_data_node) {
+ ls->need_plocks = 0;
+ ls->save_plocks = 0;
+ return;
+ }
+
+ /* We save all plock messages received after our own confchg and
+ apply them after we receive the plocks_done message from the
+ data_node. */
+
+ if (ls->need_plocks) {
+ log_dlock(ls, "save_plocks start");
+ ls->save_plocks = 1;
+ return;
+ }
+
+ if (ls->plock_data_node != our_nodeid)
+ return;
+
+ if (nodes_added(ls))
+ send_all_plocks_data(ls, cg->seq, &plocks_data);
+
+ send_plocks_done(ls, cg, plocks_data);
+}
+
+static void apply_changes(struct lockspace *ls)
+{
+ struct change *cg;
+
+ if (list_empty(&ls->changes))
+ return;
+ cg = list_first_entry(&ls->changes, struct change, list);
+
+ switch (cg->state) {
+
+ case CGST_WAIT_CONDITIONS:
+ if (wait_conditions_done(ls)) {
+ send_nacks(ls, cg);
+ send_start(ls, cg);
+ cg->state = CGST_WAIT_MESSAGES;
+ }
+ break;
+
+ case CGST_WAIT_MESSAGES:
+ if (wait_messages_done(ls)) {
+ start_kernel(ls);
+ prepare_plocks(ls);
+ cleanup_changes(ls);
+ }
+ break;
+
+ default:
+ log_error("apply_changes invalid state %d", cg->state);
+ }
+}
+
+void process_lockspace_changes(void)
+{
+ struct lockspace *ls, *safe;
+
+ poll_fencing = 0;
+ poll_quorum = 0;
+ poll_fs = 0;
+
+ list_for_each_entry_safe(ls, safe, &lockspaces, list) {
+ if (!list_empty(&ls->changes))
+ apply_changes(ls);
+ }
+}
+
+static int add_change(struct lockspace *ls,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries,
+ struct change **cg_out)
+{
+ struct change *cg;
+ struct member *memb;
+ int i, error;
+
+ cg = malloc(sizeof(struct change));
+ if (!cg)
+ goto fail_nomem;
+ memset(cg, 0, sizeof(struct change));
+ INIT_LIST_HEAD(&cg->members);
+ INIT_LIST_HEAD(&cg->removed);
+ cg->state = CGST_WAIT_CONDITIONS;
+ cg->create_time = time(NULL);
+ cg->seq = ++ls->change_seq;
+ if (!cg->seq)
+ cg->seq = ++ls->change_seq;
+
+ cg->member_count = member_list_entries;
+ cg->joined_count = joined_list_entries;
+ cg->remove_count = left_list_entries;
+
+ for (i = 0; i < member_list_entries; i++) {
+ memb = malloc(sizeof(struct member));
+ if (!memb)
+ goto fail_nomem;
+ memset(memb, 0, sizeof(struct member));
+ memb->nodeid = member_list[i].nodeid;
+ list_add_tail(&memb->list, &cg->members);
+ }
+
+ for (i = 0; i < left_list_entries; i++) {
+ memb = malloc(sizeof(struct member));
+ if (!memb)
+ goto fail_nomem;
+ memset(memb, 0, sizeof(struct member));
+ memb->nodeid = left_list[i].nodeid;
+ if (left_list[i].reason == CPG_REASON_NODEDOWN ||
+ left_list[i].reason == CPG_REASON_PROCDOWN) {
+ memb->failed = 1;
+ cg->failed_count++;
+ }
+ list_add_tail(&memb->list, &cg->removed);
+
+ if (memb->failed)
+ node_history_fail(ls, memb->nodeid, cg,
+ left_list[i].reason);
+ else
+ node_history_left(ls, memb->nodeid, cg);
+
+ log_group(ls, "add_change cg %u remove nodeid %d reason %d",
+ cg->seq, memb->nodeid, left_list[i].reason);
+
+ if (left_list[i].reason == CPG_REASON_PROCDOWN)
+ kick_node_from_cluster(memb->nodeid);
+ }
+
+ for (i = 0; i < joined_list_entries; i++) {
+ memb = find_memb(cg, joined_list[i].nodeid);
+ if (!memb) {
+ log_error("no member %d", joined_list[i].nodeid);
+ error = -ENOENT;
+ goto fail;
+ }
+ memb->added = 1;
+
+ if (memb->nodeid == our_nodeid)
+ cg->we_joined = 1;
+ else
+ node_history_init(ls, memb->nodeid, cg);
+
+ log_group(ls, "add_change cg %u joined nodeid %d", cg->seq,
+ memb->nodeid);
+ }
+
+ if (cg->we_joined) {
+ log_group(ls, "add_change cg %u we joined", cg->seq);
+ list_for_each_entry(memb, &cg->members, list)
+ node_history_init(ls, memb->nodeid, cg);
+ }
+
+ log_group(ls, "add_change cg %u counts member %d joined %d remove %d "
+ "failed %d", cg->seq, cg->member_count, cg->joined_count,
+ cg->remove_count, cg->failed_count);
+
+ list_add(&cg->list, &ls->changes);
+ *cg_out = cg;
+ return 0;
+
+ fail_nomem:
+ log_error("no memory");
+ error = -ENOMEM;
+ fail:
+ free_cg(cg);
+ return error;
+}
+
+static int we_left(const struct cpg_address *left_list,
+ size_t left_list_entries)
+{
+ int i;
+
+ for (i = 0; i < left_list_entries; i++) {
+ if (left_list[i].nodeid == our_nodeid)
+ return 1;
+ }
+ return 0;
+}
+
+static void confchg_cb(cpg_handle_t handle,
+ const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ struct lockspace *ls;
+ struct change *cg;
+ struct member *memb;
+ int rv;
+
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
+ ls = find_ls_handle(handle);
+ if (!ls) {
+ log_error("confchg_cb no lockspace for cpg %s",
+ group_name->value);
+ return;
+ }
+
+ if (ls->leaving && we_left(left_list, left_list_entries)) {
+ /* we called cpg_leave(), and this should be the final
+ cpg callback we receive */
+ log_group(ls, "confchg for our leave");
+ stop_kernel(ls, 0);
+ set_configfs_members(ls->name, 0, NULL, 0, NULL);
+ set_sysfs_event_done(ls->name, 0);
+ cpg_finalize(ls->cpg_handle);
+ client_dead(ls->cpg_client);
+ purge_plocks(ls, our_nodeid, 1);
+ list_del(&ls->list);
+ free_ls(ls);
+ return;
+ }
+
+ rv = add_change(ls, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries, &cg);
+ if (rv)
+ return;
+
+ stop_kernel(ls, cg->seq);
+
+ list_for_each_entry(memb, &cg->removed, list)
+ purge_plocks(ls, memb->nodeid, 0);
+
+ apply_changes(ls);
+
+#if 0
+ deadlk_confchg(ls, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+#endif
+}
+
+static void dlm_header_in(struct dlm_header *hd)
+{
+ hd->version[0] = le16_to_cpu(hd->version[0]);
+ hd->version[1] = le16_to_cpu(hd->version[1]);
+ hd->version[2] = le16_to_cpu(hd->version[2]);
+ hd->type = le16_to_cpu(hd->type);
+ hd->nodeid = le32_to_cpu(hd->nodeid);
+ hd->to_nodeid = le32_to_cpu(hd->to_nodeid);
+ hd->global_id = le32_to_cpu(hd->global_id);
+ hd->flags = le32_to_cpu(hd->flags);
+ hd->msgdata = le32_to_cpu(hd->msgdata);
+ hd->msgdata2 = le32_to_cpu(hd->msgdata2);
+}
+
+/* after our join confchg, we want to ignore plock messages (see need_plocks
+ checks below) until the point in time where the ckpt_node saves plock
+ state (final start message received); at this time we want to shift from
+ ignoring plock messages to saving plock messages to apply on top of the
+ plock state that we read. */
+
+static void deliver_cb(cpg_handle_t handle,
+ const struct cpg_name *group_name,
+ uint32_t nodeid, uint32_t pid,
+ void *data, size_t len)
+{
+ struct lockspace *ls;
+ struct dlm_header *hd;
+ int ignore_plock;
+
+ ls = find_ls_handle(handle);
+ if (!ls) {
+ log_error("deliver_cb no ls for cpg %s", group_name->value);
+ return;
+ }
+
+ if (len < sizeof(*hd)) {
+ log_error("deliver_cb short message %zd", len);
+ return;
+ }
+
+ hd = (struct dlm_header *)data;
+ dlm_header_in(hd);
+
+ if (hd->version[0] != our_protocol.daemon_run[0] ||
+ hd->version[1] != our_protocol.daemon_run[1]) {
+ log_error("reject message from %d version %u.%u.%u vs %u.%u.%u",
+ nodeid, hd->version[0], hd->version[1],
+ hd->version[2], our_protocol.daemon_run[0],
+ our_protocol.daemon_run[1],
+ our_protocol.daemon_run[2]);
+ return;
+ }
+
+ if (hd->nodeid != nodeid) {
+ log_error("bad msg nodeid %d %d", hd->nodeid, nodeid);
+ return;
+ }
+
+ ignore_plock = 0;
+
+ switch (hd->type) {
+ case DLM_MSG_START:
+ receive_start(ls, hd, len);
+ break;
+
+ case DLM_MSG_PLOCK:
+ if (ls->disable_plock)
+ break;
+ if (ls->need_plocks && !ls->save_plocks) {
+ ignore_plock = 1;
+ break;
+ }
+ if (cfgd_enable_plock)
+ receive_plock(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d",
+ hd->type, nodeid, cfgd_enable_plock);
+ break;
+
+ case DLM_MSG_PLOCK_OWN:
+ if (ls->disable_plock)
+ break;
+ if (ls->need_plocks && !ls->save_plocks) {
+ ignore_plock = 1;
+ break;
+ }
+ if (cfgd_enable_plock && cfgd_plock_ownership)
+ receive_own(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d owner %d",
+ hd->type, nodeid, cfgd_enable_plock,
+ cfgd_plock_ownership);
+ break;
+
+ case DLM_MSG_PLOCK_DROP:
+ if (ls->disable_plock)
+ break;
+ if (ls->need_plocks && !ls->save_plocks) {
+ ignore_plock = 1;
+ break;
+ }
+ if (cfgd_enable_plock && cfgd_plock_ownership)
+ receive_drop(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d owner %d",
+ hd->type, nodeid, cfgd_enable_plock,
+ cfgd_plock_ownership);
+ break;
+
+ case DLM_MSG_PLOCK_SYNC_LOCK:
+ case DLM_MSG_PLOCK_SYNC_WAITER:
+ if (ls->disable_plock)
+ break;
+ if (ls->need_plocks && !ls->save_plocks) {
+ ignore_plock = 1;
+ break;
+ }
+ if (cfgd_enable_plock && cfgd_plock_ownership)
+ receive_sync(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d owner %d",
+ hd->type, nodeid, cfgd_enable_plock,
+ cfgd_plock_ownership);
+ break;
+
+ case DLM_MSG_PLOCKS_DATA:
+ if (ls->disable_plock)
+ break;
+ if (cfgd_enable_plock)
+ receive_plocks_data(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d",
+ hd->type, nodeid, cfgd_enable_plock);
+ break;
+
+ case DLM_MSG_PLOCKS_DONE:
+ if (ls->disable_plock)
+ break;
+ if (cfgd_enable_plock)
+ receive_plocks_done(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_plock %d",
+ hd->type, nodeid, cfgd_enable_plock);
+ break;
+
+#if 0
+ case DLM_MSG_DEADLK_CYCLE_START:
+ if (cfgd_enable_deadlk)
+ receive_cycle_start(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_deadlk %d",
+ hd->type, nodeid, cfgd_enable_deadlk);
+ break;
+
+ case DLM_MSG_DEADLK_CYCLE_END:
+ if (cfgd_enable_deadlk)
+ receive_cycle_end(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_deadlk %d",
+ hd->type, nodeid, cfgd_enable_deadlk);
+ break;
+
+ case DLM_MSG_DEADLK_CHECKPOINT_READY:
+ if (cfgd_enable_deadlk)
+ receive_checkpoint_ready(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_deadlk %d",
+ hd->type, nodeid, cfgd_enable_deadlk);
+ break;
+
+ case DLM_MSG_DEADLK_CANCEL_LOCK:
+ if (cfgd_enable_deadlk)
+ receive_cancel_lock(ls, hd, len);
+ else
+ log_error("msg %d nodeid %d enable_deadlk %d",
+ hd->type, nodeid, cfgd_enable_deadlk);
+ break;
+#endif
+
+ default:
+ log_error("unknown msg type %d", hd->type);
+ }
+
+ if (ignore_plock)
+ log_plock(ls, "msg %s nodeid %d need_plock ignore",
+ msg_name(hd->type), nodeid);
+
+ apply_changes(ls);
+}
+
+static cpg_callbacks_t cpg_callbacks = {
+ .cpg_deliver_fn = deliver_cb,
+ .cpg_confchg_fn = confchg_cb,
+};
+
+void update_flow_control_status(void)
+{
+ cpg_flow_control_state_t flow_control_state;
+ cpg_error_t error;
+
+ error = cpg_flow_control_state_get(cpg_handle_daemon,
+ &flow_control_state);
+ if (error != CPG_OK) {
+ log_error("cpg_flow_control_state_get %d", error);
+ return;
+ }
+
+ if (flow_control_state == CPG_FLOW_CONTROL_ENABLED) {
+ if (message_flow_control_on == 0) {
+ log_debug("flow control on");
+ }
+ message_flow_control_on = 1;
+ } else {
+ if (message_flow_control_on) {
+ log_debug("flow control off");
+ }
+ message_flow_control_on = 0;
+ }
+}
+
+static void process_cpg_lockspace(int ci)
+{
+ struct lockspace *ls;
+ cpg_error_t error;
+
+ ls = find_ls_ci(ci);
+ if (!ls) {
+ log_error("process_lockspace_cpg no lockspace for ci %d", ci);
+ return;
+ }
+
+ error = cpg_dispatch(ls->cpg_handle, CPG_DISPATCH_ALL);
+ if (error != CPG_OK) {
+ log_error("cpg_dispatch error %d", error);
+ return;
+ }
+
+ update_flow_control_status();
+}
+
+/* received an "online" uevent from dlm-kernel */
+
+int dlm_join_lockspace(struct lockspace *ls)
+{
+ cpg_error_t error;
+ cpg_handle_t h;
+ struct cpg_name name;
+ int i = 0, fd, ci, rv;
+ int unused;
+
+ rv = fence_in_progress(&unused);
+ if (cfgd_enable_fencing && rv < 0) {
+ log_error("dlm_join_lockspace no fence domain");
+ rv = -1;
+ goto fail_free;
+ }
+
+ error = cpg_initialize(&h, &cpg_callbacks);
+ if (error != CPG_OK) {
+ log_error("cpg_initialize error %d", error);
+ rv = -1;
+ goto fail_free;
+ }
+
+ cpg_fd_get(h, &fd);
+
+ ci = client_add(fd, process_cpg_lockspace, NULL);
+
+ list_add(&ls->list, &lockspaces);
+
+ ls->cpg_handle = h;
+ ls->cpg_client = ci;
+ ls->cpg_fd = fd;
+ ls->kernel_stopped = 1;
+ ls->need_plocks = 1;
+ ls->joining = 1;
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "dlm:ls:%s", ls->name);
+ name.length = strlen(name.value) + 1;
+
+ /* TODO: allow global_id to be set in cluster.conf? */
+ ls->global_id = cpgname_to_crc(name.value, name.length);
+
+ retry:
+ error = cpg_join(h, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("cpg_join error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK) {
+ log_error("cpg_join error %d", error);
+ cpg_finalize(h);
+ rv = -1;
+ goto fail;
+ }
+
+ return 0;
+
+ fail:
+ list_del(&ls->list);
+ client_dead(ci);
+ cpg_finalize(h);
+ fail_free:
+ set_sysfs_event_done(ls->name, rv);
+ free_ls(ls);
+ return rv;
+}
+
+/* received an "offline" uevent from dlm-kernel */
+
+int dlm_leave_lockspace(struct lockspace *ls)
+{
+ cpg_error_t error;
+ struct cpg_name name;
+ int i = 0;
+
+ ls->leaving = 1;
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "dlm:ls:%s", ls->name);
+ name.length = strlen(name.value) + 1;
+
+ retry:
+ error = cpg_leave(ls->cpg_handle, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("cpg_leave error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK)
+ log_error("cpg_leave error %d", error);
+
+ return 0;
+}
+
+static struct node *get_node_daemon(int nodeid)
+{
+ struct node *node;
+
+ list_for_each_entry(node, &daemon_nodes, list) {
+ if (node->nodeid == nodeid)
+ return node;
+ }
+ return NULL;
+}
+
+static void add_node_daemon(int nodeid)
+{
+ struct node *node;
+
+ if (get_node_daemon(nodeid))
+ return;
+
+ node = malloc(sizeof(struct node));
+ if (!node) {
+ log_error("add_node_daemon no mem");
+ return;
+ }
+ memset(node, 0, sizeof(struct node));
+ node->nodeid = nodeid;
+ list_add_tail(&node->list, &daemon_nodes);
+}
+
+static void pv_in(struct protocol_version *pv)
+{
+ pv->major = le16_to_cpu(pv->major);
+ pv->minor = le16_to_cpu(pv->minor);
+ pv->patch = le16_to_cpu(pv->patch);
+ pv->flags = le16_to_cpu(pv->flags);
+}
+
+static void pv_out(struct protocol_version *pv)
+{
+ pv->major = cpu_to_le16(pv->major);
+ pv->minor = cpu_to_le16(pv->minor);
+ pv->patch = cpu_to_le16(pv->patch);
+ pv->flags = cpu_to_le16(pv->flags);
+}
+
+static void protocol_in(struct protocol *proto)
+{
+ pv_in(&proto->dm_ver);
+ pv_in(&proto->km_ver);
+ pv_in(&proto->dr_ver);
+ pv_in(&proto->kr_ver);
+}
+
+static void protocol_out(struct protocol *proto)
+{
+ pv_out(&proto->dm_ver);
+ pv_out(&proto->km_ver);
+ pv_out(&proto->dr_ver);
+ pv_out(&proto->kr_ver);
+}
+
+/* go through member list saved in last confchg, see if we have received a
+ proto message from each */
+
+static int all_protocol_messages(void)
+{
+ struct node *node;
+ int i;
+
+ if (!daemon_member_count)
+ return 0;
+
+ for (i = 0; i < daemon_member_count; i++) {
+ node = get_node_daemon(daemon_member[i].nodeid);
+ if (!node) {
+ log_error("all_protocol_messages no node %d",
+ daemon_member[i].nodeid);
+ return 0;
+ }
+
+ if (!node->proto.daemon_max[0])
+ return 0;
+ }
+ return 1;
+}
+
+static int pick_min_protocol(struct protocol *proto)
+{
+ uint16_t mind[4];
+ uint16_t mink[4];
+ struct node *node;
+ int i;
+
+ memset(&mind, 0, sizeof(mind));
+ memset(&mink, 0, sizeof(mink));
+
+ /* first choose the minimum major */
+
+ for (i = 0; i < daemon_member_count; i++) {
+ node = get_node_daemon(daemon_member[i].nodeid);
+ if (!node) {
+ log_error("pick_min_protocol no node %d",
+ daemon_member[i].nodeid);
+ return -1;
+ }
+
+ if (!mind[0] || node->proto.daemon_max[0] < mind[0])
+ mind[0] = node->proto.daemon_max[0];
+
+ if (!mink[0] || node->proto.kernel_max[0] < mink[0])
+ mink[0] = node->proto.kernel_max[0];
+ }
+
+ if (!mind[0] || !mink[0]) {
+ log_error("pick_min_protocol zero major number");
+ return -1;
+ }
+
+ /* second pick the minimum minor with the chosen major */
+
+ for (i = 0; i < daemon_member_count; i++) {
+ node = get_node_daemon(daemon_member[i].nodeid);
+ if (!node)
+ continue;
+
+ if (mind[0] == node->proto.daemon_max[0]) {
+ if (!mind[1] || node->proto.daemon_max[1] < mind[1])
+ mind[1] = node->proto.daemon_max[1];
+ }
+
+ if (mink[0] == node->proto.kernel_max[0]) {
+ if (!mink[1] || node->proto.kernel_max[1] < mink[1])
+ mink[1] = node->proto.kernel_max[1];
+ }
+ }
+
+ if (!mind[1] || !mink[1]) {
+ log_error("pick_min_protocol zero minor number");
+ return -1;
+ }
+
+ /* third pick the minimum patch with the chosen major.minor */
+
+ for (i = 0; i < daemon_member_count; i++) {
+ node = get_node_daemon(daemon_member[i].nodeid);
+ if (!node)
+ continue;
+
+ if (mind[0] == node->proto.daemon_max[0] &&
+ mind[1] == node->proto.daemon_max[1]) {
+ if (!mind[2] || node->proto.daemon_max[2] < mind[2])
+ mind[2] = node->proto.daemon_max[2];
+ }
+
+ if (mink[0] == node->proto.kernel_max[0] &&
+ mink[1] == node->proto.kernel_max[1]) {
+ if (!mink[2] || node->proto.kernel_max[2] < mink[2])
+ mink[2] = node->proto.kernel_max[2];
+ }
+ }
+
+ if (!mind[2] || !mink[2]) {
+ log_error("pick_min_protocol zero patch number");
+ return -1;
+ }
+
+ memcpy(&proto->daemon_run, &mind, sizeof(mind));
+ memcpy(&proto->kernel_run, &mink, sizeof(mink));
+ return 0;
+}
+
+static void receive_protocol(struct dlm_header *hd, int len)
+{
+ struct protocol *p;
+ struct node *node;
+
+ p = (struct protocol *)((char *)hd + sizeof(struct dlm_header));
+ protocol_in(p);
+
+ if (len < sizeof(struct dlm_header) + sizeof(struct protocol)) {
+ log_error("receive_protocol invalid len %d from %d",
+ len, hd->nodeid);
+ return;
+ }
+
+ /* zero is an invalid version value */
+
+ if (!p->daemon_max[0] || !p->daemon_max[1] || !p->daemon_max[2] ||
+ !p->kernel_max[0] || !p->kernel_max[1] || !p->kernel_max[2]) {
+ log_error("receive_protocol invalid max value from %d "
+ "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
+ p->daemon_max[0], p->daemon_max[1], p->daemon_max[2],
+ p->kernel_max[0], p->kernel_max[1], p->kernel_max[2]);
+ return;
+ }
+
+ /* the run values will be zero until a version is set, after
+ which none of the run values can be zero */
+
+ if (p->daemon_run[0] && (!p->daemon_run[1] || !p->daemon_run[2] ||
+ !p->kernel_run[0] || !p->kernel_run[1] || !p->kernel_run[2])) {
+ log_error("receive_protocol invalid run value from %d "
+ "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
+ p->daemon_run[0], p->daemon_run[1], p->daemon_run[2],
+ p->kernel_run[0], p->kernel_run[1], p->kernel_run[2]);
+ return;
+ }
+
+ /* if we have zero run values, and this msg has non-zero run values,
+ then adopt them as ours; otherwise save this proto message */
+
+ if (our_protocol.daemon_run[0])
+ return;
+
+ if (p->daemon_run[0]) {
+ memcpy(&our_protocol.daemon_run, &p->daemon_run,
+ sizeof(struct protocol_version));
+ memcpy(&our_protocol.kernel_run, &p->kernel_run,
+ sizeof(struct protocol_version));
+ log_debug("run protocol from nodeid %d", hd->nodeid);
+ return;
+ }
+
+ /* save this node's proto so we can tell when we've got all, and
+ use it to select a minimum protocol from all */
+
+ node = get_node_daemon(hd->nodeid);
+ if (!node) {
+ log_error("receive_protocol no node %d", hd->nodeid);
+ return;
+ }
+ memcpy(&node->proto, p, sizeof(struct protocol));
+}
+
+static void send_protocol(struct protocol *proto)
+{
+ struct dlm_header *hd;
+ struct protocol *pr;
+ char *buf;
+ int len;
+
+ len = sizeof(struct dlm_header) + sizeof(struct protocol);
+ buf = malloc(len);
+ if (!buf) {
+ log_error("send_protocol no mem %d", len);
+ return;
+ }
+ memset(buf, 0, len);
+
+ hd = (struct dlm_header *)buf;
+ pr = (struct protocol *)(buf + sizeof(*hd));
+
+ hd->type = cpu_to_le16(DLM_MSG_PROTOCOL);
+ hd->nodeid = cpu_to_le32(our_nodeid);
+
+ memcpy(pr, proto, sizeof(struct protocol));
+ protocol_out(pr);
+
+ _send_message(cpg_handle_daemon, buf, len, DLM_MSG_PROTOCOL);
+}
+
+int set_protocol(void)
+{
+ struct protocol proto;
+ struct pollfd pollfd;
+ int sent_proposal = 0;
+ int rv;
+
+ memset(&pollfd, 0, sizeof(pollfd));
+ pollfd.fd = cpg_fd_daemon;
+ pollfd.events = POLLIN;
+
+ while (1) {
+ if (our_protocol.daemon_run[0])
+ break;
+
+ if (!sent_proposal && all_protocol_messages()) {
+ /* propose a protocol; look through info from all
+ nodes and pick the min for both daemon and kernel,
+ and propose that */
+
+ sent_proposal = 1;
+
+ /* copy our max values */
+ memcpy(&proto, &our_protocol, sizeof(struct protocol));
+
+ rv = pick_min_protocol(&proto);
+ if (rv < 0)
+ return rv;
+
+ log_debug("set_protocol member_count %d propose "
+ "daemon %u.%u.%u kernel %u.%u.%u",
+ daemon_member_count,
+ proto.daemon_run[0], proto.daemon_run[1],
+ proto.daemon_run[2], proto.kernel_run[0],
+ proto.kernel_run[1], proto.kernel_run[2]);
+
+ send_protocol(&proto);
+ }
+
+ /* only process messages/events from daemon cpg until protocol
+ is established */
+
+ rv = poll(&pollfd, 1, -1);
+ if (rv == -1 && errno == EINTR) {
+ if (daemon_quit)
+ return -1;
+ continue;
+ }
+ if (rv < 0) {
+ log_error("set_protocol poll errno %d", errno);
+ return -1;
+ }
+
+ if (pollfd.revents & POLLIN)
+ process_cpg_daemon(0);
+ if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ log_error("set_protocol poll revents %u",
+ pollfd.revents);
+ return -1;
+ }
+ }
+
+ if (our_protocol.daemon_run[0] != our_protocol.daemon_max[0] ||
+ our_protocol.daemon_run[1] > our_protocol.daemon_max[1]) {
+ log_error("incompatible daemon protocol run %u.%u.%u max %u.%u.%u",
+ our_protocol.daemon_run[0],
+ our_protocol.daemon_run[1],
+ our_protocol.daemon_run[2],
+ our_protocol.daemon_max[0],
+ our_protocol.daemon_max[1],
+ our_protocol.daemon_max[2]);
+ return -1;
+ }
+
+ if (our_protocol.kernel_run[0] != our_protocol.kernel_max[0] ||
+ our_protocol.kernel_run[1] > our_protocol.kernel_max[1]) {
+ log_error("incompatible kernel protocol run %u.%u.%u max %u.%u.%u",
+ our_protocol.kernel_run[0],
+ our_protocol.kernel_run[1],
+ our_protocol.kernel_run[2],
+ our_protocol.kernel_max[0],
+ our_protocol.kernel_max[1],
+ our_protocol.kernel_max[2]);
+ return -1;
+ }
+
+ log_debug("daemon run %u.%u.%u max %u.%u.%u "
+ "kernel run %u.%u.%u max %u.%u.%u",
+ our_protocol.daemon_run[0],
+ our_protocol.daemon_run[1],
+ our_protocol.daemon_run[2],
+ our_protocol.daemon_max[0],
+ our_protocol.daemon_max[1],
+ our_protocol.daemon_max[2],
+ our_protocol.kernel_run[0],
+ our_protocol.kernel_run[1],
+ our_protocol.kernel_run[2],
+ our_protocol.kernel_max[0],
+ our_protocol.kernel_max[1],
+ our_protocol.kernel_max[2]);
+
+ send_protocol(&our_protocol);
+ return 0;
+}
+
+static void deliver_cb_daemon(cpg_handle_t handle,
+ const struct cpg_name *group_name,
+ uint32_t nodeid, uint32_t pid,
+ void *data, size_t len)
+{
+ struct dlm_header *hd;
+
+ if (len < sizeof(*hd)) {
+ log_error("deliver_cb short message %zd", len);
+ return;
+ }
+
+ hd = (struct dlm_header *)data;
+ dlm_header_in(hd);
+
+ switch (hd->type) {
+ case DLM_MSG_PROTOCOL:
+ receive_protocol(hd, len);
+ break;
+ default:
+ log_error("deliver_cb_daemon unknown msg type %d", hd->type);
+ }
+}
+
+static void confchg_cb_daemon(cpg_handle_t handle,
+ const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ int i;
+
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
+ if (joined_list_entries)
+ send_protocol(&our_protocol);
+
+ memset(&daemon_member, 0, sizeof(daemon_member));
+ daemon_member_count = member_list_entries;
+
+ for (i = 0; i < member_list_entries; i++) {
+ daemon_member[i] = member_list[i];
+ add_node_daemon(member_list[i].nodeid);
+ }
+}
+
+static cpg_callbacks_t cpg_callbacks_daemon = {
+ .cpg_deliver_fn = deliver_cb_daemon,
+ .cpg_confchg_fn = confchg_cb_daemon,
+};
+
+void process_cpg_daemon(int ci)
+{
+ cpg_error_t error;
+
+ error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL);
+ if (error != CPG_OK)
+ log_error("daemon cpg_dispatch error %d", error);
+}
+
+int setup_cpg_daemon(void)
+{
+ cpg_error_t error;
+ struct cpg_name name;
+ int i = 0;
+
+ INIT_LIST_HEAD(&daemon_nodes);
+
+ memset(&our_protocol, 0, sizeof(our_protocol));
+ our_protocol.daemon_max[0] = 2;
+ our_protocol.daemon_max[1] = 1;
+ our_protocol.daemon_max[2] = 1;
+ our_protocol.kernel_max[0] = 1;
+ our_protocol.kernel_max[1] = 1;
+ our_protocol.kernel_max[2] = 1;
+
+ error = cpg_initialize(&cpg_handle_daemon, &cpg_callbacks_daemon);
+ if (error != CPG_OK) {
+ log_error("daemon cpg_initialize error %d", error);
+ return -1;
+ }
+
+ cpg_fd_get(cpg_handle_daemon, &cpg_fd_daemon);
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "dlm:controld");
+ name.length = strlen(name.value) + 1;
+
+ retry:
+ error = cpg_join(cpg_handle_daemon, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("daemon cpg_join error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK) {
+ log_error("daemon cpg_join error %d", error);
+ goto fail;
+ }
+
+ log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
+ return cpg_fd_daemon;
+
+ fail:
+ cpg_finalize(cpg_handle_daemon);
+ return -1;
+}
+
+void close_cpg_daemon(void)
+{
+ struct lockspace *ls;
+ cpg_error_t error;
+ struct cpg_name name;
+ int i = 0;
+
+ if (!cpg_handle_daemon)
+ return;
+ if (cluster_down)
+ goto fin;
+
+ memset(&name, 0, sizeof(name));
+ sprintf(name.value, "dlm:controld");
+ name.length = strlen(name.value) + 1;
+
+ retry:
+ error = cpg_leave(cpg_handle_daemon, &name);
+ if (error == CPG_ERR_TRY_AGAIN) {
+ sleep(1);
+ if (!(++i % 10))
+ log_error("daemon cpg_leave error retrying");
+ goto retry;
+ }
+ if (error != CPG_OK)
+ log_error("daemon cpg_leave error %d", error);
+ fin:
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->cpg_handle)
+ cpg_finalize(ls->cpg_handle);
+ }
+ cpg_finalize(cpg_handle_daemon);
+}
+
+/* fs_controld has seen nodedown for nodeid; it's now ok for dlm to do
+ recovery for the failed node */
+
+int set_fs_notified(struct lockspace *ls, int nodeid)
+{
+ struct node *node;
+
+ /* this shouldn't happen */
+ node = get_node_history(ls, nodeid);
+ if (!node) {
+ log_error("set_fs_notified no nodeid %d", nodeid);
+ return -ESRCH;
+ }
+
+ if (!find_memb(ls->started_change, nodeid)) {
+ log_group(ls, "set_fs_notified %d not in ls", nodeid);
+ return 0;
+ }
+
+ /* this can happen, we haven't seen a nodedown for this node yet,
+ but we should soon */
+ if (!node->check_fs) {
+ log_group(ls, "set_fs_notified %d zero check_fs", nodeid);
+ return -EAGAIN;
+ }
+
+ log_group(ls, "set_fs_notified nodeid %d", nodeid);
+ node->fs_notified = 1;
+ return 0;
+}
+
+int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace)
+{
+ struct change *cg, *last = NULL;
+
+ strncpy(lockspace->name, ls->name, DLM_LOCKSPACE_LEN);
+ lockspace->global_id = ls->global_id;
+
+ if (ls->joining)
+ lockspace->flags |= DLMC_LF_JOINING;
+ if (ls->leaving)
+ lockspace->flags |= DLMC_LF_LEAVING;
+ if (ls->kernel_stopped)
+ lockspace->flags |= DLMC_LF_KERNEL_STOPPED;
+ if (ls->fs_registered)
+ lockspace->flags |= DLMC_LF_FS_REGISTERED;
+ if (ls->need_plocks)
+ lockspace->flags |= DLMC_LF_NEED_PLOCKS;
+ if (ls->save_plocks)
+ lockspace->flags |= DLMC_LF_SAVE_PLOCKS;
+
+ if (!ls->started_change)
+ goto next;
+
+ cg = ls->started_change;
+
+ lockspace->cg_prev.member_count = cg->member_count;
+ lockspace->cg_prev.joined_count = cg->joined_count;
+ lockspace->cg_prev.remove_count = cg->remove_count;
+ lockspace->cg_prev.failed_count = cg->failed_count;
+ lockspace->cg_prev.combined_seq = cg->combined_seq;
+ lockspace->cg_prev.seq = cg->seq;
+
+ next:
+ if (list_empty(&ls->changes))
+ goto out;
+
+ list_for_each_entry(cg, &ls->changes, list)
+ last = cg;
+
+ cg = list_first_entry(&ls->changes, struct change, list);
+
+ lockspace->cg_next.member_count = cg->member_count;
+ lockspace->cg_next.joined_count = cg->joined_count;
+ lockspace->cg_next.remove_count = cg->remove_count;
+ lockspace->cg_next.failed_count = cg->failed_count;
+ lockspace->cg_next.combined_seq = last->seq;
+ lockspace->cg_next.seq = cg->seq;
+
+ if (cg->state == CGST_WAIT_CONDITIONS)
+ lockspace->cg_next.wait_condition = 4;
+ if (poll_fencing)
+ lockspace->cg_next.wait_condition = 1;
+ else if (poll_quorum)
+ lockspace->cg_next.wait_condition = 2;
+ else if (poll_fs)
+ lockspace->cg_next.wait_condition = 3;
+
+ if (cg->state == CGST_WAIT_MESSAGES)
+ lockspace->cg_next.wait_messages = 1;
+ out:
+ return 0;
+}
+
+static int _set_node_info(struct lockspace *ls, struct change *cg, int nodeid,
+ struct dlmc_node *node)
+{
+ struct member *m = NULL;
+ struct node *n;
+
+ node->nodeid = nodeid;
+
+ if (cg)
+ m = find_memb(cg, nodeid);
+ if (!m)
+ goto history;
+
+ node->flags |= DLMC_NF_MEMBER;
+
+ if (m->start)
+ node->flags |= DLMC_NF_START;
+ if (m->disallowed)
+ node->flags |= DLMC_NF_DISALLOWED;
+
+ history:
+ n = get_node_history(ls, nodeid);
+ if (!n)
+ goto out;
+
+ if (n->check_fencing)
+ node->flags |= DLMC_NF_CHECK_FENCING;
+ if (n->check_quorum)
+ node->flags |= DLMC_NF_CHECK_QUORUM;
+ if (n->check_fs)
+ node->flags |= DLMC_NF_CHECK_FS;
+
+ node->added_seq = n->added_seq;
+ node->removed_seq = n->removed_seq;
+ node->failed_reason = n->failed_reason;
+ out:
+ return 0;
+}
+
+int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node)
+{
+ struct change *cg;
+
+ if (!list_empty(&ls->changes)) {
+ cg = list_first_entry(&ls->changes, struct change, list);
+ return _set_node_info(ls, cg, nodeid, node);
+ }
+
+ return _set_node_info(ls, ls->started_change, nodeid, node);
+}
+
+int set_lockspaces(int *count, struct dlmc_lockspace **lss_out)
+{
+ struct lockspace *ls;
+ struct dlmc_lockspace *lss, *lsp;
+ int ls_count = 0;
+
+ list_for_each_entry(ls, &lockspaces, list)
+ ls_count++;
+
+ lss = malloc(ls_count * sizeof(struct dlmc_lockspace));
+ if (!lss)
+ return -ENOMEM;
+ memset(lss, 0, ls_count * sizeof(struct dlmc_lockspace));
+
+ lsp = lss;
+ list_for_each_entry(ls, &lockspaces, list) {
+ set_lockspace_info(ls, lsp++);
+ }
+
+ *count = ls_count;
+ *lss_out = lss;
+ return 0;
+}
+
+int set_lockspace_nodes(struct lockspace *ls, int option, int *node_count,
+ struct dlmc_node **nodes_out)
+{
+ struct change *cg;
+ struct node *n;
+ struct dlmc_node *nodes = NULL, *nodep;
+ struct member *memb;
+ int count = 0;
+
+ if (option == DLMC_NODES_ALL) {
+ if (!list_empty(&ls->changes))
+ cg = list_first_entry(&ls->changes, struct change,list);
+ else
+ cg = ls->started_change;
+
+ list_for_each_entry(n, &ls->node_history, list)
+ count++;
+
+ } else if (option == DLMC_NODES_MEMBERS) {
+ if (!ls->started_change)
+ goto out;
+ cg = ls->started_change;
+ count = cg->member_count;
+
+ } else if (option == DLMC_NODES_NEXT) {
+ if (list_empty(&ls->changes))
+ goto out;
+ cg = list_first_entry(&ls->changes, struct change, list);
+ count = cg->member_count;
+ } else
+ goto out;
+
+ nodes = malloc(count * sizeof(struct dlmc_node));
+ if (!nodes)
+ return -ENOMEM;
+ memset(nodes, 0, count * sizeof(struct dlmc_node));
+ nodep = nodes;
+
+ if (option == DLMC_NODES_ALL) {
+ list_for_each_entry(n, &ls->node_history, list)
+ _set_node_info(ls, cg, n->nodeid, nodep++);
+ } else {
+ list_for_each_entry(memb, &cg->members, list)
+ _set_node_info(ls, cg, memb->nodeid, nodep++);
+ }
+ out:
+ *node_count = count;
+ *nodes_out = nodes;
+ return 0;
+}
+
diff --git a/dlm_controld/crc.c b/dlm_controld/crc.c
new file mode 100644
index 0000000..ff8c1d3
--- /dev/null
+++ b/dlm_controld/crc.c
@@ -0,0 +1,72 @@
+#include "dlm_daemon.h"
+
+static const uint32_t crc_32_tab[] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+/**
+ *
+ * Copied from:
+ *
+ * gfs2_disk_hash - hash an array of data
+ * @data: the data to be hashed
+ * @len: the length of data to be hashed
+ *
+ * This function must produce the same results as the one in the kernel:
+ * crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF
+ *
+ * Take some data and convert it to a 32-bit hash.
+ *
+ * The hash function is a 32-bit CRC of the data. The algorithm uses
+ * the crc_32_tab table above.
+ *
+ * This may not be the fastest hash function, but it does a fair bit better
+ * at providing uniform results than the others I've looked at. That's
+ * really important for efficient directories.
+ *
+ * Returns: the hash
+ */
+
+uint32_t cpgname_to_crc(const char *data, int len)
+{
+ uint32_t hash = 0xFFFFFFFF;
+
+ for (; len--; data++)
+ hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
+
+ hash = ~hash;
+
+ return hash;
+}
+
diff --git a/dlm_controld/deadlock.c b/dlm_controld/deadlock.c
new file mode 100644
index 0000000..bd1d68c
--- /dev/null
+++ b/dlm_controld/deadlock.c
@@ -0,0 +1,1550 @@
+#include "dlm_daemon.h"
+#include "config.h"
+#include "libdlm.h"
+
+static SaCkptHandleT global_ckpt_h;
+static SaCkptCallbacksT callbacks = { 0, 0 };
+static SaVersionT version = { 'B', 1, 1 };
+static char section_buf[10 * 1024 * 1024]; /* 10MB of pack_lock's enough? */
+static uint32_t section_len;
+static uint32_t section_max;
+
+struct node {
+ struct list_head list;
+ int nodeid;
+ int checkpoint_ready; /* we've read its ckpt */
+ int in_cycle; /* participating in cycle */
+};
+
+enum {
+ LOCAL_COPY = 1,
+ MASTER_COPY = 2,
+};
+
+/* from linux/fs/dlm/dlm_internal.h */
+#define DLM_LKSTS_WAITING 1
+#define DLM_LKSTS_GRANTED 2
+#define DLM_LKSTS_CONVERT 3
+
+struct pack_lock {
+ uint64_t xid;
+ uint32_t id;
+ int nodeid;
+ uint32_t remid;
+ int ownpid;
+ uint32_t exflags;
+ uint32_t flags;
+ int8_t status;
+ int8_t grmode;
+ int8_t rqmode;
+ int8_t copy;
+};
+
+struct dlm_rsb {
+ struct list_head list;
+ struct list_head locks;
+ char name[DLM_RESNAME_MAXLEN];
+ int len;
+};
+
+/* information is saved in the lkb, and lkb->lock, from the perspective of the
+ local or master copy, not the process copy */
+
+struct dlm_lkb {
+ struct list_head list; /* r->locks */
+ struct pack_lock lock; /* data from debugfs/checkpoint */
+ int home; /* node where the lock owner lives*/
+ struct dlm_rsb *rsb; /* lock is on resource */
+ struct trans *trans; /* lock owned by this transaction */
+ struct list_head trans_list; /* tr->locks */
+ struct trans *waitfor_trans; /* the trans that's holding the
+ lock that's blocking us */
+};
+
+/* waitfor pointers alloc'ed 4 at at time */
+#define TR_NALLOC 4
+
+struct trans {
+ struct list_head list;
+ struct list_head locks;
+ uint64_t xid;
+ int others_waiting_on_us; /* count of trans's
+ pointing to us in
+ waitfor */
+ int waitfor_alloc;
+ int waitfor_count; /* count of in-use
+ waitfor slots and
+ num of trans's we're
+ waiting on */
+ struct trans **waitfor; /* waitfor_alloc trans
+ pointers */
+};
+
+static const int __dlm_compat_matrix[8][8] = {
+ /* UN NL CR CW PR PW EX PD */
+ {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
+ {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
+ {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
+ {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
+ {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
+ {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
+ {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
+ {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
+};
+
+static inline int dlm_modes_compat(int mode1, int mode2)
+{
+ return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
+}
+
+static const char *status_str(int lksts)
+{
+ switch (lksts) {
+ case DLM_LKSTS_WAITING:
+ return "W";
+ case DLM_LKSTS_GRANTED:
+ return "G";
+ case DLM_LKSTS_CONVERT:
+ return "C";
+ }
+ return "?";
+}
+
+static void free_resources(struct lockspace *ls)
+{
+ struct dlm_rsb *r, *r_safe;
+ struct dlm_lkb *lkb, *lkb_safe;
+
+ list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
+ list_for_each_entry_safe(lkb, lkb_safe, &r->locks, list) {
+ list_del(&lkb->list);
+ if (!list_empty(&lkb->trans_list))
+ list_del(&lkb->trans_list);
+ free(lkb);
+ }
+ list_del(&r->list);
+ free(r);
+ }
+}
+
+static void free_transactions(struct lockspace *ls)
+{
+ struct trans *tr, *tr_safe;
+
+ list_for_each_entry_safe(tr, tr_safe, &ls->transactions, list) {
+ list_del(&tr->list);
+ if (tr->waitfor)
+ free(tr->waitfor);
+ free(tr);
+ }
+}
+
+static void disable_deadlock(void)
+{
+ log_error("FIXME: deadlock detection disabled");
+}
+
+void setup_deadlock(void)
+{
+ SaAisErrorT rv;
+
+ if (!cfgd_enable_deadlk)
+ return;
+
+ rv = saCkptInitialize(&global_ckpt_h, &callbacks, &version);
+ if (rv != SA_AIS_OK)
+ log_error("ckpt init error %d", rv);
+}
+
+static struct dlm_rsb *get_resource(struct lockspace *ls, char *name, int len)
+{
+ struct dlm_rsb *r;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ if (r->len == len && !strncmp(r->name, name, len))
+ return r;
+ }
+
+ r = malloc(sizeof(struct dlm_rsb));
+ if (!r) {
+ log_error("get_resource: no memory");
+ disable_deadlock();
+ return NULL;
+ }
+ memset(r, 0, sizeof(struct dlm_rsb));
+ memcpy(r->name, name, len);
+ r->len = len;
+ INIT_LIST_HEAD(&r->locks);
+ list_add(&r->list, &ls->resources);
+ return r;
+}
+
+static struct dlm_lkb *create_lkb(void)
+{
+ struct dlm_lkb *lkb;
+
+ lkb = malloc(sizeof(struct dlm_lkb));
+ if (!lkb) {
+ log_error("create_lkb: no memory");
+ disable_deadlock();
+ } else {
+ memset(lkb, 0, sizeof(struct dlm_lkb));
+ INIT_LIST_HEAD(&lkb->list);
+ INIT_LIST_HEAD(&lkb->trans_list);
+ }
+ return lkb;
+}
+
+static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
+{
+ list_add(&lkb->list, &r->locks);
+ lkb->rsb = r;
+}
+
+/* from linux/fs/dlm/dlm_internal.h */
+#define IFL_MSTCPY 0x00010000
+
+/* called on a lock that's just been read from debugfs */
+
+static void set_copy(struct pack_lock *lock)
+{
+ uint32_t id, remid;
+
+ if (!lock->nodeid)
+ lock->copy = LOCAL_COPY;
+ else if (lock->flags & IFL_MSTCPY)
+ lock->copy = MASTER_COPY;
+ else {
+ /* process copy lock is converted to a partial master copy
+ lock that will be combined with the real master copy */
+ lock->copy = MASTER_COPY;
+ id = lock->id;
+ remid = lock->remid;
+ lock->id = remid;
+ lock->remid = id;
+ lock->nodeid = our_nodeid;
+ }
+}
+
+/* xid is always zero in the real master copy, xid should always be non-zero
+ in the partial master copy (what was a process copy) */
+/* TODO: confirm or enforce that the partial will always have non-zero xid */
+
+static int partial_master_copy(struct pack_lock *lock)
+{
+ return (lock->xid != 0);
+}
+
+static struct dlm_lkb *get_lkb(struct dlm_rsb *r, struct pack_lock *lock)
+{
+ struct dlm_lkb *lkb;
+
+ if (lock->copy != MASTER_COPY)
+ goto out;
+
+ list_for_each_entry(lkb, &r->locks, list) {
+ if (lkb->lock.nodeid == lock->nodeid &&
+ lkb->lock.id == lock->id)
+ return lkb;
+ }
+ out:
+ return create_lkb();
+}
+
+static struct dlm_lkb *add_lock(struct lockspace *ls, struct dlm_rsb *r,
+ int from_nodeid, struct pack_lock *lock)
+{
+ struct dlm_lkb *lkb;
+
+ lkb = get_lkb(r, lock);
+ if (!lkb)
+ return NULL;
+
+ switch (lock->copy) {
+ case LOCAL_COPY:
+ lkb->lock.xid = lock->xid;
+ lkb->lock.nodeid = lock->nodeid;
+ lkb->lock.id = lock->id;
+ lkb->lock.remid = lock->remid;
+ lkb->lock.ownpid = lock->ownpid;
+ lkb->lock.exflags = lock->exflags;
+ lkb->lock.flags = lock->flags;
+ lkb->lock.status = lock->status;
+ lkb->lock.grmode = lock->grmode;
+ lkb->lock.rqmode = lock->rqmode;
+ lkb->lock.copy = LOCAL_COPY;
+ lkb->home = from_nodeid;
+
+ log_group(ls, "add %s local nodeid %d id %x remid %x xid %llx",
+ r->name, lock->nodeid, lock->id, lock->remid,
+ (unsigned long long)lock->xid);
+ break;
+
+ case MASTER_COPY:
+ if (partial_master_copy(lock)) {
+ lkb->lock.xid = lock->xid;
+ lkb->lock.nodeid = lock->nodeid;
+ lkb->lock.id = lock->id;
+ lkb->lock.remid = lock->remid;
+ lkb->lock.copy = MASTER_COPY;
+ } else {
+ /* only set xid from partial master copy above */
+ lkb->lock.nodeid = lock->nodeid;
+ lkb->lock.id = lock->id;
+ lkb->lock.remid = lock->remid;
+ lkb->lock.copy = MASTER_COPY;
+ /* set other fields from real master copy */
+ lkb->lock.ownpid = lock->ownpid;
+ lkb->lock.exflags = lock->exflags;
+ lkb->lock.flags = lock->flags;
+ lkb->lock.status = lock->status;
+ lkb->lock.grmode = lock->grmode;
+ lkb->lock.rqmode = lock->rqmode;
+ }
+ lkb->home = lock->nodeid;
+
+ log_group(ls, "add %s master nodeid %d id %x remid %x xid %llx",
+ r->name, lock->nodeid, lock->id, lock->remid,
+ (unsigned long long)lock->xid);
+ break;
+ }
+
+ if (list_empty(&lkb->list))
+ add_lkb(r, lkb);
+ return lkb;
+}
+
+static void parse_r_name(char *line, char *name)
+{
+ char *p;
+ int i = 0;
+ int begin = 0;
+
+ for (p = line; ; p++) {
+ if (*p == '"') {
+ if (begin)
+ break;
+ begin = 1;
+ continue;
+ }
+ if (begin)
+ name[i++] = *p;
+ }
+}
+
+#define LOCK_LINE_MAX 1024
+
+static int read_debugfs_locks(struct lockspace *ls)
+{
+ FILE *file;
+ char path[PATH_MAX];
+ char line[LOCK_LINE_MAX];
+ struct dlm_rsb *r;
+ struct pack_lock lock;
+ char r_name[65];
+ unsigned long long xid;
+ unsigned int waiting;
+ int r_nodeid;
+ int r_len;
+ int rv;
+
+ snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_locks", ls->name);
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+
+ /* skip the header on the first line */
+ if (!fgets(line, LOCK_LINE_MAX, file)) {
+ log_error("Unable to read %s: %d", path, errno);
+ goto out;
+ }
+
+ while (fgets(line, LOCK_LINE_MAX, file)) {
+ memset(&lock, 0, sizeof(struct pack_lock));
+
+ rv = sscanf(line, "%x %d %x %u %llu %x %x %hhd %hhd %hhd %u %d %d",
+ &lock.id,
+ &lock.nodeid,
+ &lock.remid,
+ &lock.ownpid,
+ &xid,
+ &lock.exflags,
+ &lock.flags,
+ &lock.status,
+ &lock.grmode,
+ &lock.rqmode,
+ &waiting,
+ &r_nodeid,
+ &r_len);
+
+ lock.xid = xid; /* hack to avoid warning */
+
+ if (rv != 13) {
+ log_error("invalid debugfs line %d: %s", rv, line);
+ goto out;
+ }
+
+ memset(r_name, 0, sizeof(r_name));
+ parse_r_name(line, r_name);
+
+ r = get_resource(ls, r_name, r_len);
+ if (!r)
+ break;
+
+ set_copy(&lock);
+ add_lock(ls, r, our_nodeid, &lock);
+ }
+ out:
+ fclose(file);
+ return 0;
+}
+
+static int read_checkpoint_locks(struct lockspace *ls, int from_nodeid,
+ char *numbuf, int buflen)
+{
+ struct dlm_rsb *r;
+ struct pack_lock *lock;
+ int count = section_len / sizeof(struct pack_lock);
+ int i;
+
+ r = get_resource(ls, numbuf, buflen - 1);
+ if (!r)
+ return -1;
+
+ lock = (struct pack_lock *) §ion_buf;
+
+ for (i = 0; i < count; i++) {
+ lock->xid = le64_to_cpu(lock->xid);
+ lock->id = le32_to_cpu(lock->id);
+ lock->nodeid = le32_to_cpu(lock->nodeid);
+ lock->remid = le32_to_cpu(lock->remid);
+ lock->ownpid = le32_to_cpu(lock->ownpid);
+ lock->exflags = le32_to_cpu(lock->exflags);
+ lock->flags = le32_to_cpu(lock->flags);
+
+ add_lock(ls, r, from_nodeid, lock);
+ lock++;
+ }
+ return 0;
+}
+
+static int pack_lkb_list(struct list_head *q, struct pack_lock **lockp)
+{
+ struct dlm_lkb *lkb;
+ struct pack_lock *lock = *lockp;
+ int count = 0;
+
+ list_for_each_entry(lkb, q, list) {
+ if (count + 1 > section_max) {
+ log_error("too many locks %d for ckpt buf", count);
+ break;
+ }
+
+ lock->xid = cpu_to_le64(lkb->lock.xid);
+ lock->id = cpu_to_le32(lkb->lock.id);
+ lock->nodeid = cpu_to_le32(lkb->lock.nodeid);
+ lock->remid = cpu_to_le32(lkb->lock.remid);
+ lock->ownpid = cpu_to_le32(lkb->lock.ownpid);
+ lock->exflags = cpu_to_le32(lkb->lock.exflags);
+ lock->flags = cpu_to_le32(lkb->lock.flags);
+ lock->status = lkb->lock.status;
+ lock->grmode = lkb->lock.grmode;
+ lock->rqmode = lkb->lock.rqmode;
+ lock->copy = lkb->lock.copy;
+
+ lock++;
+ count++;
+ }
+ return count;
+}
+
+static void pack_section_buf(struct lockspace *ls, struct dlm_rsb *r)
+{
+ struct pack_lock *lock;
+ int count;
+
+ memset(§ion_buf, 0, sizeof(section_buf));
+ section_max = sizeof(section_buf) / sizeof(struct pack_lock);
+
+ lock = (struct pack_lock *) §ion_buf;
+
+ count = pack_lkb_list(&r->locks, &lock);
+
+ section_len = count * sizeof(struct pack_lock);
+}
+
+static int _unlink_checkpoint(struct lockspace *ls, SaNameT *name)
+{
+ SaCkptCheckpointHandleT h;
+ SaCkptCheckpointDescriptorT s;
+ SaAisErrorT rv;
+ int ret = 0;
+ int retries;
+
+ h = (SaCkptCheckpointHandleT) ls->deadlk_ckpt_handle;
+ log_group(ls, "unlink ckpt %llx", (unsigned long long)h);
+
+ retries = 0;
+ unlink_retry:
+ rv = saCkptCheckpointUnlink(global_ckpt_h, name);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "unlink ckpt retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto unlink_retry;
+ }
+ if (rv == SA_AIS_OK)
+ goto out_close;
+ if (!h)
+ goto out;
+
+ log_error("unlink ckpt error %d %s", rv, ls->name);
+ ret = -1;
+
+ retries = 0;
+ status_retry:
+ rv = saCkptCheckpointStatusGet(h, &s);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "unlink ckpt status retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto status_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("unlink ckpt status error %d %s", rv, ls->name);
+ goto out_close;
+ }
+
+ log_group(ls, "unlink ckpt status: size %llu, max sections %u, "
+ "max section size %llu, section count %u, mem %u",
+ (unsigned long long)s.checkpointCreationAttributes.checkpointSize,
+ s.checkpointCreationAttributes.maxSections,
+ (unsigned long long)s.checkpointCreationAttributes.maxSectionSize,
+ s.numberOfSections, s.memoryUsed);
+
+ out_close:
+ retries = 0;
+ close_retry:
+ rv = saCkptCheckpointClose(h);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "unlink ckpt close retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto close_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("unlink ckpt %llx close err %d %s",
+ (unsigned long long)h, rv, ls->name);
+ }
+ out:
+ ls->deadlk_ckpt_handle = 0;
+ return ret;
+}
+
+static int unlink_checkpoint(struct lockspace *ls)
+{
+ SaNameT name;
+ int len;
+
+ len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
+ ls->name, our_nodeid);
+ name.length = len;
+
+ return _unlink_checkpoint(ls, &name);
+}
+
+static void read_checkpoint(struct lockspace *ls, int nodeid)
+{
+ SaCkptCheckpointHandleT h;
+ SaCkptSectionIterationHandleT itr;
+ SaCkptSectionDescriptorT desc;
+ SaCkptIOVectorElementT iov;
+ SaNameT name;
+ SaAisErrorT rv;
+ char buf[DLM_RESNAME_MAXLEN];
+ int len;
+ int retries;
+
+ if (nodeid == our_nodeid)
+ return;
+
+ log_group(ls, "read_checkpoint %d", nodeid);
+
+ len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
+ ls->name, nodeid);
+ name.length = len;
+
+ retries = 0;
+ open_retry:
+ rv = saCkptCheckpointOpen(global_ckpt_h, &name, NULL,
+ SA_CKPT_CHECKPOINT_READ, 0, &h);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "read_checkpoint: %d ckpt open retry", nodeid);
+ sleep(1);
+ if (retries++ < 10)
+ goto open_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("read_checkpoint: %d ckpt open error %d", nodeid, rv);
+ return;
+ }
+
+ retries = 0;
+ init_retry:
+ rv = saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, 0, &itr);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "read_checkpoint: ckpt iterinit retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto init_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("read_checkpoint: %d ckpt iterinit error %d", nodeid, rv);
+ goto out;
+ }
+
+ while (1) {
+ retries = 0;
+ next_retry:
+ rv = saCkptSectionIterationNext(itr, &desc);
+ if (rv == SA_AIS_ERR_NO_SECTIONS)
+ break;
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "read_checkpoint: ckpt iternext retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto next_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("read_checkpoint: %d ckpt iternext error %d",
+ nodeid, rv);
+ goto out_it;
+ }
+
+ if (!desc.sectionSize)
+ continue;
+
+ iov.sectionId = desc.sectionId;
+ iov.dataBuffer = §ion_buf;
+ iov.dataSize = desc.sectionSize;
+ iov.dataOffset = 0;
+
+ memset(&buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf), "%s", desc.sectionId.id);
+
+ log_group(ls, "read_checkpoint: section size %llu id %u \"%s\"",
+ (unsigned long long)iov.dataSize,
+ iov.sectionId.idLen, buf);
+
+ retries = 0;
+ read_retry:
+ rv = saCkptCheckpointRead(h, &iov, 1, NULL);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "read_checkpoint: ckpt read retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto read_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("read_checkpoint: %d ckpt read error %d",
+ nodeid, rv);
+ goto out_it;
+ }
+
+ section_len = iov.readSize;
+
+ if (!section_len)
+ continue;
+
+ if (section_len % sizeof(struct pack_lock)) {
+ log_error("read_checkpoint: %d bad section len %d",
+ nodeid, section_len);
+ continue;
+ }
+
+ read_checkpoint_locks(ls, nodeid, (char *)desc.sectionId.id,
+ desc.sectionId.idLen);
+ }
+
+ out_it:
+ saCkptSectionIterationFinalize(itr);
+ retries = 0;
+ out:
+ rv = saCkptCheckpointClose(h);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "read_checkpoint: unlink ckpt close retry");
+ sleep(1);
+ if (retries++ < 10)
+ goto out;
+ }
+ if (rv != SA_AIS_OK)
+ log_error("read_checkpoint: %d close error %d", nodeid, rv);
+}
+
+static void write_checkpoint(struct lockspace *ls)
+{
+ SaCkptCheckpointCreationAttributesT attr;
+ SaCkptCheckpointHandleT h;
+ SaCkptSectionIdT section_id;
+ SaCkptSectionCreationAttributesT section_attr;
+ SaCkptCheckpointOpenFlagsT flags;
+ SaNameT name;
+ SaAisErrorT rv;
+ char buf[DLM_RESNAME_MAXLEN];
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb;
+ int r_count, lock_count, total_size, section_size, max_section_size;
+ int len;
+
+ len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
+ ls->name, our_nodeid);
+ name.length = len;
+
+ /* unlink an old checkpoint before we create a new one */
+ if (ls->deadlk_ckpt_handle) {
+ log_error("write_checkpoint: old ckpt");
+ if (_unlink_checkpoint(ls, &name))
+ return;
+ }
+
+ /* loop through all locks to figure out sizes to set in
+ the attr fields */
+
+ r_count = 0;
+ lock_count = 0;
+ total_size = 0;
+ max_section_size = 0;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ r_count++;
+ section_size = 0;
+ list_for_each_entry(lkb, &r->locks, list) {
+ section_size += sizeof(struct pack_lock);
+ lock_count++;
+ }
+ total_size += section_size;
+ if (section_size > max_section_size)
+ max_section_size = section_size;
+ }
+
+ log_group(ls, "write_checkpoint: r_count %d, lock_count %d",
+ r_count, lock_count);
+
+ log_group(ls, "write_checkpoint: total %d bytes, max_section %d bytes",
+ total_size, max_section_size);
+
+ attr.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
+ attr.checkpointSize = total_size;
+ attr.retentionDuration = SA_TIME_MAX;
+ attr.maxSections = r_count + 1; /* don't know why we need +1 */
+ attr.maxSectionSize = max_section_size;
+ attr.maxSectionIdSize = DLM_RESNAME_MAXLEN;
+
+ flags = SA_CKPT_CHECKPOINT_READ |
+ SA_CKPT_CHECKPOINT_WRITE |
+ SA_CKPT_CHECKPOINT_CREATE;
+
+ open_retry:
+ rv = saCkptCheckpointOpen(global_ckpt_h, &name, &attr, flags, 0, &h);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "write_checkpoint: ckpt open retry");
+ sleep(1);
+ goto open_retry;
+ }
+ if (rv == SA_AIS_ERR_EXIST) {
+ log_group(ls, "write_checkpoint: ckpt already exists");
+ return;
+ }
+ if (rv != SA_AIS_OK) {
+ log_group(ls, "write_checkpoint: ckpt open error %d", rv);
+ return;
+ }
+
+ log_group(ls, "write_checkpoint: open ckpt handle %llx",
+ (unsigned long long)h);
+ ls->deadlk_ckpt_handle = (uint64_t) h;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ memset(buf, 0, sizeof(buf));
+ len = snprintf(buf, sizeof(buf), "%s", r->name);
+
+ section_id.id = (void *)buf;
+ section_id.idLen = len + 1;
+ section_attr.sectionId = §ion_id;
+ section_attr.expirationTime = SA_TIME_END;
+
+ pack_section_buf(ls, r);
+
+ log_group(ls, "write_checkpoint: section size %u id %u \"%s\"",
+ section_len, section_id.idLen, buf);
+
+ create_retry:
+ rv = saCkptSectionCreate(h, §ion_attr, §ion_buf,
+ section_len);
+ if (rv == SA_AIS_ERR_TRY_AGAIN) {
+ log_group(ls, "write_checkpoint: ckpt create retry");
+ sleep(1);
+ goto create_retry;
+ }
+ if (rv == SA_AIS_ERR_EXIST) {
+ /* this shouldn't happen in general */
+ log_error("write_checkpoint: clearing old ckpt");
+ saCkptCheckpointClose(h);
+ _unlink_checkpoint(ls, &name);
+ goto open_retry;
+ }
+ if (rv != SA_AIS_OK) {
+ log_error("write_checkpoint: section create %d", rv);
+ break;
+ }
+ }
+}
+
+static void send_message(struct lockspace *ls, int type,
+ uint32_t to_nodeid, uint32_t msgdata)
+{
+ struct dlm_header *hd;
+ int len;
+ char *buf;
+
+ len = sizeof(struct dlm_header);
+ buf = malloc(len);
+ if (!buf) {
+ log_error("send_message: no memory");
+ disable_deadlock();
+ return;
+ }
+ memset(buf, 0, len);
+
+ hd = (struct dlm_header *)buf;
+ hd->type = type;
+ hd->to_nodeid = to_nodeid;
+ hd->msgdata = msgdata;
+
+ dlm_send_message(ls, buf, len);
+
+ free(buf);
+}
+
+static void send_checkpoint_ready(struct lockspace *ls)
+{
+ log_group(ls, "send_checkpoint_ready");
+ send_message(ls, DLM_MSG_DEADLK_CHECKPOINT_READY, 0, 0);
+}
+
+void send_cycle_start(struct lockspace *ls)
+{
+ log_group(ls, "send_cycle_start");
+ send_message(ls, DLM_MSG_DEADLK_CYCLE_START, 0, 0);
+}
+
+static void send_cycle_end(struct lockspace *ls)
+{
+ log_group(ls, "send_cycle_end");
+ send_message(ls, DLM_MSG_DEADLK_CYCLE_END, 0, 0);
+}
+
+static void send_cancel_lock(struct lockspace *ls, struct trans *tr,
+ struct dlm_lkb *lkb)
+{
+ int to_nodeid;
+ uint32_t lkid;
+
+ if (!lkb->lock.nodeid)
+ lkid = lkb->lock.id;
+ else
+ lkid = lkb->lock.remid;
+ to_nodeid = lkb->home;
+
+ log_group(ls, "send_cancel_lock to nodeid %d rsb %s id %x xid %llx",
+ to_nodeid, lkb->rsb->name, lkid,
+ (unsigned long long)lkb->lock.xid);
+
+ send_message(ls, DLM_MSG_DEADLK_CANCEL_LOCK, to_nodeid, lkid);
+}
+
+static void dump_resources(struct lockspace *ls)
+{
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb;
+
+ log_group(ls, "Resource dump:");
+
+ list_for_each_entry(r, &ls->resources, list) {
+ log_group(ls, "\"%s\" len %d", r->name, r->len);
+ list_for_each_entry(lkb, &r->locks, list) {
+ log_group(ls, " %s: nodeid %d id %08x remid %08x gr %s rq %s pid %u xid %llx",
+ status_str(lkb->lock.status),
+ lkb->lock.nodeid,
+ lkb->lock.id,
+ lkb->lock.remid,
+ dlm_mode_str(lkb->lock.grmode),
+ dlm_mode_str(lkb->lock.rqmode),
+ lkb->lock.ownpid,
+ (unsigned long long)lkb->lock.xid);
+ }
+ }
+}
+
+static void find_deadlock(struct lockspace *ls);
+
+static void run_deadlock(struct lockspace *ls)
+{
+ struct node *node;
+ int not_ready = 0;
+ int low = -1;
+
+ if (ls->all_checkpoints_ready)
+ log_group(ls, "WARNING: run_deadlock all_checkpoints_ready");
+
+ list_for_each_entry(node, &ls->deadlk_nodes, list) {
+ if (!node->in_cycle)
+ continue;
+ if (!node->checkpoint_ready)
+ not_ready++;
+
+ log_group(ls, "nodeid %d checkpoint_ready = %d",
+ node->nodeid, node->checkpoint_ready);
+ }
+ if (not_ready)
+ return;
+
+ ls->all_checkpoints_ready = 1;
+
+ list_for_each_entry(node, &ls->deadlk_nodes, list) {
+ if (!node->in_cycle)
+ continue;
+ if (node->nodeid < low || low == -1)
+ low = node->nodeid;
+ }
+ ls->deadlk_low_nodeid = low;
+
+ if (low == our_nodeid)
+ find_deadlock(ls);
+ else
+ log_group(ls, "defer resolution to low nodeid %d", low);
+}
+
+void receive_checkpoint_ready(struct lockspace *ls, struct dlm_header *hd,
+ int len)
+{
+ struct node *node;
+ int nodeid = hd->nodeid;
+
+ log_group(ls, "receive_checkpoint_ready from %d", nodeid);
+
+ read_checkpoint(ls, nodeid);
+
+ list_for_each_entry(node, &ls->deadlk_nodes, list) {
+ if (node->nodeid == nodeid) {
+ node->checkpoint_ready = 1;
+ break;
+ }
+ }
+
+ run_deadlock(ls);
+}
+
+void receive_cycle_start(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct node *node;
+ int nodeid = hd->nodeid;
+ int rv;
+
+ log_group(ls, "receive_cycle_start from %d", nodeid);
+
+ if (ls->cycle_running) {
+ log_group(ls, "cycle already running");
+ return;
+ }
+ ls->cycle_running = 1;
+ gettimeofday(&ls->cycle_start_time, NULL);
+
+ list_for_each_entry(node, &ls->deadlk_nodes, list)
+ node->in_cycle = 1;
+
+ rv = read_debugfs_locks(ls);
+ if (rv < 0) {
+ log_error("can't read dlm debugfs file: %s", strerror(errno));
+ return;
+ }
+
+ write_checkpoint(ls);
+ send_checkpoint_ready(ls);
+}
+
+static uint64_t dt_usec(struct timeval *start, struct timeval *stop)
+{
+ uint64_t dt;
+
+ dt = stop->tv_sec - start->tv_sec;
+ dt *= 1000000;
+ dt += stop->tv_usec - start->tv_usec;
+ return dt;
+}
+
+/* TODO: nodes added during a cycle - what will they do with messages
+ they recv from other nodes running the cycle? */
+
+void receive_cycle_end(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct node *node;
+ int nodeid = hd->nodeid;
+ uint64_t usec;
+
+ if (!ls->cycle_running) {
+ log_error("receive_cycle_end %s from %d: no cycle running",
+ ls->name, nodeid);
+ return;
+ }
+
+ gettimeofday(&ls->cycle_end_time, NULL);
+ usec = dt_usec(&ls->cycle_start_time, &ls->cycle_end_time);
+ log_group(ls, "receive_cycle_end: from %d cycle time %.2f s",
+ nodeid, usec * 1.e-6);
+
+ ls->cycle_running = 0;
+ ls->all_checkpoints_ready = 0;
+
+ list_for_each_entry(node, &ls->deadlk_nodes, list)
+ node->checkpoint_ready = 0;
+
+ free_resources(ls);
+ free_transactions(ls);
+ unlink_checkpoint(ls);
+}
+
+void receive_cancel_lock(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ dlm_lshandle_t h;
+ int nodeid = hd->nodeid;
+ uint32_t lkid = hd->msgdata;
+ int rv;
+
+ if (nodeid != our_nodeid)
+ return;
+
+ h = dlm_open_lockspace(ls->name);
+ if (!h) {
+ log_error("deadlock cancel %x from %d can't open lockspace %s",
+ lkid, nodeid, ls->name);
+ return;
+ }
+
+ log_group(ls, "receive_cancel_lock %x from %d", lkid, nodeid);
+
+ rv = dlm_ls_deadlock_cancel(h, lkid, 0);
+ if (rv < 0) {
+ log_error("deadlock cancel %x from %x lib cancel errno %d",
+ lkid, nodeid, errno);
+ }
+
+ dlm_close_lockspace(h);
+}
+
+static void node_joined(struct lockspace *ls, int nodeid)
+{
+ struct node *node;
+
+ node = malloc(sizeof(struct node));
+ if (!node) {
+ log_error("node_joined: no memory");
+ disable_deadlock();
+ return;
+ }
+ memset(node, 0, sizeof(struct node));
+ node->nodeid = nodeid;
+ list_add_tail(&node->list, &ls->deadlk_nodes);
+ log_group(ls, "node %d joined deadlock cpg", nodeid);
+}
+
+static void node_left(struct lockspace *ls, int nodeid, int reason)
+{
+ struct node *node, *safe;
+
+ list_for_each_entry_safe(node, safe, &ls->deadlk_nodes, list) {
+ if (node->nodeid != nodeid)
+ continue;
+
+ list_del(&node->list);
+ free(node);
+ log_group(ls, "node %d left deadlock cpg", nodeid);
+ }
+}
+
+static void purge_locks(struct lockspace *ls, int nodeid);
+
+void deadlk_confchg(struct lockspace *ls,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ int i;
+
+ if (!cfgd_enable_deadlk)
+ return;
+
+ if (!ls->deadlk_confchg_init) {
+ ls->deadlk_confchg_init = 1;
+ for (i = 0; i < member_list_entries; i++)
+ node_joined(ls, member_list[i].nodeid);
+ return;
+ }
+
+ /* nodes added during a cycle won't have node->in_cycle set so they
+ won't be included in any of the cycle processing */
+
+ for (i = 0; i < joined_list_entries; i++)
+ node_joined(ls, joined_list[i].nodeid);
+
+ for (i = 0; i < left_list_entries; i++)
+ node_left(ls, left_list[i].nodeid, left_list[i].reason);
+
+ if (!ls->cycle_running)
+ return;
+
+ if (!left_list_entries)
+ return;
+
+ if (!ls->all_checkpoints_ready) {
+ run_deadlock(ls);
+ return;
+ }
+
+ for (i = 0; i < left_list_entries; i++)
+ purge_locks(ls, left_list[i].nodeid);
+
+ for (i = 0; i < left_list_entries; i++) {
+ if (left_list[i].nodeid != ls->deadlk_low_nodeid)
+ continue;
+ /* this will set a new low node which will call find_deadlock */
+ run_deadlock(ls);
+ break;
+ }
+}
+
+/* would we ever call this after we've created the transaction lists?
+ I don't think so; I think it can only be called between reading
+ checkpoints */
+
+static void purge_locks(struct lockspace *ls, int nodeid)
+{
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb, *safe;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ list_for_each_entry_safe(lkb, safe, &r->locks, list) {
+ if (lkb->home == nodeid) {
+ list_del(&lkb->list);
+ if (list_empty(&lkb->trans_list))
+ free(lkb);
+ else
+ log_group(ls, "purge %d %x on trans",
+ nodeid, lkb->lock.id);
+ }
+ }
+ }
+}
+
+static void add_lkb_trans(struct trans *tr, struct dlm_lkb *lkb)
+{
+ list_add(&lkb->trans_list, &tr->locks);
+ lkb->trans = tr;
+}
+
+static struct trans *get_trans(struct lockspace *ls, uint64_t xid)
+{
+ struct trans *tr;
+
+ list_for_each_entry(tr, &ls->transactions, list) {
+ if (tr->xid == xid)
+ return tr;
+ }
+
+ tr = malloc(sizeof(struct trans));
+ if (!tr) {
+ log_error("get_trans: no memory");
+ disable_deadlock();
+ return NULL;
+ }
+ memset(tr, 0, sizeof(struct trans));
+ tr->xid = xid;
+ tr->waitfor = NULL;
+ tr->waitfor_alloc = 0;
+ tr->waitfor_count = 0;
+ INIT_LIST_HEAD(&tr->locks);
+ list_add(&tr->list, &ls->transactions);
+ return tr;
+}
+
+/* for each rsb, for each lock, find/create trans, add lkb to the trans list */
+
+static void create_trans_list(struct lockspace *ls)
+{
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb;
+ struct trans *tr;
+ int r_count = 0, lkb_count = 0;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ r_count++;
+ list_for_each_entry(lkb, &r->locks, list) {
+ lkb_count++;
+ tr = get_trans(ls, lkb->lock.xid);
+ if (!tr)
+ goto out;
+ add_lkb_trans(tr, lkb);
+ }
+ }
+ out:
+ log_group(ls, "create_trans_list: r_count %d lkb_count %d",
+ r_count, lkb_count);
+}
+
+static int locks_compat(struct dlm_lkb *waiting_lkb,
+ struct dlm_lkb *granted_lkb)
+{
+ if (waiting_lkb == granted_lkb) {
+ log_debug("waiting and granted same lock");
+ return 0;
+ }
+
+ if (waiting_lkb->trans->xid == granted_lkb->trans->xid) {
+ log_debug("waiting and granted same trans %llx",
+ (unsigned long long)waiting_lkb->trans->xid);
+ return 0;
+ }
+
+ return dlm_modes_compat(granted_lkb->lock.grmode,
+ waiting_lkb->lock.rqmode);
+}
+
+static int in_waitfor(struct trans *tr, struct trans *add_tr)
+{
+ int i;
+
+ for (i = 0; i < tr->waitfor_alloc; i++) {
+ if (!tr->waitfor[i])
+ continue;
+ if (tr->waitfor[i] == add_tr)
+ return 1;
+ }
+ return 0;
+}
+
+static void add_waitfor(struct lockspace *ls, struct dlm_lkb *waiting_lkb,
+ struct dlm_lkb *granted_lkb)
+{
+ struct trans *tr = waiting_lkb->trans;
+ int i;
+
+ if (locks_compat(waiting_lkb, granted_lkb))
+ return;
+
+ /* this shouldn't happen AFAIK */
+ if (tr == granted_lkb->trans) {
+ log_group(ls, "trans %llx waiting on self",
+ (unsigned long long)tr->xid);
+ return;
+ }
+
+ /* don't add the same trans to the waitfor list multiple times */
+ if (tr->waitfor_count && in_waitfor(tr, granted_lkb->trans)) {
+ log_group(ls, "trans %llx already waiting for trans %llx, "
+ "waiting %x %s, granted %x %s",
+ (unsigned long long)waiting_lkb->trans->xid,
+ (unsigned long long)granted_lkb->trans->xid,
+ waiting_lkb->lock.id, waiting_lkb->rsb->name,
+ granted_lkb->lock.id, granted_lkb->rsb->name);
+ return;
+ }
+
+ if (tr->waitfor_count == tr->waitfor_alloc) {
+ struct trans **old_waitfor = tr->waitfor;
+ tr->waitfor_alloc += TR_NALLOC;
+ tr->waitfor = malloc(tr->waitfor_alloc * sizeof(tr));
+ if (!tr->waitfor) {
+ log_error("add_waitfor no mem %u", tr->waitfor_alloc);
+ return;
+ }
+ memset(tr->waitfor, 0, tr->waitfor_alloc * sizeof(tr));
+
+ /* copy then free old set of pointers */
+ for (i = 0; i < tr->waitfor_count; i++)
+ tr->waitfor[i] = old_waitfor[i];
+ if (old_waitfor)
+ free(old_waitfor);
+ }
+
+ tr->waitfor[tr->waitfor_count++] = granted_lkb->trans;
+ granted_lkb->trans->others_waiting_on_us++;
+ waiting_lkb->waitfor_trans = granted_lkb->trans;
+}
+
+/* for each trans, for each waiting lock, go to rsb of the lock,
+ find granted locks on that rsb, then find the trans the
+ granted lock belongs to, add that trans to our waitfor list */
+
+static void create_waitfor_graph(struct lockspace *ls)
+{
+ struct dlm_lkb *waiting_lkb, *granted_lkb;
+ struct dlm_rsb *r;
+ struct trans *tr;
+ int depend_count = 0;
+
+ list_for_each_entry(tr, &ls->transactions, list) {
+ list_for_each_entry(waiting_lkb, &tr->locks, trans_list) {
+ if (waiting_lkb->lock.status == DLM_LKSTS_GRANTED)
+ continue;
+ /* waiting_lkb status is CONVERT or WAITING */
+
+ r = waiting_lkb->rsb;
+
+ list_for_each_entry(granted_lkb, &r->locks, list) {
+ if (granted_lkb->lock.status==DLM_LKSTS_WAITING)
+ continue;
+ /* granted_lkb status is GRANTED or CONVERT */
+ add_waitfor(ls, waiting_lkb, granted_lkb);
+ depend_count++;
+ }
+ }
+ }
+
+ log_group(ls, "create_waitfor_graph: depend_count %d", depend_count);
+}
+
+/* Assume a transaction that's not waiting on any locks will complete, release
+ all the locks it currently holds, and exit. Other transactions that were
+ blocked waiting on the removed transaction's now-released locks may now be
+ unblocked, complete, release all held locks and exit. Repeat this until
+ no more transactions can be removed. If there are transactions remaining,
+ then they are deadlocked. */
+
+static void remove_waitfor(struct trans *tr, struct trans *remove_tr)
+{
+ int i;
+
+ for (i = 0; i < tr->waitfor_alloc; i++) {
+ if (!tr->waitfor_count)
+ break;
+
+ if (!tr->waitfor[i])
+ continue;
+
+ if (tr->waitfor[i] == remove_tr) {
+ tr->waitfor[i] = NULL;
+ tr->waitfor_count--;
+ remove_tr->others_waiting_on_us--;
+ }
+ }
+}
+
+/* remove_tr is not waiting for anything, assume it completes and goes away
+ and remove it from any other transaction's waitfor list */
+
+static void remove_trans(struct lockspace *ls, struct trans *remove_tr)
+{
+ struct trans *tr;
+
+ list_for_each_entry(tr, &ls->transactions, list) {
+ if (tr == remove_tr)
+ continue;
+ if (!remove_tr->others_waiting_on_us)
+ break;
+ remove_waitfor(tr, remove_tr);
+ }
+
+ if (remove_tr->others_waiting_on_us)
+ log_group(ls, "trans %llx removed others waiting %d",
+ (unsigned long long)remove_tr->xid,
+ remove_tr->others_waiting_on_us);
+}
+
+static int reduce_waitfor_graph(struct lockspace *ls)
+{
+ struct trans *tr, *safe;
+ int blocked = 0;
+ int removed = 0;
+
+ list_for_each_entry_safe(tr, safe, &ls->transactions, list) {
+ if (tr->waitfor_count) {
+ blocked++;
+ continue;
+ }
+ remove_trans(ls, tr);
+ list_del(&tr->list);
+ if (tr->waitfor)
+ free(tr->waitfor);
+ free(tr);
+ removed++;
+ }
+
+ log_group(ls, "reduce_waitfor_graph: %d blocked, %d removed",
+ blocked, removed);
+ return removed;
+}
+
+static void reduce_waitfor_graph_loop(struct lockspace *ls)
+{
+ int removed;
+
+ while (1) {
+ removed = reduce_waitfor_graph(ls);
+ if (!removed)
+ break;
+ }
+}
+
+static struct trans *find_trans_to_cancel(struct lockspace *ls)
+{
+ struct trans *tr;
+
+ list_for_each_entry(tr, &ls->transactions, list) {
+ if (!tr->others_waiting_on_us)
+ continue;
+ return tr;
+ }
+ return NULL;
+}
+
+static void cancel_trans(struct lockspace *ls)
+{
+ struct trans *tr;
+ struct dlm_lkb *lkb;
+ int removed;
+
+ tr = find_trans_to_cancel(ls);
+ if (!tr) {
+ log_group(ls, "cancel_trans: no trans found");
+ return;
+ }
+
+ list_for_each_entry(lkb, &tr->locks, trans_list) {
+ if (lkb->lock.status == DLM_LKSTS_GRANTED)
+ continue;
+ send_cancel_lock(ls, tr, lkb);
+
+ /* When this canceled trans has multiple locks all blocked by
+ locks held by one other trans, that other trans is only
+ added to tr->waitfor once, and only one of these waiting
+ locks will have waitfor_trans set. So, the lkb with
+ non-null waitfor_trans was the first one responsible
+ for adding waitfor_trans to tr->waitfor.
+
+ We could potentially forget about keeping track of lkb->
+ waitfor_trans, forget about calling remove_waitfor()
+ here and just set tr->waitfor_count = 0 after this loop.
+ The loss would be that waitfor_trans->others_waiting_on_us
+ would not get decremented. */
+
+ if (lkb->waitfor_trans)
+ remove_waitfor(tr, lkb->waitfor_trans);
+ }
+
+ /* this shouldn't happen, if it does something's not working right */
+ if (tr->waitfor_count) {
+ log_group(ls, "cancel_trans: %llx non-zero waitfor_count %d",
+ (unsigned long long)tr->xid, tr->waitfor_count);
+ }
+
+ /* this should now remove the canceled trans since it now has a zero
+ waitfor_count */
+ removed = reduce_waitfor_graph(ls);
+
+ if (!removed)
+ log_group(ls, "canceled trans not removed from graph");
+
+ /* now call reduce_waitfor_graph() in another loop and it
+ should completely reduce */
+}
+
+static void dump_trans(struct lockspace *ls, struct trans *tr)
+{
+ struct dlm_lkb *lkb;
+ struct trans *wf;
+ int i;
+
+ log_group(ls, "trans xid %llx waitfor_count %d others_waiting_on_us %d",
+ (unsigned long long)tr->xid, tr->waitfor_count,
+ tr->others_waiting_on_us);
+
+ log_group(ls, "locks:");
+
+ list_for_each_entry(lkb, &tr->locks, trans_list) {
+ log_group(ls, " %s: id %08x gr %s rq %s pid %u:%u \"%s\"",
+ status_str(lkb->lock.status),
+ lkb->lock.id,
+ dlm_mode_str(lkb->lock.grmode),
+ dlm_mode_str(lkb->lock.rqmode),
+ lkb->home,
+ lkb->lock.ownpid,
+ lkb->rsb->name);
+ }
+
+ if (!tr->waitfor_count)
+ return;
+
+ log_group(ls, "waitfor:");
+
+ for (i = 0; i < tr->waitfor_alloc; i++) {
+ if (!tr->waitfor[i])
+ continue;
+ wf = tr->waitfor[i];
+ log_group(ls, " xid %llx", (unsigned long long)wf->xid);
+ }
+}
+
+static void dump_all_trans(struct lockspace *ls)
+{
+ struct trans *tr;
+
+ log_group(ls, "Transaction dump:");
+
+ list_for_each_entry(tr, &ls->transactions, list)
+ dump_trans(ls, tr);
+}
+
+static void find_deadlock(struct lockspace *ls)
+{
+ if (list_empty(&ls->resources)) {
+ log_group(ls, "no deadlock: no resources");
+ goto out;
+ }
+
+ if (!list_empty(&ls->transactions)) {
+ log_group(ls, "transactions list should be empty");
+ goto out;
+ }
+
+ dump_resources(ls);
+ create_trans_list(ls);
+ create_waitfor_graph(ls);
+ dump_all_trans(ls);
+ reduce_waitfor_graph_loop(ls);
+
+ if (list_empty(&ls->transactions)) {
+ log_group(ls, "no deadlock: all transactions reduced");
+ goto out;
+ }
+
+ log_group(ls, "found deadlock");
+ dump_all_trans(ls);
+
+ cancel_trans(ls);
+ reduce_waitfor_graph_loop(ls);
+
+ if (list_empty(&ls->transactions)) {
+ log_group(ls, "resolved deadlock with cancel");
+ goto out;
+ }
+
+ log_error("deadlock resolution failed");
+ dump_all_trans(ls);
+ out:
+ send_cycle_end(ls);
+}
+
diff --git a/dlm_controld/dlm_controld.8 b/dlm_controld/dlm_controld.8
new file mode 100644
index 0000000..7100f0e
--- /dev/null
+++ b/dlm_controld/dlm_controld.8
@@ -0,0 +1,313 @@
+.TH DLM_CONTROLD 8 2009-01-18 cluster cluster
+
+.SH NAME
+dlm_controld \- daemon that configures dlm according to cluster events
+
+.SH SYNOPSIS
+.B dlm_controld
+[OPTIONS]
+
+.SH DESCRIPTION
+The dlm lives in the kernel, and the cluster infrastructure (corosync
+membership and group management) lives in user space. The dlm in the
+kernel needs to adjust/recover for certain cluster events. It's the job
+of dlm_controld to receive these events and reconfigure the kernel dlm as
+needed. dlm_controld controls and configures the dlm through sysfs and
+configfs files that are considered dlm-internal interfaces.
+
+The cman init script usually starts the dlm_controld daemon.
+
+.SH OPTIONS
+Command line options override a corresponding setting in cluster.conf.
+
+.TP
+.B \-D
+Enable debugging to stderr and don't fork.
+.br
+See also
+.B dlm_tool dump
+in
+.BR dlm_tool (8).
+
+.TP
+.B \-L
+Enable debugging to log file.
+.br
+See also
+.B logging
+in
+.BR cluster.conf (5).
+
+.TP
+.B \-K
+Enable kernel dlm debugging messages.
+.br
+See also
+.B log_debug
+below.
+
+.TP
+.BI \-r " num"
+dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect.
+2 selects tcp if corosync rrp_mode is "none", otherwise sctp.
+.br
+Default 2.
+
+.TP
+.BI \-g " num"
+groupd compatibility mode, 0 off, 1 on.
+.br
+Default 0.
+
+.TP
+.BI \-f " num"
+Enable (1) or disable (0) fencing recovery dependency.
+.br
+Default 1.
+
+.TP
+.BI \-q " num"
+Enable (1) or disable (0) quorum recovery dependency.
+.br
+Default 0.
+
+.TP
+.BI \-d " num"
+Enable (1) or disable (0) deadlock detection code.
+.br
+Default 0.
+
+.TP
+.BI \-p " num"
+Enable (1) or disable (0) plock code for cluster fs.
+.br
+Default 1.
+
+.TP
+.BI \-l " num"
+Limit the rate of plock operations, 0 for no limit.
+.br
+Default 0.
+
+.TP
+.BI \-o " num"
+Enable (1) or disable (0) plock ownership.
+.br
+Default 1.
+
+.TP
+.BI \-t " ms"
+Plock ownership drop resources time (milliseconds).
+.br
+Default 10000.
+
+.TP
+.BI \-c " num"
+Plock ownership drop resources count.
+.br
+Default 10.
+
+.TP
+.BI \-a " ms"
+Plock ownership drop resources age (milliseconds).
+.br
+Default 10000.
+
+.TP
+.B \-P
+Enable plock debugging messages (can produce excessive output).
+
+.TP
+.B \-h
+Print a help message describing available options, then exit.
+
+.TP
+.B \-V
+Print program version information, then exit.
+
+
+.SH FILES
+.BR cluster.conf (5)
+is usually located at /etc/cluster/cluster.conf. It is not read directly.
+Other cluster components load the contents into memory, and the values are
+accessed through the libccs library.
+
+Configuration options for dlm (kernel) and dlm_controld are added to the
+<dlm /> section of cluster.conf, within the top level <cluster> section.
+
+.SS Kernel options
+
+.TP
+.B protocol
+The network
+.B protocol
+can be set to tcp, sctp or detect which selects tcp or sctp based on
+the corosync rrp_mode configuration (redundant ring protocol).
+The rrp_mode "none" results in tcp. Default detect.
+
+<dlm protocol="detect"/>
+
+.TP
+.B timewarn
+After waiting
+.B timewarn
+centiseconds, the dlm will emit a warning via netlink. This only applies
+to lockspaces created with the DLM_LSFL_TIMEWARN flag, and is used for
+deadlock detection. Default 500 (5 seconds).
+
+<dlm timewarn="500"/>
+
+.TP
+.B log_debug
+DLM kernel debug messages can be enabled by setting
+.B log_debug
+to 1. Default 0.
+
+<dlm log_debug="0"/>
+
+.TP
+.B clusternode/weight
+The lock directory
+.B weight
+can be specified one the clusternode lines. Weights would usually be
+used in the lock server configurations shown below instead.
+
+<clusternode name="node01" nodeid="1" weight="1"/>
+
+.SS Daemon options
+
+.TP
+.B enable_fencing
+See command line description.
+
+<dlm enable_fencing="1"/>
+
+.TP
+.B enable_quorum
+See command line description.
+
+<dlm enable_quorum="0"/>
+
+.TP
+.B enable_deadlk
+See command line description.
+
+<dlm enable_deadlk="0"/>
+
+.TP
+.B enable_plock
+See command line description.
+
+<dlm enable_plock="1"/>
+
+.TP
+.B plock_rate_limit
+See command line description.
+
+<dlm plock_rate_limit="0"/>
+
+.TP
+.B plock_ownership
+See command line description.
+
+<dlm plock_ownership="1"/>
+
+.TP
+.B drop_resources_time
+See command line description.
+
+<dlm drop_resources_time="10000"/>
+
+.TP
+.B drop_resources_count
+See command line description.
+
+<dlm drop_resources_count="10"/>
+
+.TP
+.B drop_resources_age
+See command line description.
+
+<dlm drop_resources_age="10000"/>
+
+.TP
+.B plock_debug
+Enable (1) or disable (0) plock debugging messages (can produce excessive
+output). Default 0.
+
+<dlm plock_debug="0"/>
+
+
+.SS Disabling resource directory
+
+Lockspaces usually use a resource directory to keep track of which node is
+the master of each resource. The dlm can operate without the resource
+directory, though, by statically assigning the master of a resource using
+a hash of the resource name. To enable, set the per-lockspace
+.B nodir
+option to 1.
+
+.nf
+<dlm>
+ <lockspace name="foo" nodir="1">
+</dlm>
+.fi
+
+.SS Lock-server configuration
+
+The nodir setting can be combined with node weights to create a
+configuration where select node(s) are the master of all resources/locks.
+These
+.B master
+nodes can be viewed as "lock servers" for the other nodes.
+
+.nf
+<dlm>
+ <lockspace name="foo" nodir="1">
+ <master name="node01"/>
+ </lockspace>
+</dlm>
+
+or,
+
+<dlm>
+ <lockspace name="foo" nodir="1">
+ <master name="node01"/>
+ <master name="node02"/>
+ </lockspace>
+</dlm>
+.fi
+
+Lock management will be partitioned among the available masters. There
+can be any number of masters defined. The designated master nodes will
+master all resources/locks (according to the resource name hash). When no
+masters are members of the lockspace, then the nodes revert to the common
+fully-distributed configuration. Recovery is faster, with little
+disruption, when a non-master node joins/leaves.
+
+There is no special mode in the dlm for this lock server configuration,
+it's just a natural consequence of combining the "nodir" option with node
+weights. When a lockspace has master nodes defined, the master has a
+default weight of 1 and all non-master nodes have weight of 0. An explicit
+non-zero
+.B weight
+can also be assigned to master nodes, e.g.
+
+.nf
+<dlm>
+ <lockspace name="foo" nodir="1">
+ <master name="node01" weight="2"/>
+ <master name="node02" weight="1"/>
+ </lockspace>
+</dlm>
+.fi
+
+In which case node01 will master 2/3 of the total resources and node2 will
+master the other 1/3.
+
+.SH SEE ALSO
+.BR dlm_tool (8),
+.BR fenced (8),
+.BR cman (5),
+.BR cluster.conf (5)
+
diff --git a/dlm_controld/dlm_controld.h b/dlm_controld/dlm_controld.h
new file mode 100644
index 0000000..73e4ecc
--- /dev/null
+++ b/dlm_controld/dlm_controld.h
@@ -0,0 +1,38 @@
+#ifndef __DLM_CONTROLD_DOT_H__
+#define __DLM_CONTROLD_DOT_H__
+
+/* This defines the interface between dlm_controld and libdlmcontrol, and
+ should only be used by libdlmcontrol. */
+
+#define DLMC_SOCK_PATH "dlmc_sock"
+#define DLMC_QUERY_SOCK_PATH "dlmc_query_sock"
+
+#define DLMC_MAGIC 0xD13CD13C
+#define DLMC_VERSION 0x00010001
+
+#define DLMC_CMD_DUMP_DEBUG 1
+#define DLMC_CMD_DUMP_PLOCKS 2
+#define DLMC_CMD_LOCKSPACE_INFO 3
+#define DLMC_CMD_NODE_INFO 4
+#define DLMC_CMD_LOCKSPACES 5
+#define DLMC_CMD_LOCKSPACE_NODES 6
+#define DLMC_CMD_FS_REGISTER 7
+#define DLMC_CMD_FS_UNREGISTER 8
+#define DLMC_CMD_FS_NOTIFIED 9
+#define DLMC_CMD_DEADLOCK_CHECK 10
+#define DLMC_CMD_DUMP_LOG_PLOCK 11
+
+struct dlmc_header {
+ unsigned int magic;
+ unsigned int version;
+ unsigned int command;
+ unsigned int option;
+ unsigned int len;
+ int data; /* embedded command-specific data, for convenience */
+ int unused1;
+ int unsued2;
+ char name[DLM_LOCKSPACE_LEN]; /* no terminating null space */
+};
+
+#endif
+
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
new file mode 100644
index 0000000..0c89f67
--- /dev/null
+++ b/dlm_controld/dlm_daemon.h
@@ -0,0 +1,326 @@
+#ifndef __DLM_DAEMON_DOT_H__
+#define __DLM_DAEMON_DOT_H__
+
+#include <sys/types.h>
+#include <asm/types.h>
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <sys/poll.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <limits.h>
+#include <unistd.h>
+#include <time.h>
+#include <syslog.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <dirent.h>
+
+#include <corosync/cpg.h>
+#include <liblogthread.h>
+
+#include <linux/dlmconstants.h>
+#include "libdlmcontrol.h"
+#include "dlm_controld.h"
+#include "config.h"
+#include "list.h"
+#include "rbtree.h"
+#include "linux_endian.h"
+
+/* TODO: cleanup */
+#define CLUSTERVARLIB "/var/lib/cluster"
+#define CLUSTERVARRUN "/var/run/cluster"
+#define LOGDIR "/var/log/cluster"
+#define VERSION "master"
+#define SYSLOGFACILITY LOG_LOCAL4
+#define SYSLOGLEVEL LOG_INFO
+#define REDHAT_COPYRIGHT "Copyright (C) Red Hat, Inc. 2004-2011 All rights reserved."
+#define LOCKFILE_NAME CLUSTERVARRUN "/dlm_controld.pid"
+#define DAEMON_NAME "dlm_controld"
+#define DEFAULT_LOG_MODE LOG_MODE_OUTPUT_FILE|LOG_MODE_OUTPUT_SYSLOG
+#define DEFAULT_SYSLOG_FACILITY SYSLOGFACILITY
+#define DEFAULT_SYSLOG_PRIORITY SYSLOGLEVEL
+#define DEFAULT_LOGFILE_PRIORITY LOG_INFO /* ? */
+#define DEFAULT_LOGFILE LOGDIR "/" DAEMON_NAME ".log"
+#define DLM_CONFIG_FILE "/etc/dlm.conf"
+
+
+/* DLM_LOCKSPACE_LEN: maximum lockspace name length, from linux/dlmconstants.h.
+ Copied in libdlm.h so apps don't need to include the kernel header.
+ The libcpg limit is larger at CPG_MAX_NAME_LENGTH 128. Our cpg name includes
+ a "dlm:" prefix before the lockspace name. */
+
+/* Maximum members of a ls, should match CPG_MEMBERS_MAX in corosync/cpg.h.
+ There are no max defines in dlm-kernel for lockspace members. */
+
+#define MAX_NODES 128
+
+/* Maximum number of IP addresses per node, when using SCTP and multi-ring in
+ corosync In dlm-kernel this is DLM_MAX_ADDR_COUNT, currently 3. */
+
+#define MAX_NODE_ADDRESSES 4
+
+/* Max string length printed on a line, for debugging/dump output. */
+
+#define MAXLINE 256
+
+/* cfgk_protocol */
+
+#define PROTO_TCP 0
+#define PROTO_SCTP 1
+#define PROTO_DETECT 2
+
+extern int daemon_debug_opt;
+extern int daemon_quit;
+extern int cluster_down;
+extern int poll_fencing;
+extern int poll_quorum;
+extern int poll_fs;
+extern int poll_ignore_plock;
+extern int poll_drop_plock;
+extern int plock_fd;
+extern int plock_ci;
+extern struct list_head lockspaces;
+extern int cluster_quorate;
+extern uint32_t cluster_ringid_seq;
+extern int our_nodeid;
+extern uint32_t control_minor;
+extern uint32_t monitor_minor;
+extern uint32_t plock_minor;
+extern uint32_t old_plock_minor;
+
+#define LOG_DUMP_SIZE DLMC_DUMP_SIZE
+
+#define LOG_PLOCK 0x00010000
+
+void log_level(char *name_in, uint32_t level_in, const char *fmt, ...);
+
+#define log_error(fmt, args...) log_level(NULL, LOG_ERR, fmt, ##args)
+#define log_debug(fmt, args...) log_level(NULL, LOG_DEBUG, fmt, ##args)
+#define log_group(ls, fmt, args...) log_level((ls)->name, LOG_DEBUG, fmt, ##args)
+
+#define log_plock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK, fmt, ##args)
+#define log_dlock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK|LOG_DEBUG, fmt, ##args)
+#define log_elock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK|LOG_ERR, fmt, ##args)
+
+/* dlm_header types */
+enum {
+ DLM_MSG_PROTOCOL = 1,
+ DLM_MSG_START,
+ DLM_MSG_PLOCK,
+ DLM_MSG_PLOCK_OWN,
+ DLM_MSG_PLOCK_DROP,
+ DLM_MSG_PLOCK_SYNC_LOCK,
+ DLM_MSG_PLOCK_SYNC_WAITER,
+ DLM_MSG_PLOCKS_DONE,
+ DLM_MSG_PLOCKS_DATA,
+ DLM_MSG_DEADLK_CYCLE_START,
+ DLM_MSG_DEADLK_CYCLE_END,
+ DLM_MSG_DEADLK_CHECKPOINT_READY,
+ DLM_MSG_DEADLK_CANCEL_LOCK
+};
+
+/* dlm_header flags */
+#define DLM_MFLG_JOINING 1 /* accompanies start, we are joining */
+#define DLM_MFLG_HAVEPLOCK 2 /* accompanies start, we have plock state */
+#define DLM_MFLG_NACK 4 /* accompanies start, prevent wrong match when
+ two outstanding changes are the same */
+#define DLM_MFLG_PLOCK_SIG 8 /* msgdata2 is a plock signature */
+
+struct dlm_header {
+ uint16_t version[3];
+ uint16_t type; /* DLM_MSG_ */
+ uint32_t nodeid; /* sender */
+ uint32_t to_nodeid; /* recipient, 0 for all */
+ uint32_t global_id; /* global unique id for this lockspace */
+ uint32_t flags; /* DLM_MFLG_ */
+ uint32_t msgdata; /* in-header payload depends on MSG type; lkid
+ for deadlock, seq for lockspace membership */
+ uint32_t msgdata2; /* second MSG-specific data */
+ uint64_t pad;
+};
+
+struct lockspace {
+ struct list_head list;
+ char name[DLM_LOCKSPACE_LEN+1];
+ uint32_t global_id;
+
+ /* lockspace membership stuff */
+
+ cpg_handle_t cpg_handle;
+ int cpg_client;
+ int cpg_fd;
+ int joining;
+ int leaving;
+ int kernel_stopped;
+ int fs_registered;
+ uint32_t change_seq;
+ uint32_t started_count;
+ struct change *started_change;
+ struct list_head changes;
+ struct list_head node_history;
+
+ /* plock stuff */
+
+ int plock_data_node;
+ int need_plocks;
+ int save_plocks;
+ int disable_plock;
+ uint32_t recv_plocks_data_count;
+ uint32_t associated_mg_id;
+ struct list_head saved_messages;
+ struct list_head plock_resources;
+ struct rb_root plock_resources_root;
+ time_t last_checkpoint_time;
+ time_t last_plock_time;
+ struct timeval drop_resources_last;
+
+#if 0
+ /* deadlock stuff */
+
+ int deadlk_low_nodeid;
+ struct list_head deadlk_nodes;
+ uint64_t deadlk_ckpt_handle;
+ int deadlk_confchg_init;
+ struct list_head transactions;
+ struct list_head resources;
+ struct timeval cycle_start_time;
+ struct timeval cycle_end_time;
+ struct timeval last_send_cycle_start;
+ int cycle_running;
+ int all_checkpoints_ready;
+#endif
+};
+
+/* action.c */
+void set_associated_id(uint32_t mg_id);
+int set_sysfs_control(char *name, int val);
+int set_sysfs_event_done(char *name, int val);
+int set_sysfs_id(char *name, uint32_t id);
+int set_configfs_members(char *name, int new_count, int *new_members,
+ int renew_count, int *renew_members);
+int add_configfs_node(int nodeid, char *addr, int addrlen, int local);
+void del_configfs_node(int nodeid);
+void clear_configfs(void);
+int setup_configfs(void);
+int check_uncontrolled_lockspaces(void);
+int setup_misc_devices(void);
+int path_exists(const char *path);
+
+/* config.c */
+void setup_config(int update);
+int get_weight(int nodeid, char *lockspace);
+
+/* cpg.c */
+int setup_cpg_daemon(void);
+void close_cpg_daemon(void);
+void process_cpg_daemon(int ci);
+int set_protocol(void);
+void process_lockspace_changes(void);
+void dlm_send_message(struct lockspace *ls, char *buf, int len);
+int dlm_join_lockspace(struct lockspace *ls);
+int dlm_leave_lockspace(struct lockspace *ls);
+const char *msg_name(int type);
+void update_flow_control_status(void);
+void node_history_cluster_add(int nodeid);
+void node_history_cluster_remove(int nodeid);
+int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node);
+int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace);
+int set_lockspaces(int *count, struct dlmc_lockspace **lss_out);
+int set_lockspace_nodes(struct lockspace *ls, int option, int *node_count,
+ struct dlmc_node **nodes_out);
+int set_fs_notified(struct lockspace *ls, int nodeid);
+
+/* deadlock.c */
+void setup_deadlock(void);
+void send_cycle_start(struct lockspace *ls);
+void receive_checkpoint_ready(struct lockspace *ls, struct dlm_header *hd,
+ int len);
+void receive_cycle_start(struct lockspace *ls, struct dlm_header *hd, int len);
+void receive_cycle_end(struct lockspace *ls, struct dlm_header *hd, int len);
+void receive_cancel_lock(struct lockspace *ls, struct dlm_header *hd, int len);
+void deadlk_confchg(struct lockspace *ls,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries);
+
+/* main.c */
+int do_read(int fd, void *buf, size_t count);
+int do_write(int fd, void *buf, size_t count);
+void client_dead(int ci);
+int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci));
+int client_fd(int ci);
+void client_ignore(int ci, int fd);
+void client_back(int ci, int fd);
+struct lockspace *find_ls(char *name);
+struct lockspace *find_ls_id(uint32_t id);
+const char *dlm_mode_str(int mode);
+void cluster_dead(int ci);
+
+/* member_cman.c */
+int setup_cluster(void);
+void close_cluster(void);
+void process_cluster(int ci);
+void update_cluster(void);
+int is_cluster_member(uint32_t nodeid);
+int setup_cluster_cfg(void);
+void close_cluster_cfg(void);
+void process_cluster_cfg(int ci);
+void kick_node_from_cluster(int nodeid);
+int fence_node_time(int nodeid, uint64_t *last_fenced_time);
+int fence_in_progress(int *count);
+
+/* netlink.c */
+int setup_netlink(void);
+void process_netlink(int ci);
+
+/* plock.c */
+int setup_plocks(void);
+void close_plocks(void);
+void process_plocks(int ci);
+void drop_resources_all(void);
+int limit_plocks(void);
+void receive_plock(struct lockspace *ls, struct dlm_header *hd, int len);
+void receive_own(struct lockspace *ls, struct dlm_header *hd, int len);
+void receive_sync(struct lockspace *ls, struct dlm_header *hd, int len);
+void receive_drop(struct lockspace *ls, struct dlm_header *hd, int len);
+void process_saved_plocks(struct lockspace *ls);
+void purge_plocks(struct lockspace *ls, int nodeid, int unmount);
+int copy_plock_state(struct lockspace *ls, char *buf, int *len_out);
+
+void send_all_plocks_data(struct lockspace *ls, uint32_t seq, uint32_t *plocks_data);
+void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len);
+void clear_plocks_data(struct lockspace *ls);
+
+/* logging.c */
+
+void init_logging(void);
+void setup_logging(void);
+void close_logging(void);
+void copy_log_dump(char *buf, int *len);
+void copy_log_dump_plock(char *buf, int *len);
+
+/* crc.c */
+uint32_t cpgname_to_crc(const char *data, int len);
+
+#endif
+
diff --git a/dlm_controld/endian.h b/dlm_controld/endian.h
new file mode 100644
index 0000000..43089d2
--- /dev/null
+++ b/dlm_controld/endian.h
@@ -0,0 +1,68 @@
+#ifndef __LINUX_ENDIAN_DOT_H__
+#define __LINUX_ENDIAN_DOT_H__
+
+
+#include <endian.h>
+#include <byteswap.h>
+
+
+/* I'm not sure which versions of alpha glibc/gcc are broken,
+ so fix all of them. */
+#ifdef __alpha__
+#undef bswap_64
+static __inline__ unsigned long bswap_64(unsigned long x)
+{
+ unsigned int h = x >> 32;
+ unsigned int l = x;
+
+ h = bswap_32(h);
+ l = bswap_32(l);
+
+ return ((unsigned long)l << 32) | h;
+}
+#endif /* __alpha__ */
+
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+
+#define be16_to_cpu(x) (x)
+#define be32_to_cpu(x) (x)
+#define be64_to_cpu(x) (x)
+
+#define cpu_to_be16(x) (x)
+#define cpu_to_be32(x) (x)
+#define cpu_to_be64(x) (x)
+
+#define le16_to_cpu(x) (bswap_16((x)))
+#define le32_to_cpu(x) (bswap_32((x)))
+#define le64_to_cpu(x) (bswap_64((x)))
+
+#define cpu_to_le16(x) (bswap_16((x)))
+#define cpu_to_le32(x) (bswap_32((x)))
+#define cpu_to_le64(x) (bswap_64((x)))
+
+#endif /* __BYTE_ORDER == __BIG_ENDIAN */
+
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#define be16_to_cpu(x) (bswap_16((x)))
+#define be32_to_cpu(x) (bswap_32((x)))
+#define be64_to_cpu(x) (bswap_64((x)))
+
+#define cpu_to_be16(x) (bswap_16((x)))
+#define cpu_to_be32(x) (bswap_32((x)))
+#define cpu_to_be64(x) (bswap_64((x)))
+
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+
+#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */
+
+
+#endif /* __LINUX_ENDIAN_DOT_H__ */
diff --git a/dlm_controld/list.h b/dlm_controld/list.h
new file mode 100644
index 0000000..8100cbc
--- /dev/null
+++ b/dlm_controld/list.h
@@ -0,0 +1,336 @@
+/* Copied from include/linux/list.h */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ *
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is
+ * empty _and_ checks that no other CPU might be
+ * in the process of still modifying either member
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ *
+ * @head: the list to test.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+ struct list_head *next = head->next;
+ return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * __list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use as a start point in
+ * list_for_each_entry_continue
+ * @pos: the type * to use as a start point
+ * @head: the head of the list
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_prepare_entry(pos, head, member) \
+ ((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - iterate over list of given type
+ * continuing after existing point
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+#endif
diff --git a/dlm_controld/logging.c b/dlm_controld/logging.c
new file mode 100644
index 0000000..e6e82b7
--- /dev/null
+++ b/dlm_controld/logging.c
@@ -0,0 +1,173 @@
+#include "dlm_daemon.h"
+
+static int log_mode;
+static int syslog_facility;
+static int syslog_priority;
+static int logfile_priority;
+static char logfile[PATH_MAX];
+
+void init_logging(void)
+{
+ log_mode = DEFAULT_LOG_MODE;
+ syslog_facility = DEFAULT_SYSLOG_FACILITY;
+ syslog_priority = DEFAULT_SYSLOG_PRIORITY;
+ logfile_priority = DEFAULT_LOGFILE_PRIORITY;
+ strcpy(logfile, DEFAULT_LOGFILE);
+
+ /* logfile_priority is the only one of these options that
+ can be controlled from command line or environment variable */
+
+ if (cfgd_debug_logfile)
+ logfile_priority = LOG_DEBUG;
+
+ log_debug("logging mode %d syslog f %d p %d logfile p %d %s",
+ log_mode, syslog_facility, syslog_priority,
+ logfile_priority, logfile);
+
+ logt_init(DAEMON_NAME, log_mode, syslog_facility, syslog_priority,
+ logfile_priority, logfile);
+}
+
+void setup_logging(void)
+{
+ /* TODO */
+ /*
+ ccs_read_logging(ccs_handle, DAEMON_NAME,
+ &cfgd_debug_logfile, &log_mode,
+ &syslog_facility, &syslog_priority,
+ &logfile_priority, logfile);
+ */
+
+ log_debug("logging mode %d syslog f %d p %d logfile p %d %s",
+ log_mode, syslog_facility, syslog_priority,
+ logfile_priority, logfile);
+
+ logt_conf(DAEMON_NAME, log_mode, syslog_facility, syslog_priority,
+ logfile_priority, logfile);
+}
+
+void close_logging(void)
+{
+ logt_exit();
+}
+
+#define NAME_ID_SIZE 32
+#define LOG_STR_LEN 512
+static char log_str[LOG_STR_LEN];
+
+static char log_dump[LOG_DUMP_SIZE];
+static unsigned int log_point;
+static unsigned int log_wrap;
+
+static char log_dump_plock[LOG_DUMP_SIZE];
+static unsigned int log_point_plock;
+static unsigned int log_wrap_plock;
+
+static void log_copy(char *buf, int *len, char *log_buf,
+ unsigned int *point, unsigned int *wrap)
+{
+ unsigned int p = *point;
+ unsigned int w = *wrap;
+ int tail_len;
+
+ if (!w && !p) {
+ *len = 0;
+ } else if (*wrap) {
+ tail_len = LOG_DUMP_SIZE - p;
+ memcpy(buf, log_buf + p, tail_len);
+ if (p)
+ memcpy(buf+tail_len, log_buf, p);
+ *len = LOG_DUMP_SIZE;
+ } else {
+ memcpy(buf, log_buf, p-1);
+ *len = p-1;
+ }
+}
+
+void copy_log_dump(char *buf, int *len)
+{
+ log_copy(buf, len, log_dump, &log_point, &log_wrap);
+}
+
+void copy_log_dump_plock(char *buf, int *len)
+{
+ log_copy(buf, len, log_dump_plock, &log_point_plock, &log_wrap_plock);
+}
+
+static void log_save_str(int level, int len, char *log_buf,
+ unsigned int *point, unsigned int *wrap)
+{
+ unsigned int p = *point;
+ unsigned int w = *wrap;
+ int i;
+
+ if (len < LOG_DUMP_SIZE - p) {
+ memcpy(log_buf + p, log_str, len);
+ p += len;
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ goto out;
+ }
+
+ for (i = 0; i < len; i++) {
+ log_buf[p++] = log_str[i];
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ }
+ out:
+ *point = p;
+ *wrap = w;
+}
+
+void log_level(char *name_in, uint32_t level_in, const char *fmt, ...)
+{
+ va_list ap;
+ char name[NAME_ID_SIZE + 1];
+ uint32_t level = level_in & 0x0000FFFF;
+ uint32_t extra = level_in & 0xFFFF0000;
+ int ret, pos = 0;
+ int len = LOG_STR_LEN - 2;
+ int plock = extra & LOG_PLOCK;
+
+ memset(name, 0, sizeof(name));
+
+ if (name_in)
+ snprintf(name, NAME_ID_SIZE, "%s ", name_in);
+
+ ret = snprintf(log_str + pos, len - pos, "%llu %s",
+ (unsigned long long)time(NULL), name);
+
+ pos += ret;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(log_str + pos, len - pos, fmt, ap);
+ va_end(ap);
+
+ if (ret >= len - pos)
+ pos = len - 1;
+ else
+ pos += ret;
+
+ log_str[pos++] = '\n';
+ log_str[pos++] = '\0';
+
+ if (level)
+ log_save_str(level, pos - 1, log_dump, &log_point, &log_wrap);
+ if (plock)
+ log_save_str(level, pos - 1, log_dump_plock, &log_point_plock, &log_wrap_plock);
+ if (level)
+ logt_print(level, "%s", log_str);
+
+ if (!daemon_debug_opt)
+ return;
+
+ if (level || (plock && cfgd_plock_debug))
+ fprintf(stderr, "%s", log_str);
+}
+
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
new file mode 100644
index 0000000..b0d9b03
--- /dev/null
+++ b/dlm_controld/main.c
@@ -0,0 +1,1274 @@
+#include "dlm_daemon.h"
+#include <pthread.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/dlm_netlink.h>
+
+#define CLIENT_NALLOC 32
+static int client_maxi;
+static int client_size = 0;
+static struct client *client = NULL;
+static struct pollfd *pollfd = NULL;
+static pthread_t query_thread;
+static pthread_mutex_t query_mutex;
+static struct list_head fs_register_list;
+static int kernel_monitor_fd;
+
+struct client {
+ int fd;
+ void *workfn;
+ void *deadfn;
+ struct lockspace *ls;
+};
+
+int do_read(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ while (off < count) {
+ rv = read(fd, (char *)buf + off, count - off);
+ if (rv == 0)
+ return -1;
+ if (rv == -1 && errno == EINTR)
+ continue;
+ if (rv == -1)
+ return -1;
+ off += rv;
+ }
+ return 0;
+}
+
+int do_write(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ retry:
+ rv = write(fd, (char *)buf + off, count);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0) {
+ log_error("write errno %d", errno);
+ return rv;
+ }
+
+ if (rv != count) {
+ count -= rv;
+ off += rv;
+ goto retry;
+ }
+ return 0;
+}
+
+static void client_alloc(void)
+{
+ int i;
+
+ if (!client) {
+ client = malloc(CLIENT_NALLOC * sizeof(struct client));
+ pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
+ } else {
+ client = realloc(client, (client_size + CLIENT_NALLOC) *
+ sizeof(struct client));
+ pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) *
+ sizeof(struct pollfd));
+ if (!pollfd)
+ log_error("can't alloc for pollfd");
+ }
+ if (!client || !pollfd)
+ log_error("can't alloc for client array");
+
+ for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
+ client[i].workfn = NULL;
+ client[i].deadfn = NULL;
+ client[i].fd = -1;
+ pollfd[i].fd = -1;
+ pollfd[i].revents = 0;
+ }
+ client_size += CLIENT_NALLOC;
+}
+
+void client_dead(int ci)
+{
+ close(client[ci].fd);
+ client[ci].workfn = NULL;
+ client[ci].fd = -1;
+ pollfd[ci].fd = -1;
+}
+
+int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
+{
+ int i;
+
+ if (!client)
+ client_alloc();
+ again:
+ for (i = 0; i < client_size; i++) {
+ if (client[i].fd == -1) {
+ client[i].workfn = workfn;
+ if (deadfn)
+ client[i].deadfn = deadfn;
+ else
+ client[i].deadfn = client_dead;
+ client[i].fd = fd;
+ pollfd[i].fd = fd;
+ pollfd[i].events = POLLIN;
+ if (i > client_maxi)
+ client_maxi = i;
+ return i;
+ }
+ }
+
+ client_alloc();
+ goto again;
+}
+
+int client_fd(int ci)
+{
+ return client[ci].fd;
+}
+
+void client_ignore(int ci, int fd)
+{
+ pollfd[ci].fd = -1;
+ pollfd[ci].events = 0;
+}
+
+void client_back(int ci, int fd)
+{
+ pollfd[ci].fd = fd;
+ pollfd[ci].events = POLLIN;
+}
+
+static void sigterm_handler(int sig)
+{
+ daemon_quit = 1;
+}
+
+static struct lockspace *create_ls(char *name)
+{
+ struct lockspace *ls;
+
+ ls = malloc(sizeof(*ls));
+ if (!ls)
+ goto out;
+ memset(ls, 0, sizeof(struct lockspace));
+ strncpy(ls->name, name, DLM_LOCKSPACE_LEN);
+
+ INIT_LIST_HEAD(&ls->changes);
+ INIT_LIST_HEAD(&ls->node_history);
+ INIT_LIST_HEAD(&ls->saved_messages);
+ INIT_LIST_HEAD(&ls->plock_resources);
+ ls->plock_resources_root = RB_ROOT;
+#if 0
+ INIT_LIST_HEAD(&ls->deadlk_nodes);
+ INIT_LIST_HEAD(&ls->transactions);
+ INIT_LIST_HEAD(&ls->resources);
+#endif
+ out:
+ return ls;
+}
+
+struct lockspace *find_ls(char *name)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if ((strlen(ls->name) == strlen(name)) &&
+ !strncmp(ls->name, name, strlen(name)))
+ return ls;
+ }
+ return NULL;
+}
+
+struct lockspace *find_ls_id(uint32_t id)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->global_id == id)
+ return ls;
+ }
+ return NULL;
+}
+
+struct fs_reg {
+ struct list_head list;
+ char name[DLM_LOCKSPACE_LEN+1];
+};
+
+static int fs_register_check(char *name)
+{
+ struct fs_reg *fs;
+ list_for_each_entry(fs, &fs_register_list, list) {
+ if (!strcmp(name, fs->name))
+ return 1;
+ }
+ return 0;
+}
+
+static int fs_register_add(char *name)
+{
+ struct fs_reg *fs;
+
+ if (fs_register_check(name))
+ return -EALREADY;
+
+ fs = malloc(sizeof(struct fs_reg));
+ if (!fs)
+ return -ENOMEM;
+ strncpy(fs->name, name, DLM_LOCKSPACE_LEN);
+ list_add(&fs->list, &fs_register_list);
+ return 0;
+}
+
+static void fs_register_del(char *name)
+{
+ struct fs_reg *fs;
+ list_for_each_entry(fs, &fs_register_list, list) {
+ if (!strcmp(name, fs->name)) {
+ list_del(&fs->list);
+ free(fs);
+ return;
+ }
+ }
+}
+
+#define MAXARGS 8
+
+static char *get_args(char *buf, int *argc, char **argv, char sep, int want)
+{
+ char *p = buf, *rp = NULL;
+ int i;
+
+ argv[0] = p;
+
+ for (i = 1; i < MAXARGS; i++) {
+ p = strchr(buf, sep);
+ if (!p)
+ break;
+ *p = '\0';
+
+ if (want == i) {
+ rp = p + 1;
+ break;
+ }
+
+ argv[i] = p + 1;
+ buf = p + 1;
+ }
+ *argc = i;
+
+ /* we ended by hitting \0, return the point following that */
+ if (!rp)
+ rp = strchr(buf, '\0') + 1;
+
+ return rp;
+}
+
+const char *dlm_mode_str(int mode)
+{
+ switch (mode) {
+ case DLM_LOCK_IV:
+ return "IV";
+ case DLM_LOCK_NL:
+ return "NL";
+ case DLM_LOCK_CR:
+ return "CR";
+ case DLM_LOCK_CW:
+ return "CW";
+ case DLM_LOCK_PR:
+ return "PR";
+ case DLM_LOCK_PW:
+ return "PW";
+ case DLM_LOCK_EX:
+ return "EX";
+ }
+ return "??";
+}
+
+/* recv "online" (join) and "offline" (leave) messages from dlm via uevents */
+
+static void process_uevent(int ci)
+{
+ struct lockspace *ls;
+ char buf[MAXLINE];
+ char *argv[MAXARGS], *act, *sys;
+ int rv, argc = 0;
+
+ memset(buf, 0, sizeof(buf));
+ memset(argv, 0, sizeof(char *) * MAXARGS);
+
+ retry_recv:
+ rv = recv(client[ci].fd, &buf, sizeof(buf), 0);
+ if (rv < 0) {
+ if (errno == EINTR)
+ goto retry_recv;
+ if (errno != EAGAIN)
+ log_error("uevent recv error %d errno %d", rv, errno);
+ return;
+ }
+
+ if (!strstr(buf, "dlm"))
+ return;
+
+ log_debug("uevent: %s", buf);
+
+ get_args(buf, &argc, argv, '/', 4);
+ if (argc != 4)
+ log_error("uevent message has %d args", argc);
+ act = argv[0];
+ sys = argv[2];
+
+ if ((strlen(sys) != strlen("dlm")) || strcmp(sys, "dlm"))
+ return;
+
+ log_debug("kernel: %s %s", act, argv[3]);
+
+ rv = 0;
+
+ if (!strcmp(act, "online@")) {
+ ls = find_ls(argv[3]);
+ if (ls) {
+ rv = -EEXIST;
+ goto out;
+ }
+
+ ls = create_ls(argv[3]);
+ if (!ls) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ if (fs_register_check(ls->name))
+ ls->fs_registered = 1;
+
+ rv = dlm_join_lockspace(ls);
+ if (rv) {
+ /* ls already freed */
+ goto out;
+ }
+
+ } else if (!strcmp(act, "offline@")) {
+ ls = find_ls(argv[3]);
+ if (!ls) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ dlm_leave_lockspace(ls);
+ }
+ out:
+ if (rv < 0)
+ log_error("process_uevent %s error %d errno %d",
+ act, rv, errno);
+}
+
+static int setup_uevent(void)
+{
+ struct sockaddr_nl snl;
+ int s, rv;
+
+ s = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
+ if (s < 0) {
+ log_error("uevent netlink socket");
+ return s;
+ }
+
+ memset(&snl, 0, sizeof(snl));
+ snl.nl_family = AF_NETLINK;
+ snl.nl_pid = getpid();
+ snl.nl_groups = 1;
+
+ rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
+ if (rv < 0) {
+ log_error("uevent bind error %d errno %d", rv, errno);
+ close(s);
+ return rv;
+ }
+
+ return s;
+}
+
+static void init_header(struct dlmc_header *h, int cmd, char *name, int result,
+ int extra_len)
+{
+ memset(h, 0, sizeof(struct dlmc_header));
+
+ h->magic = DLMC_MAGIC;
+ h->version = DLMC_VERSION;
+ h->len = sizeof(struct dlmc_header) + extra_len;
+ h->command = cmd;
+ h->data = result;
+
+ if (name)
+ strncpy(h->name, name, DLM_LOCKSPACE_LEN);
+}
+
+static char copy_buf[LOG_DUMP_SIZE];
+
+static void query_dump_debug(int fd)
+{
+ struct dlmc_header h;
+ int len = 0;
+
+ copy_log_dump(copy_buf, &len);
+
+ init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+
+ if (len)
+ send(fd, copy_buf, len, MSG_NOSIGNAL);
+}
+
+static void query_dump_log_plock(int fd)
+{
+ struct dlmc_header h;
+ int len = 0;
+
+ copy_log_dump_plock(copy_buf, &len);
+
+ init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+
+ if (len)
+ send(fd, copy_buf, len, MSG_NOSIGNAL);
+}
+
+static void query_dump_plocks(int fd, char *name)
+{
+ struct lockspace *ls;
+ struct dlmc_header h;
+ int len = 0;
+ int rv;
+
+ ls = find_ls(name);
+ if (!ls) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ rv = copy_plock_state(ls, copy_buf, &len);
+ out:
+ init_header(&h, DLMC_CMD_DUMP_PLOCKS, name, rv, len);
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+
+ if (len)
+ send(fd, copy_buf, len, MSG_NOSIGNAL);
+}
+
+/* combines a header and the data and sends it back to the client in
+ a single do_write() call */
+
+static void do_reply(int fd, int cmd, char *name, int result, int option,
+ char *buf, int buflen)
+{
+ struct dlmc_header *h;
+ char *reply;
+ int reply_len;
+
+ reply_len = sizeof(struct dlmc_header) + buflen;
+ reply = malloc(reply_len);
+ if (!reply)
+ return;
+ memset(reply, 0, reply_len);
+ h = (struct dlmc_header *)reply;
+
+ init_header(h, cmd, name, result, buflen);
+ h->option = option;
+
+ if (buf && buflen)
+ memcpy(reply + sizeof(struct dlmc_header), buf, buflen);
+
+ do_write(fd, reply, reply_len);
+
+ free(reply);
+}
+
+static void query_lockspace_info(int fd, char *name)
+{
+ struct lockspace *ls;
+ struct dlmc_lockspace lockspace;
+ int rv;
+
+ ls = find_ls(name);
+ if (!ls) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ memset(&lockspace, 0, sizeof(lockspace));
+
+ rv = set_lockspace_info(ls, &lockspace);
+ out:
+ do_reply(fd, DLMC_CMD_LOCKSPACE_INFO, name, rv, 0,
+ (char *)&lockspace, sizeof(lockspace));
+}
+
+static void query_node_info(int fd, char *name, int nodeid)
+{
+ struct lockspace *ls;
+ struct dlmc_node node;
+ int rv;
+
+ ls = find_ls(name);
+ if (!ls) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ memset(&node, 0, sizeof(node));
+
+ rv = set_node_info(ls, nodeid, &node);
+ out:
+ do_reply(fd, DLMC_CMD_NODE_INFO, name, rv, 0,
+ (char *)&node, sizeof(node));
+}
+
+static void query_lockspaces(int fd, int max)
+{
+ int ls_count = 0;
+ struct dlmc_lockspace *lss = NULL;
+ int rv, result;
+
+ rv = set_lockspaces(&ls_count, &lss);
+ if (rv < 0) {
+ result = rv;
+ ls_count = 0;
+ goto out;
+ }
+
+ if (ls_count > max) {
+ result = -E2BIG;
+ ls_count = max;
+ } else {
+ result = ls_count;
+ }
+ out:
+ do_reply(fd, DLMC_CMD_LOCKSPACES, NULL, result, 0,
+ (char *)lss, ls_count * sizeof(struct dlmc_lockspace));
+
+ if (lss)
+ free(lss);
+}
+
+static void query_lockspace_nodes(int fd, char *name, int option, int max)
+{
+ struct lockspace *ls;
+ int node_count = 0;
+ struct dlmc_node *nodes = NULL;
+ int rv, result;
+
+ ls = find_ls(name);
+ if (!ls) {
+ result = -ENOENT;
+ node_count = 0;
+ goto out;
+ }
+
+ rv = set_lockspace_nodes(ls, option, &node_count, &nodes);
+ if (rv < 0) {
+ result = rv;
+ node_count = 0;
+ goto out;
+ }
+
+ /* node_count is the number of structs copied/returned; the caller's
+ max may be less than that, in which case we copy as many as they
+ asked for and return -E2BIG */
+
+ if (node_count > max) {
+ result = -E2BIG;
+ node_count = max;
+ } else {
+ result = node_count;
+ }
+ out:
+ do_reply(fd, DLMC_CMD_LOCKSPACE_NODES, name, result, 0,
+ (char *)nodes, node_count * sizeof(struct dlmc_node));
+
+ if (nodes)
+ free(nodes);
+}
+
+static void process_connection(int ci)
+{
+ struct dlmc_header h;
+ char *extra = NULL;
+ int rv, extra_len;
+ struct lockspace *ls;
+
+ rv = do_read(client[ci].fd, &h, sizeof(h));
+ if (rv < 0) {
+ log_debug("connection %d read error %d", ci, rv);
+ goto out;
+ }
+
+ if (h.magic != DLMC_MAGIC) {
+ log_debug("connection %d magic error %x", ci, h.magic);
+ goto out;
+ }
+
+ if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
+ log_debug("connection %d version error %x", ci, h.version);
+ goto out;
+ }
+
+ if (h.len > sizeof(h)) {
+ extra_len = h.len - sizeof(h);
+ extra = malloc(extra_len);
+ if (!extra) {
+ log_error("process_connection no mem %d", extra_len);
+ goto out;
+ }
+ memset(extra, 0, extra_len);
+
+ rv = do_read(client[ci].fd, extra, extra_len);
+ if (rv < 0) {
+ log_debug("connection %d extra read error %d", ci, rv);
+ goto out;
+ }
+ }
+
+ switch (h.command) {
+ case DLMC_CMD_FS_REGISTER:
+ rv = fs_register_add(h.name);
+ ls = find_ls(h.name);
+ if (ls)
+ ls->fs_registered = 1;
+ do_reply(client[ci].fd, DLMC_CMD_FS_REGISTER, h.name, rv, 0,
+ NULL, 0);
+ break;
+
+ case DLMC_CMD_FS_UNREGISTER:
+ fs_register_del(h.name);
+ ls = find_ls(h.name);
+ if (ls)
+ ls->fs_registered = 0;
+ break;
+
+ case DLMC_CMD_FS_NOTIFIED:
+ ls = find_ls(h.name);
+ if (ls)
+ rv = set_fs_notified(ls, h.data);
+ else
+ rv = -ENOENT;
+ /* pass back the nodeid provided by caller in option field */
+ do_reply(client[ci].fd, DLMC_CMD_FS_NOTIFIED, h.name, rv,
+ h.data, NULL, 0);
+ break;
+
+#if 0
+ case DLMC_CMD_DEADLOCK_CHECK:
+ ls = find_ls(h.name);
+ if (ls)
+ send_cycle_start(ls);
+ client_dead(ci);
+ break;
+#endif
+ default:
+ log_error("process_connection %d unknown command %d",
+ ci, h.command);
+ }
+ out:
+ if (extra)
+ free(extra);
+}
+
+static void process_listener(int ci)
+{
+ int fd, i;
+
+ fd = accept(client[ci].fd, NULL, NULL);
+ if (fd < 0) {
+ log_error("process_listener: accept error %d %d", fd, errno);
+ return;
+ }
+
+ i = client_add(fd, process_connection, NULL);
+
+ log_debug("client connection %d fd %d", i, fd);
+}
+
+static int setup_listener(const char *sock_path)
+{
+ struct sockaddr_un addr;
+ socklen_t addrlen;
+ int rv, s;
+
+ /* we listen for new client connections on socket s */
+
+ s = socket(AF_LOCAL, SOCK_STREAM, 0);
+ if (s < 0) {
+ log_error("socket error %d %d", s, errno);
+ return s;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_LOCAL;
+ strcpy(&addr.sun_path[1], sock_path);
+ addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
+
+ rv = bind(s, (struct sockaddr *) &addr, addrlen);
+ if (rv < 0) {
+ log_error("bind error %d %d", rv, errno);
+ close(s);
+ return rv;
+ }
+
+ rv = listen(s, 5);
+ if (rv < 0) {
+ log_error("listen error %d %d", rv, errno);
+ close(s);
+ return rv;
+ }
+ return s;
+}
+
+static void query_lock(void)
+{
+ pthread_mutex_lock(&query_mutex);
+}
+
+static void query_unlock(void)
+{
+ pthread_mutex_unlock(&query_mutex);
+}
+
+/* This is a thread, so we have to be careful, don't call log_ functions.
+ We need a thread to process queries because the main thread may block
+ for long periods when writing to sysfs to stop dlm-kernel (any maybe
+ other places). */
+
+static void *process_queries(void *arg)
+{
+ struct dlmc_header h;
+ int s, f, rv;
+
+ rv = setup_listener(DLMC_QUERY_SOCK_PATH);
+ if (rv < 0)
+ return NULL;
+
+ s = rv;
+
+ for (;;) {
+ f = accept(s, NULL, NULL);
+ if (f < 0)
+ return NULL;
+
+ rv = do_read(f, &h, sizeof(h));
+ if (rv < 0) {
+ goto out;
+ }
+
+ if (h.magic != DLMC_MAGIC) {
+ goto out;
+ }
+
+ if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
+ goto out;
+ }
+
+ query_lock();
+
+ switch (h.command) {
+ case DLMC_CMD_DUMP_DEBUG:
+ query_dump_debug(f);
+ break;
+ case DLMC_CMD_DUMP_LOG_PLOCK:
+ query_dump_log_plock(f);
+ break;
+ case DLMC_CMD_DUMP_PLOCKS:
+ query_dump_plocks(f, h.name);
+ break;
+ case DLMC_CMD_LOCKSPACE_INFO:
+ query_lockspace_info(f, h.name);
+ break;
+ case DLMC_CMD_NODE_INFO:
+ query_node_info(f, h.name, h.data);
+ break;
+ case DLMC_CMD_LOCKSPACES:
+ query_lockspaces(f, h.data);
+ break;
+ case DLMC_CMD_LOCKSPACE_NODES:
+ query_lockspace_nodes(f, h.name, h.option, h.data);
+ break;
+ default:
+ break;
+ }
+ query_unlock();
+
+ out:
+ close(f);
+ }
+}
+
+static int setup_queries(void)
+{
+ int rv;
+
+ pthread_mutex_init(&query_mutex, NULL);
+
+ rv = pthread_create(&query_thread, NULL, process_queries, NULL);
+ if (rv < 0) {
+ log_error("can't create query thread");
+ return rv;
+ }
+ return 0;
+}
+
+/* The dlm in kernels before 2.6.28 do not have the monitor device. We
+ keep this fd open as long as we're running. If we exit/terminate while
+ lockspaces exist in the kernel, the kernel will detect a close on this
+ fd and stop the lockspaces. */
+
+static void setup_monitor(void)
+{
+ if (!monitor_minor)
+ return;
+
+ kernel_monitor_fd = open("/dev/misc/dlm-monitor", O_RDONLY);
+ log_debug("/dev/misc/dlm-monitor fd %d", kernel_monitor_fd);
+}
+
+void cluster_dead(int ci)
+{
+ if (!cluster_down)
+ log_error("cluster is down, exiting");
+ daemon_quit = 1;
+ cluster_down = 1;
+}
+
+static void loop(void)
+{
+ struct lockspace *ls;
+ int poll_timeout = -1;
+ int rv, i;
+ void (*workfn) (int ci);
+ void (*deadfn) (int ci);
+
+ rv = setup_queries();
+ if (rv < 0)
+ goto out;
+
+ rv = setup_listener(DLMC_SOCK_PATH);
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_listener, NULL);
+
+ rv = setup_cluster_cfg();
+ if (rv < 0)
+ goto out;
+ if (rv > 0)
+ client_add(rv, process_cluster_cfg, cluster_dead);
+
+ rv = setup_cluster();
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_cluster, cluster_dead);
+
+ setup_config(0);
+
+ setup_logging();
+
+ rv = check_uncontrolled_lockspaces();
+ if (rv < 0)
+ goto out;
+
+ rv = setup_misc_devices();
+ if (rv < 0)
+ goto out;
+
+ setup_monitor();
+
+ rv = setup_configfs(); /* calls update_cluster() */
+ if (rv < 0)
+ goto out;
+
+ rv = setup_uevent();
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_uevent, NULL);
+
+ rv = setup_cpg_daemon();
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_cpg_daemon, cluster_dead);
+
+ rv = set_protocol();
+ if (rv < 0)
+ goto out;
+
+#if 0
+ if (cfgd_enable_deadlk) {
+ rv = setup_netlink();
+ if (rv < 0)
+ goto out;
+ client_add(rv, process_netlink, NULL);
+
+ setup_deadlock();
+ }
+#endif
+
+ rv = setup_plocks();
+ if (rv < 0)
+ goto out;
+ plock_fd = rv;
+ plock_ci = client_add(rv, process_plocks, NULL);
+
+ for (;;) {
+ rv = poll(pollfd, client_maxi + 1, poll_timeout);
+ if (rv == -1 && errno == EINTR) {
+ if (daemon_quit && list_empty(&lockspaces))
+ goto out;
+ log_error("shutdown ignored, active lockspaces");
+ daemon_quit = 0;
+ continue;
+ }
+ if (rv < 0) {
+ log_error("poll errno %d", errno);
+ goto out;
+ }
+
+ query_lock();
+
+ for (i = 0; i <= client_maxi; i++) {
+ if (client[i].fd < 0)
+ continue;
+ if (pollfd[i].revents & POLLIN) {
+ workfn = client[i].workfn;
+ workfn(i);
+ }
+ if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ deadfn = client[i].deadfn;
+ deadfn(i);
+ }
+ }
+ query_unlock();
+
+ if (daemon_quit)
+ break;
+
+ query_lock();
+
+ poll_timeout = -1;
+
+ if (poll_fencing || poll_quorum || poll_fs) {
+ process_lockspace_changes();
+ poll_timeout = 1000;
+ }
+
+ if (poll_ignore_plock) {
+ if (!limit_plocks()) {
+ poll_ignore_plock = 0;
+ client_back(plock_ci, plock_fd);
+ }
+ poll_timeout = 1000;
+ }
+
+ if (poll_drop_plock) {
+ drop_resources_all();
+ if (poll_drop_plock)
+ poll_timeout = 1000;
+ }
+
+ query_unlock();
+ }
+ out:
+ log_debug("shutdown");
+ close_plocks();
+ close_cpg_daemon();
+ clear_configfs();
+ close_logging();
+ close_cluster();
+ close_cluster_cfg();
+
+ list_for_each_entry(ls, &lockspaces, list)
+ log_error("abandoned lockspace %s", ls->name);
+}
+
+static void lockfile(void)
+{
+ int fd, error;
+ struct flock lock;
+ char buf[33];
+
+ memset(buf, 0, 33);
+
+ fd = open(LOCKFILE_NAME, O_CREAT|O_WRONLY,
+ S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
+ if (fd < 0) {
+ fprintf(stderr, "cannot open/create lock file %s\n",
+ LOCKFILE_NAME);
+ exit(EXIT_FAILURE);
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_start = 0;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 0;
+
+ error = fcntl(fd, F_SETLK, &lock);
+ if (error) {
+ fprintf(stderr, "dlm_controld is already running\n");
+ exit(EXIT_FAILURE);
+ }
+
+ error = ftruncate(fd, 0);
+ if (error) {
+ fprintf(stderr, "cannot clear lock file %s\n", LOCKFILE_NAME);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(buf, "%d\n", getpid());
+
+ error = write(fd, buf, strlen(buf));
+ if (error <= 0) {
+ fprintf(stderr, "cannot write lock file %s\n", LOCKFILE_NAME);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void print_usage(void)
+{
+ printf("Usage:\n");
+ printf("\n");
+ printf("dlm_controld [options]\n");
+ printf("\n");
+ printf("Options:\n");
+ printf("\n");
+ printf(" -D Enable debugging to stderr and don't fork\n");
+ printf(" -L Enable debugging to log file\n");
+ printf(" -K Enable kernel dlm debugging messages\n");
+ printf(" -r <num> dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect\n");
+ printf(" 2 selects tcp if corosync rrp_mode is \"none\", otherwise sctp\n");
+ printf(" Default is 2\n");
+ printf(" -f <num> Enable (1) or disable (0) fencing recovery dependency\n");
+ printf(" Default is %d\n", DEFAULT_ENABLE_FENCING);
+ printf(" -q <num> Enable (1) or disable (0) quorum recovery dependency\n");
+ printf(" Default is %d\n", DEFAULT_ENABLE_QUORUM);
+#if 0
+ printf(" -d <num> Enable (1) or disable (0) deadlock detection code\n");
+ printf(" Default is %d\n", DEFAULT_ENABLE_DEADLK);
+#endif
+ printf(" -p <num> Enable (1) or disable (0) plock code for cluster fs\n");
+ printf(" Default is %d\n", DEFAULT_ENABLE_PLOCK);
+ printf(" -P Enable plock debugging\n");
+ printf(" -l <limit> Limit the rate of plock operations\n");
+ printf(" Default is %d, set to 0 for no limit\n", DEFAULT_PLOCK_RATE_LIMIT);
+ printf(" -o <n> Enable (1) or disable (0) plock ownership\n");
+ printf(" Default is %d\n", DEFAULT_PLOCK_OWNERSHIP);
+ printf(" -t <ms> plock ownership drop resources time (milliseconds)\n");
+ printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_TIME);
+ printf(" -c <num> plock ownership drop resources count\n");
+ printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_COUNT);
+ printf(" -a <ms> plock ownership drop resources age (milliseconds)\n");
+ printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_AGE);
+ printf(" -h Print this help, then exit\n");
+ printf(" -V Print program version information, then exit\n");
+}
+
+#define OPTION_STRING "LDKf:q:p:Pl:o:t:c:a:hVr:"
+
+static void read_arguments(int argc, char **argv)
+{
+ int cont = 1;
+ int optchar;
+
+ while (cont) {
+ optchar = getopt(argc, argv, OPTION_STRING);
+
+ switch (optchar) {
+ case 'D':
+ daemon_debug_opt = 1;
+ break;
+
+ case 'L':
+ optd_debug_logfile = 1;
+ cfgd_debug_logfile = 1;
+ break;
+
+ case 'K':
+ optk_debug = 1;
+ cfgk_debug = 1;
+ break;
+
+ case 'r':
+ optk_protocol = 1;
+ cfgk_protocol = atoi(optarg);
+ break;
+
+ case 'f':
+ optd_enable_fencing = 1;
+ cfgd_enable_fencing = atoi(optarg);
+ break;
+
+ case 'q':
+ optd_enable_quorum = 1;
+ cfgd_enable_quorum = atoi(optarg);
+ break;
+
+ case 'p':
+ optd_enable_plock = 1;
+ cfgd_enable_plock = atoi(optarg);
+ break;
+
+ case 'P':
+ optd_plock_debug = 1;
+ cfgd_plock_debug = 1;
+ break;
+
+ case 'l':
+ optd_plock_rate_limit = 1;
+ cfgd_plock_rate_limit = atoi(optarg);
+ break;
+
+ case 'o':
+ optd_plock_ownership = 1;
+ cfgd_plock_ownership = atoi(optarg);
+ break;
+
+ case 't':
+ optd_drop_resources_time = 1;
+ cfgd_drop_resources_time = atoi(optarg);
+ break;
+
+ case 'c':
+ optd_drop_resources_count = 1;
+ cfgd_drop_resources_count = atoi(optarg);
+ break;
+
+ case 'a':
+ optd_drop_resources_age = 1;
+ cfgd_drop_resources_age = atoi(optarg);
+ break;
+
+ case 'h':
+ print_usage();
+ exit(EXIT_SUCCESS);
+ break;
+
+ case 'V':
+ printf("dlm_controld %s (built %s %s)\n",
+ VERSION, __DATE__, __TIME__);
+ printf("%s\n", REDHAT_COPYRIGHT);
+ exit(EXIT_SUCCESS);
+ break;
+
+ case ':':
+ case '?':
+ fprintf(stderr, "Please use '-h' for usage.\n");
+ exit(EXIT_FAILURE);
+ break;
+
+ case EOF:
+ cont = 0;
+ break;
+
+ default:
+ fprintf(stderr, "unknown option: %c\n", optchar);
+ exit(EXIT_FAILURE);
+ break;
+ };
+ }
+
+ if (getenv("DLM_CONTROLD_DEBUG")) {
+ optd_debug_logfile = 1;
+ cfgd_debug_logfile = 1;
+ }
+}
+
+static void set_scheduler(void)
+{
+ struct sched_param sched_param;
+ int rv;
+
+ rv = sched_get_priority_max(SCHED_RR);
+ if (rv != -1) {
+ sched_param.sched_priority = rv;
+ rv = sched_setscheduler(0, SCHED_RR, &sched_param);
+ if (rv == -1)
+ log_error("could not set SCHED_RR priority %d err %d",
+ sched_param.sched_priority, errno);
+ } else {
+ log_error("could not get maximum scheduler priority err %d",
+ errno);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ INIT_LIST_HEAD(&lockspaces);
+ INIT_LIST_HEAD(&fs_register_list);
+
+ read_arguments(argc, argv);
+
+ if (!daemon_debug_opt) {
+ if (daemon(0, 0) < 0) {
+ perror("daemon error");
+ exit(EXIT_FAILURE);
+ }
+ }
+ lockfile();
+ init_logging();
+ log_level(NULL, LOG_INFO, "dlm_controld %s started", VERSION);
+ signal(SIGTERM, sigterm_handler);
+ set_scheduler();
+
+ loop();
+
+ return 0;
+}
+
+int daemon_debug_opt;
+int daemon_quit;
+int cluster_down;
+int poll_fencing;
+int poll_quorum;
+int poll_fs;
+int poll_ignore_plock;
+int poll_drop_plock;
+int plock_fd;
+int plock_ci;
+struct list_head lockspaces;
+int cluster_quorate;
+int our_nodeid;
+uint32_t cluster_ringid_seq;
+uint32_t control_minor;
+uint32_t monitor_minor;
+uint32_t plock_minor;
+uint32_t old_plock_minor;
+
+/* was a config value set on command line?, 0 or 1.
+ optk is a kernel option, optd is a daemon option */
+
+int optk_debug;
+int optk_timewarn;
+int optk_protocol;
+int optd_debug_logfile;
+int optd_enable_fencing;
+int optd_enable_quorum;
+int optd_enable_plock;
+int optd_plock_debug;
+int optd_plock_rate_limit;
+int optd_plock_ownership;
+int optd_drop_resources_time;
+int optd_drop_resources_count;
+int optd_drop_resources_age;
+
+/* actual config value from command line, cluster.conf, or default.
+ cfgk is a kernel config value, cfgd is a daemon config value */
+
+int cfgk_debug = -1;
+int cfgk_timewarn = -1;
+int cfgk_protocol = PROTO_DETECT;
+int cfgd_debug_logfile = DEFAULT_DEBUG_LOGFILE;
+int cfgd_enable_fencing = DEFAULT_ENABLE_FENCING;
+int cfgd_enable_quorum = DEFAULT_ENABLE_QUORUM;
+int cfgd_enable_plock = DEFAULT_ENABLE_PLOCK;
+int cfgd_plock_debug = DEFAULT_PLOCK_DEBUG;
+int cfgd_plock_rate_limit = DEFAULT_PLOCK_RATE_LIMIT;
+int cfgd_plock_ownership = DEFAULT_PLOCK_OWNERSHIP;
+int cfgd_drop_resources_time = DEFAULT_DROP_RESOURCES_TIME;
+int cfgd_drop_resources_count = DEFAULT_DROP_RESOURCES_COUNT;
+int cfgd_drop_resources_age = DEFAULT_DROP_RESOURCES_AGE;
+
diff --git a/dlm_controld/member_cman.c b/dlm_controld/member_cman.c
new file mode 100644
index 0000000..909c249
--- /dev/null
+++ b/dlm_controld/member_cman.c
@@ -0,0 +1,267 @@
+#include "dlm_daemon.h"
+#include <corosync/corotypes.h>
+#include <corosync/cfg.h>
+#include <corosync/quorum.h>
+#include "libfenced.h"
+
+static corosync_cfg_handle_t ch;
+static quorum_handle_t qh;
+static uint32_t old_nodes[MAX_NODES];
+static int old_node_count;
+static uint32_t quorum_nodes[MAX_NODES];
+static int quorum_node_count;
+
+static int is_member(uint32_t *node_list, int count, uint32_t nodeid)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (node_list[i] == nodeid)
+ return 1;
+ }
+ return 0;
+}
+
+static int is_old_member(uint32_t nodeid)
+{
+ return is_member(old_nodes, old_node_count, nodeid);
+}
+
+int is_cluster_member(uint32_t nodeid)
+{
+ return is_member(quorum_nodes, quorum_node_count, nodeid);
+}
+
+static void quorum_callback(quorum_handle_t h, uint32_t quorate,
+ uint64_t ring_seq, uint32_t node_list_entries,
+ uint32_t *node_list)
+{
+ corosync_cfg_node_address_t addrs[MAX_NODE_ADDRESSES];
+ corosync_cfg_node_address_t *addrptr = addrs;
+ cs_error_t err;
+ int i, j, num_addrs;
+
+
+ cluster_quorate = quorate;
+ cluster_ringid_seq = (uint32_t)ring_seq;
+
+ log_debug("cluster quorum %u seq %u nodes %u",
+ cluster_quorate, cluster_ringid_seq, node_list_entries);
+
+ old_node_count = quorum_node_count;
+ memcpy(&old_nodes, &quorum_nodes, sizeof(old_nodes));
+
+ quorum_node_count = 0;
+ memset(&quorum_nodes, 0, sizeof(quorum_nodes));
+
+ for (i = 0; i < node_list_entries; i++)
+ quorum_nodes[quorum_node_count++] = node_list[i];
+
+ for (i = 0; i < old_node_count; i++) {
+ if (!is_cluster_member(old_nodes[i])) {
+ log_debug("cluster node %u removed seq %u",
+ old_nodes[i], cluster_ringid_seq);
+ node_history_cluster_remove(old_nodes[i]);
+ del_configfs_node(old_nodes[i]);
+ }
+ }
+
+ for (i = 0; i < quorum_node_count; i++) {
+ if (!is_old_member(quorum_nodes[i])) {
+ log_debug("cluster node %u added seq %u",
+ quorum_nodes[i], cluster_ringid_seq);
+ node_history_cluster_add(quorum_nodes[i]);
+
+ err = corosync_cfg_get_node_addrs(ch, quorum_nodes[i],
+ MAX_NODE_ADDRESSES,
+ &num_addrs, addrs);
+ if (err != CS_OK) {
+ log_error("corosync_cfg_get_node_addrs failed "
+ "nodeid %u", quorum_nodes[i]);
+ continue;
+ }
+
+ for (j = 0; j < num_addrs; j++) {
+ add_configfs_node(quorum_nodes[i],
+ addrptr[j].address,
+ addrptr[j].address_length,
+ (quorum_nodes[i] ==
+ our_nodeid));
+ }
+ }
+ }
+}
+
+static quorum_callbacks_t quorum_callbacks =
+{
+ .quorum_notify_fn = quorum_callback,
+};
+
+void process_cluster(int ci)
+{
+ cs_error_t err;
+
+ err = quorum_dispatch(qh, CS_DISPATCH_ALL);
+ if (err != CS_OK)
+ cluster_dead(0);
+}
+
+/* Force re-read of quorum nodes */
+void update_cluster(void)
+{
+ cs_error_t err;
+
+ err = quorum_dispatch(qh, CS_DISPATCH_ONE);
+ if (err != CS_OK)
+ cluster_dead(0);
+}
+
+int setup_cluster(void)
+{
+ cs_error_t err;
+ int fd;
+
+ err = quorum_initialize(&qh, &quorum_callbacks);
+ if (err != CS_OK) {
+ log_error("quorum init error %d", err);
+ return -1;
+ }
+
+ err = quorum_fd_get(qh, &fd);
+ if (err != CS_OK) {
+ log_error("quorum fd_get error %d", err);
+ goto fail;
+ }
+
+ err = quorum_trackstart(qh, CS_TRACK_CHANGES);
+ if (err != CS_OK) {
+ log_error("quorum trackstart error %d", err);
+ goto fail;
+ }
+
+ old_node_count = 0;
+ memset(&old_nodes, 0, sizeof(old_nodes));
+ quorum_node_count = 0;
+ memset(&quorum_nodes, 0, sizeof(quorum_nodes));
+
+ return fd;
+ fail:
+ quorum_finalize(qh);
+ return -1;
+}
+
+void close_cluster(void)
+{
+ quorum_trackstop(qh);
+ quorum_finalize(qh);
+}
+
+void kick_node_from_cluster(int nodeid)
+{
+ if (!nodeid) {
+ log_error("telling corosync to shut down cluster locally");
+ corosync_cfg_try_shutdown(ch,
+ COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
+ } else {
+ log_error("telling corosync to remove nodeid %d from cluster",
+ nodeid);
+ corosync_cfg_kill_node(ch, nodeid, "dlm_controld");
+ }
+}
+
+static void shutdown_callback(corosync_cfg_handle_t h,
+ corosync_cfg_shutdown_flags_t flags)
+{
+ if (flags & COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) {
+ if (list_empty(&lockspaces))
+ corosync_cfg_replyto_shutdown(ch,
+ COROSYNC_CFG_SHUTDOWN_FLAG_YES);
+ else {
+ log_debug("no to corosync shutdown");
+ corosync_cfg_replyto_shutdown(ch,
+ COROSYNC_CFG_SHUTDOWN_FLAG_NO);
+ }
+ }
+}
+
+static corosync_cfg_callbacks_t cfg_callbacks =
+{
+ .corosync_cfg_shutdown_callback = shutdown_callback,
+ .corosync_cfg_state_track_callback = NULL,
+};
+
+void process_cluster_cfg(int ci)
+{
+ cs_error_t err;
+
+ err = corosync_cfg_dispatch(ch, CS_DISPATCH_ALL);
+ if (err != CS_OK)
+ cluster_dead(0);
+}
+
+int setup_cluster_cfg(void)
+{
+ cs_error_t err;
+ unsigned int nodeid;
+ int fd;
+
+ err = corosync_cfg_initialize(&ch, &cfg_callbacks);
+ if (err != CS_OK) {
+ log_error("corosync cfg init error %d", err);
+ return -1;
+ }
+
+ err = corosync_cfg_fd_get(ch, &fd);
+ if (err != CS_OK) {
+ log_error("corosync cfg fd_get error %d", err);
+ corosync_cfg_finalize(ch);
+ return -1;
+ }
+
+ err = corosync_cfg_local_get(ch, &nodeid);
+ if (err != CS_OK) {
+ log_error("corosync cfg local_get error %d", err);
+ corosync_cfg_finalize(ch);
+ return -1;
+ }
+ our_nodeid = nodeid;
+ log_debug("our_nodeid %d", our_nodeid);
+
+ return fd;
+}
+
+void close_cluster_cfg(void)
+{
+ corosync_cfg_finalize(ch);
+}
+
+int fence_node_time(int nodeid, uint64_t *last_fenced_time)
+{
+ struct fenced_node nodeinfo;
+ int rv;
+
+ memset(&nodeinfo, 0, sizeof(nodeinfo));
+
+ rv = fenced_node_info(nodeid, &nodeinfo);
+ if (rv < 0)
+ return rv;
+
+ *last_fenced_time = nodeinfo.last_fenced_time;
+ return 0;
+}
+
+int fence_in_progress(int *count)
+{
+ struct fenced_domain domain;
+ int rv;
+
+ memset(&domain, 0, sizeof(domain));
+
+ rv = fenced_domain_info(&domain);
+ if (rv < 0)
+ return rv;
+
+ *count = domain.victim_count;
+ return 0;
+}
+
diff --git a/dlm_controld/netlink.c b/dlm_controld/netlink.c
new file mode 100644
index 0000000..63122f7
--- /dev/null
+++ b/dlm_controld/netlink.c
@@ -0,0 +1,225 @@
+#include "dlm_daemon.h"
+#include "config.h"
+#include <linux/dlm.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/dlm_netlink.h>
+
+#define DEADLOCK_CHECK_SECS 10
+
+/* FIXME: look into using libnl/libnetlink */
+
+#define GENLMSG_DATA(glh) ((void *)((char *)NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
+
+/* Maximum size of response requested or message sent */
+#define MAX_MSG_SIZE 1024
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[MAX_MSG_SIZE];
+};
+
+static int send_genetlink_cmd(int sd, uint16_t nlmsg_type, uint32_t nlmsg_pid,
+ uint8_t genl_cmd, uint16_t nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg;
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 0x1;
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ if (nla_data)
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *) &msg;
+ buflen = msg.n.nlmsg_len ;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN)
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Probe the controller in genetlink to find the family id
+ * for the DLM family
+ */
+static int get_family_id(int sd)
+{
+ char genl_name[100];
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[256];
+ } ans;
+
+ int id = 0, rc;
+ struct nlattr *na;
+ int rep_len;
+
+ strcpy(genl_name, DLM_GENL_NAME);
+ rc = send_genetlink_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)genl_name,
+ strlen(DLM_GENL_NAME)+1);
+
+ rep_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR ||
+ (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
+ return 0;
+
+ na = (struct nlattr *) GENLMSG_DATA(&ans);
+ na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(uint16_t *) NLA_DATA(na);
+ }
+ return id;
+}
+
+/* genetlink messages are timewarnings used as part of deadlock detection */
+
+int setup_netlink(void)
+{
+ struct sockaddr_nl snl;
+ int s, rv;
+ uint16_t id;
+
+ s = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (s < 0) {
+ log_error("generic netlink socket");
+ return s;
+ }
+
+ memset(&snl, 0, sizeof(snl));
+ snl.nl_family = AF_NETLINK;
+
+ rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
+ if (rv < 0) {
+ log_error("gen netlink bind error %d errno %d", rv, errno);
+ close(s);
+ return rv;
+ }
+
+ id = get_family_id(s);
+ if (!id) {
+ log_error("Error getting family id, errno %d", errno);
+ close(s);
+ return -1;
+ }
+
+ rv = send_genetlink_cmd(s, id, getpid(), DLM_CMD_HELLO, 0, NULL, 0);
+ if (rv < 0) {
+ log_error("error sending hello cmd, errno %d", errno);
+ close(s);
+ return -1;
+ }
+
+ return s;
+}
+
+static void process_timewarn(struct dlm_lock_data *data)
+{
+ struct lockspace *ls;
+ struct timeval now;
+ unsigned int sec;
+
+ ls = find_ls_id(data->lockspace_id);
+ if (!ls)
+ return;
+
+ data->resource_name[data->resource_namelen] = '\0';
+
+ log_group(ls, "timewarn: lkid %x pid %d name %s",
+ data->id, data->ownpid, data->resource_name);
+
+ /* Problem: we don't want to get a timewarn, assume it's resolved
+ by the current cycle, but in fact it's from a deadlock that
+ formed after the checkpoints for the current cycle. Then we'd
+ have to hope for another warning (that may not come) to trigger
+ a new cycle to catch the deadlock. If our last cycle ckpt
+ was say N (~5?) sec before we receive the timewarn, then we
+ can be confident that the cycle included the lock in question.
+ Otherwise, we're not sure if the warning is for a new deadlock
+ that's formed since our last cycle ckpt (unless it's a long
+ enough time since the last cycle that we're confident it *is*
+ a new deadlock). When there is a deadlock, I suspect it will
+ be common to receive warnings before, during, and possibly
+ after the cycle that resolves it. Wonder if we should record
+ timewarns and match them with deadlock cycles so we can tell
+ which timewarns are addressed by a given cycle and which aren't. */
+
+
+ gettimeofday(&now, NULL);
+
+ /* don't send a new start until at least SECS after the last
+ we sent, and at least SECS after the last completed cycle */
+
+ sec = now.tv_sec - ls->last_send_cycle_start.tv_sec;
+
+ if (sec < DEADLOCK_CHECK_SECS) {
+ log_group(ls, "skip send: recent send cycle %d sec", sec);
+ return;
+ }
+
+ sec = now.tv_sec - ls->cycle_end_time.tv_sec;
+
+ if (sec < DEADLOCK_CHECK_SECS) {
+ log_group(ls, "skip send: recent cycle end %d sec", sec);
+ return;
+ }
+
+ gettimeofday(&ls->last_send_cycle_start, NULL);
+
+ if (cfgd_enable_deadlk)
+ send_cycle_start(ls);
+}
+
+void process_netlink(int ci)
+{
+ struct msgtemplate msg;
+ struct nlattr *na;
+ int len;
+ int fd;
+
+ fd = client_fd(ci);
+
+ len = recv(fd, &msg, sizeof(msg), 0);
+
+ if (len < 0) {
+ log_error("nonfatal netlink error: errno %d", errno);
+ return;
+ }
+
+ if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), len)) {
+ struct nlmsgerr *err = NLMSG_DATA(&msg);
+ log_error("fatal netlink error: errno %d", err->error);
+ return;
+ }
+
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+
+ process_timewarn((struct dlm_lock_data *) NLA_DATA(na));
+}
+
diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
new file mode 100644
index 0000000..c38b998
--- /dev/null
+++ b/dlm_controld/plock.c
@@ -0,0 +1,2197 @@
+#include "dlm_daemon.h"
+#include <linux/dlm_plock.h>
+
+/* FIXME: remove this once everyone is using the version of
+ * dlm_plock.h which defines it */
+
+#ifndef DLM_PLOCK_FL_CLOSE
+#warning DLM_PLOCK_FL_CLOSE undefined. Enabling build workaround.
+#define DLM_PLOCK_FL_CLOSE 1
+#define DLM_PLOCK_BUILD_WORKAROUND 1
+#endif
+
+static uint32_t plock_read_count;
+static uint32_t plock_recv_count;
+static uint32_t plock_rate_delays;
+static struct timeval plock_read_time;
+static struct timeval plock_recv_time;
+static struct timeval plock_rate_last;
+
+static int plock_device_fd = -1;
+static int need_fsid_translation = 0;
+
+extern int message_flow_control_on;
+
+#define RD_CONTINUE 0x00000001
+
+struct resource_data {
+ uint64_t number;
+ int owner;
+ uint32_t lock_count;
+ uint32_t flags;
+ uint32_t pad;
+};
+
+struct plock_data {
+ uint64_t start;
+ uint64_t end;
+ uint64_t owner;
+ uint32_t pid;
+ uint32_t nodeid;
+ uint8_t ex;
+ uint8_t waiter;
+ uint16_t pad1;
+ uint32_t pad;
+};
+
+#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */
+#define R_SEND_UNOWN 0x00000002 /* have sent owner=0 message */
+#define R_SEND_OWN 0x00000004 /* have sent owner=our_nodeid message */
+#define R_PURGE_UNOWN 0x00000008 /* set owner=0 in purge */
+#define R_SEND_DROP 0x00000010
+
+struct resource {
+ struct list_head list; /* list of resources */
+ uint64_t number;
+ int owner; /* nodeid or 0 for unowned */
+ uint32_t flags;
+ struct timeval last_access;
+ struct list_head locks; /* one lock for each range */
+ struct list_head waiters;
+ struct list_head pending; /* discovering r owner */
+ struct rb_node rb_node;
+};
+
+#define P_SYNCING 0x00000001 /* plock has been sent as part of sync but not
+ yet received */
+
+struct posix_lock {
+ struct list_head list; /* resource locks or waiters list */
+ uint32_t pid;
+ uint64_t owner;
+ uint64_t start;
+ uint64_t end;
+ int ex;
+ int nodeid;
+ uint32_t flags;
+};
+
+struct lock_waiter {
+ struct list_head list;
+ uint32_t flags;
+ struct dlm_plock_info info;
+};
+
+struct save_msg {
+ struct list_head list;
+ int nodeid;
+ int len;
+ int type;
+ char buf[0];
+};
+
+
+static void send_own(struct lockspace *ls, struct resource *r, int owner);
+static void save_pending_plock(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in);
+
+
+static int got_unown(struct resource *r)
+{
+ return !!(r->flags & R_GOT_UNOWN);
+}
+
+static void info_bswap_out(struct dlm_plock_info *i)
+{
+ i->version[0] = cpu_to_le32(i->version[0]);
+ i->version[1] = cpu_to_le32(i->version[1]);
+ i->version[2] = cpu_to_le32(i->version[2]);
+ i->pid = cpu_to_le32(i->pid);
+ i->nodeid = cpu_to_le32(i->nodeid);
+ i->rv = cpu_to_le32(i->rv);
+ i->fsid = cpu_to_le32(i->fsid);
+ i->number = cpu_to_le64(i->number);
+ i->start = cpu_to_le64(i->start);
+ i->end = cpu_to_le64(i->end);
+ i->owner = cpu_to_le64(i->owner);
+}
+
+static void info_bswap_in(struct dlm_plock_info *i)
+{
+ i->version[0] = le32_to_cpu(i->version[0]);
+ i->version[1] = le32_to_cpu(i->version[1]);
+ i->version[2] = le32_to_cpu(i->version[2]);
+ i->pid = le32_to_cpu(i->pid);
+ i->nodeid = le32_to_cpu(i->nodeid);
+ i->rv = le32_to_cpu(i->rv);
+ i->fsid = le32_to_cpu(i->fsid);
+ i->number = le64_to_cpu(i->number);
+ i->start = le64_to_cpu(i->start);
+ i->end = le64_to_cpu(i->end);
+ i->owner = le64_to_cpu(i->owner);
+}
+
+static const char *op_str(int optype)
+{
+ switch (optype) {
+ case DLM_PLOCK_OP_LOCK:
+ return "LK";
+ case DLM_PLOCK_OP_UNLOCK:
+ return "UN";
+ case DLM_PLOCK_OP_GET:
+ return "GET";
+ default:
+ return "??";
+ }
+}
+
+static const char *ex_str(int optype, int ex)
+{
+ if (optype == DLM_PLOCK_OP_UNLOCK || optype == DLM_PLOCK_OP_GET)
+ return "-";
+ if (ex)
+ return "WR";
+ else
+ return "RD";
+}
+
+/*
+ * In kernels before 2.6.26, plocks came from gfs2's lock_dlm module.
+ * Reading plocks from there as well should allow us to use cluster3
+ * on old (RHEL5) kernels. In this case, the fsid we read in plock_info
+ * structs is the mountgroup id, which we need to translate to the ls id.
+ */
+
+int setup_plocks(void)
+{
+ plock_read_count = 0;
+ plock_recv_count = 0;
+ plock_rate_delays = 0;
+ gettimeofday(&plock_read_time, NULL);
+ gettimeofday(&plock_recv_time, NULL);
+ gettimeofday(&plock_rate_last, NULL);
+
+ if (plock_minor) {
+ plock_device_fd = open("/dev/misc/dlm_plock", O_RDWR);
+ } else if (old_plock_minor) {
+ log_debug("setup_plocks using old lock_dlm interface");
+ need_fsid_translation = 1;
+ plock_device_fd = open("/dev/misc/lock_dlm_plock", O_RDWR);
+ }
+
+ if (plock_device_fd < 0) {
+ log_error("Failure to open plock device: %s", strerror(errno));
+ return -1;
+ }
+
+ log_debug("plocks %d", plock_device_fd);
+ log_debug("plock cpg message size: %u bytes",
+ (unsigned int) (sizeof(struct dlm_header) +
+ sizeof(struct dlm_plock_info)));
+
+ return plock_device_fd;
+}
+
+void close_plocks(void)
+{
+ if (plock_device_fd > 0)
+ close(plock_device_fd);
+}
+
+static uint32_t mg_to_ls_id(uint32_t fsid)
+{
+ struct lockspace *ls;
+ int do_set = 1;
+
+ retry:
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->associated_mg_id == fsid)
+ return ls->global_id;
+ }
+
+ if (do_set) {
+ do_set = 0;
+ set_associated_id(fsid);
+ goto retry;
+ }
+
+ return fsid;
+}
+
+/* FIXME: unify these two */
+
+static unsigned long time_diff_ms(struct timeval *begin, struct timeval *end)
+{
+ struct timeval result;
+ timersub(end, begin, &result);
+ return (result.tv_sec * 1000) + (result.tv_usec / 1000);
+}
+
+static uint64_t dt_usec(struct timeval *start, struct timeval *stop)
+{
+ uint64_t dt;
+
+ dt = stop->tv_sec - start->tv_sec;
+ dt *= 1000000;
+ dt += stop->tv_usec - start->tv_usec;
+ return dt;
+}
+
+static struct resource * rb_search_plock_resource(struct lockspace *ls, uint64_t number)
+{
+ struct rb_node *n = ls->plock_resources_root.rb_node;
+ struct resource *r;
+
+ while (n) {
+ r = rb_entry(n, struct resource, rb_node);
+ if (number < r->number)
+ n = n->rb_left;
+ else if (number > r->number)
+ n = n->rb_right;
+ else
+ return r;
+ }
+ return NULL;
+}
+
+static void rb_insert_plock_resource(struct lockspace *ls, struct resource *r)
+{
+ struct resource *entry;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+
+ p = &ls->plock_resources_root.rb_node;
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct resource, rb_node);
+ if (r->number < entry->number)
+ p = &parent->rb_left;
+ else if (r->number > entry->number)
+ p = &parent->rb_right;
+ else
+ return;
+ }
+ rb_link_node(&r->rb_node, parent, p);
+ rb_insert_color(&r->rb_node, &ls->plock_resources_root);
+}
+
+static void rb_del_plock_resource(struct lockspace *ls, struct resource *r)
+{
+ if (!RB_EMPTY_NODE(&r->rb_node)) {
+ rb_erase(&r->rb_node, &ls->plock_resources_root);
+ RB_CLEAR_NODE(&r->rb_node);
+ }
+}
+
+static struct resource *search_resource(struct lockspace *ls, uint64_t number)
+{
+ struct resource *r;
+
+ list_for_each_entry(r, &ls->plock_resources, list) {
+ if (r->number == number)
+ return r;
+ }
+ return NULL;
+}
+
+static int find_resource(struct lockspace *ls, uint64_t number, int create,
+ struct resource **r_out)
+{
+ struct resource *r = NULL;
+ int rv = 0;
+
+ r = rb_search_plock_resource(ls, number);
+ if (r)
+ goto out;
+
+ if (create == 0) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ r = malloc(sizeof(struct resource));
+ if (!r) {
+ log_elock(ls, "find_resource no memory %d", errno);
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ memset(r, 0, sizeof(struct resource));
+ r->number = number;
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->waiters);
+ INIT_LIST_HEAD(&r->pending);
+
+ if (cfgd_plock_ownership)
+ r->owner = -1;
+ else
+ r->owner = 0;
+
+ list_add_tail(&r->list, &ls->plock_resources);
+ rb_insert_plock_resource(ls, r);
+ out:
+ if (r)
+ gettimeofday(&r->last_access, NULL);
+ *r_out = r;
+ return rv;
+}
+
+static void put_resource(struct lockspace *ls, struct resource *r)
+{
+ /* with ownership, resources are only freed via drop messages */
+ if (cfgd_plock_ownership)
+ return;
+
+ if (list_empty(&r->locks) && list_empty(&r->waiters)) {
+ rb_del_plock_resource(ls, r);
+ list_del(&r->list);
+ free(r);
+ }
+}
+
+static inline int ranges_overlap(uint64_t start1, uint64_t end1,
+ uint64_t start2, uint64_t end2)
+{
+ if (end1 < start2 || start1 > end2)
+ return 0;
+ return 1;
+}
+
+/**
+ * overlap_type - returns a value based on the type of overlap
+ * @s1 - start of new lock range
+ * @e1 - end of new lock range
+ * @s2 - start of existing lock range
+ * @e2 - end of existing lock range
+ *
+ */
+
+static int overlap_type(uint64_t s1, uint64_t e1, uint64_t s2, uint64_t e2)
+{
+ int ret;
+
+ /*
+ * ---r1---
+ * ---r2---
+ */
+
+ if (s1 == s2 && e1 == e2)
+ ret = 0;
+
+ /*
+ * --r1--
+ * ---r2---
+ */
+
+ else if (s1 == s2 && e1 < e2)
+ ret = 1;
+
+ /*
+ * --r1--
+ * ---r2---
+ */
+
+ else if (s1 > s2 && e1 == e2)
+ ret = 1;
+
+ /*
+ * --r1--
+ * ---r2---
+ */
+
+ else if (s1 > s2 && e1 < e2)
+ ret = 2;
+
+ /*
+ * ---r1--- or ---r1--- or ---r1---
+ * --r2-- --r2-- --r2--
+ */
+
+ else if (s1 <= s2 && e1 >= e2)
+ ret = 3;
+
+ /*
+ * ---r1---
+ * ---r2---
+ */
+
+ else if (s1 > s2 && e1 > e2)
+ ret = 4;
+
+ /*
+ * ---r1---
+ * ---r2---
+ */
+
+ else if (s1 < s2 && e1 < e2)
+ ret = 4;
+
+ else
+ ret = -1;
+
+ return ret;
+}
+
+/* shrink the range start2:end2 by the partially overlapping start:end */
+
+static int shrink_range2(uint64_t *start2, uint64_t *end2,
+ uint64_t start, uint64_t end)
+{
+ int error = 0;
+
+ if (*start2 < start)
+ *end2 = start - 1;
+ else if (*end2 > end)
+ *start2 = end + 1;
+ else
+ error = -1;
+ return error;
+}
+
+static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end)
+{
+ return shrink_range2(&po->start, &po->end, start, end);
+}
+
+static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get)
+{
+ struct posix_lock *po;
+
+ list_for_each_entry(po, &r->locks, list) {
+ if (po->nodeid == in->nodeid && po->owner == in->owner)
+ continue;
+ if (!ranges_overlap(po->start, po->end, in->start, in->end))
+ continue;
+
+ if (in->ex || po->ex) {
+ if (get) {
+ in->ex = po->ex;
+ in->pid = po->pid;
+ in->start = po->start;
+ in->end = po->end;
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
+ uint32_t pid, int ex, uint64_t start, uint64_t end)
+{
+ struct posix_lock *po;
+
+ po = malloc(sizeof(struct posix_lock));
+ if (!po)
+ return -ENOMEM;
+ memset(po, 0, sizeof(struct posix_lock));
+
+ po->start = start;
+ po->end = end;
+ po->nodeid = nodeid;
+ po->owner = owner;
+ po->pid = pid;
+ po->ex = ex;
+ list_add_tail(&po->list, &r->locks);
+
+ return 0;
+}
+
+/* RN within RE (and starts or ends on RE boundary)
+ 1. add new lock for non-overlap area of RE, orig mode
+ 2. convert RE to RN range and mode */
+
+static int lock_case1(struct posix_lock *po, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ uint64_t start2, end2;
+ int rv;
+
+ /* non-overlapping area start2:end2 */
+ start2 = po->start;
+ end2 = po->end;
+ rv = shrink_range2(&start2, &end2, in->start, in->end);
+ if (rv)
+ goto out;
+
+ po->start = in->start;
+ po->end = in->end;
+ po->ex = in->ex;
+
+ rv = add_lock(r, in->nodeid, in->owner, in->pid, !in->ex, start2, end2);
+ out:
+ return rv;
+}
+
+/* RN within RE (RE overlaps RN on both sides)
+ 1. add new lock for front fragment, orig mode
+ 2. add new lock for back fragment, orig mode
+ 3. convert RE to RN range and mode */
+
+static int lock_case2(struct posix_lock *po, struct resource *r,
+ struct dlm_plock_info *in)
+
+{
+ int rv;
+
+ rv = add_lock(r, in->nodeid, in->owner, in->pid,
+ !in->ex, po->start, in->start - 1);
+ if (rv)
+ goto out;
+
+ rv = add_lock(r, in->nodeid, in->owner, in->pid,
+ !in->ex, in->end + 1, po->end);
+ if (rv)
+ goto out;
+
+ po->start = in->start;
+ po->end = in->end;
+ po->ex = in->ex;
+ out:
+ return rv;
+}
+
+static int lock_internal(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ struct posix_lock *po, *safe;
+ int rv = 0;
+
+ list_for_each_entry_safe(po, safe, &r->locks, list) {
+ if (po->nodeid != in->nodeid || po->owner != in->owner)
+ continue;
+ if (!ranges_overlap(po->start, po->end, in->start, in->end))
+ continue;
+
+ /* existing range (RE) overlaps new range (RN) */
+
+ switch(overlap_type(in->start, in->end, po->start, po->end)) {
+
+ case 0:
+ if (po->ex == in->ex)
+ goto out;
+
+ /* ranges the same - just update the existing lock */
+ po->ex = in->ex;
+ goto out;
+
+ case 1:
+ if (po->ex == in->ex)
+ goto out;
+
+ rv = lock_case1(po, r, in);
+ goto out;
+
+ case 2:
+ if (po->ex == in->ex)
+ goto out;
+
+ rv = lock_case2(po, r, in);
+ goto out;
+
+ case 3:
+ list_del(&po->list);
+ free(po);
+ break;
+
+ case 4:
+ if (po->start < in->start)
+ po->end = in->start - 1;
+ else
+ po->start = in->end + 1;
+ break;
+
+ default:
+ rv = -1;
+ goto out;
+ }
+ }
+
+ rv = add_lock(r, in->nodeid, in->owner, in->pid,
+ in->ex, in->start, in->end);
+ out:
+ return rv;
+
+}
+
+static int unlock_internal(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ struct posix_lock *po, *safe;
+ int rv = 0;
+
+ list_for_each_entry_safe(po, safe, &r->locks, list) {
+ if (po->nodeid != in->nodeid || po->owner != in->owner)
+ continue;
+ if (!ranges_overlap(po->start, po->end, in->start, in->end))
+ continue;
+
+ /* existing range (RE) overlaps new range (RN) */
+
+ switch (overlap_type(in->start, in->end, po->start, po->end)) {
+
+ case 0:
+ /* ranges the same - just remove the existing lock */
+
+ list_del(&po->list);
+ free(po);
+ goto out;
+
+ case 1:
+ /* RN within RE and starts or ends on RE boundary -
+ * shrink and update RE */
+
+ rv = shrink_range(po, in->start, in->end);
+ goto out;
+
+ case 2:
+ /* RN within RE - shrink and update RE to be front
+ * fragment, and add a new lock for back fragment */
+
+ rv = add_lock(r, in->nodeid, in->owner, in->pid,
+ po->ex, in->end + 1, po->end);
+ po->end = in->start - 1;
+ goto out;
+
+ case 3:
+ /* RE within RN - remove RE, then continue checking
+ * because RN could cover other locks */
+
+ list_del(&po->list);
+ free(po);
+ continue;
+
+ case 4:
+ /* front of RE in RN, or end of RE in RN - shrink and
+ * update RE, then continue because RN could cover
+ * other locks */
+
+ rv = shrink_range(po, in->start, in->end);
+ continue;
+
+ default:
+ rv = -1;
+ goto out;
+ }
+ }
+ out:
+ return rv;
+}
+
+static void clear_waiters(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ struct lock_waiter *w, *safe;
+
+ list_for_each_entry_safe(w, safe, &r->waiters, list) {
+ if (w->info.nodeid != in->nodeid || w->info.owner != in->owner)
+ continue;
+
+ list_del(&w->list);
+
+ log_elock(ls, "clear waiter %llx %llx-%llx %d/%u/%llx",
+ (unsigned long long)in->number,
+ (unsigned long long)in->start,
+ (unsigned long long)in->end,
+ in->nodeid, in->pid,
+ (unsigned long long)in->owner);
+ free(w);
+ }
+}
+
+static int add_waiter(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+
+{
+ struct lock_waiter *w;
+
+ w = malloc(sizeof(struct lock_waiter));
+ if (!w)
+ return -ENOMEM;
+ memcpy(&w->info, in, sizeof(struct dlm_plock_info));
+ list_add_tail(&w->list, &r->waiters);
+ return 0;
+}
+
+static void write_result(struct lockspace *ls, struct dlm_plock_info *in,
+ int rv)
+{
+ if (need_fsid_translation)
+ in->fsid = ls->associated_mg_id;
+
+ in->rv = rv;
+ write(plock_device_fd, in, sizeof(struct dlm_plock_info));
+}
+
+static void do_waiters(struct lockspace *ls, struct resource *r)
+{
+ struct lock_waiter *w, *safe;
+ struct dlm_plock_info *in;
+ int rv;
+
+ list_for_each_entry_safe(w, safe, &r->waiters, list) {
+ in = &w->info;
+
+ if (is_conflict(r, in, 0))
+ continue;
+
+ list_del(&w->list);
+
+ /*
+ log_group(ls, "take waiter %llx %llx-%llx %d/%u/%llx",
+ in->number, in->start, in->end,
+ in->nodeid, in->pid, in->owner);
+ */
+
+ rv = lock_internal(ls, r, in);
+
+ if (in->nodeid == our_nodeid)
+ write_result(ls, in, rv);
+
+ free(w);
+ }
+}
+
+static void do_lock(struct lockspace *ls, struct dlm_plock_info *in,
+ struct resource *r)
+{
+ int rv;
+
+ if (is_conflict(r, in, 0)) {
+ if (!in->wait)
+ rv = -EAGAIN;
+ else {
+ rv = add_waiter(ls, r, in);
+ if (rv)
+ goto out;
+ rv = -EINPROGRESS;
+ }
+ } else
+ rv = lock_internal(ls, r, in);
+
+ out:
+ if (in->nodeid == our_nodeid && rv != -EINPROGRESS)
+ write_result(ls, in, rv);
+
+ do_waiters(ls, r);
+ put_resource(ls, r);
+}
+
+static void do_unlock(struct lockspace *ls, struct dlm_plock_info *in,
+ struct resource *r)
+{
+ int rv;
+
+ rv = unlock_internal(ls, r, in);
+
+#ifdef DLM_PLOCK_BUILD_WORKAROUND
+ if (in->pad & DLM_PLOCK_FL_CLOSE) {
+#else
+ if (in->flags & DLM_PLOCK_FL_CLOSE) {
+#endif
+ clear_waiters(ls, r, in);
+ /* no replies for unlock-close ops */
+ goto skip_result;
+ }
+
+ if (in->nodeid == our_nodeid)
+ write_result(ls, in, rv);
+
+ skip_result:
+ do_waiters(ls, r);
+ put_resource(ls, r);
+}
+
+/* we don't even get to this function if the getlk isn't from us */
+
+static void do_get(struct lockspace *ls, struct dlm_plock_info *in,
+ struct resource *r)
+{
+ int rv;
+
+ if (is_conflict(r, in, 1))
+ rv = 1;
+ else
+ rv = 0;
+
+ write_result(ls, in, rv);
+ put_resource(ls, r);
+}
+
+static void save_message(struct lockspace *ls, struct dlm_header *hd, int len,
+ int from, int type)
+{
+ struct save_msg *sm;
+
+ sm = malloc(sizeof(struct save_msg) + len);
+ if (!sm)
+ return;
+ memset(sm, 0, sizeof(struct save_msg) + len);
+
+ memcpy(&sm->buf, hd, len);
+ sm->type = type;
+ sm->len = len;
+ sm->nodeid = from;
+
+ log_plock(ls, "save %s from %d len %d", msg_name(type), from, len);
+
+ list_add_tail(&sm->list, &ls->saved_messages);
+}
+
+static void __receive_plock(struct lockspace *ls, struct dlm_plock_info *in,
+ int from, struct resource *r)
+{
+ switch (in->optype) {
+ case DLM_PLOCK_OP_LOCK:
+ ls->last_plock_time = time(NULL);
+ do_lock(ls, in, r);
+ break;
+ case DLM_PLOCK_OP_UNLOCK:
+ ls->last_plock_time = time(NULL);
+ do_unlock(ls, in, r);
+ break;
+ case DLM_PLOCK_OP_GET:
+ do_get(ls, in, r);
+ break;
+ default:
+ log_elock(ls, "receive_plock error from %d optype %d",
+ from, in->optype);
+ if (from == our_nodeid)
+ write_result(ls, in, -EINVAL);
+ }
+}
+
+/* When ls members receive our options message (for our mount), one of them
+ saves all plock state received to that point in a checkpoint and then sends
+ us our journals message. We know to retrieve the plock state from the
+ checkpoint when we receive our journals message. Any plocks messages that
+ arrive between seeing our options message and our journals message needs to
+ be saved and processed after we synchronize our plock state from the
+ checkpoint. Any plock message received while we're mounting but before we
+ set save_plocks (when we see our options message) can be ignored because it
+ should be reflected in the checkpointed state. */
+
+static void _receive_plock(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct dlm_plock_info info;
+ struct resource *r = NULL;
+ struct timeval now;
+ uint64_t usec;
+ int from = hd->nodeid;
+ int rv, create;
+
+ memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
+ info_bswap_in(&info);
+
+ log_plock(ls, "receive plock %llx %s %s %llx-%llx %d/%u/%llx w %d",
+ (unsigned long long)info.number,
+ op_str(info.optype),
+ ex_str(info.optype, info.ex),
+ (unsigned long long)info.start, (unsigned long long)info.end,
+ info.nodeid, info.pid, (unsigned long long)info.owner,
+ info.wait);
+
+ plock_recv_count++;
+ if (!(plock_recv_count % 1000)) {
+ gettimeofday(&now, NULL);
+ usec = dt_usec(&plock_recv_time, &now);
+ log_plock(ls, "plock_recv_count %u time %.3f s",
+ plock_recv_count, usec * 1.e-6);
+ plock_recv_time = now;
+ }
+
+ if (info.optype == DLM_PLOCK_OP_GET && from != our_nodeid)
+ return;
+
+ if (from != hd->nodeid || from != info.nodeid) {
+ log_elock(ls, "receive_plock error from %d header %d info %d",
+ from, hd->nodeid, info.nodeid);
+ return;
+ }
+
+ create = !cfgd_plock_ownership;
+
+ rv = find_resource(ls, info.number, create, &r);
+
+ if (rv && cfgd_plock_ownership) {
+ /* There must have been a race with a drop, so we need to
+ ignore this plock op which will be resent. If we're the one
+ who sent the plock, we need to send_own() and put it on the
+ pending list to resend once the owner is established. */
+
+ log_plock(ls, "receive_plock from %d no r %llx", from,
+ (unsigned long long)info.number);
+
+ if (from != our_nodeid)
+ return;
+
+ rv = find_resource(ls, info.number, 1, &r);
+ if (rv)
+ return;
+ send_own(ls, r, our_nodeid);
+ save_pending_plock(ls, r, &info);
+ return;
+ }
+ if (rv) {
+ /* r not found, rv is -ENOENT, this shouldn't happen because
+ process_plocks() creates a resource for every op */
+
+ log_elock(ls, "receive_plock error from %d no r %llx %d",
+ from, (unsigned long long)info.number, rv);
+ return;
+ }
+
+ /* The owner should almost always be 0 here, but other owners may
+ be possible given odd combinations of races with drop. Odd races to
+ worry about (some seem pretty improbable):
+
+ - A sends drop, B sends plock, receive drop, receive plock.
+ This is addressed above.
+
+ - A sends drop, B sends plock, receive drop, B reads plock
+ and sends own, receive plock, on B we find owner of -1.
+
+ - A sends drop, B sends two plocks, receive drop, receive plocks.
+ Receiving the first plock is the previous case, receiving the
+ second plock will find r with owner of -1.
+
+ - A sends drop, B sends two plocks, receive drop, C sends own,
+ receive plock, B sends own, receive own (C), receive plock,
+ receive own (B).
+
+ Haven't tried to cook up a scenario that would lead to the
+ last case below; receiving a plock from ourself and finding
+ we're the owner of r. */
+
+ if (!r->owner) {
+ __receive_plock(ls, &info, from, r);
+
+ } else if (r->owner == -1) {
+ log_plock(ls, "receive_plock from %d r %llx owner %d", from,
+ (unsigned long long)info.number, r->owner);
+
+ if (from == our_nodeid)
+ save_pending_plock(ls, r, &info);
+
+ } else if (r->owner != our_nodeid) {
+ log_plock(ls, "receive_plock from %d r %llx owner %d", from,
+ (unsigned long long)info.number, r->owner);
+
+ if (from == our_nodeid)
+ save_pending_plock(ls, r, &info);
+
+ } else if (r->owner == our_nodeid) {
+ log_plock(ls, "receive_plock from %d r %llx owner %d", from,
+ (unsigned long long)info.number, r->owner);
+
+ if (from == our_nodeid)
+ __receive_plock(ls, &info, from, r);
+ }
+}
+
+void receive_plock(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ if (ls->save_plocks) {
+ save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK);
+ return;
+ }
+
+ _receive_plock(ls, hd, len);
+}
+
+static int send_struct_info(struct lockspace *ls, struct dlm_plock_info *in,
+ int msg_type)
+{
+ struct dlm_header *hd;
+ int rv = 0, len;
+ char *buf;
+
+ len = sizeof(struct dlm_header) + sizeof(struct dlm_plock_info);
+ buf = malloc(len);
+ if (!buf) {
+ rv = -ENOMEM;
+ goto out;
+ }
+ memset(buf, 0, len);
+
+ info_bswap_out(in);
+
+ hd = (struct dlm_header *)buf;
+ hd->type = msg_type;
+
+ memcpy(buf + sizeof(struct dlm_header), in, sizeof(*in));
+
+ dlm_send_message(ls, buf, len);
+
+ free(buf);
+ out:
+ if (rv)
+ log_elock(ls, "send_struct_info error %d", rv);
+ return rv;
+}
+
+static void send_plock(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ send_struct_info(ls, in, DLM_MSG_PLOCK);
+}
+
+static void send_own(struct lockspace *ls, struct resource *r, int owner)
+{
+ struct dlm_plock_info info;
+
+ /* if we've already sent an own message for this resource,
+ (pending list is not empty), then we shouldn't send another */
+
+ if (!list_empty(&r->pending)) {
+ log_plock(ls, "send_own %llx already pending",
+ (unsigned long long)r->number);
+ return;
+ }
+
+ if (!owner)
+ r->flags |= R_SEND_UNOWN;
+ else
+ r->flags |= R_SEND_OWN;
+
+ memset(&info, 0, sizeof(info));
+ info.number = r->number;
+ info.nodeid = owner;
+
+ send_struct_info(ls, &info, DLM_MSG_PLOCK_OWN);
+}
+
+static void send_syncs(struct lockspace *ls, struct resource *r)
+{
+ struct dlm_plock_info info;
+ struct posix_lock *po;
+ struct lock_waiter *w;
+ int rv;
+
+ list_for_each_entry(po, &r->locks, list) {
+ memset(&info, 0, sizeof(info));
+ info.number = r->number;
+ info.start = po->start;
+ info.end = po->end;
+ info.nodeid = po->nodeid;
+ info.owner = po->owner;
+ info.pid = po->pid;
+ info.ex = po->ex;
+
+ rv = send_struct_info(ls, &info, DLM_MSG_PLOCK_SYNC_LOCK);
+ if (rv)
+ goto out;
+
+ po->flags |= P_SYNCING;
+ }
+
+ list_for_each_entry(w, &r->waiters, list) {
+ memcpy(&info, &w->info, sizeof(info));
+
+ rv = send_struct_info(ls, &info, DLM_MSG_PLOCK_SYNC_WAITER);
+ if (rv)
+ goto out;
+
+ w->flags |= P_SYNCING;
+ }
+ out:
+ return;
+}
+
+static void send_drop(struct lockspace *ls, struct resource *r)
+{
+ struct dlm_plock_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.number = r->number;
+ r->flags |= R_SEND_DROP;
+
+ send_struct_info(ls, &info, DLM_MSG_PLOCK_DROP);
+}
+
+/* plock op can't be handled until we know the owner value of the resource,
+ so the op is saved on the pending list until the r owner is established */
+
+static void save_pending_plock(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ struct lock_waiter *w;
+
+ w = malloc(sizeof(struct lock_waiter));
+ if (!w) {
+ log_elock(ls, "save_pending_plock no mem");
+ return;
+ }
+ memcpy(&w->info, in, sizeof(struct dlm_plock_info));
+ list_add_tail(&w->list, &r->pending);
+}
+
+/* plock ops are on pending list waiting for ownership to be established.
+ owner has now become us, so add these plocks to r */
+
+static void add_pending_plocks(struct lockspace *ls, struct resource *r)
+{
+ struct lock_waiter *w, *safe;
+
+ list_for_each_entry_safe(w, safe, &r->pending, list) {
+ __receive_plock(ls, &w->info, our_nodeid, r);
+ list_del(&w->list);
+ free(w);
+ }
+}
+
+/* plock ops are on pending list waiting for ownership to be established.
+ owner has now become 0, so send these plocks to everyone */
+
+static void send_pending_plocks(struct lockspace *ls, struct resource *r)
+{
+ struct lock_waiter *w, *safe;
+
+ list_for_each_entry_safe(w, safe, &r->pending, list) {
+ send_plock(ls, r, &w->info);
+ list_del(&w->list);
+ free(w);
+ }
+}
+
+static void _receive_own(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct dlm_plock_info info;
+ struct resource *r;
+ int should_not_happen = 0;
+ int from = hd->nodeid;
+ int rv;
+
+ ls->last_plock_time = time(NULL);
+
+ memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
+ info_bswap_in(&info);
+
+ log_plock(ls, "receive_own %llx from %u owner %u",
+ (unsigned long long)info.number, hd->nodeid, info.nodeid);
+
+ rv = find_resource(ls, info.number, 1, &r);
+ if (rv)
+ return;
+
+ if (from == our_nodeid) {
+ /*
+ * received our own own message
+ */
+
+ if (info.nodeid == 0) {
+ /* we are setting owner to 0 */
+
+ if (r->owner == our_nodeid) {
+ /* we set owner to 0 when we relinquish
+ ownership */
+ should_not_happen = 1;
+ } else if (r->owner == 0) {
+ /* this happens when we relinquish ownership */
+ r->flags |= R_GOT_UNOWN;
+ } else {
+ should_not_happen = 1;
+ }
+
+ } else if (info.nodeid == our_nodeid) {
+ /* we are setting owner to ourself */
+
+ if (r->owner == -1) {
+ /* we have gained ownership */
+ r->owner = our_nodeid;
+ add_pending_plocks(ls, r);
+ } else if (r->owner == our_nodeid) {
+ should_not_happen = 1;
+ } else if (r->owner == 0) {
+ send_pending_plocks(ls, r);
+ } else {
+ /* resource is owned by other node;
+ they should set owner to 0 shortly */
+ }
+
+ } else {
+ /* we should only ever set owner to 0 or ourself */
+ should_not_happen = 1;
+ }
+ } else {
+ /*
+ * received own message from another node
+ */
+
+ if (info.nodeid == 0) {
+ /* other node is setting owner to 0 */
+
+ if (r->owner == -1) {
+ /* we should have a record of the owner before
+ it relinquishes */
+ should_not_happen = 1;
+ } else if (r->owner == our_nodeid) {
+ /* only the owner should relinquish */
+ should_not_happen = 1;
+ } else if (r->owner == 0) {
+ should_not_happen = 1;
+ } else {
+ r->owner = 0;
+ r->flags |= R_GOT_UNOWN;
+ send_pending_plocks(ls, r);
+ }
+
+ } else if (info.nodeid == from) {
+ /* other node is setting owner to itself */
+
+ if (r->owner == -1) {
+ /* normal path for a node becoming owner */
+ r->owner = from;
+ } else if (r->owner == our_nodeid) {
+ /* we relinquish our ownership: sync our local
+ plocks to everyone, then set owner to 0 */
+ send_syncs(ls, r);
+ send_own(ls, r, 0);
+ /* we need to set owner to 0 here because
+ local ops may arrive before we receive
+ our send_own message and can't be added
+ locally */
+ r->owner = 0;
+ } else if (r->owner == 0) {
+ /* can happen because we set owner to 0 before
+ we receive our send_own sent just above */
+ } else {
+ /* do nothing, current owner should be
+ relinquishing its ownership */
+ }
+
+ } else if (info.nodeid == our_nodeid) {
+ /* no one else should try to set the owner to us */
+ should_not_happen = 1;
+ } else {
+ /* a node should only ever set owner to 0 or itself */
+ should_not_happen = 1;
+ }
+ }
+
+ if (should_not_happen) {
+ log_elock(ls, "receive_own error from %u %llx "
+ "info nodeid %d r owner %d",
+ from, (unsigned long long)r->number,
+ info.nodeid, r->owner);
+ }
+}
+
+void receive_own(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ if (ls->save_plocks) {
+ save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK_OWN);
+ return;
+ }
+
+ _receive_own(ls, hd, len);
+}
+
+static void clear_syncing_flag(struct lockspace *ls, struct resource *r,
+ struct dlm_plock_info *in)
+{
+ struct posix_lock *po;
+ struct lock_waiter *w;
+
+ list_for_each_entry(po, &r->locks, list) {
+ if ((po->flags & P_SYNCING) &&
+ in->start == po->start &&
+ in->end == po->end &&
+ in->nodeid == po->nodeid &&
+ in->owner == po->owner &&
+ in->pid == po->pid &&
+ in->ex == po->ex) {
+ po->flags &= ~P_SYNCING;
+ return;
+ }
+ }
+
+ list_for_each_entry(w, &r->waiters, list) {
+ if ((w->flags & P_SYNCING) &&
+ in->start == w->info.start &&
+ in->end == w->info.end &&
+ in->nodeid == w->info.nodeid &&
+ in->owner == w->info.owner &&
+ in->pid == w->info.pid &&
+ in->ex == w->info.ex) {
+ w->flags &= ~P_SYNCING;
+ return;
+ }
+ }
+
+ log_elock(ls, "clear_syncing error %llx no match %s %llx-%llx %d/%u/%llx",
+ (unsigned long long)r->number,
+ in->ex ? "WR" : "RD",
+ (unsigned long long)in->start,
+ (unsigned long long)in->end,
+ in->nodeid, in->pid,
+ (unsigned long long)in->owner);
+}
+
+static void _receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct dlm_plock_info info;
+ struct resource *r;
+ int from = hd->nodeid;
+ int rv;
+
+ ls->last_plock_time = time(NULL);
+
+ memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
+ info_bswap_in(&info);
+
+ log_plock(ls, "receive sync %llx from %u %s %llx-%llx %d/%u/%llx",
+ (unsigned long long)info.number, from, info.ex ? "WR" : "RD",
+ (unsigned long long)info.start, (unsigned long long)info.end,
+ info.nodeid, info.pid, (unsigned long long)info.owner);
+
+ rv = find_resource(ls, info.number, 0, &r);
+ if (rv) {
+ log_elock(ls, "receive_sync error no r %llx from %d",
+ info.number, from);
+ return;
+ }
+
+ if (from == our_nodeid) {
+ /* this plock now in sync on all nodes */
+ clear_syncing_flag(ls, r, &info);
+ return;
+ }
+
+ if (hd->type == DLM_MSG_PLOCK_SYNC_LOCK)
+ add_lock(r, info.nodeid, info.owner, info.pid, info.ex,
+ info.start, info.end);
+ else if (hd->type == DLM_MSG_PLOCK_SYNC_WAITER)
+ add_waiter(ls, r, &info);
+}
+
+void receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ if (ls->save_plocks) {
+ save_message(ls, hd, len, hd->nodeid, hd->type);
+ return;
+ }
+
+ _receive_sync(ls, hd, len);
+}
+
+static void _receive_drop(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct dlm_plock_info info;
+ struct resource *r;
+ int from = hd->nodeid;
+ int rv;
+
+ ls->last_plock_time = time(NULL);
+
+ memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
+ info_bswap_in(&info);
+
+ log_plock(ls, "receive_drop %llx from %u",
+ (unsigned long long)info.number, from);
+
+ rv = find_resource(ls, info.number, 0, &r);
+ if (rv) {
+ /* we'll find no r if two nodes sent drop at once */
+ log_plock(ls, "receive_drop from %d no r %llx", from,
+ (unsigned long long)info.number);
+ return;
+ }
+
+ if (r->owner != 0) {
+ /* - A sent drop, B sent drop, receive drop A, C sent own,
+ receive drop B (this warning on C, owner -1)
+ - A sent drop, B sent drop, receive drop A, A sent own,
+ receive own A, receive drop B (this warning on all,
+ owner A) */
+ log_plock(ls, "receive_drop from %d r %llx owner %d", from,
+ (unsigned long long)r->number, r->owner);
+ return;
+ }
+
+ if (!list_empty(&r->pending)) {
+ /* shouldn't happen */
+ log_elock(ls, "receive_drop error from %d r %llx pending op",
+ from, (unsigned long long)r->number);
+ return;
+ }
+
+ /* the decision to drop or not must be based on things that are
+ guaranteed to be the same on all nodes */
+
+ if (list_empty(&r->locks) && list_empty(&r->waiters)) {
+ rb_del_plock_resource(ls, r);
+ list_del(&r->list);
+ free(r);
+ } else {
+ /* A sent drop, B sent a plock, receive plock, receive drop */
+ log_plock(ls, "receive_drop from %d r %llx in use", from,
+ (unsigned long long)r->number);
+ }
+}
+
+void receive_drop(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ if (ls->save_plocks) {
+ save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK_DROP);
+ return;
+ }
+
+ _receive_drop(ls, hd, len);
+}
+
+/* We only drop resources from the unowned state to simplify things.
+ If we want to drop a resource we own, we unown/relinquish it first. */
+
+/* FIXME: in the transition from owner = us, to owner = 0, to drop;
+ we want the second period to be shorter than the first */
+
+static int drop_resources(struct lockspace *ls)
+{
+ struct resource *r;
+ struct timeval now;
+ int count = 0;
+
+ if (!cfgd_plock_ownership)
+ return 0;
+
+ if (list_empty(&ls->plock_resources))
+ return 0;
+
+ gettimeofday(&now, NULL);
+
+ if (time_diff_ms(&ls->drop_resources_last, &now) <
+ cfgd_drop_resources_time)
+ return 1;
+
+ ls->drop_resources_last = now;
+
+ /* try to drop the oldest, unused resources */
+
+ list_for_each_entry_reverse(r, &ls->plock_resources, list) {
+ if (count >= cfgd_drop_resources_count)
+ break;
+ if (r->owner && r->owner != our_nodeid)
+ continue;
+ if (time_diff_ms(&r->last_access, &now) <
+ cfgd_drop_resources_age)
+ continue;
+
+ if (list_empty(&r->locks) && list_empty(&r->waiters)) {
+ if (r->owner == our_nodeid) {
+ send_own(ls, r, 0);
+ r->owner = 0;
+ } else if (r->owner == 0 && got_unown(r)) {
+ send_drop(ls, r);
+ }
+
+ count++;
+ }
+ }
+
+ return 1;
+}
+
+void drop_resources_all(void)
+{
+ struct lockspace *ls;
+ int rv = 0;
+
+ poll_drop_plock = 0;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ rv = drop_resources(ls);
+ if (rv)
+ poll_drop_plock = 1;
+ }
+}
+
+int limit_plocks(void)
+{
+ struct timeval now;
+
+ /* Don't send more messages while the cpg message queue is backed up */
+
+ if (message_flow_control_on) {
+ update_flow_control_status();
+ if (message_flow_control_on)
+ return 1;
+ }
+
+ if (!cfgd_plock_rate_limit || !plock_read_count)
+ return 0;
+
+ gettimeofday(&now, NULL);
+
+ /* Every time a plock op is read from the kernel, we increment
+ plock_read_count. After every cfgd_plock_rate_limit (N) reads,
+ we check the time it's taken to do those N; if the time is less than
+ a second, then we delay reading any more until a second is up.
+ This way we read a max of N ops from the kernel every second. */
+
+ if (!(plock_read_count % cfgd_plock_rate_limit)) {
+ if (time_diff_ms(&plock_rate_last, &now) < 1000) {
+ plock_rate_delays++;
+ return 2;
+ }
+ plock_rate_last = now;
+ plock_read_count++;
+ }
+ return 0;
+}
+
+void process_plocks(int ci)
+{
+ struct lockspace *ls;
+ struct resource *r;
+ struct dlm_plock_info info;
+ struct timeval now;
+ uint64_t usec;
+ int create, rv;
+
+ if (limit_plocks()) {
+ poll_ignore_plock = 1;
+ client_ignore(plock_ci, plock_fd);
+ return;
+ }
+
+ gettimeofday(&now, NULL);
+
+ memset(&info, 0, sizeof(info));
+
+ rv = do_read(plock_device_fd, &info, sizeof(info));
+ if (rv < 0) {
+ log_debug("process_plocks: read error %d fd %d\n",
+ errno, plock_device_fd);
+ return;
+ }
+
+ /* kernel doesn't set the nodeid field */
+ info.nodeid = our_nodeid;
+
+ if (!cfgd_enable_plock) {
+ rv = -ENOSYS;
+ goto fail;
+ }
+
+ if (need_fsid_translation)
+ info.fsid = mg_to_ls_id(info.fsid);
+
+ ls = find_ls_id(info.fsid);
+ if (!ls) {
+ log_plock(ls, "process_plocks: no ls id %x", info.fsid);
+ rv = -EEXIST;
+ goto fail;
+ }
+
+ if (ls->disable_plock) {
+ rv = -ENOSYS;
+ goto fail;
+ }
+
+ log_plock(ls, "read plock %llx %s %s %llx-%llx %d/%u/%llx w %d",
+ (unsigned long long)info.number,
+ op_str(info.optype),
+ ex_str(info.optype, info.ex),
+ (unsigned long long)info.start, (unsigned long long)info.end,
+ info.nodeid, info.pid, (unsigned long long)info.owner,
+ info.wait);
+
+ /* report plock rate and any delays since the last report */
+ plock_read_count++;
+ if (!(plock_read_count % 1000)) {
+ usec = dt_usec(&plock_read_time, &now) ;
+ log_plock(ls, "plock_read_count %u time %.3f s delays %u",
+ plock_read_count, usec * 1.e-6, plock_rate_delays);
+ plock_read_time = now;
+ plock_rate_delays = 0;
+ }
+
+ create = (info.optype == DLM_PLOCK_OP_UNLOCK) ? 0 : 1;
+
+ rv = find_resource(ls, info.number, create, &r);
+ if (rv)
+ goto fail;
+
+ if (r->owner == 0) {
+ /* plock state replicated on all nodes */
+ send_plock(ls, r, &info);
+
+ } else if (r->owner == our_nodeid) {
+ /* we are the owner of r, so our plocks are local */
+ __receive_plock(ls, &info, our_nodeid, r);
+
+ } else {
+ /* r owner is -1: r is new, try to become the owner;
+ r owner > 0: tell other owner to give up ownership;
+ both done with a message trying to set owner to ourself */
+ send_own(ls, r, our_nodeid);
+ save_pending_plock(ls, r, &info);
+ }
+
+ if (cfgd_plock_ownership && !list_empty(&ls->plock_resources))
+ poll_drop_plock = 1;
+ return;
+
+ fail:
+#ifdef DLM_PLOCK_BUILD_WORKAROUND
+ if (!(info.pad & DLM_PLOCK_FL_CLOSE)) {
+#else
+ if (!(info.flags & DLM_PLOCK_FL_CLOSE)) {
+#endif
+ info.rv = rv;
+ rv = write(plock_device_fd, &info, sizeof(info));
+ }
+}
+
+void process_saved_plocks(struct lockspace *ls)
+{
+ struct save_msg *sm, *sm2;
+ struct dlm_header *hd;
+ int count = 0;
+
+ log_dlock(ls, "process_saved_plocks begin");
+
+ if (list_empty(&ls->saved_messages))
+ goto out;
+
+ list_for_each_entry_safe(sm, sm2, &ls->saved_messages, list) {
+ hd = (struct dlm_header *)sm->buf;
+
+ switch (sm->type) {
+ case DLM_MSG_PLOCK:
+ _receive_plock(ls, hd, sm->len);
+ break;
+ case DLM_MSG_PLOCK_OWN:
+ _receive_own(ls, hd, sm->len);
+ break;
+ case DLM_MSG_PLOCK_DROP:
+ _receive_drop(ls, hd, sm->len);
+ break;
+ case DLM_MSG_PLOCK_SYNC_LOCK:
+ case DLM_MSG_PLOCK_SYNC_WAITER:
+ _receive_sync(ls, hd, sm->len);
+ break;
+ default:
+ continue;
+ }
+
+ list_del(&sm->list);
+ free(sm);
+ count++;
+ }
+ out:
+ log_dlock(ls, "process_saved_plocks %d done", count);
+}
+
+/* locks still marked SYNCING should not go into the ckpt; the new node
+ will get those locks by receiving PLOCK_SYNC messages */
+
+#define MAX_SEND_SIZE 1024 /* 1024 holds 24 plock_data */
+
+static char send_buf[MAX_SEND_SIZE];
+
+static int pack_send_buf(struct lockspace *ls, struct resource *r, int owner,
+ int full, int *count_out, void **last)
+{
+ struct resource_data *rd;
+ struct plock_data *pp;
+ struct posix_lock *po;
+ struct lock_waiter *w;
+ int count = 0;
+ int find = 0;
+ int len;
+
+ /* N.B. owner not always equal to r->owner */
+ rd = (struct resource_data *)(send_buf + sizeof(struct dlm_header));
+ rd->number = cpu_to_le64(r->number);
+ rd->owner = cpu_to_le32(owner);
+
+ if (full) {
+ rd->flags = RD_CONTINUE;
+ find = 1;
+ }
+
+ /* plocks not replicated for owned resources */
+ if (cfgd_plock_ownership && (owner == our_nodeid))
+ goto done;
+
+ len = sizeof(struct dlm_header) + sizeof(struct resource_data);
+
+ pp = (struct plock_data *)(send_buf + sizeof(struct dlm_header) + sizeof(struct resource_data));
+
+ list_for_each_entry(po, &r->locks, list) {
+ if (find && *last != po)
+ continue;
+ find = 0;
+
+ if (po->flags & P_SYNCING)
+ continue;
+
+ if (len + sizeof(struct plock_data) > sizeof(send_buf)) {
+ *last = po;
+ goto full;
+ }
+ len += sizeof(struct plock_data);
+
+ pp->start = cpu_to_le64(po->start);
+ pp->end = cpu_to_le64(po->end);
+ pp->owner = cpu_to_le64(po->owner);
+ pp->pid = cpu_to_le32(po->pid);
+ pp->nodeid = cpu_to_le32(po->nodeid);
+ pp->ex = po->ex;
+ pp->waiter = 0;
+ pp++;
+ count++;
+ }
+
+ list_for_each_entry(w, &r->waiters, list) {
+ if (find && *last != w)
+ continue;
+ find = 0;
+
+ if (w->flags & P_SYNCING)
+ continue;
+
+ if (len + sizeof(struct plock_data) > sizeof(send_buf)) {
+ *last = w;
+ goto full;
+ }
+ len += sizeof(struct plock_data);
+
+ pp->start = cpu_to_le64(w->info.start);
+ pp->end = cpu_to_le64(w->info.end);
+ pp->owner = cpu_to_le64(w->info.owner);
+ pp->pid = cpu_to_le32(w->info.pid);
+ pp->nodeid = cpu_to_le32(w->info.nodeid);
+ pp->ex = w->info.ex;
+ pp->waiter = 1;
+ pp++;
+ count++;
+ }
+ done:
+ rd->lock_count = cpu_to_le32(count);
+ *count_out = count;
+ *last = NULL;
+ return 0;
+
+ full:
+ rd->lock_count = cpu_to_le32(count);
+ *count_out = count;
+ return 1;
+}
+
+/* Copy all plock state into a checkpoint so new node can retrieve it. The
+ node creating the ckpt for the mounter needs to be the same node that's
+ sending the mounter its journals message (i.e. the low nodeid). The new
+ mounter knows the ckpt is ready to read only after it gets its journals
+ message.
+
+ If the mounter is becoming the new low nodeid in the group, the node doing
+ the store closes the ckpt and the new node unlinks the ckpt after reading
+ it. The ckpt should then disappear and the new node can create a new ckpt
+ for the next mounter. */
+
+static int send_plocks_data(struct lockspace *ls, uint32_t seq, char *buf, int len)
+{
+ struct dlm_header *hd;
+
+ hd = (struct dlm_header *)buf;
+ hd->type = DLM_MSG_PLOCKS_DATA;
+ hd->msgdata = seq;
+
+ dlm_send_message(ls, buf, len);
+
+ return 0;
+}
+
+void send_all_plocks_data(struct lockspace *ls, uint32_t seq, uint32_t *plocks_data)
+{
+ struct resource *r;
+ void *last;
+ int owner, count, len, full;
+ uint32_t send_count = 0;
+
+ if (!cfgd_enable_plock || ls->disable_plock)
+ return;
+
+ log_dlock(ls, "send_all_plocks_data %d:%u", our_nodeid, seq);
+
+ /* - If r owner is -1, ckpt nothing.
+ - If r owner is us, ckpt owner of us and no plocks.
+ - If r owner is other, ckpt that owner and any plocks we have on r
+ (they've just been synced but owner=0 msg not recved yet).
+ - If r owner is 0 and !got_unown, then we've just unowned r;
+ ckpt owner of us and any plocks that don't have SYNCING set
+ (plocks with SYNCING will be handled by our sync messages).
+ - If r owner is 0 and got_unown, then ckpt owner 0 and all plocks;
+ (there should be no SYNCING plocks) */
+
+ list_for_each_entry(r, &ls->plock_resources, list) {
+ if (!cfgd_plock_ownership)
+ owner = 0;
+ else if (r->owner == -1)
+ continue;
+ else if (r->owner == our_nodeid)
+ owner = our_nodeid;
+ else if (r->owner)
+ owner = r->owner;
+ else if (!r->owner && !got_unown(r))
+ owner = our_nodeid;
+ else if (!r->owner)
+ owner = 0;
+ else {
+ log_elock(ls, "send_all_plocks_data error owner %d r %llx",
+ r->owner, (unsigned long long)r->number);
+ continue;
+ }
+
+ memset(&send_buf, 0, sizeof(send_buf));
+ count = 0;
+ full = 0;
+ last = NULL;
+
+ do {
+ full = pack_send_buf(ls, r, owner, full, &count, &last);
+
+ len = sizeof(struct dlm_header) +
+ sizeof(struct resource_data) +
+ sizeof(struct plock_data) * count;
+
+ log_plock(ls, "send_plocks_data %d:%u n %llu o %d locks %d len %d",
+ our_nodeid, seq, (unsigned long long)r->number, r->owner,
+ count, len);
+
+ send_plocks_data(ls, seq, send_buf, len);
+
+ send_count++;
+
+ } while (full);
+ }
+
+ *plocks_data = send_count;
+
+ log_dlock(ls, "send_all_plocks_data %d:%u %u done",
+ our_nodeid, seq, send_count);
+}
+
+static void free_r_lists(struct resource *r)
+{
+ struct posix_lock *po, *po2;
+ struct lock_waiter *w, *w2;
+
+ list_for_each_entry_safe(po, po2, &r->locks, list) {
+ list_del(&po->list);
+ free(po);
+ }
+
+ list_for_each_entry_safe(w, w2, &r->waiters, list) {
+ list_del(&w->list);
+ free(w);
+ }
+}
+
+void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
+{
+ struct resource_data *rd;
+ struct plock_data *pp;
+ struct posix_lock *po;
+ struct lock_waiter *w;
+ struct resource *r;
+ uint64_t num;
+ uint32_t count;
+ uint32_t flags;
+ int owner;
+ int i;
+
+ if (!cfgd_enable_plock || ls->disable_plock)
+ return;
+
+ if (!ls->need_plocks)
+ return;
+
+ if (!ls->save_plocks)
+ return;
+
+ ls->recv_plocks_data_count++;
+
+ if (len < sizeof(struct dlm_header) + sizeof(struct resource_data)) {
+ log_elock(ls, "recv_plocks_data %d:%u bad len %d",
+ hd->nodeid, hd->msgdata, len);
+ return;
+ }
+
+ rd = (struct resource_data *)((char *)hd + sizeof(struct dlm_header));
+ num = le64_to_cpu(rd->number);
+ owner = le32_to_cpu(rd->owner);
+ count = le32_to_cpu(rd->lock_count);
+ flags = le32_to_cpu(rd->flags);
+
+ if (flags & RD_CONTINUE) {
+ r = search_resource(ls, num);
+ if (!r) {
+ log_elock(ls, "recv_plocks_data %d:%u n %llu not found",
+ hd->nodeid, hd->msgdata, (unsigned long long)num);
+ return;
+ }
+ log_plock(ls, "recv_plocks_data %d:%u n %llu continue",
+ hd->nodeid, hd->msgdata, (unsigned long long)num);
+ goto unpack;
+ }
+
+ r = malloc(sizeof(struct resource));
+ if (!r) {
+ log_elock(ls, "recv_plocks_data %d:%u n %llu no mem",
+ hd->nodeid, hd->msgdata, (unsigned long long)num);
+ return;
+ }
+ memset(r, 0, sizeof(struct resource));
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->waiters);
+ INIT_LIST_HEAD(&r->pending);
+
+ if (!cfgd_plock_ownership) {
+ if (owner) {
+ log_elock(ls, "recv_plocks_data %d:%u n %llu bad owner %d",
+ hd->nodeid, hd->msgdata, (unsigned long long)num,
+ owner);
+ goto fail_free;
+ }
+ } else {
+ if (!owner)
+ r->flags |= R_GOT_UNOWN;
+
+ /* no locks should be included for owned resources */
+
+ if (owner && count) {
+ log_elock(ls, "recv_plocks_data %d:%u n %llu o %d bad count %u",
+ (unsigned long long)num, owner, count);
+ goto fail_free;
+ }
+ }
+
+ r->number = num;
+ r->owner = owner;
+
+ unpack:
+ if (len < sizeof(struct dlm_header) +
+ sizeof(struct resource_data) +
+ sizeof(struct plock_data) * count) {
+ log_elock(ls, "recv_plocks_data %d:%u count %u bad len %d",
+ hd->nodeid, hd->msgdata, count, len);
+ goto fail_free;
+ }
+
+ pp = (struct plock_data *)((char *)rd + sizeof(struct resource_data));
+
+ for (i = 0; i < count; i++) {
+ if (!pp->waiter) {
+ po = malloc(sizeof(struct posix_lock));
+ if (!po)
+ goto fail_free;
+ po->start = le64_to_cpu(pp->start);
+ po->end = le64_to_cpu(pp->end);
+ po->owner = le64_to_cpu(pp->owner);
+ po->pid = le32_to_cpu(pp->pid);
+ po->nodeid = le32_to_cpu(pp->nodeid);
+ po->ex = pp->ex;
+ list_add_tail(&po->list, &r->locks);
+ } else {
+ w = malloc(sizeof(struct lock_waiter));
+ if (!w)
+ goto fail_free;
+ w->info.start = le64_to_cpu(pp->start);
+ w->info.end = le64_to_cpu(pp->end);
+ w->info.owner = le64_to_cpu(pp->owner);
+ w->info.pid = le32_to_cpu(pp->pid);
+ w->info.nodeid = le32_to_cpu(pp->nodeid);
+ w->info.ex = pp->ex;
+ list_add_tail(&w->list, &r->waiters);
+ }
+ pp++;
+ }
+
+ log_plock(ls, "recv_plocks_data %d:%u n %llu o %d locks %d len %d",
+ hd->nodeid, hd->msgdata, (unsigned long long)r->number,
+ r->owner, count, len);
+
+ if (!(flags & RD_CONTINUE)) {
+ list_add_tail(&r->list, &ls->plock_resources);
+ rb_insert_plock_resource(ls, r);
+ }
+ return;
+
+ fail_free:
+ if (!(flags & RD_CONTINUE)) {
+ free_r_lists(r);
+ free(r);
+ }
+ return;
+}
+
+void clear_plocks_data(struct lockspace *ls)
+{
+ struct resource *r, *r2;
+ uint32_t count = 0;
+
+ if (!cfgd_enable_plock || ls->disable_plock)
+ return;
+
+ list_for_each_entry_safe(r, r2, &ls->plock_resources, list) {
+ free_r_lists(r);
+ rb_del_plock_resource(ls, r);
+ list_del(&r->list);
+ free(r);
+ count++;
+ }
+
+ log_dlock(ls, "clear_plocks_data done %u recv_plocks_data_count %u",
+ count, ls->recv_plocks_data_count);
+
+ ls->recv_plocks_data_count = 0;
+}
+
+/* Called when a node has failed, or we're unmounting. For a node failure, we
+ need to call this when the cpg confchg arrives so that we're guaranteed all
+ nodes do this in the same sequence wrt other messages. */
+
+void purge_plocks(struct lockspace *ls, int nodeid, int unmount)
+{
+ struct posix_lock *po, *po2;
+ struct lock_waiter *w, *w2;
+ struct resource *r, *r2;
+ int purged = 0;
+
+ if (!cfgd_enable_plock || ls->disable_plock)
+ return;
+
+ list_for_each_entry_safe(r, r2, &ls->plock_resources, list) {
+ list_for_each_entry_safe(po, po2, &r->locks, list) {
+ if (po->nodeid == nodeid || unmount) {
+ list_del(&po->list);
+ free(po);
+ purged++;
+ }
+ }
+
+ list_for_each_entry_safe(w, w2, &r->waiters, list) {
+ if (w->info.nodeid == nodeid || unmount) {
+ list_del(&w->list);
+ free(w);
+ purged++;
+ }
+ }
+
+ /* TODO: haven't thought carefully about how this transition
+ to owner 0 might interact with other owner messages in
+ progress. */
+
+ if (r->owner == nodeid) {
+ r->owner = 0;
+ r->flags |= R_GOT_UNOWN;
+ r->flags |= R_PURGE_UNOWN;
+ send_pending_plocks(ls, r);
+ }
+
+ if (!list_empty(&r->waiters))
+ do_waiters(ls, r);
+
+ if (!cfgd_plock_ownership &&
+ list_empty(&r->locks) && list_empty(&r->waiters)) {
+ rb_del_plock_resource(ls, r);
+ list_del(&r->list);
+ free(r);
+ }
+ }
+
+ if (purged)
+ ls->last_plock_time = time(NULL);
+
+ log_dlock(ls, "purged %d plocks for %d", purged, nodeid);
+}
+
+int copy_plock_state(struct lockspace *ls, char *buf, int *len_out)
+{
+ struct posix_lock *po;
+ struct lock_waiter *w;
+ struct resource *r;
+ struct timeval now;
+ int rv = 0;
+ int len = DLMC_DUMP_SIZE, pos = 0, ret;
+
+ gettimeofday(&now, NULL);
+
+ list_for_each_entry(r, &ls->plock_resources, list) {
+
+ if (list_empty(&r->locks) &&
+ list_empty(&r->waiters) &&
+ list_empty(&r->pending)) {
+ ret = snprintf(buf + pos, len - pos,
+ "%llu rown %d unused_ms %llu\n",
+ (unsigned long long)r->number, r->owner,
+ (unsigned long long)time_diff_ms(&r->last_access,
+ &now));
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ continue;
+ }
+
+ list_for_each_entry(po, &r->locks, list) {
+ ret = snprintf(buf + pos, len - pos,
+ "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d\n",
+ (unsigned long long)r->number,
+ po->ex ? "WR" : "RD",
+ (unsigned long long)po->start,
+ (unsigned long long)po->end,
+ po->nodeid, po->pid,
+ (unsigned long long)po->owner, r->owner);
+
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(w, &r->waiters, list) {
+ ret = snprintf(buf + pos, len - pos,
+ "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d WAITING\n",
+ (unsigned long long)r->number,
+ w->info.ex ? "WR" : "RD",
+ (unsigned long long)w->info.start,
+ (unsigned long long)w->info.end,
+ w->info.nodeid, w->info.pid,
+ (unsigned long long)w->info.owner, r->owner);
+
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(w, &r->pending, list) {
+ ret = snprintf(buf + pos, len - pos,
+ "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d PENDING\n",
+ (unsigned long long)r->number,
+ w->info.ex ? "WR" : "RD",
+ (unsigned long long)w->info.start,
+ (unsigned long long)w->info.end,
+ w->info.nodeid, w->info.pid,
+ (unsigned long long)w->info.owner, r->owner);
+
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+ }
+ out:
+ *len_out = pos;
+ return rv;
+}
+
diff --git a/dlm_controld/rbtree.c b/dlm_controld/rbtree.c
new file mode 100644
index 0000000..430ccc1
--- /dev/null
+++ b/dlm_controld/rbtree.c
@@ -0,0 +1,383 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea(a)suse.de>
+ (C) 2002 David Woodhouse <dwmw2(a)infradead.org>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *right = node->rb_right;
+ struct rb_node *parent = rb_parent(node);
+
+ if ((node->rb_right = right->rb_left))
+ rb_set_parent(right->rb_left, node);
+ right->rb_left = node;
+
+ rb_set_parent(right, parent);
+
+ if (parent)
+ {
+ if (node == parent->rb_left)
+ parent->rb_left = right;
+ else
+ parent->rb_right = right;
+ }
+ else
+ root->rb_node = right;
+ rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *left = node->rb_left;
+ struct rb_node *parent = rb_parent(node);
+
+ if ((node->rb_left = left->rb_right))
+ rb_set_parent(left->rb_right, node);
+ left->rb_right = node;
+
+ rb_set_parent(left, parent);
+
+ if (parent)
+ {
+ if (node == parent->rb_right)
+ parent->rb_right = left;
+ else
+ parent->rb_left = left;
+ }
+ else
+ root->rb_node = left;
+ rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *parent, *gparent;
+
+ while ((parent = rb_parent(node)) && rb_is_red(parent))
+ {
+ gparent = rb_parent(parent);
+
+ if (parent == gparent->rb_left)
+ {
+ {
+ register struct rb_node *uncle = gparent->rb_right;
+ if (uncle && rb_is_red(uncle))
+ {
+ rb_set_black(uncle);
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_right == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_left(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ __rb_rotate_right(gparent, root);
+ } else {
+ {
+ register struct rb_node *uncle = gparent->rb_left;
+ if (uncle && rb_is_red(uncle))
+ {
+ rb_set_black(uncle);
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_left == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_right(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ __rb_rotate_left(gparent, root);
+ }
+ }
+
+ rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+ struct rb_root *root)
+{
+ struct rb_node *other;
+
+ while ((!node || rb_is_black(node)) && node != root->rb_node)
+ {
+ if (parent->rb_left == node)
+ {
+ other = parent->rb_right;
+ if (rb_is_red(other))
+ {
+ rb_set_black(other);
+ rb_set_red(parent);
+ __rb_rotate_left(parent, root);
+ other = parent->rb_right;
+ }
+ if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+ (!other->rb_right || rb_is_black(other->rb_right)))
+ {
+ rb_set_red(other);
+ node = parent;
+ parent = rb_parent(node);
+ }
+ else
+ {
+ if (!other->rb_right || rb_is_black(other->rb_right))
+ {
+ rb_set_black(other->rb_left);
+ rb_set_red(other);
+ __rb_rotate_right(other, root);
+ other = parent->rb_right;
+ }
+ rb_set_color(other, rb_color(parent));
+ rb_set_black(parent);
+ rb_set_black(other->rb_right);
+ __rb_rotate_left(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ else
+ {
+ other = parent->rb_left;
+ if (rb_is_red(other))
+ {
+ rb_set_black(other);
+ rb_set_red(parent);
+ __rb_rotate_right(parent, root);
+ other = parent->rb_left;
+ }
+ if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+ (!other->rb_right || rb_is_black(other->rb_right)))
+ {
+ rb_set_red(other);
+ node = parent;
+ parent = rb_parent(node);
+ }
+ else
+ {
+ if (!other->rb_left || rb_is_black(other->rb_left))
+ {
+ rb_set_black(other->rb_right);
+ rb_set_red(other);
+ __rb_rotate_left(other, root);
+ other = parent->rb_left;
+ }
+ rb_set_color(other, rb_color(parent));
+ rb_set_black(parent);
+ rb_set_black(other->rb_left);
+ __rb_rotate_right(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ }
+ if (node)
+ rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *child, *parent;
+ int color;
+
+ if (!node->rb_left)
+ child = node->rb_right;
+ else if (!node->rb_right)
+ child = node->rb_left;
+ else
+ {
+ struct rb_node *old = node, *left;
+
+ node = node->rb_right;
+ while ((left = node->rb_left) != NULL)
+ node = left;
+
+ if (rb_parent(old)) {
+ if (rb_parent(old)->rb_left == old)
+ rb_parent(old)->rb_left = node;
+ else
+ rb_parent(old)->rb_right = node;
+ } else
+ root->rb_node = node;
+
+ child = node->rb_right;
+ parent = rb_parent(node);
+ color = rb_color(node);
+
+ if (parent == old) {
+ parent = node;
+ } else {
+ if (child)
+ rb_set_parent(child, parent);
+ parent->rb_left = child;
+
+ node->rb_right = old->rb_right;
+ rb_set_parent(old->rb_right, node);
+ }
+
+ node->rb_parent_color = old->rb_parent_color;
+ node->rb_left = old->rb_left;
+ rb_set_parent(old->rb_left, node);
+
+ goto color;
+ }
+
+ parent = rb_parent(node);
+ color = rb_color(node);
+
+ if (child)
+ rb_set_parent(child, parent);
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ color:
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (rb_parent(node) == node)
+ return NULL;
+
+ /* If we have a right-hand child, go down and then left as far
+ as we can. */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return (struct rb_node *)node;
+ }
+
+ /* No right-hand children. Everything down and left is
+ smaller than us, so any 'next' node must be in the general
+ direction of our parent. Go up the tree; any time the
+ ancestor is a right-hand child of its parent, keep going
+ up. First time it's a left-hand child of its parent, said
+ parent is our 'next' node. */
+ while ((parent = rb_parent(node)) && node == parent->rb_right)
+ node = parent;
+
+ return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (rb_parent(node) == node)
+ return NULL;
+
+ /* If we have a left-hand child, go down and then right as far
+ as we can. */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return (struct rb_node *)node;
+ }
+
+ /* No left-hand children. Go up till we find an ancestor which
+ is a right-hand child of its parent */
+ while ((parent = rb_parent(node)) && node == parent->rb_left)
+ node = parent;
+
+ return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
+
+ /* Set the surrounding nodes to point to the replacement */
+ if (parent) {
+ if (victim == parent->rb_left)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else {
+ root->rb_node = new;
+ }
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+}
diff --git a/dlm_controld/rbtree.h b/dlm_controld/rbtree.h
new file mode 100644
index 0000000..e2b2409
--- /dev/null
+++ b/dlm_controld/rbtree.h
@@ -0,0 +1,160 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea(a)suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ Some example of insert and search follows here. The search is a plain
+ normal search over an ordered tree. The insert instead must be implemented
+ int two steps: as first thing the code must insert the element in
+ order as a red leaf in the tree, then the support library function
+ rb_insert_color() must be called. Such function will do the
+ not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+ unsigned long offset)
+{
+ struct rb_node * n = inode->i_rb_page_cache.rb_node;
+ struct page * page;
+
+ while (n)
+ {
+ page = rb_entry(n, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ n = n->rb_left;
+ else if (offset > page->offset)
+ n = n->rb_right;
+ else
+ return page;
+ }
+ return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ struct rb_node * node)
+{
+ struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+ struct rb_node * parent = NULL;
+ struct page * page;
+
+ while (*p)
+ {
+ parent = *p;
+ page = rb_entry(parent, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ p = &(*p)->rb_left;
+ else if (offset > page->offset)
+ p = &(*p)->rb_right;
+ else
+ return page;
+ }
+
+ rb_link_node(node, parent, p);
+
+ return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ struct rb_node * node)
+{
+ struct page * ret;
+ if ((ret = __rb_insert_page_cache(inode, offset, node)))
+ goto out;
+ rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+ return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef _LINUX_RBTREE_H
+#define _LINUX_RBTREE_H
+
+#include <linux/stddef.h>
+
+struct rb_node
+{
+ unsigned long rb_parent_color;
+#define RB_RED 0
+#define RB_BLACK 1
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+ /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+ struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r) ((r)->rb_parent_color & 1)
+#define rb_is_red(r) (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+ rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+ rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
+#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+ struct rb_node ** rb_link)
+{
+ node->rb_parent_color = (unsigned long )parent;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#endif /* _LINUX_RBTREE_H */
diff --git a/group/dlm_controld/Makefile b/group/dlm_controld/Makefile
deleted file mode 100644
index 63dcb9b..0000000
--- a/group/dlm_controld/Makefile
+++ /dev/null
@@ -1,50 +0,0 @@
-TARGET = dlm_controld
-
-SOURCE = \
- action.c \
- cpg.c \
- crc.c \
- main.c \
- plock.c \
- config.c \
- member_cman.c \
- logging.c \
- rbtree.c
-
-CFLAGS += -D_GNU_SOURCE -g \
- -Wall \
- -Wformat \
- -Wformat-security \
- -Wmissing-prototypes \
- -Wnested-externs \
- -Wpointer-arith \
- -Wextra -Wshadow \
- -Wcast-align \
- -Wwrite-strings \
- -Waggregate-return \
- -Wstrict-prototypes \
- -Winline \
- -Wredundant-decls \
- -Wno-sign-compare \
- -Wno-unused-parameter \
- -Wp,-D_FORTIFY_SOURCE=2 \
- -fexceptions \
- -fasynchronous-unwind-tables \
- -fdiagnostics-show-option \
-
-CFLAGS += -fPIE -DPIE
-CFLAGS += `xml2-config --cflags`
-CFLAGS += -I../include -I../../dlm/libdlm -I../../dlm/libdlmcontrol
-
-LDFLAGS += -Wl,-z,now -Wl,-z,relro -pie
-LDFLAGS += `xml2-config --libs`
-LDFLAGS += -lpthread -llogthread -lcpg -lconfdb -lcfg -lquorum -lfenced
-
-all: $(TARGET)
-
-$(TARGET): $(SOURCE)
- $(CC) $(CFLAGS) $(LDFLAGS) $(SOURCE) -o $@ -L.
-
-clean:
- rm -f *.o *.so *.so.* $(TARGET)
-
diff --git a/group/dlm_controld/action.c b/group/dlm_controld/action.c
deleted file mode 100644
index e9148a1..0000000
--- a/group/dlm_controld/action.c
+++ /dev/null
@@ -1,1084 +0,0 @@
-#include "dlm_daemon.h"
-
-#include <corosync/corotypes.h>
-#include <corosync/confdb.h>
-
-static int dir_members[MAX_NODES];
-static int dir_members_count;
-static int comms_nodes[MAX_NODES];
-static int comms_nodes_count;
-static char mg_name[DLM_LOCKSPACE_LEN+1];
-
-#define DLM_SYSFS_DIR "/sys/kernel/dlm"
-#define CLUSTER_DIR "/sys/kernel/config/dlm/cluster"
-#define SPACES_DIR "/sys/kernel/config/dlm/cluster/spaces"
-#define COMMS_DIR "/sys/kernel/config/dlm/cluster/comms"
-
-static int detect_protocol(void)
-{
- confdb_handle_t handle;
- hdb_handle_t totem_handle;
- char key_value[256];
- size_t value_len;
- int rv, proto = -1;
- confdb_callbacks_t callbacks = {
- .confdb_key_change_notify_fn = NULL,
- .confdb_object_create_change_notify_fn = NULL,
- .confdb_object_delete_change_notify_fn = NULL
- };
-
- rv = confdb_initialize(&handle, &callbacks);
- if (rv != CS_OK) {
- log_error("confdb_initialize error %d", rv);
- return -1;
- }
-
- rv = confdb_object_find_start(handle, OBJECT_PARENT_HANDLE);
- if (rv != CS_OK) {
- log_error("confdb_object_find_start error %d", rv);
- goto out;
- }
-
- rv = confdb_object_find(handle, OBJECT_PARENT_HANDLE,
- "totem", strlen("totem"), &totem_handle);
- if (rv != CS_OK) {
- log_error("confdb_object_find error %d", rv);
- goto out;
- }
-
- rv = confdb_key_get(handle, totem_handle,
- "rrp_mode", strlen("rrp_mode"),
- key_value, &value_len);
- if (rv != CS_OK) {
- log_error("confdb_key_get error %d", rv);
- goto out;
- }
-
- key_value[value_len] = '\0';
- log_debug("totem/rrp_mode = '%s'", key_value);
-
- if (!strcmp(key_value, "none"))
- proto = PROTO_TCP;
- else
- proto = PROTO_SCTP;
- out:
- confdb_finalize(handle);
- return proto;
-}
-
-/* look for an id that matches in e.g. /sys/fs/gfs/bull\:x/lock_module/id
- and then extract the "x" as the name */
-
-static int get_mountgroup_name(uint32_t mg_id)
-{
- char path[PATH_MAX];
- char *fsname;
- const char *fsdir;
- DIR *d;
- FILE *file;
- struct dirent *de;
- uint32_t id;
- int retry_gfs2 = 1;
- int rv, error;
-
- fsdir = "/sys/fs/gfs";
- retry:
- rv = -1;
-
- d = opendir(fsdir);
- if (!d) {
- log_debug("%s: opendir failed: %d", path, errno);
- goto out;
- }
-
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
-
- id = 0;
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/lock_module/id",
- fsdir, de->d_name);
-
- file = fopen(path, "r");
- if (!file) {
- log_error("can't open %s %d", path, errno);
- continue;
- }
-
- error = fscanf(file, "%u", &id);
- fclose(file);
-
- if (error != 1) {
- log_error("bad read %s %d", path, errno);
- continue;
- }
- if (id != mg_id) {
- log_debug("get_mountgroup_name skip %x %s",
- id, de->d_name);
- continue;
- }
-
- /* take the fsname out of clustername:fsname */
- fsname = strstr(de->d_name, ":");
- if (!fsname) {
- log_debug("get_mountgroup_name skip2 %x %s",
- id, de->d_name);
- continue;
- }
- fsname++;
-
- log_debug("get_mountgroup_name found %x %s %s",
- id, de->d_name, fsname);
- strncpy(mg_name, fsname, sizeof(mg_name));
- rv = 0;
- break;
- }
-
- closedir(d);
-
- out:
- if (rv && retry_gfs2) {
- retry_gfs2 = 0;
- fsdir = "/sys/fs/gfs2";
- goto retry;
- }
-
- return rv;
-}
-
-/* This is for the case where dlm_controld exits/fails, abandoning dlm
- lockspaces in the kernel, and then dlm_controld is restarted. When
- dlm_controld exits and abandons lockspaces, that node needs to be
- rebooted to clear the uncontrolled lockspaces from the kernel. */
-
-int check_uncontrolled_lockspaces(void)
-{
- DIR *d;
- struct dirent *de;
- int count = 0;
-
- d = opendir(DLM_SYSFS_DIR);
- if (!d)
- return 0;
-
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
-
- log_error("found uncontrolled lockspace %s", de->d_name);
- count++;
- }
- closedir(d);
-
- if (count) {
- kick_node_from_cluster(our_nodeid);
- return -1;
- }
- return 0;
-}
-
-/* find the mountgroup with "mg_id" in sysfs, get it's name, then look for
- the ls with with the same name in lockspaces list, return its id */
-
-void set_associated_id(uint32_t mg_id)
-{
- struct lockspace *ls;
- int rv;
-
- log_debug("set_associated_id mg_id %x %d", mg_id, mg_id);
-
- memset(&mg_name, 0, sizeof(mg_name));
-
- rv = get_mountgroup_name(mg_id);
- if (rv) {
- log_error("no mountgroup found with id %x", mg_id);
- return;
- }
-
- ls = find_ls(mg_name);
- if (!ls) {
- log_error("no lockspace found with name %s for mg_id %x",
- mg_name, mg_id);
- return;
- }
-
- log_debug("set_associated_id mg %x is ls %x", mg_id, ls->global_id);
-
- ls->associated_mg_id = mg_id;
-}
-
-static int do_sysfs(const char *name, const char *file, char *val)
-{
- char fname[512];
- int rv, fd;
-
- sprintf(fname, "%s/%s/%s", DLM_SYSFS_DIR, name, file);
-
- fd = open(fname, O_WRONLY);
- if (fd < 0) {
- log_error("open \"%s\" error %d %d", fname, fd, errno);
- return -1;
- }
-
- log_debug("write \"%s\" to \"%s\"", val, fname);
-
- rv = do_write(fd, val, strlen(val) + 1);
- close(fd);
- return rv;
-}
-
-int set_sysfs_control(char *name, int val)
-{
- char buf[32];
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", val);
-
- return do_sysfs(name, "control", buf);
-}
-
-int set_sysfs_event_done(char *name, int val)
-{
- char buf[32];
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", val);
-
- return do_sysfs(name, "event_done", buf);
-}
-
-int set_sysfs_id(char *name, uint32_t id)
-{
- char buf[32];
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%u", id);
-
- return do_sysfs(name, "id", buf);
-}
-
-static int update_dir_members(char *name)
-{
- char path[PATH_MAX];
- DIR *d;
- struct dirent *de;
- int i = 0;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes", SPACES_DIR, name);
-
- d = opendir(path);
- if (!d) {
- log_debug("%s: opendir failed: %d", path, errno);
- return -1;
- }
-
- memset(dir_members, 0, sizeof(dir_members));
- dir_members_count = 0;
-
- /* FIXME: we should probably read the nodeid in each dir instead */
-
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
- dir_members[i++] = atoi(de->d_name);
- log_debug("dir_member %d", dir_members[i-1]);
- }
- closedir(d);
-
- dir_members_count = i;
- return 0;
-}
-
-static int id_exists(int id, int count, int *array)
-{
- int i;
- for (i = 0; i < count; i++) {
- if (array[i] == id)
- return 1;
- }
- return 0;
-}
-
-static int create_path(const char *path)
-{
- mode_t old_umask;
- int rv;
-
- old_umask = umask(0022);
- rv = mkdir(path, 0777);
- if (rv < 0 && errno == EEXIST)
- rv = 0;
- if (rv < 0)
- log_error("%s: mkdir failed: %d", path, errno);
- umask(old_umask);
- return rv;
-}
-
-int path_exists(const char *path)
-{
- struct stat buf;
-
- if (stat(path, &buf) < 0) {
- if (errno != ENOENT)
- log_error("%s: stat failed: %d", path, errno);
- return 0;
- }
- return 1;
-}
-
-/* The "renew" nodes are those that have left and rejoined since the last
- call to set_members(). We rmdir/mkdir for these nodes so dlm-kernel
- can notice they've left and rejoined. */
-
-int set_configfs_members(char *name, int new_count, int *new_members,
- int renew_count, int *renew_members)
-{
- char path[PATH_MAX];
- char buf[32];
- int i, w, fd, rv, id, old_count, *old_members;
- int do_renew;
-
- /*
- * create lockspace dir if it doesn't exist yet
- */
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, name);
-
- if (!path_exists(path)) {
- if (create_path(path))
- return -1;
- }
-
- /*
- * remove/add lockspace members
- */
-
- rv = update_dir_members(name);
- if (rv)
- return rv;
-
- old_members = dir_members;
- old_count = dir_members_count;
-
- for (i = 0; i < old_count; i++) {
- id = old_members[i];
- if (id_exists(id, new_count, new_members))
- continue;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
- SPACES_DIR, name, id);
-
- log_debug("set_members rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv) {
- log_error("%s: rmdir failed: %d", path, errno);
- goto out;
- }
- }
-
- /*
- * remove lockspace dir after we've removed all the nodes
- * (when we're shutting down and adding no new nodes)
- */
-
- if (!new_count) {
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, name);
-
- log_debug("set_members lockspace rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv)
- log_error("%s: rmdir failed: %d", path, errno);
- }
-
- for (i = 0; i < new_count; i++) {
- id = new_members[i];
-
- do_renew = 0;
-
- if (id_exists(id, renew_count, renew_members))
- do_renew = 1;
- else if (id_exists(id, old_count, old_members))
- continue;
-
- if (!is_cluster_member(id))
- update_cluster();
- /*
- * create node's dir
- */
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
- SPACES_DIR, name, id);
-
- if (do_renew) {
- log_debug("set_members renew rmdir \"%s\"", path);
- rv = rmdir(path);
- if (rv) {
- log_error("%s: renew rmdir failed: %d",
- path, errno);
- goto out;
- }
- }
-
- log_debug("set_members mkdir \"%s\"", path);
-
- rv = create_path(path);
- if (rv)
- goto out;
-
- /*
- * set node's nodeid
- */
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes/%d/nodeid",
- SPACES_DIR, name, id);
-
- rv = fd = open(path, O_WRONLY);
- if (rv < 0) {
- log_error("%s: open failed: %d", path, errno);
- goto out;
- }
-
- memset(buf, 0, 32);
- snprintf(buf, 32, "%d", id);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d, %s", path, errno, buf);
- close(fd);
- goto out;
- }
- close(fd);
-
- /*
- * set node's weight
- */
-
- w = get_weight(id, name);
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes/%d/weight",
- SPACES_DIR, name, id);
-
- rv = fd = open(path, O_WRONLY);
- if (rv < 0) {
- log_error("%s: open failed: %d", path, errno);
- goto out;
- }
-
- memset(buf, 0, 32);
- snprintf(buf, 32, "%d", w);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d, %s", path, errno, buf);
- close(fd);
- goto out;
- }
- close(fd);
- }
-
- rv = 0;
- out:
- return rv;
-}
-
-#if 0
-char *str_ip(char *addr)
-{
- static char ip[256];
- struct sockaddr_in *sin = (struct sockaddr_in *) addr;
- memset(ip, 0, sizeof(ip));
- inet_ntop(AF_INET, &sin->sin_addr, ip, 256);
- return ip;
-}
-#endif
-
-static char *str_ip(char *addr)
-{
- static char str_ip_buf[INET6_ADDRSTRLEN];
- struct sockaddr_storage *ss = (struct sockaddr_storage *)addr;
- struct sockaddr_in *sin = (struct sockaddr_in *)addr;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
- void *saddr;
-
- if (ss->ss_family == AF_INET6)
- saddr = &sin6->sin6_addr;
- else
- saddr = &sin->sin_addr;
-
- inet_ntop(ss->ss_family, saddr, str_ip_buf, sizeof(str_ip_buf));
- return str_ip_buf;
-}
-
-/* record the nodeids that are currently listed under
- config/dlm/cluster/comms/ so that we can remove all of them */
-
-static int update_comms_nodes(void)
-{
- char path[PATH_MAX];
- DIR *d;
- struct dirent *de;
- int i = 0;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, COMMS_DIR);
-
- d = opendir(path);
- if (!d) {
- log_debug("%s: opendir failed: %d", path, errno);
- return -1;
- }
-
- memset(comms_nodes, 0, sizeof(comms_nodes));
- comms_nodes_count = 0;
-
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
- comms_nodes[i++] = atoi(de->d_name);
- }
- closedir(d);
-
- comms_nodes_count = i;
- return 0;
-}
-
-/* clear out everything under config/dlm/cluster/comms/ */
-
-static void clear_configfs_comms(void)
-{
- char path[PATH_MAX];
- int i, rv;
-
- rv = update_comms_nodes();
- if (rv < 0)
- return;
-
- for (i = 0; i < comms_nodes_count; i++) {
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, comms_nodes[i]);
-
- log_debug("clear_configfs_nodes rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv)
- log_error("%s: rmdir failed: %d", path, errno);
- }
-}
-
-static void clear_configfs_space_nodes(char *name)
-{
- char path[PATH_MAX];
- int i, rv;
-
- rv = update_dir_members(name);
- if (rv < 0)
- return;
-
- for (i = 0; i < dir_members_count; i++) {
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s/nodes/%d",
- SPACES_DIR, name, dir_members[i]);
-
- log_debug("clear_configfs_space_nodes rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv)
- log_error("%s: rmdir failed: %d", path, errno);
- }
-}
-
-/* clear out everything under config/dlm/cluster/spaces/ */
-
-static void clear_configfs_spaces(void)
-{
- char path[PATH_MAX];
- DIR *d;
- struct dirent *de;
- int rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s", SPACES_DIR);
-
- d = opendir(path);
- if (!d) {
- log_debug("%s: opendir failed: %d", path, errno);
- return;
- }
-
- while ((de = readdir(d))) {
- if (de->d_name[0] == '.')
- continue;
-
- clear_configfs_space_nodes(de->d_name);
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%s", SPACES_DIR, de->d_name);
-
- log_debug("clear_configfs_spaces rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv)
- log_error("%s: rmdir failed: %d", path, errno);
- }
- closedir(d);
-}
-
-static int add_configfs_base(void)
-{
- int rv = 0;
-
- if (!path_exists("/sys/kernel/config")) {
- log_error("No /sys/kernel/config, is configfs loaded?");
- return -1;
- }
-
- if (!path_exists("/sys/kernel/config/dlm")) {
- log_error("No /sys/kernel/config/dlm, is the dlm loaded?");
- return -1;
- }
-
- if (!path_exists("/sys/kernel/config/dlm/cluster"))
- rv = create_path("/sys/kernel/config/dlm/cluster");
-
- return rv;
-}
-
-int add_configfs_node(int nodeid, char *addr, int addrlen, int local)
-{
- char path[PATH_MAX];
- char padded_addr[sizeof(struct sockaddr_storage)];
- char buf[32];
- int rv, fd;
-
- log_debug("set_configfs_node %d %s local %d",
- nodeid, str_ip(addr), local);
-
- /*
- * create comm dir for this node
- */
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, nodeid);
-
- rv = create_path(path);
- if (rv)
- return -1;
-
- /*
- * set the nodeid
- */
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d/nodeid", COMMS_DIR, nodeid);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return -1;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", nodeid);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d, %s", path, errno, buf);
- close(fd);
- return -1;
- }
- close(fd);
-
- /*
- * set the address
- */
-
- memset(padded_addr, 0, sizeof(padded_addr));
- memcpy(padded_addr, addr, addrlen);
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d/addr", COMMS_DIR, nodeid);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return -1;
- }
-
- rv = do_write(fd, padded_addr, sizeof(struct sockaddr_storage));
- if (rv < 0) {
- log_error("%s: write failed: %d %d", path, errno, rv);
- close(fd);
- return -1;
- }
- close(fd);
-
- /*
- * set local
- */
-
- if (!local)
- goto out;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d/local", COMMS_DIR, nodeid);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return -1;
- }
-
- rv = do_write(fd, (void *)"1", strlen("1"));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- close(fd);
- return -1;
- }
- close(fd);
- out:
- return 0;
-}
-
-void del_configfs_node(int nodeid)
-{
- char path[PATH_MAX];
- int rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, nodeid);
-
- log_debug("del_configfs_node rmdir \"%s\"", path);
-
- rv = rmdir(path);
- if (rv)
- log_error("%s: rmdir failed: %d", path, errno);
-}
-
-static int set_configfs_protocol(int proto)
-{
- char path[PATH_MAX];
- char buf[32];
- int fd, rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/protocol", CLUSTER_DIR);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return fd;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", proto);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- return rv;
- }
- close(fd);
- log_debug("set protocol %d", proto);
- return 0;
-}
-
-static int set_configfs_timewarn(int cs)
-{
- char path[PATH_MAX];
- char buf[32];
- int fd, rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/timewarn_cs", CLUSTER_DIR);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return fd;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", cs);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- return rv;
- }
- close(fd);
- log_debug("set timewarn_cs %d", cs);
- return 0;
-}
-
-static int set_configfs_debug(int val)
-{
- char path[PATH_MAX];
- char buf[32];
- int fd, rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "%s/log_debug", CLUSTER_DIR);
-
- fd = open(path, O_WRONLY);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return fd;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", val);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- return rv;
- }
- close(fd);
- log_debug("set log_debug %d", val);
- return 0;
-}
-
-#define NET_RMEM_DEFAULT 4194304
-#define NET_RMEM_MAX 4194304
-
-static int set_proc_rmem(void)
-{
- char path[PATH_MAX];
- char buf[32];
- int fd, rv;
-
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "/proc/sys/net/core/rmem_default");
-
- fd = open(path, O_RDWR);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return fd;
- }
-
- memset(buf, 0, sizeof(buf));
-
- rv = read(fd, buf, sizeof(buf));
- if (rv < 0) {
- log_error("%s: read failed: %d", path, errno);
- close(fd);
- return rv;
- }
-
- if (atoi(buf) >= NET_RMEM_DEFAULT) {
- close(fd);
- goto next;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", NET_RMEM_DEFAULT);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- close(fd);
- return rv;
- }
-
- close(fd);
- log_debug("set %s %s", path, buf);
-
- next:
- memset(path, 0, PATH_MAX);
- snprintf(path, PATH_MAX, "/proc/sys/net/core/rmem_max");
-
- fd = open(path, O_RDWR);
- if (fd < 0) {
- log_error("%s: open failed: %d", path, errno);
- return fd;
- }
-
- memset(buf, 0, sizeof(buf));
-
- rv = read(fd, buf, sizeof(buf));
- if (rv < 0) {
- log_error("%s: read failed: %d", path, errno);
- close(fd);
- return rv;
- }
-
- if (atoi(buf) >= NET_RMEM_MAX) {
- close(fd);
- goto out;
- }
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, 32, "%d", NET_RMEM_MAX);
-
- rv = do_write(fd, buf, strlen(buf));
- if (rv < 0) {
- log_error("%s: write failed: %d", path, errno);
- close(fd);
- return rv;
- }
-
- close(fd);
- log_debug("set %s %s", path, buf);
- out:
- return 0;
-}
-
-void clear_configfs(void)
-{
- clear_configfs_comms();
- clear_configfs_spaces();
- rmdir("/sys/kernel/config/dlm/cluster");
-}
-
-int setup_configfs(void)
-{
- int rv;
-
- clear_configfs();
-
- rv = add_configfs_base();
- if (rv < 0)
- return rv;
-
- /* add configfs entries for existing nodes */
- update_cluster();
-
- /* the kernel has its own defaults for these values which we
- don't want to change unless these have been set; -1 means
- they have not been set on command line or config file */
-
- if (cfgk_debug != -1)
- set_configfs_debug(cfgk_debug);
- if (cfgk_timewarn != -1)
- set_configfs_timewarn(cfgk_timewarn);
-
- if (cfgk_protocol == PROTO_DETECT) {
- rv = detect_protocol();
- if (rv == PROTO_TCP || rv == PROTO_SCTP)
- cfgk_protocol = rv;
- }
-
- if (cfgk_protocol == PROTO_TCP || cfgk_protocol == PROTO_SCTP)
- set_configfs_protocol(cfgk_protocol);
-
- if (cfgk_protocol == PROTO_SCTP)
- set_proc_rmem();
-
- return 0;
-}
-
-static void find_minors(void)
-{
- FILE *fl;
- char name[256];
- uint32_t number;
- int found = 0;
- int c;
-
- control_minor = 0;
- monitor_minor = 0;
- plock_minor = 0;
- old_plock_minor = 0;
-
- if (!(fl = fopen("/proc/misc", "r"))) {
- log_error("/proc/misc fopen failed: %s", strerror(errno));
- return;
- }
-
- while (!feof(fl)) {
- if (fscanf(fl, "%d %255s\n", &number, &name[0]) == 2) {
-
- if (!strcmp(name, "dlm-control")) {
- control_minor = number;
- found++;
- } else if (!strcmp(name, "dlm-monitor")) {
- monitor_minor = number;
- found++;
- } else if (!strcmp(name, "dlm_plock")) {
- plock_minor = number;
- found++;
- } else if (!strcmp(name, "lock_dlm_plock")) {
- old_plock_minor = number;
- found++;
- }
-
- } else do {
- c = fgetc(fl);
- } while (c != EOF && c != '\n');
-
- if (found == 3)
- break;
- }
- fclose(fl);
-
- if (!found)
- log_error("Is dlm missing from kernel? No misc devices found.");
-}
-
-static int find_udev_device(const char *path, uint32_t minor)
-{
- struct stat st;
- int i;
-
- for (i = 0; i < 10; i++) {
- if (stat(path, &st) == 0 && minor(st.st_rdev) == minor)
- return 0;
- sleep(1);
- }
-
- log_error("cannot find device %s with minor %d", path, minor);
- return -1;
-}
-
-int setup_misc_devices(void)
-{
- int rv;
-
- find_minors();
-
- if (control_minor) {
- rv = find_udev_device("/dev/misc/dlm-control", control_minor);
- if (rv < 0)
- return rv;
- log_debug("found /dev/misc/dlm-control minor %u",
- control_minor);
- }
-
- if (monitor_minor) {
- rv = find_udev_device("/dev/misc/dlm-monitor", monitor_minor);
- if (rv < 0)
- return rv;
- log_debug("found /dev/misc/dlm-monitor minor %u",
- monitor_minor);
- }
-
- if (plock_minor) {
- rv = find_udev_device("/dev/misc/dlm_plock", plock_minor);
- if (rv < 0)
- return rv;
- log_debug("found /dev/misc/dlm_plock minor %u",
- plock_minor);
- }
-
- if (!plock_minor && old_plock_minor) {
- rv = find_udev_device("/dev/misc/lock_dlm_plock",
- old_plock_minor);
- if (rv < 0)
- return rv;
- log_debug("found /dev/misc/lock_dlm_plock minor %u",
- old_plock_minor);
- }
-
- return 0;
-}
-
diff --git a/group/dlm_controld/config.c b/group/dlm_controld/config.c
deleted file mode 100644
index 40b0c87..0000000
--- a/group/dlm_controld/config.c
+++ /dev/null
@@ -1,93 +0,0 @@
-#include "dlm_daemon.h"
-#include <libxml/tree.h>
-
-/* TODO:
- <dlm>
- <lockspace name="foo" nodir="1">
- <master nodeid="1" weight="2"/>
- <master nodeid="2" weight="3"/>
- </lockspace>
- </dlm>
-*/
-
-int get_weight(int nodeid, char *lockspace)
-{
- /* default weight is 1 */
- return 1;
-}
-
-static void proto_val(char *str, int *val)
-{
- if (!strncasecmp(str, "tcp", 3))
- *val = PROTO_TCP;
- else if (!strncasecmp(str, "sctp", 4))
- *val = PROTO_SCTP;
- else if (!strncasecmp(str, "detect", 6))
- *val = PROTO_DETECT;
- else {
- log_error("invalid protocol value %s", str);
- }
-}
-
-static void set_val(xmlNode *root, const char *name, int *opt, int *val)
-{
- xmlChar *str;
-
- str = xmlGetProp(root, BAD_CAST name);
- if (str && !(*opt)) {
- *val = atoi((char *)str);
- log_debug("config %s = %d", name, *val);
- }
-}
-
-void setup_config(int update)
-{
- xmlDoc *doc;
- xmlNode *root;
- xmlChar *str;
-
- if (!path_exists(DLM_CONFIG_FILE))
- return;
-
- doc = xmlParseFile(DLM_CONFIG_FILE);
- if (!doc) {
- log_error("xml parse error %d %s", errno, DLM_CONFIG_FILE);
- return;
- }
-
- root = xmlDocGetRootElement(doc);
- if (!root) {
- log_error("xml root error %d %s", errno, DLM_CONFIG_FILE);
- xmlFreeDoc(doc);
- return;
- }
-
- if (update)
- goto do_update;
-
- /* These config values are set from dlm.conf only if they haven't
- already been set on the command line. */
-
- str = xmlGetProp(root, BAD_CAST "protocol");
- if (str && !optk_protocol) {
- proto_val((char *)str, &cfgk_protocol);
- log_debug("config protocol = %d", cfgk_protocol);
- }
-
- set_val(root, "log_debug", &optk_debug, &cfgk_debug);
- set_val(root, "timewarn", &optk_timewarn, &cfgk_timewarn);
- set_val(root, "enable_fencing", &optd_enable_fencing, &cfgd_enable_fencing);
- set_val(root, "enable_quorum", &optd_enable_quorum, &cfgd_enable_quorum);
- set_val(root, "enable_plock", &optd_enable_plock, &cfgd_enable_plock);
- set_val(root, "plock_ownership", &optd_plock_ownership, &cfgd_plock_ownership);
- do_update:
- /* The following can be changed while running */
- set_val(root, "plock_debug", &optd_plock_debug, &cfgd_plock_debug);
- set_val(root, "plock_rate_limit", &optd_plock_rate_limit, &cfgd_plock_rate_limit);
- set_val(root, "drop_resources_time", &optd_drop_resources_time, &cfgd_drop_resources_time);
- set_val(root, "drop_resources_count", &optd_drop_resources_count, &cfgd_drop_resources_count);
- set_val(root, "drop_resources_age", &optd_drop_resources_age, &cfgd_drop_resources_age);
-
- xmlFreeDoc(doc);
-}
-
diff --git a/group/dlm_controld/config.h b/group/dlm_controld/config.h
deleted file mode 100644
index ab37eed..0000000
--- a/group/dlm_controld/config.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __CONFIG_DOT_H__
-#define __CONFIG_DOT_H__
-
-/* the kernel has default values for debug, timewarn and protocol;
- we only change them if new values are given on command line or in ccs */
-
-#define DEFAULT_DEBUG_LOGFILE 0
-#define DEFAULT_ENABLE_FENCING 1
-#define DEFAULT_ENABLE_QUORUM 0
-#define DEFAULT_ENABLE_PLOCK 1
-#define DEFAULT_PLOCK_DEBUG 0
-#define DEFAULT_PLOCK_RATE_LIMIT 0
-#define DEFAULT_PLOCK_OWNERSHIP 0
-#define DEFAULT_DROP_RESOURCES_TIME 10000 /* 10 sec */
-#define DEFAULT_DROP_RESOURCES_COUNT 10
-#define DEFAULT_DROP_RESOURCES_AGE 10000 /* 10 sec */
-
-extern int optk_debug;
-extern int optk_timewarn;
-extern int optk_protocol;
-extern int optd_debug_logfile;
-extern int optd_enable_fencing;
-extern int optd_enable_quorum;
-extern int optd_enable_plock;
-extern int optd_plock_debug;
-extern int optd_plock_rate_limit;
-extern int optd_plock_ownership;
-extern int optd_drop_resources_time;
-extern int optd_drop_resources_count;
-extern int optd_drop_resources_age;
-
-extern int cfgk_debug;
-extern int cfgk_timewarn;
-extern int cfgk_protocol;
-extern int cfgd_debug_logfile;
-extern int cfgd_enable_fencing;
-extern int cfgd_enable_quorum;
-extern int cfgd_enable_plock;
-extern int cfgd_plock_debug;
-extern int cfgd_plock_rate_limit;
-extern int cfgd_plock_ownership;
-extern int cfgd_drop_resources_time;
-extern int cfgd_drop_resources_count;
-extern int cfgd_drop_resources_age;
-
-#endif
-
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
deleted file mode 100644
index b3c1454..0000000
--- a/group/dlm_controld/cpg.c
+++ /dev/null
@@ -1,2606 +0,0 @@
-#include "dlm_daemon.h"
-
-struct protocol_version {
- uint16_t major;
- uint16_t minor;
- uint16_t patch;
- uint16_t flags;
-};
-
-struct protocol {
- union {
- struct protocol_version dm_ver;
- uint16_t daemon_max[4];
- };
- union {
- struct protocol_version km_ver;
- uint16_t kernel_max[4];
- };
- union {
- struct protocol_version dr_ver;
- uint16_t daemon_run[4];
- };
- union {
- struct protocol_version kr_ver;
- uint16_t kernel_run[4];
- };
-};
-
-struct member {
- struct list_head list;
- int nodeid;
- int start; /* 1 if we received a start message for this change */
- int added; /* 1 if added by this change */
- int failed; /* 1 if failed in this change */
- int disallowed;
- uint32_t start_flags;
-};
-
-struct node {
- struct list_head list;
- int nodeid;
- int check_fencing;
- int check_quorum;
- int check_fs;
- int fs_notified;
- uint64_t add_time;
- uint64_t fail_time;
- uint64_t fence_time; /* for debug */
- uint64_t cluster_add_time;
- uint64_t cluster_remove_time;
- uint32_t fence_queries; /* for debug */
- uint32_t added_seq; /* for queries */
- uint32_t removed_seq; /* for queries */
- int failed_reason; /* for queries */
-
- struct protocol proto;
-};
-
-/* One of these change structs is created for every confchg a cpg gets. */
-
-#define CGST_WAIT_CONDITIONS 1
-#define CGST_WAIT_MESSAGES 2
-
-struct change {
- struct list_head list;
- struct list_head members;
- struct list_head removed; /* nodes removed by this change */
- int member_count;
- int joined_count;
- int remove_count;
- int failed_count;
- int state;
- int we_joined;
- uint32_t seq; /* used as a reference for debugging, and for queries */
- uint32_t combined_seq; /* for queries */
- uint64_t create_time;
-};
-
-struct ls_info {
- uint32_t ls_info_size;
- uint32_t id_info_size;
- uint32_t id_info_count;
-
- uint32_t started_count;
-
- int member_count;
- int joined_count;
- int remove_count;
- int failed_count;
-};
-
-struct id_info {
- int nodeid;
-};
-
-int message_flow_control_on;
-static cpg_handle_t cpg_handle_daemon;
-static int cpg_fd_daemon;
-static struct protocol our_protocol;
-static struct list_head daemon_nodes;
-static struct cpg_address daemon_member[MAX_NODES];
-static int daemon_member_count;
-
-static void log_config(const struct cpg_name *group_name,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries)
-{
- char m_buf[128];
- char j_buf[32];
- char l_buf[32];
- size_t i, len, pos;
- int ret;
-
- memset(m_buf, 0, sizeof(m_buf));
- memset(j_buf, 0, sizeof(j_buf));
- memset(l_buf, 0, sizeof(l_buf));
-
- len = sizeof(m_buf);
- pos = 0;
- for (i = 0; i < member_list_entries; i++) {
- ret = snprintf(m_buf + pos, len - pos, " %d",
- member_list[i].nodeid);
- if (ret >= len - pos)
- break;
- pos += ret;
- }
-
- len = sizeof(j_buf);
- pos = 0;
- for (i = 0; i < joined_list_entries; i++) {
- ret = snprintf(j_buf + pos, len - pos, " %d",
- joined_list[i].nodeid);
- if (ret >= len - pos)
- break;
- pos += ret;
- }
-
- len = sizeof(l_buf);
- pos = 0;
- for (i = 0; i < left_list_entries; i++) {
- ret = snprintf(l_buf + pos, len - pos, " %d",
- left_list[i].nodeid);
- if (ret >= len - pos)
- break;
- pos += ret;
- }
-
- log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
- member_list_entries, joined_list_entries, left_list_entries,
- m_buf, j_buf, l_buf);
-}
-
-static void ls_info_in(struct ls_info *li)
-{
- li->ls_info_size = le32_to_cpu(li->ls_info_size);
- li->id_info_size = le32_to_cpu(li->id_info_size);
- li->id_info_count = le32_to_cpu(li->id_info_count);
- li->started_count = le32_to_cpu(li->started_count);
- li->member_count = le32_to_cpu(li->member_count);
- li->joined_count = le32_to_cpu(li->joined_count);
- li->remove_count = le32_to_cpu(li->remove_count);
- li->failed_count = le32_to_cpu(li->failed_count);
-}
-
-static void id_info_in(struct id_info *id)
-{
- id->nodeid = le32_to_cpu(id->nodeid);
-}
-
-static void ids_in(struct ls_info *li, struct id_info *ids)
-{
- struct id_info *id;
- int i;
-
- id = ids;
- for (i = 0; i < li->id_info_count; i++) {
- id_info_in(id);
- id = (struct id_info *)((char *)id + li->id_info_size);
- }
-}
-
-const char *msg_name(int type)
-{
- switch (type) {
- case DLM_MSG_PROTOCOL:
- return "protocol";
- case DLM_MSG_START:
- return "start";
- case DLM_MSG_PLOCK:
- return "plock";
- case DLM_MSG_PLOCK_OWN:
- return "plock_own";
- case DLM_MSG_PLOCK_DROP:
- return "plock_drop";
- case DLM_MSG_PLOCK_SYNC_LOCK:
- return "plock_sync_lock";
- case DLM_MSG_PLOCK_SYNC_WAITER:
- return "plock_sync_waiter";
- case DLM_MSG_PLOCKS_DATA:
- return "plocks_data";
- case DLM_MSG_PLOCKS_DONE:
- return "plocks_done";
- case DLM_MSG_DEADLK_CYCLE_START:
- return "deadlk_cycle_start";
- case DLM_MSG_DEADLK_CYCLE_END:
- return "deadlk_cycle_end";
- case DLM_MSG_DEADLK_CHECKPOINT_READY:
- return "deadlk_checkpoint_ready";
- case DLM_MSG_DEADLK_CANCEL_LOCK:
- return "deadlk_cancel_lock";
- default:
- return "unknown";
- }
-}
-
-static int _send_message(cpg_handle_t h, void *buf, int len, int type)
-{
- struct iovec iov;
- cpg_error_t error;
- int retries = 0;
-
- iov.iov_base = buf;
- iov.iov_len = len;
-
- retry:
- error = cpg_mcast_joined(h, CPG_TYPE_AGREED, &iov, 1);
- if (error == CPG_ERR_TRY_AGAIN) {
- retries++;
- usleep(1000);
- if (!(retries % 100))
- log_error("cpg_mcast_joined retry %d %s",
- retries, msg_name(type));
- goto retry;
- }
- if (error != CPG_OK) {
- log_error("cpg_mcast_joined error %d handle %llx %s",
- error, (unsigned long long)h, msg_name(type));
- return -1;
- }
-
- if (retries)
- log_debug("cpg_mcast_joined retried %d %s",
- retries, msg_name(type));
-
- return 0;
-}
-
-/* header fields caller needs to set: type, to_nodeid, flags, msgdata */
-
-void dlm_send_message(struct lockspace *ls, char *buf, int len)
-{
- struct dlm_header *hd = (struct dlm_header *) buf;
- int type = hd->type;
-
- hd->version[0] = cpu_to_le16(our_protocol.daemon_run[0]);
- hd->version[1] = cpu_to_le16(our_protocol.daemon_run[1]);
- hd->version[2] = cpu_to_le16(our_protocol.daemon_run[2]);
- hd->type = cpu_to_le16(hd->type);
- hd->nodeid = cpu_to_le32(our_nodeid);
- hd->to_nodeid = cpu_to_le32(hd->to_nodeid);
- hd->global_id = cpu_to_le32(ls->global_id);
- hd->flags = cpu_to_le32(hd->flags);
- hd->msgdata = cpu_to_le32(hd->msgdata);
- hd->msgdata2 = cpu_to_le32(hd->msgdata2);
-
- _send_message(ls->cpg_handle, buf, len, type);
-}
-
-static struct member *find_memb(struct change *cg, int nodeid)
-{
- struct member *memb;
-
- list_for_each_entry(memb, &cg->members, list) {
- if (memb->nodeid == nodeid)
- return memb;
- }
- return NULL;
-}
-
-static struct lockspace *find_ls_handle(cpg_handle_t h)
-{
- struct lockspace *ls;
-
- list_for_each_entry(ls, &lockspaces, list) {
- if (ls->cpg_handle == h)
- return ls;
- }
- return NULL;
-}
-
-static struct lockspace *find_ls_ci(int ci)
-{
- struct lockspace *ls;
-
- list_for_each_entry(ls, &lockspaces, list) {
- if (ls->cpg_client == ci)
- return ls;
- }
- return NULL;
-}
-
-static void free_cg(struct change *cg)
-{
- struct member *memb, *safe;
-
- list_for_each_entry_safe(memb, safe, &cg->members, list) {
- list_del(&memb->list);
- free(memb);
- }
- list_for_each_entry_safe(memb, safe, &cg->removed, list) {
- list_del(&memb->list);
- free(memb);
- }
- free(cg);
-}
-
-static void free_ls(struct lockspace *ls)
-{
- struct change *cg, *cg_safe;
- struct node *node, *node_safe;
-
- list_for_each_entry_safe(cg, cg_safe, &ls->changes, list) {
- list_del(&cg->list);
- free_cg(cg);
- }
-
- if (ls->started_change)
- free_cg(ls->started_change);
-
- list_for_each_entry_safe(node, node_safe, &ls->node_history, list) {
- list_del(&node->list);
- free(node);
- }
-
- free(ls);
-}
-
-
-/* Problem scenario:
- nodes A,B,C are in fence domain
- node C has gfs foo mounted
- node C fails
- nodes A,B begin fencing C (slow, not completed)
- node B mounts gfs foo
-
- We may end up having gfs foo mounted and being used on B before
- C has been fenced. C could wake up corrupt fs.
-
- So, we need to prevent any new gfs mounts while there are any
- outstanding, incomplete fencing operations.
-
- We also need to check that the specific failed nodes we know about have
- been fenced (since fenced may not even have been notified that the node
- has failed yet).
-
- So, check that:
- 1. has fenced fenced the node since we saw it fail?
- 2. fenced has no outstanding fencing ops
-
- For 1:
- - node X fails
- - we see node X fail and X has non-zero add_time,
- set check_fencing and record the fail time
- - wait for X to be removed from all dlm cpg's (probably not necessary)
- - check that the fencing time is later than the recorded time above
-
- Tracking fencing state when there are spurious partitions/merges...
-
- from a spurious leave/join of node X, a lockspace will see:
- - node X is a lockspace member
- - node X fails, may be waiting for all cpgs to see failure or for fencing to
- complete
- - node X joins the lockspace - we want to process the change as usual, but
- don't want to disrupt the code waiting for the fencing, and we want to
- continue running properly once the remerged node is properly reset
-
- ls->node_history
- when we see a node not in this list, add entry for it with zero add_time
- record the time we get a good start message from the node, add_time
- clear add_time if the node leaves
- if node fails with non-zero add_time, set check_fencing
- when a node is fenced, clear add_time and clear check_fencing
- if a node remerges after this, no good start message, no new add_time set
- if a node fails with zero add_time, it doesn't need fencing
- if a node remerges before it's been fenced, no good start message, no new
- add_time set
-*/
-
-static struct node *get_node_history(struct lockspace *ls, int nodeid)
-{
- struct node *node;
-
- list_for_each_entry(node, &ls->node_history, list) {
- if (node->nodeid == nodeid)
- return node;
- }
- return NULL;
-}
-
-static void node_history_init(struct lockspace *ls, int nodeid,
- struct change *cg)
-{
- struct node *node;
-
- node = get_node_history(ls, nodeid);
- if (node)
- goto out;
-
- node = malloc(sizeof(struct node));
- if (!node)
- return;
- memset(node, 0, sizeof(struct node));
-
- node->nodeid = nodeid;
- node->add_time = 0;
- list_add_tail(&node->list, &ls->node_history);
- out:
- if (cg)
- node->added_seq = cg->seq; /* for queries */
-}
-
-void node_history_cluster_add(int nodeid)
-{
- struct lockspace *ls;
- struct node *node;
-
- list_for_each_entry(ls, &lockspaces, list) {
- node_history_init(ls, nodeid, NULL);
-
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("node_history_cluster_add no nodeid %d",
- nodeid);
- return;
- }
-
- node->cluster_add_time = time(NULL);
- }
-}
-
-void node_history_cluster_remove(int nodeid)
-{
- struct lockspace *ls;
- struct node *node;
-
- list_for_each_entry(ls, &lockspaces, list) {
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("node_history_cluster_remove no nodeid %d",
- nodeid);
- return;
- }
-
- node->cluster_remove_time = time(NULL);
- }
-}
-
-static void node_history_start(struct lockspace *ls, int nodeid)
-{
- struct node *node;
-
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("node_history_start no nodeid %d", nodeid);
- return;
- }
-
- node->add_time = time(NULL);
-}
-
-static void node_history_left(struct lockspace *ls, int nodeid,
- struct change *cg)
-{
- struct node *node;
-
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("node_history_left no nodeid %d", nodeid);
- return;
- }
-
- node->add_time = 0;
- node->removed_seq = cg->seq; /* for queries */
-}
-
-static void node_history_fail(struct lockspace *ls, int nodeid,
- struct change *cg, int reason)
-{
- struct node *node;
-
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("node_history_fail no nodeid %d", nodeid);
- return;
- }
-
- if (cfgd_enable_fencing && node->add_time) {
- node->check_fencing = 1;
- node->fence_time = 0;
- node->fence_queries = 0;
- node->fail_time = time(NULL);
- }
-
- /* fenced will take care of making sure the quorum value
- is adjusted for all the failures */
-
- if (cfgd_enable_quorum && !cfgd_enable_fencing)
- node->check_quorum = 1;
-
- if (ls->fs_registered) {
- log_group(ls, "check_fs nodeid %d set", nodeid);
- node->check_fs = 1;
- }
-
- node->removed_seq = cg->seq; /* for queries */
- node->failed_reason = reason; /* for queries */
-}
-
-static int check_fencing_done(struct lockspace *ls)
-{
- struct node *node;
- uint64_t last_fenced_time;
- int in_progress, wait_count = 0;
- int rv;
-
- if (!cfgd_enable_fencing) {
- log_group(ls, "check_fencing disabled");
- return 1;
- }
-
- list_for_each_entry(node, &ls->node_history, list) {
- if (!node->check_fencing)
- continue;
-
- /* check with fenced to see if the node has been
- fenced since node->add_time */
-
- rv = fence_node_time(node->nodeid, &last_fenced_time);
- if (rv < 0)
- log_error("fenced_node_info error %d", rv);
-
- /* need >= not just > because in at least one case
- we've seen fenced_time within the same second as
- fail_time: with external fencing, e.g. fence_node */
-
- if (last_fenced_time >= node->fail_time) {
- log_group(ls, "check_fencing %d done "
- "add %llu fail %llu last %llu",
- node->nodeid,
- (unsigned long long)node->add_time,
- (unsigned long long)node->fail_time,
- (unsigned long long)last_fenced_time);
- node->check_fencing = 0;
- node->add_time = 0;
- node->fence_time = last_fenced_time;
- } else {
- if (!node->fence_queries ||
- node->fence_time != last_fenced_time) {
- log_group(ls, "check_fencing %d wait "
- "add %llu fail %llu last %llu",
- node->nodeid,
- (unsigned long long)node->add_time,
- (unsigned long long)node->fail_time,
- (unsigned long long)last_fenced_time);
- node->fence_queries++;
- node->fence_time = last_fenced_time;
- }
- wait_count++;
- }
- }
-
- if (wait_count)
- return 0;
-
- /* now check if there are any outstanding fencing ops (for nodes
- we may not have seen in any lockspace), and return 0 if there
- are any */
-
- rv = fence_in_progress(&in_progress);
- if (rv < 0) {
- log_error("fenced_domain_info error %d", rv);
- return 0;
- }
-
- if (in_progress)
- return 0;
-
- log_group(ls, "check_fencing done");
- return 1;
-}
-
-static int check_quorum_done(struct lockspace *ls)
-{
- struct node *node;
- int wait_count = 0;
-
- if (!cfgd_enable_quorum) {
- log_group(ls, "check_quorum disabled");
- return 1;
- }
-
- /* wait for quorum system (cman) to see all the same nodes failed, so
- we know that cluster_quorate is adjusted for the same failures we've
- seen (see comment in fenced about the assumption here) */
-
- list_for_each_entry(node, &ls->node_history, list) {
- if (!node->check_quorum)
- continue;
-
- if (!is_cluster_member(node->nodeid)) {
- node->check_quorum = 0;
- } else {
- log_group(ls, "check_quorum nodeid %d is_cluster_member",
- node->nodeid);
- wait_count++;
- }
- }
-
- if (wait_count)
- return 0;
-
- if (!cluster_quorate) {
- log_group(ls, "check_quorum not quorate");
- return 0;
- }
-
- log_group(ls, "check_quorum done");
- return 1;
-}
-
-/* wait for local fs_controld to ack each failed node */
-
-static int check_fs_done(struct lockspace *ls)
-{
- struct node *node;
- int wait_count = 0;
-
- /* no corresponding fs for this lockspace */
- if (!ls->fs_registered) {
- log_group(ls, "check_fs none registered");
- return 1;
- }
-
- list_for_each_entry(node, &ls->node_history, list) {
- if (!node->check_fs)
- continue;
-
- if (node->fs_notified) {
- log_group(ls, "check_fs nodeid %d clear", node->nodeid);
- node->check_fs = 0;
- node->fs_notified = 0;
- } else {
- log_group(ls, "check_fs nodeid %d needs fs notify",
- node->nodeid);
- wait_count++;
- }
- }
-
- if (wait_count)
- return 0;
-
- log_group(ls, "check_fs done");
- return 1;
-}
-
-static int member_ids[MAX_NODES];
-static int member_count;
-static int renew_ids[MAX_NODES];
-static int renew_count;
-
-static void format_member_ids(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
- struct member *memb;
-
- memset(member_ids, 0, sizeof(member_ids));
- member_count = 0;
-
- list_for_each_entry(memb, &cg->members, list)
- member_ids[member_count++] = memb->nodeid;
-}
-
-/* list of nodeids that have left and rejoined since last start_kernel;
- is any member of startcg in the left list of any other cg's?
- (if it is, then it presumably must be flagged added in another) */
-
-static void format_renew_ids(struct lockspace *ls)
-{
- struct change *cg, *startcg;
- struct member *memb, *leftmemb;
-
- startcg = list_first_entry(&ls->changes, struct change, list);
-
- memset(renew_ids, 0, sizeof(renew_ids));
- renew_count = 0;
-
- list_for_each_entry(memb, &startcg->members, list) {
- list_for_each_entry(cg, &ls->changes, list) {
- if (cg == startcg)
- continue;
- list_for_each_entry(leftmemb, &cg->removed, list) {
- if (memb->nodeid == leftmemb->nodeid) {
- renew_ids[renew_count++] = memb->nodeid;
- }
- }
- }
- }
-
-}
-
-static void start_kernel(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
-
- if (!ls->kernel_stopped) {
- log_error("start_kernel cg %u not stopped", cg->seq);
- return;
- }
-
- log_group(ls, "start_kernel cg %u member_count %d",
- cg->seq, cg->member_count);
-
- /* needs to happen before setting control which starts recovery */
- if (ls->joining)
- set_sysfs_id(ls->name, ls->global_id);
-
- format_member_ids(ls);
- format_renew_ids(ls);
- set_configfs_members(ls->name, member_count, member_ids,
- renew_count, renew_ids);
- set_sysfs_control(ls->name, 1);
- ls->kernel_stopped = 0;
-
- if (ls->joining) {
- set_sysfs_event_done(ls->name, 0);
- ls->joining = 0;
- }
-}
-
-static void stop_kernel(struct lockspace *ls, uint32_t seq)
-{
- if (!ls->kernel_stopped) {
- log_group(ls, "stop_kernel cg %u", seq);
- set_sysfs_control(ls->name, 0);
- ls->kernel_stopped = 1;
- }
-}
-
-/* the first condition is that the local lockspace is stopped which we
- don't need to check for because stop_kernel(), which is synchronous,
- was done when the change was created */
-
-static int wait_conditions_done(struct lockspace *ls)
-{
- /* the fencing/quorum/fs conditions need to account for all the changes
- that have occured since the last change applied to dlm-kernel, not
- just the latest change */
-
- if (!check_fencing_done(ls)) {
- poll_fencing++;
- return 0;
- }
-
- /* fencing waits for quorum, so we don't need to check quorum for any
- reasons related to safety or protection, so enable_quorum defaults
- to 0. This does mean that lockspaces (and cluster fs's) can be
- started/enabled in an inquorate cluster if there are no outstanding
- fencing operations. Some users or apps may want lockspaces/fs's to
- only be enabled in a quorate cluster; enable_quorum can be set to 1
- to get that behavior. The main advantage of not waiting for quorum
- here is to allow lockspaces to be shut down (and cluster fs's
- unmounted) in an inquorate cluster. */
-
- if (!check_quorum_done(ls)) {
- poll_quorum++;
- return 0;
- }
-
- if (!check_fs_done(ls)) {
- poll_fs++;
- return 0;
- }
-
- return 1;
-}
-
-static int wait_messages_done(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
- struct member *memb;
- int need = 0, total = 0;
-
- list_for_each_entry(memb, &cg->members, list) {
- if (!memb->start)
- need++;
- total++;
- }
-
- if (need) {
- log_group(ls, "wait_messages cg %u need %d of %d",
- cg->seq, need, total);
- return 0;
- }
-
- log_group(ls, "wait_messages cg %u got all %d", cg->seq, total);
- return 1;
-}
-
-static void cleanup_changes(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
- struct change *safe;
-
- list_del(&cg->list);
- if (ls->started_change)
- free_cg(ls->started_change);
- ls->started_change = cg;
-
- ls->started_count++;
- if (!ls->started_count)
- ls->started_count++;
-
- cg->combined_seq = cg->seq; /* for queries */
-
- list_for_each_entry_safe(cg, safe, &ls->changes, list) {
- ls->started_change->combined_seq = cg->seq; /* for queries */
- list_del(&cg->list);
- free_cg(cg);
- }
-}
-
-/* There's a stream of confchg and messages. At one of these
- messages, the low node needs to store plocks and new nodes
- need to begin saving plock messages. A second message is
- needed to say that the plocks are ready to be read.
-
- When the last start message is recvd for a change, the low node
- stores plocks and the new nodes begin saving messages. When the
- store is done, low node sends plocks_stored message. When
- new nodes recv this, they read the plocks and their saved messages.
- plocks_stored message should identify a specific change, like start
- messages do; if it doesn't match ls->started_change, then it's ignored.
-
- If a confchg adding a new node arrives after plocks are stored but
- before plocks_stored msg recvd, then the message is ignored. The low
- node will send another plocks_stored message for the latest change
- (although it may be able to reuse the ckpt if no plock state has changed).
-*/
-
-static void set_plock_data_node(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
- struct member *memb;
- int low = 0;
-
- list_for_each_entry(memb, &cg->members, list) {
- if (!(memb->start_flags & DLM_MFLG_HAVEPLOCK))
- continue;
-
- if (!low || memb->nodeid < low)
- low = memb->nodeid;
- }
-
- log_dlock(ls, "set_plock_data_node from %d to %d",
- ls->plock_data_node, low);
-
- ls->plock_data_node = low;
-}
-
-static struct id_info *get_id_struct(struct id_info *ids, int count, int size,
- int nodeid)
-{
- struct id_info *id = ids;
- int i;
-
- for (i = 0; i < count; i++) {
- if (id->nodeid == nodeid)
- return id;
- id = (struct id_info *)((char *)id + size);
- }
- return NULL;
-}
-
-/* do the change details in the message match the details of the given change */
-
-static int match_change(struct lockspace *ls, struct change *cg,
- struct dlm_header *hd, struct ls_info *li,
- struct id_info *ids)
-{
- struct id_info *id;
- struct member *memb;
- struct node *node;
- uint32_t seq = hd->msgdata;
- int i, members_mismatch;
-
- /* We can ignore messages if we're not in the list of members.
- The one known time this will happen is after we've joined
- the cpg, we can get messages for changes prior to the change
- in which we're added. */
-
- id = get_id_struct(ids, li->id_info_count, li->id_info_size,our_nodeid);
-
- if (!id) {
- log_group(ls, "match_change %d:%u skip %u we are not in members",
- hd->nodeid, seq, cg->seq);
- return 0;
- }
-
- memb = find_memb(cg, hd->nodeid);
- if (!memb) {
- log_group(ls, "match_change %d:%u skip %u sender not member",
- hd->nodeid, seq, cg->seq);
- return 0;
- }
-
- if (memb->start_flags & DLM_MFLG_NACK) {
- log_group(ls, "match_change %d:%u skip %u is nacked",
- hd->nodeid, seq, cg->seq);
- return 0;
- }
-
- if (memb->start && hd->type == DLM_MSG_START) {
- log_group(ls, "match_change %d:%u skip %u already start",
- hd->nodeid, seq, cg->seq);
- return 0;
- }
-
- /* a node's start can't match a change if the node joined the cluster
- more recently than the change was created */
-
- node = get_node_history(ls, hd->nodeid);
- if (!node) {
- log_group(ls, "match_change %d:%u skip cg %u no node history",
- hd->nodeid, seq, cg->seq);
- return 0;
- }
-
- if (node->cluster_add_time > cg->create_time) {
- log_group(ls, "match_change %d:%u skip cg %u created %llu "
- "cluster add %llu", hd->nodeid, seq, cg->seq,
- (unsigned long long)cg->create_time,
- (unsigned long long)node->cluster_add_time);
- return 0;
- }
-
- /* verify this is the right change by matching the counts
- and the nodeids of the current members */
-
- if (li->member_count != cg->member_count ||
- li->joined_count != cg->joined_count ||
- li->remove_count != cg->remove_count ||
- li->failed_count != cg->failed_count) {
- log_group(ls, "match_change %d:%u skip %u expect counts "
- "%d %d %d %d", hd->nodeid, seq, cg->seq,
- cg->member_count, cg->joined_count,
- cg->remove_count, cg->failed_count);
- return 0;
- }
-
- members_mismatch = 0;
- id = ids;
-
- for (i = 0; i < li->id_info_count; i++) {
- memb = find_memb(cg, id->nodeid);
- if (!memb) {
- log_group(ls, "match_change %d:%u skip %u no memb %d",
- hd->nodeid, seq, cg->seq, id->nodeid);
- members_mismatch = 1;
- break;
- }
- id = (struct id_info *)((char *)id + li->id_info_size);
- }
-
- if (members_mismatch)
- return 0;
-
- log_group(ls, "match_change %d:%u matches cg %u", hd->nodeid, seq,
- cg->seq);
- return 1;
-}
-
-/* Unfortunately, there's no really simple way to match a message with the
- specific change that it was sent for. We hope that by passing all the
- details of the change in the message, we will be able to uniquely match the
- it to the correct change. */
-
-/* A start message will usually be for the first (current) change on our list.
- In some cases it will be for a non-current change, and we can ignore it:
-
- 1. A,B,C get confchg1 adding C
- 2. C sends start for confchg1
- 3. A,B,C get confchg2 adding D
- 4. A,B,C,D recv start from C for confchg1 - ignored
- 5. C,D send start for confchg2
- 6. A,B send start for confchg2
- 7. A,B,C,D recv all start messages for confchg2, and start kernel
-
- In step 4, how do the nodes know whether the start message from C is
- for confchg1 or confchg2? Hopefully by comparing the counts and members. */
-
-static struct change *find_change(struct lockspace *ls, struct dlm_header *hd,
- struct ls_info *li, struct id_info *ids)
-{
- struct change *cg;
-
- list_for_each_entry_reverse(cg, &ls->changes, list) {
- if (!match_change(ls, cg, hd, li, ids))
- continue;
- return cg;
- }
-
- log_group(ls, "find_change %d:%u no match", hd->nodeid, hd->msgdata);
- return NULL;
-}
-
-static int is_added(struct lockspace *ls, int nodeid)
-{
- struct change *cg;
- struct member *memb;
-
- list_for_each_entry(cg, &ls->changes, list) {
- memb = find_memb(cg, nodeid);
- if (memb && memb->added)
- return 1;
- }
- return 0;
-}
-
-static void receive_start(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct change *cg;
- struct member *memb;
- struct ls_info *li;
- struct id_info *ids;
- uint32_t seq = hd->msgdata;
- int added;
-
- log_group(ls, "receive_start %d:%u len %d", hd->nodeid, seq, len);
-
- li = (struct ls_info *)((char *)hd + sizeof(struct dlm_header));
- ids = (struct id_info *)((char *)li + sizeof(struct ls_info));
-
- ls_info_in(li);
- ids_in(li, ids);
-
- cg = find_change(ls, hd, li, ids);
- if (!cg)
- return;
-
- memb = find_memb(cg, hd->nodeid);
- if (!memb) {
- /* this should never happen since match_change checks it */
- log_error("receive_start no member %d", hd->nodeid);
- return;
- }
-
- memb->start_flags = hd->flags;
-
- added = is_added(ls, hd->nodeid);
-
- if (added && li->started_count && ls->started_count) {
- log_error("receive_start %d:%u add node with started_count %u",
- hd->nodeid, seq, li->started_count);
-
- /* see comment in fence/fenced/cpg.c */
- memb->disallowed = 1;
- return;
- }
-
- if (memb->start_flags & DLM_MFLG_NACK) {
- log_group(ls, "receive_start %d:%u is NACK", hd->nodeid, seq);
- return;
- }
-
- node_history_start(ls, hd->nodeid);
- memb->start = 1;
-}
-
-static void receive_plocks_done(struct lockspace *ls, struct dlm_header *hd,
- int len)
-{
- struct ls_info *li;
- struct id_info *ids;
-
- log_dlock(ls, "receive_plocks_done %d:%u flags %x plocks_data %u need %d save %d",
- hd->nodeid, hd->msgdata, hd->flags, hd->msgdata2,
- ls->need_plocks, ls->save_plocks);
-
- if (!ls->need_plocks)
- return;
-
- if (ls->need_plocks && !ls->save_plocks)
- return;
-
- if (!ls->started_change) {
- /* don't think this should happen */
- log_elock(ls, "receive_plocks_done %d:%u no started_change",
- hd->nodeid, hd->msgdata);
- return;
- }
-
- li = (struct ls_info *)((char *)hd + sizeof(struct dlm_header));
- ids = (struct id_info *)((char *)li + sizeof(struct ls_info));
- ls_info_in(li);
- ids_in(li, ids);
-
- if (!match_change(ls, ls->started_change, hd, li, ids)) {
- /* don't think this should happen */
- log_elock(ls, "receive_plocks_done %d:%u no match_change",
- hd->nodeid, hd->msgdata);
-
- /* remove/free anything we've saved from
- receive_plocks_data messages that weren't for us */
- clear_plocks_data(ls);
- return;
- }
-
- if (ls->recv_plocks_data_count != hd->msgdata2) {
- log_elock(ls, "receive_plocks_done plocks_data %u recv %u",
- hd->msgdata2, ls->recv_plocks_data_count);
- }
-
- process_saved_plocks(ls);
- ls->need_plocks = 0;
- ls->save_plocks = 0;
-
- log_dlock(ls, "receive_plocks_done %d:%u plocks_data_count %u",
- hd->nodeid, hd->msgdata, ls->recv_plocks_data_count);
-}
-
-static void send_info(struct lockspace *ls, struct change *cg, int type,
- uint32_t flags, uint32_t msgdata2)
-{
- struct dlm_header *hd;
- struct ls_info *li;
- struct id_info *id;
- struct member *memb;
- char *buf;
- int len, id_count;
-
- id_count = cg->member_count;
-
- len = sizeof(struct dlm_header) + sizeof(struct ls_info) +
- id_count * sizeof(struct id_info);
-
- buf = malloc(len);
- if (!buf) {
- log_error("send_info len %d no mem", len);
- return;
- }
- memset(buf, 0, len);
-
- hd = (struct dlm_header *)buf;
- li = (struct ls_info *)(buf + sizeof(*hd));
- id = (struct id_info *)(buf + sizeof(*hd) + sizeof(*li));
-
- /* fill in header (dlm_send_message handles part of header) */
-
- hd->type = type;
- hd->msgdata = cg->seq;
- hd->flags = flags;
- hd->msgdata2 = msgdata2;
-
- if (ls->joining)
- hd->flags |= DLM_MFLG_JOINING;
- if (!ls->need_plocks)
- hd->flags |= DLM_MFLG_HAVEPLOCK;
-
- /* fill in ls_info */
-
- li->ls_info_size = cpu_to_le32(sizeof(struct ls_info));
- li->id_info_size = cpu_to_le32(sizeof(struct id_info));
- li->id_info_count = cpu_to_le32(id_count);
- li->started_count = cpu_to_le32(ls->started_count);
- li->member_count = cpu_to_le32(cg->member_count);
- li->joined_count = cpu_to_le32(cg->joined_count);
- li->remove_count = cpu_to_le32(cg->remove_count);
- li->failed_count = cpu_to_le32(cg->failed_count);
-
- /* fill in id_info entries */
-
- list_for_each_entry(memb, &cg->members, list) {
- id->nodeid = cpu_to_le32(memb->nodeid);
- id++;
- }
-
- dlm_send_message(ls, buf, len);
-
- free(buf);
-}
-
-static void send_start(struct lockspace *ls, struct change *cg)
-{
- log_group(ls, "send_start %d:%u counts %u %d %d %d %d",
- our_nodeid, cg->seq, ls->started_count,
- cg->member_count, cg->joined_count, cg->remove_count,
- cg->failed_count);
-
- send_info(ls, cg, DLM_MSG_START, 0, 0);
-}
-
-static void send_plocks_done(struct lockspace *ls, struct change *cg, uint32_t plocks_data)
-{
- log_dlock(ls, "send_plocks_done %d:%u counts %u %d %d %d %d plocks_data %u",
- our_nodeid, cg->seq, ls->started_count,
- cg->member_count, cg->joined_count, cg->remove_count,
- cg->failed_count, plocks_data);
-
- send_info(ls, cg, DLM_MSG_PLOCKS_DONE, 0, plocks_data);
-}
-
-static int same_members(struct change *cg1, struct change *cg2)
-{
- struct member *memb;
-
- list_for_each_entry(memb, &cg1->members, list) {
- if (!find_memb(cg2, memb->nodeid))
- return 0;
- }
- return 1;
-}
-
-static void send_nacks(struct lockspace *ls, struct change *startcg)
-{
- struct change *cg;
-
- list_for_each_entry(cg, &ls->changes, list) {
- if (cg->seq < startcg->seq &&
- cg->member_count == startcg->member_count &&
- cg->joined_count == startcg->joined_count &&
- cg->remove_count == startcg->remove_count &&
- cg->failed_count == startcg->failed_count &&
- same_members(cg, startcg)) {
- log_group(ls, "send nack old cg %u new cg %u",
- cg->seq, startcg->seq);
- send_info(ls, cg, DLM_MSG_START, DLM_MFLG_NACK, 0);
- }
- }
-}
-
-static int nodes_added(struct lockspace *ls)
-{
- struct change *cg;
-
- list_for_each_entry(cg, &ls->changes, list) {
- if (cg->joined_count)
- return 1;
- }
- return 0;
-}
-
-static void prepare_plocks(struct lockspace *ls)
-{
- struct change *cg = list_first_entry(&ls->changes, struct change, list);
- struct member *memb;
- uint32_t plocks_data;
-
- if (!cfgd_enable_plock || ls->disable_plock)
- return;
-
- log_dlock(ls, "prepare_plocks");
-
- /* if we're the only node in the lockspace, then we are the data_node
- and we don't need plocks */
-
- if (cg->member_count == 1) {
- list_for_each_entry(memb, &cg->members, list) {
- if (memb->nodeid != our_nodeid) {
- log_elock(ls, "prepare_plocks other member %d",
- memb->nodeid);
- }
- }
- ls->plock_data_node = our_nodeid;
- ls->need_plocks = 0;
- return;
- }
-
- /* the low node that indicated it had plock state in its last
- start message is the data_node */
-
- set_plock_data_node(ls);
-
- /* there is no node with plock state, so there's no syncing to do */
-
- if (!ls->plock_data_node) {
- ls->need_plocks = 0;
- ls->save_plocks = 0;
- return;
- }
-
- /* We save all plock messages received after our own confchg and
- apply them after we receive the plocks_done message from the
- data_node. */
-
- if (ls->need_plocks) {
- log_dlock(ls, "save_plocks start");
- ls->save_plocks = 1;
- return;
- }
-
- if (ls->plock_data_node != our_nodeid)
- return;
-
- if (nodes_added(ls))
- send_all_plocks_data(ls, cg->seq, &plocks_data);
-
- send_plocks_done(ls, cg, plocks_data);
-}
-
-static void apply_changes(struct lockspace *ls)
-{
- struct change *cg;
-
- if (list_empty(&ls->changes))
- return;
- cg = list_first_entry(&ls->changes, struct change, list);
-
- switch (cg->state) {
-
- case CGST_WAIT_CONDITIONS:
- if (wait_conditions_done(ls)) {
- send_nacks(ls, cg);
- send_start(ls, cg);
- cg->state = CGST_WAIT_MESSAGES;
- }
- break;
-
- case CGST_WAIT_MESSAGES:
- if (wait_messages_done(ls)) {
- start_kernel(ls);
- prepare_plocks(ls);
- cleanup_changes(ls);
- }
- break;
-
- default:
- log_error("apply_changes invalid state %d", cg->state);
- }
-}
-
-void process_lockspace_changes(void)
-{
- struct lockspace *ls, *safe;
-
- poll_fencing = 0;
- poll_quorum = 0;
- poll_fs = 0;
-
- list_for_each_entry_safe(ls, safe, &lockspaces, list) {
- if (!list_empty(&ls->changes))
- apply_changes(ls);
- }
-}
-
-static int add_change(struct lockspace *ls,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries,
- struct change **cg_out)
-{
- struct change *cg;
- struct member *memb;
- int i, error;
-
- cg = malloc(sizeof(struct change));
- if (!cg)
- goto fail_nomem;
- memset(cg, 0, sizeof(struct change));
- INIT_LIST_HEAD(&cg->members);
- INIT_LIST_HEAD(&cg->removed);
- cg->state = CGST_WAIT_CONDITIONS;
- cg->create_time = time(NULL);
- cg->seq = ++ls->change_seq;
- if (!cg->seq)
- cg->seq = ++ls->change_seq;
-
- cg->member_count = member_list_entries;
- cg->joined_count = joined_list_entries;
- cg->remove_count = left_list_entries;
-
- for (i = 0; i < member_list_entries; i++) {
- memb = malloc(sizeof(struct member));
- if (!memb)
- goto fail_nomem;
- memset(memb, 0, sizeof(struct member));
- memb->nodeid = member_list[i].nodeid;
- list_add_tail(&memb->list, &cg->members);
- }
-
- for (i = 0; i < left_list_entries; i++) {
- memb = malloc(sizeof(struct member));
- if (!memb)
- goto fail_nomem;
- memset(memb, 0, sizeof(struct member));
- memb->nodeid = left_list[i].nodeid;
- if (left_list[i].reason == CPG_REASON_NODEDOWN ||
- left_list[i].reason == CPG_REASON_PROCDOWN) {
- memb->failed = 1;
- cg->failed_count++;
- }
- list_add_tail(&memb->list, &cg->removed);
-
- if (memb->failed)
- node_history_fail(ls, memb->nodeid, cg,
- left_list[i].reason);
- else
- node_history_left(ls, memb->nodeid, cg);
-
- log_group(ls, "add_change cg %u remove nodeid %d reason %d",
- cg->seq, memb->nodeid, left_list[i].reason);
-
- if (left_list[i].reason == CPG_REASON_PROCDOWN)
- kick_node_from_cluster(memb->nodeid);
- }
-
- for (i = 0; i < joined_list_entries; i++) {
- memb = find_memb(cg, joined_list[i].nodeid);
- if (!memb) {
- log_error("no member %d", joined_list[i].nodeid);
- error = -ENOENT;
- goto fail;
- }
- memb->added = 1;
-
- if (memb->nodeid == our_nodeid)
- cg->we_joined = 1;
- else
- node_history_init(ls, memb->nodeid, cg);
-
- log_group(ls, "add_change cg %u joined nodeid %d", cg->seq,
- memb->nodeid);
- }
-
- if (cg->we_joined) {
- log_group(ls, "add_change cg %u we joined", cg->seq);
- list_for_each_entry(memb, &cg->members, list)
- node_history_init(ls, memb->nodeid, cg);
- }
-
- log_group(ls, "add_change cg %u counts member %d joined %d remove %d "
- "failed %d", cg->seq, cg->member_count, cg->joined_count,
- cg->remove_count, cg->failed_count);
-
- list_add(&cg->list, &ls->changes);
- *cg_out = cg;
- return 0;
-
- fail_nomem:
- log_error("no memory");
- error = -ENOMEM;
- fail:
- free_cg(cg);
- return error;
-}
-
-static int we_left(const struct cpg_address *left_list,
- size_t left_list_entries)
-{
- int i;
-
- for (i = 0; i < left_list_entries; i++) {
- if (left_list[i].nodeid == our_nodeid)
- return 1;
- }
- return 0;
-}
-
-static void confchg_cb(cpg_handle_t handle,
- const struct cpg_name *group_name,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries)
-{
- struct lockspace *ls;
- struct change *cg;
- struct member *memb;
- int rv;
-
- log_config(group_name, member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries);
-
- ls = find_ls_handle(handle);
- if (!ls) {
- log_error("confchg_cb no lockspace for cpg %s",
- group_name->value);
- return;
- }
-
- if (ls->leaving && we_left(left_list, left_list_entries)) {
- /* we called cpg_leave(), and this should be the final
- cpg callback we receive */
- log_group(ls, "confchg for our leave");
- stop_kernel(ls, 0);
- set_configfs_members(ls->name, 0, NULL, 0, NULL);
- set_sysfs_event_done(ls->name, 0);
- cpg_finalize(ls->cpg_handle);
- client_dead(ls->cpg_client);
- purge_plocks(ls, our_nodeid, 1);
- list_del(&ls->list);
- free_ls(ls);
- return;
- }
-
- rv = add_change(ls, member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries, &cg);
- if (rv)
- return;
-
- stop_kernel(ls, cg->seq);
-
- list_for_each_entry(memb, &cg->removed, list)
- purge_plocks(ls, memb->nodeid, 0);
-
- apply_changes(ls);
-
-#if 0
- deadlk_confchg(ls, member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries);
-#endif
-}
-
-static void dlm_header_in(struct dlm_header *hd)
-{
- hd->version[0] = le16_to_cpu(hd->version[0]);
- hd->version[1] = le16_to_cpu(hd->version[1]);
- hd->version[2] = le16_to_cpu(hd->version[2]);
- hd->type = le16_to_cpu(hd->type);
- hd->nodeid = le32_to_cpu(hd->nodeid);
- hd->to_nodeid = le32_to_cpu(hd->to_nodeid);
- hd->global_id = le32_to_cpu(hd->global_id);
- hd->flags = le32_to_cpu(hd->flags);
- hd->msgdata = le32_to_cpu(hd->msgdata);
- hd->msgdata2 = le32_to_cpu(hd->msgdata2);
-}
-
-/* after our join confchg, we want to ignore plock messages (see need_plocks
- checks below) until the point in time where the ckpt_node saves plock
- state (final start message received); at this time we want to shift from
- ignoring plock messages to saving plock messages to apply on top of the
- plock state that we read. */
-
-static void deliver_cb(cpg_handle_t handle,
- const struct cpg_name *group_name,
- uint32_t nodeid, uint32_t pid,
- void *data, size_t len)
-{
- struct lockspace *ls;
- struct dlm_header *hd;
- int ignore_plock;
-
- ls = find_ls_handle(handle);
- if (!ls) {
- log_error("deliver_cb no ls for cpg %s", group_name->value);
- return;
- }
-
- if (len < sizeof(*hd)) {
- log_error("deliver_cb short message %zd", len);
- return;
- }
-
- hd = (struct dlm_header *)data;
- dlm_header_in(hd);
-
- if (hd->version[0] != our_protocol.daemon_run[0] ||
- hd->version[1] != our_protocol.daemon_run[1]) {
- log_error("reject message from %d version %u.%u.%u vs %u.%u.%u",
- nodeid, hd->version[0], hd->version[1],
- hd->version[2], our_protocol.daemon_run[0],
- our_protocol.daemon_run[1],
- our_protocol.daemon_run[2]);
- return;
- }
-
- if (hd->nodeid != nodeid) {
- log_error("bad msg nodeid %d %d", hd->nodeid, nodeid);
- return;
- }
-
- ignore_plock = 0;
-
- switch (hd->type) {
- case DLM_MSG_START:
- receive_start(ls, hd, len);
- break;
-
- case DLM_MSG_PLOCK:
- if (ls->disable_plock)
- break;
- if (ls->need_plocks && !ls->save_plocks) {
- ignore_plock = 1;
- break;
- }
- if (cfgd_enable_plock)
- receive_plock(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d",
- hd->type, nodeid, cfgd_enable_plock);
- break;
-
- case DLM_MSG_PLOCK_OWN:
- if (ls->disable_plock)
- break;
- if (ls->need_plocks && !ls->save_plocks) {
- ignore_plock = 1;
- break;
- }
- if (cfgd_enable_plock && cfgd_plock_ownership)
- receive_own(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d owner %d",
- hd->type, nodeid, cfgd_enable_plock,
- cfgd_plock_ownership);
- break;
-
- case DLM_MSG_PLOCK_DROP:
- if (ls->disable_plock)
- break;
- if (ls->need_plocks && !ls->save_plocks) {
- ignore_plock = 1;
- break;
- }
- if (cfgd_enable_plock && cfgd_plock_ownership)
- receive_drop(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d owner %d",
- hd->type, nodeid, cfgd_enable_plock,
- cfgd_plock_ownership);
- break;
-
- case DLM_MSG_PLOCK_SYNC_LOCK:
- case DLM_MSG_PLOCK_SYNC_WAITER:
- if (ls->disable_plock)
- break;
- if (ls->need_plocks && !ls->save_plocks) {
- ignore_plock = 1;
- break;
- }
- if (cfgd_enable_plock && cfgd_plock_ownership)
- receive_sync(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d owner %d",
- hd->type, nodeid, cfgd_enable_plock,
- cfgd_plock_ownership);
- break;
-
- case DLM_MSG_PLOCKS_DATA:
- if (ls->disable_plock)
- break;
- if (cfgd_enable_plock)
- receive_plocks_data(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d",
- hd->type, nodeid, cfgd_enable_plock);
- break;
-
- case DLM_MSG_PLOCKS_DONE:
- if (ls->disable_plock)
- break;
- if (cfgd_enable_plock)
- receive_plocks_done(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_plock %d",
- hd->type, nodeid, cfgd_enable_plock);
- break;
-
-#if 0
- case DLM_MSG_DEADLK_CYCLE_START:
- if (cfgd_enable_deadlk)
- receive_cycle_start(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_deadlk %d",
- hd->type, nodeid, cfgd_enable_deadlk);
- break;
-
- case DLM_MSG_DEADLK_CYCLE_END:
- if (cfgd_enable_deadlk)
- receive_cycle_end(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_deadlk %d",
- hd->type, nodeid, cfgd_enable_deadlk);
- break;
-
- case DLM_MSG_DEADLK_CHECKPOINT_READY:
- if (cfgd_enable_deadlk)
- receive_checkpoint_ready(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_deadlk %d",
- hd->type, nodeid, cfgd_enable_deadlk);
- break;
-
- case DLM_MSG_DEADLK_CANCEL_LOCK:
- if (cfgd_enable_deadlk)
- receive_cancel_lock(ls, hd, len);
- else
- log_error("msg %d nodeid %d enable_deadlk %d",
- hd->type, nodeid, cfgd_enable_deadlk);
- break;
-#endif
-
- default:
- log_error("unknown msg type %d", hd->type);
- }
-
- if (ignore_plock)
- log_plock(ls, "msg %s nodeid %d need_plock ignore",
- msg_name(hd->type), nodeid);
-
- apply_changes(ls);
-}
-
-static cpg_callbacks_t cpg_callbacks = {
- .cpg_deliver_fn = deliver_cb,
- .cpg_confchg_fn = confchg_cb,
-};
-
-void update_flow_control_status(void)
-{
- cpg_flow_control_state_t flow_control_state;
- cpg_error_t error;
-
- error = cpg_flow_control_state_get(cpg_handle_daemon,
- &flow_control_state);
- if (error != CPG_OK) {
- log_error("cpg_flow_control_state_get %d", error);
- return;
- }
-
- if (flow_control_state == CPG_FLOW_CONTROL_ENABLED) {
- if (message_flow_control_on == 0) {
- log_debug("flow control on");
- }
- message_flow_control_on = 1;
- } else {
- if (message_flow_control_on) {
- log_debug("flow control off");
- }
- message_flow_control_on = 0;
- }
-}
-
-static void process_cpg_lockspace(int ci)
-{
- struct lockspace *ls;
- cpg_error_t error;
-
- ls = find_ls_ci(ci);
- if (!ls) {
- log_error("process_lockspace_cpg no lockspace for ci %d", ci);
- return;
- }
-
- error = cpg_dispatch(ls->cpg_handle, CPG_DISPATCH_ALL);
- if (error != CPG_OK) {
- log_error("cpg_dispatch error %d", error);
- return;
- }
-
- update_flow_control_status();
-}
-
-/* received an "online" uevent from dlm-kernel */
-
-int dlm_join_lockspace(struct lockspace *ls)
-{
- cpg_error_t error;
- cpg_handle_t h;
- struct cpg_name name;
- int i = 0, fd, ci, rv;
- int unused;
-
- rv = fence_in_progress(&unused);
- if (cfgd_enable_fencing && rv < 0) {
- log_error("dlm_join_lockspace no fence domain");
- rv = -1;
- goto fail_free;
- }
-
- error = cpg_initialize(&h, &cpg_callbacks);
- if (error != CPG_OK) {
- log_error("cpg_initialize error %d", error);
- rv = -1;
- goto fail_free;
- }
-
- cpg_fd_get(h, &fd);
-
- ci = client_add(fd, process_cpg_lockspace, NULL);
-
- list_add(&ls->list, &lockspaces);
-
- ls->cpg_handle = h;
- ls->cpg_client = ci;
- ls->cpg_fd = fd;
- ls->kernel_stopped = 1;
- ls->need_plocks = 1;
- ls->joining = 1;
-
- memset(&name, 0, sizeof(name));
- sprintf(name.value, "dlm:ls:%s", ls->name);
- name.length = strlen(name.value) + 1;
-
- /* TODO: allow global_id to be set in cluster.conf? */
- ls->global_id = cpgname_to_crc(name.value, name.length);
-
- retry:
- error = cpg_join(h, &name);
- if (error == CPG_ERR_TRY_AGAIN) {
- sleep(1);
- if (!(++i % 10))
- log_error("cpg_join error retrying");
- goto retry;
- }
- if (error != CPG_OK) {
- log_error("cpg_join error %d", error);
- cpg_finalize(h);
- rv = -1;
- goto fail;
- }
-
- return 0;
-
- fail:
- list_del(&ls->list);
- client_dead(ci);
- cpg_finalize(h);
- fail_free:
- set_sysfs_event_done(ls->name, rv);
- free_ls(ls);
- return rv;
-}
-
-/* received an "offline" uevent from dlm-kernel */
-
-int dlm_leave_lockspace(struct lockspace *ls)
-{
- cpg_error_t error;
- struct cpg_name name;
- int i = 0;
-
- ls->leaving = 1;
-
- memset(&name, 0, sizeof(name));
- sprintf(name.value, "dlm:ls:%s", ls->name);
- name.length = strlen(name.value) + 1;
-
- retry:
- error = cpg_leave(ls->cpg_handle, &name);
- if (error == CPG_ERR_TRY_AGAIN) {
- sleep(1);
- if (!(++i % 10))
- log_error("cpg_leave error retrying");
- goto retry;
- }
- if (error != CPG_OK)
- log_error("cpg_leave error %d", error);
-
- return 0;
-}
-
-static struct node *get_node_daemon(int nodeid)
-{
- struct node *node;
-
- list_for_each_entry(node, &daemon_nodes, list) {
- if (node->nodeid == nodeid)
- return node;
- }
- return NULL;
-}
-
-static void add_node_daemon(int nodeid)
-{
- struct node *node;
-
- if (get_node_daemon(nodeid))
- return;
-
- node = malloc(sizeof(struct node));
- if (!node) {
- log_error("add_node_daemon no mem");
- return;
- }
- memset(node, 0, sizeof(struct node));
- node->nodeid = nodeid;
- list_add_tail(&node->list, &daemon_nodes);
-}
-
-static void pv_in(struct protocol_version *pv)
-{
- pv->major = le16_to_cpu(pv->major);
- pv->minor = le16_to_cpu(pv->minor);
- pv->patch = le16_to_cpu(pv->patch);
- pv->flags = le16_to_cpu(pv->flags);
-}
-
-static void pv_out(struct protocol_version *pv)
-{
- pv->major = cpu_to_le16(pv->major);
- pv->minor = cpu_to_le16(pv->minor);
- pv->patch = cpu_to_le16(pv->patch);
- pv->flags = cpu_to_le16(pv->flags);
-}
-
-static void protocol_in(struct protocol *proto)
-{
- pv_in(&proto->dm_ver);
- pv_in(&proto->km_ver);
- pv_in(&proto->dr_ver);
- pv_in(&proto->kr_ver);
-}
-
-static void protocol_out(struct protocol *proto)
-{
- pv_out(&proto->dm_ver);
- pv_out(&proto->km_ver);
- pv_out(&proto->dr_ver);
- pv_out(&proto->kr_ver);
-}
-
-/* go through member list saved in last confchg, see if we have received a
- proto message from each */
-
-static int all_protocol_messages(void)
-{
- struct node *node;
- int i;
-
- if (!daemon_member_count)
- return 0;
-
- for (i = 0; i < daemon_member_count; i++) {
- node = get_node_daemon(daemon_member[i].nodeid);
- if (!node) {
- log_error("all_protocol_messages no node %d",
- daemon_member[i].nodeid);
- return 0;
- }
-
- if (!node->proto.daemon_max[0])
- return 0;
- }
- return 1;
-}
-
-static int pick_min_protocol(struct protocol *proto)
-{
- uint16_t mind[4];
- uint16_t mink[4];
- struct node *node;
- int i;
-
- memset(&mind, 0, sizeof(mind));
- memset(&mink, 0, sizeof(mink));
-
- /* first choose the minimum major */
-
- for (i = 0; i < daemon_member_count; i++) {
- node = get_node_daemon(daemon_member[i].nodeid);
- if (!node) {
- log_error("pick_min_protocol no node %d",
- daemon_member[i].nodeid);
- return -1;
- }
-
- if (!mind[0] || node->proto.daemon_max[0] < mind[0])
- mind[0] = node->proto.daemon_max[0];
-
- if (!mink[0] || node->proto.kernel_max[0] < mink[0])
- mink[0] = node->proto.kernel_max[0];
- }
-
- if (!mind[0] || !mink[0]) {
- log_error("pick_min_protocol zero major number");
- return -1;
- }
-
- /* second pick the minimum minor with the chosen major */
-
- for (i = 0; i < daemon_member_count; i++) {
- node = get_node_daemon(daemon_member[i].nodeid);
- if (!node)
- continue;
-
- if (mind[0] == node->proto.daemon_max[0]) {
- if (!mind[1] || node->proto.daemon_max[1] < mind[1])
- mind[1] = node->proto.daemon_max[1];
- }
-
- if (mink[0] == node->proto.kernel_max[0]) {
- if (!mink[1] || node->proto.kernel_max[1] < mink[1])
- mink[1] = node->proto.kernel_max[1];
- }
- }
-
- if (!mind[1] || !mink[1]) {
- log_error("pick_min_protocol zero minor number");
- return -1;
- }
-
- /* third pick the minimum patch with the chosen major.minor */
-
- for (i = 0; i < daemon_member_count; i++) {
- node = get_node_daemon(daemon_member[i].nodeid);
- if (!node)
- continue;
-
- if (mind[0] == node->proto.daemon_max[0] &&
- mind[1] == node->proto.daemon_max[1]) {
- if (!mind[2] || node->proto.daemon_max[2] < mind[2])
- mind[2] = node->proto.daemon_max[2];
- }
-
- if (mink[0] == node->proto.kernel_max[0] &&
- mink[1] == node->proto.kernel_max[1]) {
- if (!mink[2] || node->proto.kernel_max[2] < mink[2])
- mink[2] = node->proto.kernel_max[2];
- }
- }
-
- if (!mind[2] || !mink[2]) {
- log_error("pick_min_protocol zero patch number");
- return -1;
- }
-
- memcpy(&proto->daemon_run, &mind, sizeof(mind));
- memcpy(&proto->kernel_run, &mink, sizeof(mink));
- return 0;
-}
-
-static void receive_protocol(struct dlm_header *hd, int len)
-{
- struct protocol *p;
- struct node *node;
-
- p = (struct protocol *)((char *)hd + sizeof(struct dlm_header));
- protocol_in(p);
-
- if (len < sizeof(struct dlm_header) + sizeof(struct protocol)) {
- log_error("receive_protocol invalid len %d from %d",
- len, hd->nodeid);
- return;
- }
-
- /* zero is an invalid version value */
-
- if (!p->daemon_max[0] || !p->daemon_max[1] || !p->daemon_max[2] ||
- !p->kernel_max[0] || !p->kernel_max[1] || !p->kernel_max[2]) {
- log_error("receive_protocol invalid max value from %d "
- "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
- p->daemon_max[0], p->daemon_max[1], p->daemon_max[2],
- p->kernel_max[0], p->kernel_max[1], p->kernel_max[2]);
- return;
- }
-
- /* the run values will be zero until a version is set, after
- which none of the run values can be zero */
-
- if (p->daemon_run[0] && (!p->daemon_run[1] || !p->daemon_run[2] ||
- !p->kernel_run[0] || !p->kernel_run[1] || !p->kernel_run[2])) {
- log_error("receive_protocol invalid run value from %d "
- "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
- p->daemon_run[0], p->daemon_run[1], p->daemon_run[2],
- p->kernel_run[0], p->kernel_run[1], p->kernel_run[2]);
- return;
- }
-
- /* if we have zero run values, and this msg has non-zero run values,
- then adopt them as ours; otherwise save this proto message */
-
- if (our_protocol.daemon_run[0])
- return;
-
- if (p->daemon_run[0]) {
- memcpy(&our_protocol.daemon_run, &p->daemon_run,
- sizeof(struct protocol_version));
- memcpy(&our_protocol.kernel_run, &p->kernel_run,
- sizeof(struct protocol_version));
- log_debug("run protocol from nodeid %d", hd->nodeid);
- return;
- }
-
- /* save this node's proto so we can tell when we've got all, and
- use it to select a minimum protocol from all */
-
- node = get_node_daemon(hd->nodeid);
- if (!node) {
- log_error("receive_protocol no node %d", hd->nodeid);
- return;
- }
- memcpy(&node->proto, p, sizeof(struct protocol));
-}
-
-static void send_protocol(struct protocol *proto)
-{
- struct dlm_header *hd;
- struct protocol *pr;
- char *buf;
- int len;
-
- len = sizeof(struct dlm_header) + sizeof(struct protocol);
- buf = malloc(len);
- if (!buf) {
- log_error("send_protocol no mem %d", len);
- return;
- }
- memset(buf, 0, len);
-
- hd = (struct dlm_header *)buf;
- pr = (struct protocol *)(buf + sizeof(*hd));
-
- hd->type = cpu_to_le16(DLM_MSG_PROTOCOL);
- hd->nodeid = cpu_to_le32(our_nodeid);
-
- memcpy(pr, proto, sizeof(struct protocol));
- protocol_out(pr);
-
- _send_message(cpg_handle_daemon, buf, len, DLM_MSG_PROTOCOL);
-}
-
-int set_protocol(void)
-{
- struct protocol proto;
- struct pollfd pollfd;
- int sent_proposal = 0;
- int rv;
-
- memset(&pollfd, 0, sizeof(pollfd));
- pollfd.fd = cpg_fd_daemon;
- pollfd.events = POLLIN;
-
- while (1) {
- if (our_protocol.daemon_run[0])
- break;
-
- if (!sent_proposal && all_protocol_messages()) {
- /* propose a protocol; look through info from all
- nodes and pick the min for both daemon and kernel,
- and propose that */
-
- sent_proposal = 1;
-
- /* copy our max values */
- memcpy(&proto, &our_protocol, sizeof(struct protocol));
-
- rv = pick_min_protocol(&proto);
- if (rv < 0)
- return rv;
-
- log_debug("set_protocol member_count %d propose "
- "daemon %u.%u.%u kernel %u.%u.%u",
- daemon_member_count,
- proto.daemon_run[0], proto.daemon_run[1],
- proto.daemon_run[2], proto.kernel_run[0],
- proto.kernel_run[1], proto.kernel_run[2]);
-
- send_protocol(&proto);
- }
-
- /* only process messages/events from daemon cpg until protocol
- is established */
-
- rv = poll(&pollfd, 1, -1);
- if (rv == -1 && errno == EINTR) {
- if (daemon_quit)
- return -1;
- continue;
- }
- if (rv < 0) {
- log_error("set_protocol poll errno %d", errno);
- return -1;
- }
-
- if (pollfd.revents & POLLIN)
- process_cpg_daemon(0);
- if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
- log_error("set_protocol poll revents %u",
- pollfd.revents);
- return -1;
- }
- }
-
- if (our_protocol.daemon_run[0] != our_protocol.daemon_max[0] ||
- our_protocol.daemon_run[1] > our_protocol.daemon_max[1]) {
- log_error("incompatible daemon protocol run %u.%u.%u max %u.%u.%u",
- our_protocol.daemon_run[0],
- our_protocol.daemon_run[1],
- our_protocol.daemon_run[2],
- our_protocol.daemon_max[0],
- our_protocol.daemon_max[1],
- our_protocol.daemon_max[2]);
- return -1;
- }
-
- if (our_protocol.kernel_run[0] != our_protocol.kernel_max[0] ||
- our_protocol.kernel_run[1] > our_protocol.kernel_max[1]) {
- log_error("incompatible kernel protocol run %u.%u.%u max %u.%u.%u",
- our_protocol.kernel_run[0],
- our_protocol.kernel_run[1],
- our_protocol.kernel_run[2],
- our_protocol.kernel_max[0],
- our_protocol.kernel_max[1],
- our_protocol.kernel_max[2]);
- return -1;
- }
-
- log_debug("daemon run %u.%u.%u max %u.%u.%u "
- "kernel run %u.%u.%u max %u.%u.%u",
- our_protocol.daemon_run[0],
- our_protocol.daemon_run[1],
- our_protocol.daemon_run[2],
- our_protocol.daemon_max[0],
- our_protocol.daemon_max[1],
- our_protocol.daemon_max[2],
- our_protocol.kernel_run[0],
- our_protocol.kernel_run[1],
- our_protocol.kernel_run[2],
- our_protocol.kernel_max[0],
- our_protocol.kernel_max[1],
- our_protocol.kernel_max[2]);
-
- send_protocol(&our_protocol);
- return 0;
-}
-
-static void deliver_cb_daemon(cpg_handle_t handle,
- const struct cpg_name *group_name,
- uint32_t nodeid, uint32_t pid,
- void *data, size_t len)
-{
- struct dlm_header *hd;
-
- if (len < sizeof(*hd)) {
- log_error("deliver_cb short message %zd", len);
- return;
- }
-
- hd = (struct dlm_header *)data;
- dlm_header_in(hd);
-
- switch (hd->type) {
- case DLM_MSG_PROTOCOL:
- receive_protocol(hd, len);
- break;
- default:
- log_error("deliver_cb_daemon unknown msg type %d", hd->type);
- }
-}
-
-static void confchg_cb_daemon(cpg_handle_t handle,
- const struct cpg_name *group_name,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries)
-{
- int i;
-
- log_config(group_name, member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries);
-
- if (joined_list_entries)
- send_protocol(&our_protocol);
-
- memset(&daemon_member, 0, sizeof(daemon_member));
- daemon_member_count = member_list_entries;
-
- for (i = 0; i < member_list_entries; i++) {
- daemon_member[i] = member_list[i];
- add_node_daemon(member_list[i].nodeid);
- }
-}
-
-static cpg_callbacks_t cpg_callbacks_daemon = {
- .cpg_deliver_fn = deliver_cb_daemon,
- .cpg_confchg_fn = confchg_cb_daemon,
-};
-
-void process_cpg_daemon(int ci)
-{
- cpg_error_t error;
-
- error = cpg_dispatch(cpg_handle_daemon, CPG_DISPATCH_ALL);
- if (error != CPG_OK)
- log_error("daemon cpg_dispatch error %d", error);
-}
-
-int setup_cpg_daemon(void)
-{
- cpg_error_t error;
- struct cpg_name name;
- int i = 0;
-
- INIT_LIST_HEAD(&daemon_nodes);
-
- memset(&our_protocol, 0, sizeof(our_protocol));
- our_protocol.daemon_max[0] = 2;
- our_protocol.daemon_max[1] = 1;
- our_protocol.daemon_max[2] = 1;
- our_protocol.kernel_max[0] = 1;
- our_protocol.kernel_max[1] = 1;
- our_protocol.kernel_max[2] = 1;
-
- error = cpg_initialize(&cpg_handle_daemon, &cpg_callbacks_daemon);
- if (error != CPG_OK) {
- log_error("daemon cpg_initialize error %d", error);
- return -1;
- }
-
- cpg_fd_get(cpg_handle_daemon, &cpg_fd_daemon);
-
- memset(&name, 0, sizeof(name));
- sprintf(name.value, "dlm:controld");
- name.length = strlen(name.value) + 1;
-
- retry:
- error = cpg_join(cpg_handle_daemon, &name);
- if (error == CPG_ERR_TRY_AGAIN) {
- sleep(1);
- if (!(++i % 10))
- log_error("daemon cpg_join error retrying");
- goto retry;
- }
- if (error != CPG_OK) {
- log_error("daemon cpg_join error %d", error);
- goto fail;
- }
-
- log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
- return cpg_fd_daemon;
-
- fail:
- cpg_finalize(cpg_handle_daemon);
- return -1;
-}
-
-void close_cpg_daemon(void)
-{
- struct lockspace *ls;
- cpg_error_t error;
- struct cpg_name name;
- int i = 0;
-
- if (!cpg_handle_daemon)
- return;
- if (cluster_down)
- goto fin;
-
- memset(&name, 0, sizeof(name));
- sprintf(name.value, "dlm:controld");
- name.length = strlen(name.value) + 1;
-
- retry:
- error = cpg_leave(cpg_handle_daemon, &name);
- if (error == CPG_ERR_TRY_AGAIN) {
- sleep(1);
- if (!(++i % 10))
- log_error("daemon cpg_leave error retrying");
- goto retry;
- }
- if (error != CPG_OK)
- log_error("daemon cpg_leave error %d", error);
- fin:
- list_for_each_entry(ls, &lockspaces, list) {
- if (ls->cpg_handle)
- cpg_finalize(ls->cpg_handle);
- }
- cpg_finalize(cpg_handle_daemon);
-}
-
-/* fs_controld has seen nodedown for nodeid; it's now ok for dlm to do
- recovery for the failed node */
-
-int set_fs_notified(struct lockspace *ls, int nodeid)
-{
- struct node *node;
-
- /* this shouldn't happen */
- node = get_node_history(ls, nodeid);
- if (!node) {
- log_error("set_fs_notified no nodeid %d", nodeid);
- return -ESRCH;
- }
-
- if (!find_memb(ls->started_change, nodeid)) {
- log_group(ls, "set_fs_notified %d not in ls", nodeid);
- return 0;
- }
-
- /* this can happen, we haven't seen a nodedown for this node yet,
- but we should soon */
- if (!node->check_fs) {
- log_group(ls, "set_fs_notified %d zero check_fs", nodeid);
- return -EAGAIN;
- }
-
- log_group(ls, "set_fs_notified nodeid %d", nodeid);
- node->fs_notified = 1;
- return 0;
-}
-
-int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace)
-{
- struct change *cg, *last = NULL;
-
- strncpy(lockspace->name, ls->name, DLM_LOCKSPACE_LEN);
- lockspace->global_id = ls->global_id;
-
- if (ls->joining)
- lockspace->flags |= DLMC_LF_JOINING;
- if (ls->leaving)
- lockspace->flags |= DLMC_LF_LEAVING;
- if (ls->kernel_stopped)
- lockspace->flags |= DLMC_LF_KERNEL_STOPPED;
- if (ls->fs_registered)
- lockspace->flags |= DLMC_LF_FS_REGISTERED;
- if (ls->need_plocks)
- lockspace->flags |= DLMC_LF_NEED_PLOCKS;
- if (ls->save_plocks)
- lockspace->flags |= DLMC_LF_SAVE_PLOCKS;
-
- if (!ls->started_change)
- goto next;
-
- cg = ls->started_change;
-
- lockspace->cg_prev.member_count = cg->member_count;
- lockspace->cg_prev.joined_count = cg->joined_count;
- lockspace->cg_prev.remove_count = cg->remove_count;
- lockspace->cg_prev.failed_count = cg->failed_count;
- lockspace->cg_prev.combined_seq = cg->combined_seq;
- lockspace->cg_prev.seq = cg->seq;
-
- next:
- if (list_empty(&ls->changes))
- goto out;
-
- list_for_each_entry(cg, &ls->changes, list)
- last = cg;
-
- cg = list_first_entry(&ls->changes, struct change, list);
-
- lockspace->cg_next.member_count = cg->member_count;
- lockspace->cg_next.joined_count = cg->joined_count;
- lockspace->cg_next.remove_count = cg->remove_count;
- lockspace->cg_next.failed_count = cg->failed_count;
- lockspace->cg_next.combined_seq = last->seq;
- lockspace->cg_next.seq = cg->seq;
-
- if (cg->state == CGST_WAIT_CONDITIONS)
- lockspace->cg_next.wait_condition = 4;
- if (poll_fencing)
- lockspace->cg_next.wait_condition = 1;
- else if (poll_quorum)
- lockspace->cg_next.wait_condition = 2;
- else if (poll_fs)
- lockspace->cg_next.wait_condition = 3;
-
- if (cg->state == CGST_WAIT_MESSAGES)
- lockspace->cg_next.wait_messages = 1;
- out:
- return 0;
-}
-
-static int _set_node_info(struct lockspace *ls, struct change *cg, int nodeid,
- struct dlmc_node *node)
-{
- struct member *m = NULL;
- struct node *n;
-
- node->nodeid = nodeid;
-
- if (cg)
- m = find_memb(cg, nodeid);
- if (!m)
- goto history;
-
- node->flags |= DLMC_NF_MEMBER;
-
- if (m->start)
- node->flags |= DLMC_NF_START;
- if (m->disallowed)
- node->flags |= DLMC_NF_DISALLOWED;
-
- history:
- n = get_node_history(ls, nodeid);
- if (!n)
- goto out;
-
- if (n->check_fencing)
- node->flags |= DLMC_NF_CHECK_FENCING;
- if (n->check_quorum)
- node->flags |= DLMC_NF_CHECK_QUORUM;
- if (n->check_fs)
- node->flags |= DLMC_NF_CHECK_FS;
-
- node->added_seq = n->added_seq;
- node->removed_seq = n->removed_seq;
- node->failed_reason = n->failed_reason;
- out:
- return 0;
-}
-
-int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node)
-{
- struct change *cg;
-
- if (!list_empty(&ls->changes)) {
- cg = list_first_entry(&ls->changes, struct change, list);
- return _set_node_info(ls, cg, nodeid, node);
- }
-
- return _set_node_info(ls, ls->started_change, nodeid, node);
-}
-
-int set_lockspaces(int *count, struct dlmc_lockspace **lss_out)
-{
- struct lockspace *ls;
- struct dlmc_lockspace *lss, *lsp;
- int ls_count = 0;
-
- list_for_each_entry(ls, &lockspaces, list)
- ls_count++;
-
- lss = malloc(ls_count * sizeof(struct dlmc_lockspace));
- if (!lss)
- return -ENOMEM;
- memset(lss, 0, ls_count * sizeof(struct dlmc_lockspace));
-
- lsp = lss;
- list_for_each_entry(ls, &lockspaces, list) {
- set_lockspace_info(ls, lsp++);
- }
-
- *count = ls_count;
- *lss_out = lss;
- return 0;
-}
-
-int set_lockspace_nodes(struct lockspace *ls, int option, int *node_count,
- struct dlmc_node **nodes_out)
-{
- struct change *cg;
- struct node *n;
- struct dlmc_node *nodes = NULL, *nodep;
- struct member *memb;
- int count = 0;
-
- if (option == DLMC_NODES_ALL) {
- if (!list_empty(&ls->changes))
- cg = list_first_entry(&ls->changes, struct change,list);
- else
- cg = ls->started_change;
-
- list_for_each_entry(n, &ls->node_history, list)
- count++;
-
- } else if (option == DLMC_NODES_MEMBERS) {
- if (!ls->started_change)
- goto out;
- cg = ls->started_change;
- count = cg->member_count;
-
- } else if (option == DLMC_NODES_NEXT) {
- if (list_empty(&ls->changes))
- goto out;
- cg = list_first_entry(&ls->changes, struct change, list);
- count = cg->member_count;
- } else
- goto out;
-
- nodes = malloc(count * sizeof(struct dlmc_node));
- if (!nodes)
- return -ENOMEM;
- memset(nodes, 0, count * sizeof(struct dlmc_node));
- nodep = nodes;
-
- if (option == DLMC_NODES_ALL) {
- list_for_each_entry(n, &ls->node_history, list)
- _set_node_info(ls, cg, n->nodeid, nodep++);
- } else {
- list_for_each_entry(memb, &cg->members, list)
- _set_node_info(ls, cg, memb->nodeid, nodep++);
- }
- out:
- *node_count = count;
- *nodes_out = nodes;
- return 0;
-}
-
diff --git a/group/dlm_controld/crc.c b/group/dlm_controld/crc.c
deleted file mode 100644
index ff8c1d3..0000000
--- a/group/dlm_controld/crc.c
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "dlm_daemon.h"
-
-static const uint32_t crc_32_tab[] = {
- 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
- 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
- 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
- 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
- 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
- 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
- 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
- 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
- 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
- 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
- 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
- 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
- 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
- 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
- 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
- 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
- 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
- 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
- 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
- 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
- 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
- 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
- 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
- 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
- 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
- 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
- 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
- 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
- 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
- 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
- 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
- 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
-};
-
-/**
- *
- * Copied from:
- *
- * gfs2_disk_hash - hash an array of data
- * @data: the data to be hashed
- * @len: the length of data to be hashed
- *
- * This function must produce the same results as the one in the kernel:
- * crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF
- *
- * Take some data and convert it to a 32-bit hash.
- *
- * The hash function is a 32-bit CRC of the data. The algorithm uses
- * the crc_32_tab table above.
- *
- * This may not be the fastest hash function, but it does a fair bit better
- * at providing uniform results than the others I've looked at. That's
- * really important for efficient directories.
- *
- * Returns: the hash
- */
-
-uint32_t cpgname_to_crc(const char *data, int len)
-{
- uint32_t hash = 0xFFFFFFFF;
-
- for (; len--; data++)
- hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
-
- hash = ~hash;
-
- return hash;
-}
-
diff --git a/group/dlm_controld/deadlock.c b/group/dlm_controld/deadlock.c
deleted file mode 100644
index bd1d68c..0000000
--- a/group/dlm_controld/deadlock.c
+++ /dev/null
@@ -1,1550 +0,0 @@
-#include "dlm_daemon.h"
-#include "config.h"
-#include "libdlm.h"
-
-static SaCkptHandleT global_ckpt_h;
-static SaCkptCallbacksT callbacks = { 0, 0 };
-static SaVersionT version = { 'B', 1, 1 };
-static char section_buf[10 * 1024 * 1024]; /* 10MB of pack_lock's enough? */
-static uint32_t section_len;
-static uint32_t section_max;
-
-struct node {
- struct list_head list;
- int nodeid;
- int checkpoint_ready; /* we've read its ckpt */
- int in_cycle; /* participating in cycle */
-};
-
-enum {
- LOCAL_COPY = 1,
- MASTER_COPY = 2,
-};
-
-/* from linux/fs/dlm/dlm_internal.h */
-#define DLM_LKSTS_WAITING 1
-#define DLM_LKSTS_GRANTED 2
-#define DLM_LKSTS_CONVERT 3
-
-struct pack_lock {
- uint64_t xid;
- uint32_t id;
- int nodeid;
- uint32_t remid;
- int ownpid;
- uint32_t exflags;
- uint32_t flags;
- int8_t status;
- int8_t grmode;
- int8_t rqmode;
- int8_t copy;
-};
-
-struct dlm_rsb {
- struct list_head list;
- struct list_head locks;
- char name[DLM_RESNAME_MAXLEN];
- int len;
-};
-
-/* information is saved in the lkb, and lkb->lock, from the perspective of the
- local or master copy, not the process copy */
-
-struct dlm_lkb {
- struct list_head list; /* r->locks */
- struct pack_lock lock; /* data from debugfs/checkpoint */
- int home; /* node where the lock owner lives*/
- struct dlm_rsb *rsb; /* lock is on resource */
- struct trans *trans; /* lock owned by this transaction */
- struct list_head trans_list; /* tr->locks */
- struct trans *waitfor_trans; /* the trans that's holding the
- lock that's blocking us */
-};
-
-/* waitfor pointers alloc'ed 4 at at time */
-#define TR_NALLOC 4
-
-struct trans {
- struct list_head list;
- struct list_head locks;
- uint64_t xid;
- int others_waiting_on_us; /* count of trans's
- pointing to us in
- waitfor */
- int waitfor_alloc;
- int waitfor_count; /* count of in-use
- waitfor slots and
- num of trans's we're
- waiting on */
- struct trans **waitfor; /* waitfor_alloc trans
- pointers */
-};
-
-static const int __dlm_compat_matrix[8][8] = {
- /* UN NL CR CW PR PW EX PD */
- {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
- {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
- {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
- {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
- {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
- {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
- {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
- {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
-};
-
-static inline int dlm_modes_compat(int mode1, int mode2)
-{
- return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
-}
-
-static const char *status_str(int lksts)
-{
- switch (lksts) {
- case DLM_LKSTS_WAITING:
- return "W";
- case DLM_LKSTS_GRANTED:
- return "G";
- case DLM_LKSTS_CONVERT:
- return "C";
- }
- return "?";
-}
-
-static void free_resources(struct lockspace *ls)
-{
- struct dlm_rsb *r, *r_safe;
- struct dlm_lkb *lkb, *lkb_safe;
-
- list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
- list_for_each_entry_safe(lkb, lkb_safe, &r->locks, list) {
- list_del(&lkb->list);
- if (!list_empty(&lkb->trans_list))
- list_del(&lkb->trans_list);
- free(lkb);
- }
- list_del(&r->list);
- free(r);
- }
-}
-
-static void free_transactions(struct lockspace *ls)
-{
- struct trans *tr, *tr_safe;
-
- list_for_each_entry_safe(tr, tr_safe, &ls->transactions, list) {
- list_del(&tr->list);
- if (tr->waitfor)
- free(tr->waitfor);
- free(tr);
- }
-}
-
-static void disable_deadlock(void)
-{
- log_error("FIXME: deadlock detection disabled");
-}
-
-void setup_deadlock(void)
-{
- SaAisErrorT rv;
-
- if (!cfgd_enable_deadlk)
- return;
-
- rv = saCkptInitialize(&global_ckpt_h, &callbacks, &version);
- if (rv != SA_AIS_OK)
- log_error("ckpt init error %d", rv);
-}
-
-static struct dlm_rsb *get_resource(struct lockspace *ls, char *name, int len)
-{
- struct dlm_rsb *r;
-
- list_for_each_entry(r, &ls->resources, list) {
- if (r->len == len && !strncmp(r->name, name, len))
- return r;
- }
-
- r = malloc(sizeof(struct dlm_rsb));
- if (!r) {
- log_error("get_resource: no memory");
- disable_deadlock();
- return NULL;
- }
- memset(r, 0, sizeof(struct dlm_rsb));
- memcpy(r->name, name, len);
- r->len = len;
- INIT_LIST_HEAD(&r->locks);
- list_add(&r->list, &ls->resources);
- return r;
-}
-
-static struct dlm_lkb *create_lkb(void)
-{
- struct dlm_lkb *lkb;
-
- lkb = malloc(sizeof(struct dlm_lkb));
- if (!lkb) {
- log_error("create_lkb: no memory");
- disable_deadlock();
- } else {
- memset(lkb, 0, sizeof(struct dlm_lkb));
- INIT_LIST_HEAD(&lkb->list);
- INIT_LIST_HEAD(&lkb->trans_list);
- }
- return lkb;
-}
-
-static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
-{
- list_add(&lkb->list, &r->locks);
- lkb->rsb = r;
-}
-
-/* from linux/fs/dlm/dlm_internal.h */
-#define IFL_MSTCPY 0x00010000
-
-/* called on a lock that's just been read from debugfs */
-
-static void set_copy(struct pack_lock *lock)
-{
- uint32_t id, remid;
-
- if (!lock->nodeid)
- lock->copy = LOCAL_COPY;
- else if (lock->flags & IFL_MSTCPY)
- lock->copy = MASTER_COPY;
- else {
- /* process copy lock is converted to a partial master copy
- lock that will be combined with the real master copy */
- lock->copy = MASTER_COPY;
- id = lock->id;
- remid = lock->remid;
- lock->id = remid;
- lock->remid = id;
- lock->nodeid = our_nodeid;
- }
-}
-
-/* xid is always zero in the real master copy, xid should always be non-zero
- in the partial master copy (what was a process copy) */
-/* TODO: confirm or enforce that the partial will always have non-zero xid */
-
-static int partial_master_copy(struct pack_lock *lock)
-{
- return (lock->xid != 0);
-}
-
-static struct dlm_lkb *get_lkb(struct dlm_rsb *r, struct pack_lock *lock)
-{
- struct dlm_lkb *lkb;
-
- if (lock->copy != MASTER_COPY)
- goto out;
-
- list_for_each_entry(lkb, &r->locks, list) {
- if (lkb->lock.nodeid == lock->nodeid &&
- lkb->lock.id == lock->id)
- return lkb;
- }
- out:
- return create_lkb();
-}
-
-static struct dlm_lkb *add_lock(struct lockspace *ls, struct dlm_rsb *r,
- int from_nodeid, struct pack_lock *lock)
-{
- struct dlm_lkb *lkb;
-
- lkb = get_lkb(r, lock);
- if (!lkb)
- return NULL;
-
- switch (lock->copy) {
- case LOCAL_COPY:
- lkb->lock.xid = lock->xid;
- lkb->lock.nodeid = lock->nodeid;
- lkb->lock.id = lock->id;
- lkb->lock.remid = lock->remid;
- lkb->lock.ownpid = lock->ownpid;
- lkb->lock.exflags = lock->exflags;
- lkb->lock.flags = lock->flags;
- lkb->lock.status = lock->status;
- lkb->lock.grmode = lock->grmode;
- lkb->lock.rqmode = lock->rqmode;
- lkb->lock.copy = LOCAL_COPY;
- lkb->home = from_nodeid;
-
- log_group(ls, "add %s local nodeid %d id %x remid %x xid %llx",
- r->name, lock->nodeid, lock->id, lock->remid,
- (unsigned long long)lock->xid);
- break;
-
- case MASTER_COPY:
- if (partial_master_copy(lock)) {
- lkb->lock.xid = lock->xid;
- lkb->lock.nodeid = lock->nodeid;
- lkb->lock.id = lock->id;
- lkb->lock.remid = lock->remid;
- lkb->lock.copy = MASTER_COPY;
- } else {
- /* only set xid from partial master copy above */
- lkb->lock.nodeid = lock->nodeid;
- lkb->lock.id = lock->id;
- lkb->lock.remid = lock->remid;
- lkb->lock.copy = MASTER_COPY;
- /* set other fields from real master copy */
- lkb->lock.ownpid = lock->ownpid;
- lkb->lock.exflags = lock->exflags;
- lkb->lock.flags = lock->flags;
- lkb->lock.status = lock->status;
- lkb->lock.grmode = lock->grmode;
- lkb->lock.rqmode = lock->rqmode;
- }
- lkb->home = lock->nodeid;
-
- log_group(ls, "add %s master nodeid %d id %x remid %x xid %llx",
- r->name, lock->nodeid, lock->id, lock->remid,
- (unsigned long long)lock->xid);
- break;
- }
-
- if (list_empty(&lkb->list))
- add_lkb(r, lkb);
- return lkb;
-}
-
-static void parse_r_name(char *line, char *name)
-{
- char *p;
- int i = 0;
- int begin = 0;
-
- for (p = line; ; p++) {
- if (*p == '"') {
- if (begin)
- break;
- begin = 1;
- continue;
- }
- if (begin)
- name[i++] = *p;
- }
-}
-
-#define LOCK_LINE_MAX 1024
-
-static int read_debugfs_locks(struct lockspace *ls)
-{
- FILE *file;
- char path[PATH_MAX];
- char line[LOCK_LINE_MAX];
- struct dlm_rsb *r;
- struct pack_lock lock;
- char r_name[65];
- unsigned long long xid;
- unsigned int waiting;
- int r_nodeid;
- int r_len;
- int rv;
-
- snprintf(path, PATH_MAX, "/sys/kernel/debug/dlm/%s_locks", ls->name);
-
- file = fopen(path, "r");
- if (!file)
- return -1;
-
- /* skip the header on the first line */
- if (!fgets(line, LOCK_LINE_MAX, file)) {
- log_error("Unable to read %s: %d", path, errno);
- goto out;
- }
-
- while (fgets(line, LOCK_LINE_MAX, file)) {
- memset(&lock, 0, sizeof(struct pack_lock));
-
- rv = sscanf(line, "%x %d %x %u %llu %x %x %hhd %hhd %hhd %u %d %d",
- &lock.id,
- &lock.nodeid,
- &lock.remid,
- &lock.ownpid,
- &xid,
- &lock.exflags,
- &lock.flags,
- &lock.status,
- &lock.grmode,
- &lock.rqmode,
- &waiting,
- &r_nodeid,
- &r_len);
-
- lock.xid = xid; /* hack to avoid warning */
-
- if (rv != 13) {
- log_error("invalid debugfs line %d: %s", rv, line);
- goto out;
- }
-
- memset(r_name, 0, sizeof(r_name));
- parse_r_name(line, r_name);
-
- r = get_resource(ls, r_name, r_len);
- if (!r)
- break;
-
- set_copy(&lock);
- add_lock(ls, r, our_nodeid, &lock);
- }
- out:
- fclose(file);
- return 0;
-}
-
-static int read_checkpoint_locks(struct lockspace *ls, int from_nodeid,
- char *numbuf, int buflen)
-{
- struct dlm_rsb *r;
- struct pack_lock *lock;
- int count = section_len / sizeof(struct pack_lock);
- int i;
-
- r = get_resource(ls, numbuf, buflen - 1);
- if (!r)
- return -1;
-
- lock = (struct pack_lock *) §ion_buf;
-
- for (i = 0; i < count; i++) {
- lock->xid = le64_to_cpu(lock->xid);
- lock->id = le32_to_cpu(lock->id);
- lock->nodeid = le32_to_cpu(lock->nodeid);
- lock->remid = le32_to_cpu(lock->remid);
- lock->ownpid = le32_to_cpu(lock->ownpid);
- lock->exflags = le32_to_cpu(lock->exflags);
- lock->flags = le32_to_cpu(lock->flags);
-
- add_lock(ls, r, from_nodeid, lock);
- lock++;
- }
- return 0;
-}
-
-static int pack_lkb_list(struct list_head *q, struct pack_lock **lockp)
-{
- struct dlm_lkb *lkb;
- struct pack_lock *lock = *lockp;
- int count = 0;
-
- list_for_each_entry(lkb, q, list) {
- if (count + 1 > section_max) {
- log_error("too many locks %d for ckpt buf", count);
- break;
- }
-
- lock->xid = cpu_to_le64(lkb->lock.xid);
- lock->id = cpu_to_le32(lkb->lock.id);
- lock->nodeid = cpu_to_le32(lkb->lock.nodeid);
- lock->remid = cpu_to_le32(lkb->lock.remid);
- lock->ownpid = cpu_to_le32(lkb->lock.ownpid);
- lock->exflags = cpu_to_le32(lkb->lock.exflags);
- lock->flags = cpu_to_le32(lkb->lock.flags);
- lock->status = lkb->lock.status;
- lock->grmode = lkb->lock.grmode;
- lock->rqmode = lkb->lock.rqmode;
- lock->copy = lkb->lock.copy;
-
- lock++;
- count++;
- }
- return count;
-}
-
-static void pack_section_buf(struct lockspace *ls, struct dlm_rsb *r)
-{
- struct pack_lock *lock;
- int count;
-
- memset(§ion_buf, 0, sizeof(section_buf));
- section_max = sizeof(section_buf) / sizeof(struct pack_lock);
-
- lock = (struct pack_lock *) §ion_buf;
-
- count = pack_lkb_list(&r->locks, &lock);
-
- section_len = count * sizeof(struct pack_lock);
-}
-
-static int _unlink_checkpoint(struct lockspace *ls, SaNameT *name)
-{
- SaCkptCheckpointHandleT h;
- SaCkptCheckpointDescriptorT s;
- SaAisErrorT rv;
- int ret = 0;
- int retries;
-
- h = (SaCkptCheckpointHandleT) ls->deadlk_ckpt_handle;
- log_group(ls, "unlink ckpt %llx", (unsigned long long)h);
-
- retries = 0;
- unlink_retry:
- rv = saCkptCheckpointUnlink(global_ckpt_h, name);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "unlink ckpt retry");
- sleep(1);
- if (retries++ < 10)
- goto unlink_retry;
- }
- if (rv == SA_AIS_OK)
- goto out_close;
- if (!h)
- goto out;
-
- log_error("unlink ckpt error %d %s", rv, ls->name);
- ret = -1;
-
- retries = 0;
- status_retry:
- rv = saCkptCheckpointStatusGet(h, &s);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "unlink ckpt status retry");
- sleep(1);
- if (retries++ < 10)
- goto status_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("unlink ckpt status error %d %s", rv, ls->name);
- goto out_close;
- }
-
- log_group(ls, "unlink ckpt status: size %llu, max sections %u, "
- "max section size %llu, section count %u, mem %u",
- (unsigned long long)s.checkpointCreationAttributes.checkpointSize,
- s.checkpointCreationAttributes.maxSections,
- (unsigned long long)s.checkpointCreationAttributes.maxSectionSize,
- s.numberOfSections, s.memoryUsed);
-
- out_close:
- retries = 0;
- close_retry:
- rv = saCkptCheckpointClose(h);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "unlink ckpt close retry");
- sleep(1);
- if (retries++ < 10)
- goto close_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("unlink ckpt %llx close err %d %s",
- (unsigned long long)h, rv, ls->name);
- }
- out:
- ls->deadlk_ckpt_handle = 0;
- return ret;
-}
-
-static int unlink_checkpoint(struct lockspace *ls)
-{
- SaNameT name;
- int len;
-
- len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
- ls->name, our_nodeid);
- name.length = len;
-
- return _unlink_checkpoint(ls, &name);
-}
-
-static void read_checkpoint(struct lockspace *ls, int nodeid)
-{
- SaCkptCheckpointHandleT h;
- SaCkptSectionIterationHandleT itr;
- SaCkptSectionDescriptorT desc;
- SaCkptIOVectorElementT iov;
- SaNameT name;
- SaAisErrorT rv;
- char buf[DLM_RESNAME_MAXLEN];
- int len;
- int retries;
-
- if (nodeid == our_nodeid)
- return;
-
- log_group(ls, "read_checkpoint %d", nodeid);
-
- len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
- ls->name, nodeid);
- name.length = len;
-
- retries = 0;
- open_retry:
- rv = saCkptCheckpointOpen(global_ckpt_h, &name, NULL,
- SA_CKPT_CHECKPOINT_READ, 0, &h);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "read_checkpoint: %d ckpt open retry", nodeid);
- sleep(1);
- if (retries++ < 10)
- goto open_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("read_checkpoint: %d ckpt open error %d", nodeid, rv);
- return;
- }
-
- retries = 0;
- init_retry:
- rv = saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, 0, &itr);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "read_checkpoint: ckpt iterinit retry");
- sleep(1);
- if (retries++ < 10)
- goto init_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("read_checkpoint: %d ckpt iterinit error %d", nodeid, rv);
- goto out;
- }
-
- while (1) {
- retries = 0;
- next_retry:
- rv = saCkptSectionIterationNext(itr, &desc);
- if (rv == SA_AIS_ERR_NO_SECTIONS)
- break;
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "read_checkpoint: ckpt iternext retry");
- sleep(1);
- if (retries++ < 10)
- goto next_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("read_checkpoint: %d ckpt iternext error %d",
- nodeid, rv);
- goto out_it;
- }
-
- if (!desc.sectionSize)
- continue;
-
- iov.sectionId = desc.sectionId;
- iov.dataBuffer = §ion_buf;
- iov.dataSize = desc.sectionSize;
- iov.dataOffset = 0;
-
- memset(&buf, 0, sizeof(buf));
- snprintf(buf, sizeof(buf), "%s", desc.sectionId.id);
-
- log_group(ls, "read_checkpoint: section size %llu id %u \"%s\"",
- (unsigned long long)iov.dataSize,
- iov.sectionId.idLen, buf);
-
- retries = 0;
- read_retry:
- rv = saCkptCheckpointRead(h, &iov, 1, NULL);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "read_checkpoint: ckpt read retry");
- sleep(1);
- if (retries++ < 10)
- goto read_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("read_checkpoint: %d ckpt read error %d",
- nodeid, rv);
- goto out_it;
- }
-
- section_len = iov.readSize;
-
- if (!section_len)
- continue;
-
- if (section_len % sizeof(struct pack_lock)) {
- log_error("read_checkpoint: %d bad section len %d",
- nodeid, section_len);
- continue;
- }
-
- read_checkpoint_locks(ls, nodeid, (char *)desc.sectionId.id,
- desc.sectionId.idLen);
- }
-
- out_it:
- saCkptSectionIterationFinalize(itr);
- retries = 0;
- out:
- rv = saCkptCheckpointClose(h);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "read_checkpoint: unlink ckpt close retry");
- sleep(1);
- if (retries++ < 10)
- goto out;
- }
- if (rv != SA_AIS_OK)
- log_error("read_checkpoint: %d close error %d", nodeid, rv);
-}
-
-static void write_checkpoint(struct lockspace *ls)
-{
- SaCkptCheckpointCreationAttributesT attr;
- SaCkptCheckpointHandleT h;
- SaCkptSectionIdT section_id;
- SaCkptSectionCreationAttributesT section_attr;
- SaCkptCheckpointOpenFlagsT flags;
- SaNameT name;
- SaAisErrorT rv;
- char buf[DLM_RESNAME_MAXLEN];
- struct dlm_rsb *r;
- struct dlm_lkb *lkb;
- int r_count, lock_count, total_size, section_size, max_section_size;
- int len;
-
- len = snprintf((char *)name.value, SA_MAX_NAME_LENGTH, "dlmdeadlk.%s.%d",
- ls->name, our_nodeid);
- name.length = len;
-
- /* unlink an old checkpoint before we create a new one */
- if (ls->deadlk_ckpt_handle) {
- log_error("write_checkpoint: old ckpt");
- if (_unlink_checkpoint(ls, &name))
- return;
- }
-
- /* loop through all locks to figure out sizes to set in
- the attr fields */
-
- r_count = 0;
- lock_count = 0;
- total_size = 0;
- max_section_size = 0;
-
- list_for_each_entry(r, &ls->resources, list) {
- r_count++;
- section_size = 0;
- list_for_each_entry(lkb, &r->locks, list) {
- section_size += sizeof(struct pack_lock);
- lock_count++;
- }
- total_size += section_size;
- if (section_size > max_section_size)
- max_section_size = section_size;
- }
-
- log_group(ls, "write_checkpoint: r_count %d, lock_count %d",
- r_count, lock_count);
-
- log_group(ls, "write_checkpoint: total %d bytes, max_section %d bytes",
- total_size, max_section_size);
-
- attr.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
- attr.checkpointSize = total_size;
- attr.retentionDuration = SA_TIME_MAX;
- attr.maxSections = r_count + 1; /* don't know why we need +1 */
- attr.maxSectionSize = max_section_size;
- attr.maxSectionIdSize = DLM_RESNAME_MAXLEN;
-
- flags = SA_CKPT_CHECKPOINT_READ |
- SA_CKPT_CHECKPOINT_WRITE |
- SA_CKPT_CHECKPOINT_CREATE;
-
- open_retry:
- rv = saCkptCheckpointOpen(global_ckpt_h, &name, &attr, flags, 0, &h);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "write_checkpoint: ckpt open retry");
- sleep(1);
- goto open_retry;
- }
- if (rv == SA_AIS_ERR_EXIST) {
- log_group(ls, "write_checkpoint: ckpt already exists");
- return;
- }
- if (rv != SA_AIS_OK) {
- log_group(ls, "write_checkpoint: ckpt open error %d", rv);
- return;
- }
-
- log_group(ls, "write_checkpoint: open ckpt handle %llx",
- (unsigned long long)h);
- ls->deadlk_ckpt_handle = (uint64_t) h;
-
- list_for_each_entry(r, &ls->resources, list) {
- memset(buf, 0, sizeof(buf));
- len = snprintf(buf, sizeof(buf), "%s", r->name);
-
- section_id.id = (void *)buf;
- section_id.idLen = len + 1;
- section_attr.sectionId = §ion_id;
- section_attr.expirationTime = SA_TIME_END;
-
- pack_section_buf(ls, r);
-
- log_group(ls, "write_checkpoint: section size %u id %u \"%s\"",
- section_len, section_id.idLen, buf);
-
- create_retry:
- rv = saCkptSectionCreate(h, §ion_attr, §ion_buf,
- section_len);
- if (rv == SA_AIS_ERR_TRY_AGAIN) {
- log_group(ls, "write_checkpoint: ckpt create retry");
- sleep(1);
- goto create_retry;
- }
- if (rv == SA_AIS_ERR_EXIST) {
- /* this shouldn't happen in general */
- log_error("write_checkpoint: clearing old ckpt");
- saCkptCheckpointClose(h);
- _unlink_checkpoint(ls, &name);
- goto open_retry;
- }
- if (rv != SA_AIS_OK) {
- log_error("write_checkpoint: section create %d", rv);
- break;
- }
- }
-}
-
-static void send_message(struct lockspace *ls, int type,
- uint32_t to_nodeid, uint32_t msgdata)
-{
- struct dlm_header *hd;
- int len;
- char *buf;
-
- len = sizeof(struct dlm_header);
- buf = malloc(len);
- if (!buf) {
- log_error("send_message: no memory");
- disable_deadlock();
- return;
- }
- memset(buf, 0, len);
-
- hd = (struct dlm_header *)buf;
- hd->type = type;
- hd->to_nodeid = to_nodeid;
- hd->msgdata = msgdata;
-
- dlm_send_message(ls, buf, len);
-
- free(buf);
-}
-
-static void send_checkpoint_ready(struct lockspace *ls)
-{
- log_group(ls, "send_checkpoint_ready");
- send_message(ls, DLM_MSG_DEADLK_CHECKPOINT_READY, 0, 0);
-}
-
-void send_cycle_start(struct lockspace *ls)
-{
- log_group(ls, "send_cycle_start");
- send_message(ls, DLM_MSG_DEADLK_CYCLE_START, 0, 0);
-}
-
-static void send_cycle_end(struct lockspace *ls)
-{
- log_group(ls, "send_cycle_end");
- send_message(ls, DLM_MSG_DEADLK_CYCLE_END, 0, 0);
-}
-
-static void send_cancel_lock(struct lockspace *ls, struct trans *tr,
- struct dlm_lkb *lkb)
-{
- int to_nodeid;
- uint32_t lkid;
-
- if (!lkb->lock.nodeid)
- lkid = lkb->lock.id;
- else
- lkid = lkb->lock.remid;
- to_nodeid = lkb->home;
-
- log_group(ls, "send_cancel_lock to nodeid %d rsb %s id %x xid %llx",
- to_nodeid, lkb->rsb->name, lkid,
- (unsigned long long)lkb->lock.xid);
-
- send_message(ls, DLM_MSG_DEADLK_CANCEL_LOCK, to_nodeid, lkid);
-}
-
-static void dump_resources(struct lockspace *ls)
-{
- struct dlm_rsb *r;
- struct dlm_lkb *lkb;
-
- log_group(ls, "Resource dump:");
-
- list_for_each_entry(r, &ls->resources, list) {
- log_group(ls, "\"%s\" len %d", r->name, r->len);
- list_for_each_entry(lkb, &r->locks, list) {
- log_group(ls, " %s: nodeid %d id %08x remid %08x gr %s rq %s pid %u xid %llx",
- status_str(lkb->lock.status),
- lkb->lock.nodeid,
- lkb->lock.id,
- lkb->lock.remid,
- dlm_mode_str(lkb->lock.grmode),
- dlm_mode_str(lkb->lock.rqmode),
- lkb->lock.ownpid,
- (unsigned long long)lkb->lock.xid);
- }
- }
-}
-
-static void find_deadlock(struct lockspace *ls);
-
-static void run_deadlock(struct lockspace *ls)
-{
- struct node *node;
- int not_ready = 0;
- int low = -1;
-
- if (ls->all_checkpoints_ready)
- log_group(ls, "WARNING: run_deadlock all_checkpoints_ready");
-
- list_for_each_entry(node, &ls->deadlk_nodes, list) {
- if (!node->in_cycle)
- continue;
- if (!node->checkpoint_ready)
- not_ready++;
-
- log_group(ls, "nodeid %d checkpoint_ready = %d",
- node->nodeid, node->checkpoint_ready);
- }
- if (not_ready)
- return;
-
- ls->all_checkpoints_ready = 1;
-
- list_for_each_entry(node, &ls->deadlk_nodes, list) {
- if (!node->in_cycle)
- continue;
- if (node->nodeid < low || low == -1)
- low = node->nodeid;
- }
- ls->deadlk_low_nodeid = low;
-
- if (low == our_nodeid)
- find_deadlock(ls);
- else
- log_group(ls, "defer resolution to low nodeid %d", low);
-}
-
-void receive_checkpoint_ready(struct lockspace *ls, struct dlm_header *hd,
- int len)
-{
- struct node *node;
- int nodeid = hd->nodeid;
-
- log_group(ls, "receive_checkpoint_ready from %d", nodeid);
-
- read_checkpoint(ls, nodeid);
-
- list_for_each_entry(node, &ls->deadlk_nodes, list) {
- if (node->nodeid == nodeid) {
- node->checkpoint_ready = 1;
- break;
- }
- }
-
- run_deadlock(ls);
-}
-
-void receive_cycle_start(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct node *node;
- int nodeid = hd->nodeid;
- int rv;
-
- log_group(ls, "receive_cycle_start from %d", nodeid);
-
- if (ls->cycle_running) {
- log_group(ls, "cycle already running");
- return;
- }
- ls->cycle_running = 1;
- gettimeofday(&ls->cycle_start_time, NULL);
-
- list_for_each_entry(node, &ls->deadlk_nodes, list)
- node->in_cycle = 1;
-
- rv = read_debugfs_locks(ls);
- if (rv < 0) {
- log_error("can't read dlm debugfs file: %s", strerror(errno));
- return;
- }
-
- write_checkpoint(ls);
- send_checkpoint_ready(ls);
-}
-
-static uint64_t dt_usec(struct timeval *start, struct timeval *stop)
-{
- uint64_t dt;
-
- dt = stop->tv_sec - start->tv_sec;
- dt *= 1000000;
- dt += stop->tv_usec - start->tv_usec;
- return dt;
-}
-
-/* TODO: nodes added during a cycle - what will they do with messages
- they recv from other nodes running the cycle? */
-
-void receive_cycle_end(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct node *node;
- int nodeid = hd->nodeid;
- uint64_t usec;
-
- if (!ls->cycle_running) {
- log_error("receive_cycle_end %s from %d: no cycle running",
- ls->name, nodeid);
- return;
- }
-
- gettimeofday(&ls->cycle_end_time, NULL);
- usec = dt_usec(&ls->cycle_start_time, &ls->cycle_end_time);
- log_group(ls, "receive_cycle_end: from %d cycle time %.2f s",
- nodeid, usec * 1.e-6);
-
- ls->cycle_running = 0;
- ls->all_checkpoints_ready = 0;
-
- list_for_each_entry(node, &ls->deadlk_nodes, list)
- node->checkpoint_ready = 0;
-
- free_resources(ls);
- free_transactions(ls);
- unlink_checkpoint(ls);
-}
-
-void receive_cancel_lock(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- dlm_lshandle_t h;
- int nodeid = hd->nodeid;
- uint32_t lkid = hd->msgdata;
- int rv;
-
- if (nodeid != our_nodeid)
- return;
-
- h = dlm_open_lockspace(ls->name);
- if (!h) {
- log_error("deadlock cancel %x from %d can't open lockspace %s",
- lkid, nodeid, ls->name);
- return;
- }
-
- log_group(ls, "receive_cancel_lock %x from %d", lkid, nodeid);
-
- rv = dlm_ls_deadlock_cancel(h, lkid, 0);
- if (rv < 0) {
- log_error("deadlock cancel %x from %x lib cancel errno %d",
- lkid, nodeid, errno);
- }
-
- dlm_close_lockspace(h);
-}
-
-static void node_joined(struct lockspace *ls, int nodeid)
-{
- struct node *node;
-
- node = malloc(sizeof(struct node));
- if (!node) {
- log_error("node_joined: no memory");
- disable_deadlock();
- return;
- }
- memset(node, 0, sizeof(struct node));
- node->nodeid = nodeid;
- list_add_tail(&node->list, &ls->deadlk_nodes);
- log_group(ls, "node %d joined deadlock cpg", nodeid);
-}
-
-static void node_left(struct lockspace *ls, int nodeid, int reason)
-{
- struct node *node, *safe;
-
- list_for_each_entry_safe(node, safe, &ls->deadlk_nodes, list) {
- if (node->nodeid != nodeid)
- continue;
-
- list_del(&node->list);
- free(node);
- log_group(ls, "node %d left deadlock cpg", nodeid);
- }
-}
-
-static void purge_locks(struct lockspace *ls, int nodeid);
-
-void deadlk_confchg(struct lockspace *ls,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries)
-{
- int i;
-
- if (!cfgd_enable_deadlk)
- return;
-
- if (!ls->deadlk_confchg_init) {
- ls->deadlk_confchg_init = 1;
- for (i = 0; i < member_list_entries; i++)
- node_joined(ls, member_list[i].nodeid);
- return;
- }
-
- /* nodes added during a cycle won't have node->in_cycle set so they
- won't be included in any of the cycle processing */
-
- for (i = 0; i < joined_list_entries; i++)
- node_joined(ls, joined_list[i].nodeid);
-
- for (i = 0; i < left_list_entries; i++)
- node_left(ls, left_list[i].nodeid, left_list[i].reason);
-
- if (!ls->cycle_running)
- return;
-
- if (!left_list_entries)
- return;
-
- if (!ls->all_checkpoints_ready) {
- run_deadlock(ls);
- return;
- }
-
- for (i = 0; i < left_list_entries; i++)
- purge_locks(ls, left_list[i].nodeid);
-
- for (i = 0; i < left_list_entries; i++) {
- if (left_list[i].nodeid != ls->deadlk_low_nodeid)
- continue;
- /* this will set a new low node which will call find_deadlock */
- run_deadlock(ls);
- break;
- }
-}
-
-/* would we ever call this after we've created the transaction lists?
- I don't think so; I think it can only be called between reading
- checkpoints */
-
-static void purge_locks(struct lockspace *ls, int nodeid)
-{
- struct dlm_rsb *r;
- struct dlm_lkb *lkb, *safe;
-
- list_for_each_entry(r, &ls->resources, list) {
- list_for_each_entry_safe(lkb, safe, &r->locks, list) {
- if (lkb->home == nodeid) {
- list_del(&lkb->list);
- if (list_empty(&lkb->trans_list))
- free(lkb);
- else
- log_group(ls, "purge %d %x on trans",
- nodeid, lkb->lock.id);
- }
- }
- }
-}
-
-static void add_lkb_trans(struct trans *tr, struct dlm_lkb *lkb)
-{
- list_add(&lkb->trans_list, &tr->locks);
- lkb->trans = tr;
-}
-
-static struct trans *get_trans(struct lockspace *ls, uint64_t xid)
-{
- struct trans *tr;
-
- list_for_each_entry(tr, &ls->transactions, list) {
- if (tr->xid == xid)
- return tr;
- }
-
- tr = malloc(sizeof(struct trans));
- if (!tr) {
- log_error("get_trans: no memory");
- disable_deadlock();
- return NULL;
- }
- memset(tr, 0, sizeof(struct trans));
- tr->xid = xid;
- tr->waitfor = NULL;
- tr->waitfor_alloc = 0;
- tr->waitfor_count = 0;
- INIT_LIST_HEAD(&tr->locks);
- list_add(&tr->list, &ls->transactions);
- return tr;
-}
-
-/* for each rsb, for each lock, find/create trans, add lkb to the trans list */
-
-static void create_trans_list(struct lockspace *ls)
-{
- struct dlm_rsb *r;
- struct dlm_lkb *lkb;
- struct trans *tr;
- int r_count = 0, lkb_count = 0;
-
- list_for_each_entry(r, &ls->resources, list) {
- r_count++;
- list_for_each_entry(lkb, &r->locks, list) {
- lkb_count++;
- tr = get_trans(ls, lkb->lock.xid);
- if (!tr)
- goto out;
- add_lkb_trans(tr, lkb);
- }
- }
- out:
- log_group(ls, "create_trans_list: r_count %d lkb_count %d",
- r_count, lkb_count);
-}
-
-static int locks_compat(struct dlm_lkb *waiting_lkb,
- struct dlm_lkb *granted_lkb)
-{
- if (waiting_lkb == granted_lkb) {
- log_debug("waiting and granted same lock");
- return 0;
- }
-
- if (waiting_lkb->trans->xid == granted_lkb->trans->xid) {
- log_debug("waiting and granted same trans %llx",
- (unsigned long long)waiting_lkb->trans->xid);
- return 0;
- }
-
- return dlm_modes_compat(granted_lkb->lock.grmode,
- waiting_lkb->lock.rqmode);
-}
-
-static int in_waitfor(struct trans *tr, struct trans *add_tr)
-{
- int i;
-
- for (i = 0; i < tr->waitfor_alloc; i++) {
- if (!tr->waitfor[i])
- continue;
- if (tr->waitfor[i] == add_tr)
- return 1;
- }
- return 0;
-}
-
-static void add_waitfor(struct lockspace *ls, struct dlm_lkb *waiting_lkb,
- struct dlm_lkb *granted_lkb)
-{
- struct trans *tr = waiting_lkb->trans;
- int i;
-
- if (locks_compat(waiting_lkb, granted_lkb))
- return;
-
- /* this shouldn't happen AFAIK */
- if (tr == granted_lkb->trans) {
- log_group(ls, "trans %llx waiting on self",
- (unsigned long long)tr->xid);
- return;
- }
-
- /* don't add the same trans to the waitfor list multiple times */
- if (tr->waitfor_count && in_waitfor(tr, granted_lkb->trans)) {
- log_group(ls, "trans %llx already waiting for trans %llx, "
- "waiting %x %s, granted %x %s",
- (unsigned long long)waiting_lkb->trans->xid,
- (unsigned long long)granted_lkb->trans->xid,
- waiting_lkb->lock.id, waiting_lkb->rsb->name,
- granted_lkb->lock.id, granted_lkb->rsb->name);
- return;
- }
-
- if (tr->waitfor_count == tr->waitfor_alloc) {
- struct trans **old_waitfor = tr->waitfor;
- tr->waitfor_alloc += TR_NALLOC;
- tr->waitfor = malloc(tr->waitfor_alloc * sizeof(tr));
- if (!tr->waitfor) {
- log_error("add_waitfor no mem %u", tr->waitfor_alloc);
- return;
- }
- memset(tr->waitfor, 0, tr->waitfor_alloc * sizeof(tr));
-
- /* copy then free old set of pointers */
- for (i = 0; i < tr->waitfor_count; i++)
- tr->waitfor[i] = old_waitfor[i];
- if (old_waitfor)
- free(old_waitfor);
- }
-
- tr->waitfor[tr->waitfor_count++] = granted_lkb->trans;
- granted_lkb->trans->others_waiting_on_us++;
- waiting_lkb->waitfor_trans = granted_lkb->trans;
-}
-
-/* for each trans, for each waiting lock, go to rsb of the lock,
- find granted locks on that rsb, then find the trans the
- granted lock belongs to, add that trans to our waitfor list */
-
-static void create_waitfor_graph(struct lockspace *ls)
-{
- struct dlm_lkb *waiting_lkb, *granted_lkb;
- struct dlm_rsb *r;
- struct trans *tr;
- int depend_count = 0;
-
- list_for_each_entry(tr, &ls->transactions, list) {
- list_for_each_entry(waiting_lkb, &tr->locks, trans_list) {
- if (waiting_lkb->lock.status == DLM_LKSTS_GRANTED)
- continue;
- /* waiting_lkb status is CONVERT or WAITING */
-
- r = waiting_lkb->rsb;
-
- list_for_each_entry(granted_lkb, &r->locks, list) {
- if (granted_lkb->lock.status==DLM_LKSTS_WAITING)
- continue;
- /* granted_lkb status is GRANTED or CONVERT */
- add_waitfor(ls, waiting_lkb, granted_lkb);
- depend_count++;
- }
- }
- }
-
- log_group(ls, "create_waitfor_graph: depend_count %d", depend_count);
-}
-
-/* Assume a transaction that's not waiting on any locks will complete, release
- all the locks it currently holds, and exit. Other transactions that were
- blocked waiting on the removed transaction's now-released locks may now be
- unblocked, complete, release all held locks and exit. Repeat this until
- no more transactions can be removed. If there are transactions remaining,
- then they are deadlocked. */
-
-static void remove_waitfor(struct trans *tr, struct trans *remove_tr)
-{
- int i;
-
- for (i = 0; i < tr->waitfor_alloc; i++) {
- if (!tr->waitfor_count)
- break;
-
- if (!tr->waitfor[i])
- continue;
-
- if (tr->waitfor[i] == remove_tr) {
- tr->waitfor[i] = NULL;
- tr->waitfor_count--;
- remove_tr->others_waiting_on_us--;
- }
- }
-}
-
-/* remove_tr is not waiting for anything, assume it completes and goes away
- and remove it from any other transaction's waitfor list */
-
-static void remove_trans(struct lockspace *ls, struct trans *remove_tr)
-{
- struct trans *tr;
-
- list_for_each_entry(tr, &ls->transactions, list) {
- if (tr == remove_tr)
- continue;
- if (!remove_tr->others_waiting_on_us)
- break;
- remove_waitfor(tr, remove_tr);
- }
-
- if (remove_tr->others_waiting_on_us)
- log_group(ls, "trans %llx removed others waiting %d",
- (unsigned long long)remove_tr->xid,
- remove_tr->others_waiting_on_us);
-}
-
-static int reduce_waitfor_graph(struct lockspace *ls)
-{
- struct trans *tr, *safe;
- int blocked = 0;
- int removed = 0;
-
- list_for_each_entry_safe(tr, safe, &ls->transactions, list) {
- if (tr->waitfor_count) {
- blocked++;
- continue;
- }
- remove_trans(ls, tr);
- list_del(&tr->list);
- if (tr->waitfor)
- free(tr->waitfor);
- free(tr);
- removed++;
- }
-
- log_group(ls, "reduce_waitfor_graph: %d blocked, %d removed",
- blocked, removed);
- return removed;
-}
-
-static void reduce_waitfor_graph_loop(struct lockspace *ls)
-{
- int removed;
-
- while (1) {
- removed = reduce_waitfor_graph(ls);
- if (!removed)
- break;
- }
-}
-
-static struct trans *find_trans_to_cancel(struct lockspace *ls)
-{
- struct trans *tr;
-
- list_for_each_entry(tr, &ls->transactions, list) {
- if (!tr->others_waiting_on_us)
- continue;
- return tr;
- }
- return NULL;
-}
-
-static void cancel_trans(struct lockspace *ls)
-{
- struct trans *tr;
- struct dlm_lkb *lkb;
- int removed;
-
- tr = find_trans_to_cancel(ls);
- if (!tr) {
- log_group(ls, "cancel_trans: no trans found");
- return;
- }
-
- list_for_each_entry(lkb, &tr->locks, trans_list) {
- if (lkb->lock.status == DLM_LKSTS_GRANTED)
- continue;
- send_cancel_lock(ls, tr, lkb);
-
- /* When this canceled trans has multiple locks all blocked by
- locks held by one other trans, that other trans is only
- added to tr->waitfor once, and only one of these waiting
- locks will have waitfor_trans set. So, the lkb with
- non-null waitfor_trans was the first one responsible
- for adding waitfor_trans to tr->waitfor.
-
- We could potentially forget about keeping track of lkb->
- waitfor_trans, forget about calling remove_waitfor()
- here and just set tr->waitfor_count = 0 after this loop.
- The loss would be that waitfor_trans->others_waiting_on_us
- would not get decremented. */
-
- if (lkb->waitfor_trans)
- remove_waitfor(tr, lkb->waitfor_trans);
- }
-
- /* this shouldn't happen, if it does something's not working right */
- if (tr->waitfor_count) {
- log_group(ls, "cancel_trans: %llx non-zero waitfor_count %d",
- (unsigned long long)tr->xid, tr->waitfor_count);
- }
-
- /* this should now remove the canceled trans since it now has a zero
- waitfor_count */
- removed = reduce_waitfor_graph(ls);
-
- if (!removed)
- log_group(ls, "canceled trans not removed from graph");
-
- /* now call reduce_waitfor_graph() in another loop and it
- should completely reduce */
-}
-
-static void dump_trans(struct lockspace *ls, struct trans *tr)
-{
- struct dlm_lkb *lkb;
- struct trans *wf;
- int i;
-
- log_group(ls, "trans xid %llx waitfor_count %d others_waiting_on_us %d",
- (unsigned long long)tr->xid, tr->waitfor_count,
- tr->others_waiting_on_us);
-
- log_group(ls, "locks:");
-
- list_for_each_entry(lkb, &tr->locks, trans_list) {
- log_group(ls, " %s: id %08x gr %s rq %s pid %u:%u \"%s\"",
- status_str(lkb->lock.status),
- lkb->lock.id,
- dlm_mode_str(lkb->lock.grmode),
- dlm_mode_str(lkb->lock.rqmode),
- lkb->home,
- lkb->lock.ownpid,
- lkb->rsb->name);
- }
-
- if (!tr->waitfor_count)
- return;
-
- log_group(ls, "waitfor:");
-
- for (i = 0; i < tr->waitfor_alloc; i++) {
- if (!tr->waitfor[i])
- continue;
- wf = tr->waitfor[i];
- log_group(ls, " xid %llx", (unsigned long long)wf->xid);
- }
-}
-
-static void dump_all_trans(struct lockspace *ls)
-{
- struct trans *tr;
-
- log_group(ls, "Transaction dump:");
-
- list_for_each_entry(tr, &ls->transactions, list)
- dump_trans(ls, tr);
-}
-
-static void find_deadlock(struct lockspace *ls)
-{
- if (list_empty(&ls->resources)) {
- log_group(ls, "no deadlock: no resources");
- goto out;
- }
-
- if (!list_empty(&ls->transactions)) {
- log_group(ls, "transactions list should be empty");
- goto out;
- }
-
- dump_resources(ls);
- create_trans_list(ls);
- create_waitfor_graph(ls);
- dump_all_trans(ls);
- reduce_waitfor_graph_loop(ls);
-
- if (list_empty(&ls->transactions)) {
- log_group(ls, "no deadlock: all transactions reduced");
- goto out;
- }
-
- log_group(ls, "found deadlock");
- dump_all_trans(ls);
-
- cancel_trans(ls);
- reduce_waitfor_graph_loop(ls);
-
- if (list_empty(&ls->transactions)) {
- log_group(ls, "resolved deadlock with cancel");
- goto out;
- }
-
- log_error("deadlock resolution failed");
- dump_all_trans(ls);
- out:
- send_cycle_end(ls);
-}
-
diff --git a/group/dlm_controld/dlm_controld.h b/group/dlm_controld/dlm_controld.h
deleted file mode 100644
index 73e4ecc..0000000
--- a/group/dlm_controld/dlm_controld.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __DLM_CONTROLD_DOT_H__
-#define __DLM_CONTROLD_DOT_H__
-
-/* This defines the interface between dlm_controld and libdlmcontrol, and
- should only be used by libdlmcontrol. */
-
-#define DLMC_SOCK_PATH "dlmc_sock"
-#define DLMC_QUERY_SOCK_PATH "dlmc_query_sock"
-
-#define DLMC_MAGIC 0xD13CD13C
-#define DLMC_VERSION 0x00010001
-
-#define DLMC_CMD_DUMP_DEBUG 1
-#define DLMC_CMD_DUMP_PLOCKS 2
-#define DLMC_CMD_LOCKSPACE_INFO 3
-#define DLMC_CMD_NODE_INFO 4
-#define DLMC_CMD_LOCKSPACES 5
-#define DLMC_CMD_LOCKSPACE_NODES 6
-#define DLMC_CMD_FS_REGISTER 7
-#define DLMC_CMD_FS_UNREGISTER 8
-#define DLMC_CMD_FS_NOTIFIED 9
-#define DLMC_CMD_DEADLOCK_CHECK 10
-#define DLMC_CMD_DUMP_LOG_PLOCK 11
-
-struct dlmc_header {
- unsigned int magic;
- unsigned int version;
- unsigned int command;
- unsigned int option;
- unsigned int len;
- int data; /* embedded command-specific data, for convenience */
- int unused1;
- int unsued2;
- char name[DLM_LOCKSPACE_LEN]; /* no terminating null space */
-};
-
-#endif
-
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
deleted file mode 100644
index 0c89f67..0000000
--- a/group/dlm_controld/dlm_daemon.h
+++ /dev/null
@@ -1,326 +0,0 @@
-#ifndef __DLM_DAEMON_DOT_H__
-#define __DLM_DAEMON_DOT_H__
-
-#include <sys/types.h>
-#include <asm/types.h>
-#include <sys/uio.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/utsname.h>
-#include <sys/poll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <net/if.h>
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdarg.h>
-#include <fcntl.h>
-#include <netdb.h>
-#include <limits.h>
-#include <unistd.h>
-#include <time.h>
-#include <syslog.h>
-#include <sched.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <dirent.h>
-
-#include <corosync/cpg.h>
-#include <liblogthread.h>
-
-#include <linux/dlmconstants.h>
-#include "libdlmcontrol.h"
-#include "dlm_controld.h"
-#include "config.h"
-#include "list.h"
-#include "rbtree.h"
-#include "linux_endian.h"
-
-/* TODO: cleanup */
-#define CLUSTERVARLIB "/var/lib/cluster"
-#define CLUSTERVARRUN "/var/run/cluster"
-#define LOGDIR "/var/log/cluster"
-#define VERSION "master"
-#define SYSLOGFACILITY LOG_LOCAL4
-#define SYSLOGLEVEL LOG_INFO
-#define REDHAT_COPYRIGHT "Copyright (C) Red Hat, Inc. 2004-2011 All rights reserved."
-#define LOCKFILE_NAME CLUSTERVARRUN "/dlm_controld.pid"
-#define DAEMON_NAME "dlm_controld"
-#define DEFAULT_LOG_MODE LOG_MODE_OUTPUT_FILE|LOG_MODE_OUTPUT_SYSLOG
-#define DEFAULT_SYSLOG_FACILITY SYSLOGFACILITY
-#define DEFAULT_SYSLOG_PRIORITY SYSLOGLEVEL
-#define DEFAULT_LOGFILE_PRIORITY LOG_INFO /* ? */
-#define DEFAULT_LOGFILE LOGDIR "/" DAEMON_NAME ".log"
-#define DLM_CONFIG_FILE "/etc/dlm.conf"
-
-
-/* DLM_LOCKSPACE_LEN: maximum lockspace name length, from linux/dlmconstants.h.
- Copied in libdlm.h so apps don't need to include the kernel header.
- The libcpg limit is larger at CPG_MAX_NAME_LENGTH 128. Our cpg name includes
- a "dlm:" prefix before the lockspace name. */
-
-/* Maximum members of a ls, should match CPG_MEMBERS_MAX in corosync/cpg.h.
- There are no max defines in dlm-kernel for lockspace members. */
-
-#define MAX_NODES 128
-
-/* Maximum number of IP addresses per node, when using SCTP and multi-ring in
- corosync In dlm-kernel this is DLM_MAX_ADDR_COUNT, currently 3. */
-
-#define MAX_NODE_ADDRESSES 4
-
-/* Max string length printed on a line, for debugging/dump output. */
-
-#define MAXLINE 256
-
-/* cfgk_protocol */
-
-#define PROTO_TCP 0
-#define PROTO_SCTP 1
-#define PROTO_DETECT 2
-
-extern int daemon_debug_opt;
-extern int daemon_quit;
-extern int cluster_down;
-extern int poll_fencing;
-extern int poll_quorum;
-extern int poll_fs;
-extern int poll_ignore_plock;
-extern int poll_drop_plock;
-extern int plock_fd;
-extern int plock_ci;
-extern struct list_head lockspaces;
-extern int cluster_quorate;
-extern uint32_t cluster_ringid_seq;
-extern int our_nodeid;
-extern uint32_t control_minor;
-extern uint32_t monitor_minor;
-extern uint32_t plock_minor;
-extern uint32_t old_plock_minor;
-
-#define LOG_DUMP_SIZE DLMC_DUMP_SIZE
-
-#define LOG_PLOCK 0x00010000
-
-void log_level(char *name_in, uint32_t level_in, const char *fmt, ...);
-
-#define log_error(fmt, args...) log_level(NULL, LOG_ERR, fmt, ##args)
-#define log_debug(fmt, args...) log_level(NULL, LOG_DEBUG, fmt, ##args)
-#define log_group(ls, fmt, args...) log_level((ls)->name, LOG_DEBUG, fmt, ##args)
-
-#define log_plock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK, fmt, ##args)
-#define log_dlock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK|LOG_DEBUG, fmt, ##args)
-#define log_elock(ls, fmt, args...) log_level((ls)->name, LOG_PLOCK|LOG_ERR, fmt, ##args)
-
-/* dlm_header types */
-enum {
- DLM_MSG_PROTOCOL = 1,
- DLM_MSG_START,
- DLM_MSG_PLOCK,
- DLM_MSG_PLOCK_OWN,
- DLM_MSG_PLOCK_DROP,
- DLM_MSG_PLOCK_SYNC_LOCK,
- DLM_MSG_PLOCK_SYNC_WAITER,
- DLM_MSG_PLOCKS_DONE,
- DLM_MSG_PLOCKS_DATA,
- DLM_MSG_DEADLK_CYCLE_START,
- DLM_MSG_DEADLK_CYCLE_END,
- DLM_MSG_DEADLK_CHECKPOINT_READY,
- DLM_MSG_DEADLK_CANCEL_LOCK
-};
-
-/* dlm_header flags */
-#define DLM_MFLG_JOINING 1 /* accompanies start, we are joining */
-#define DLM_MFLG_HAVEPLOCK 2 /* accompanies start, we have plock state */
-#define DLM_MFLG_NACK 4 /* accompanies start, prevent wrong match when
- two outstanding changes are the same */
-#define DLM_MFLG_PLOCK_SIG 8 /* msgdata2 is a plock signature */
-
-struct dlm_header {
- uint16_t version[3];
- uint16_t type; /* DLM_MSG_ */
- uint32_t nodeid; /* sender */
- uint32_t to_nodeid; /* recipient, 0 for all */
- uint32_t global_id; /* global unique id for this lockspace */
- uint32_t flags; /* DLM_MFLG_ */
- uint32_t msgdata; /* in-header payload depends on MSG type; lkid
- for deadlock, seq for lockspace membership */
- uint32_t msgdata2; /* second MSG-specific data */
- uint64_t pad;
-};
-
-struct lockspace {
- struct list_head list;
- char name[DLM_LOCKSPACE_LEN+1];
- uint32_t global_id;
-
- /* lockspace membership stuff */
-
- cpg_handle_t cpg_handle;
- int cpg_client;
- int cpg_fd;
- int joining;
- int leaving;
- int kernel_stopped;
- int fs_registered;
- uint32_t change_seq;
- uint32_t started_count;
- struct change *started_change;
- struct list_head changes;
- struct list_head node_history;
-
- /* plock stuff */
-
- int plock_data_node;
- int need_plocks;
- int save_plocks;
- int disable_plock;
- uint32_t recv_plocks_data_count;
- uint32_t associated_mg_id;
- struct list_head saved_messages;
- struct list_head plock_resources;
- struct rb_root plock_resources_root;
- time_t last_checkpoint_time;
- time_t last_plock_time;
- struct timeval drop_resources_last;
-
-#if 0
- /* deadlock stuff */
-
- int deadlk_low_nodeid;
- struct list_head deadlk_nodes;
- uint64_t deadlk_ckpt_handle;
- int deadlk_confchg_init;
- struct list_head transactions;
- struct list_head resources;
- struct timeval cycle_start_time;
- struct timeval cycle_end_time;
- struct timeval last_send_cycle_start;
- int cycle_running;
- int all_checkpoints_ready;
-#endif
-};
-
-/* action.c */
-void set_associated_id(uint32_t mg_id);
-int set_sysfs_control(char *name, int val);
-int set_sysfs_event_done(char *name, int val);
-int set_sysfs_id(char *name, uint32_t id);
-int set_configfs_members(char *name, int new_count, int *new_members,
- int renew_count, int *renew_members);
-int add_configfs_node(int nodeid, char *addr, int addrlen, int local);
-void del_configfs_node(int nodeid);
-void clear_configfs(void);
-int setup_configfs(void);
-int check_uncontrolled_lockspaces(void);
-int setup_misc_devices(void);
-int path_exists(const char *path);
-
-/* config.c */
-void setup_config(int update);
-int get_weight(int nodeid, char *lockspace);
-
-/* cpg.c */
-int setup_cpg_daemon(void);
-void close_cpg_daemon(void);
-void process_cpg_daemon(int ci);
-int set_protocol(void);
-void process_lockspace_changes(void);
-void dlm_send_message(struct lockspace *ls, char *buf, int len);
-int dlm_join_lockspace(struct lockspace *ls);
-int dlm_leave_lockspace(struct lockspace *ls);
-const char *msg_name(int type);
-void update_flow_control_status(void);
-void node_history_cluster_add(int nodeid);
-void node_history_cluster_remove(int nodeid);
-int set_node_info(struct lockspace *ls, int nodeid, struct dlmc_node *node);
-int set_lockspace_info(struct lockspace *ls, struct dlmc_lockspace *lockspace);
-int set_lockspaces(int *count, struct dlmc_lockspace **lss_out);
-int set_lockspace_nodes(struct lockspace *ls, int option, int *node_count,
- struct dlmc_node **nodes_out);
-int set_fs_notified(struct lockspace *ls, int nodeid);
-
-/* deadlock.c */
-void setup_deadlock(void);
-void send_cycle_start(struct lockspace *ls);
-void receive_checkpoint_ready(struct lockspace *ls, struct dlm_header *hd,
- int len);
-void receive_cycle_start(struct lockspace *ls, struct dlm_header *hd, int len);
-void receive_cycle_end(struct lockspace *ls, struct dlm_header *hd, int len);
-void receive_cancel_lock(struct lockspace *ls, struct dlm_header *hd, int len);
-void deadlk_confchg(struct lockspace *ls,
- const struct cpg_address *member_list,
- size_t member_list_entries,
- const struct cpg_address *left_list,
- size_t left_list_entries,
- const struct cpg_address *joined_list,
- size_t joined_list_entries);
-
-/* main.c */
-int do_read(int fd, void *buf, size_t count);
-int do_write(int fd, void *buf, size_t count);
-void client_dead(int ci);
-int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci));
-int client_fd(int ci);
-void client_ignore(int ci, int fd);
-void client_back(int ci, int fd);
-struct lockspace *find_ls(char *name);
-struct lockspace *find_ls_id(uint32_t id);
-const char *dlm_mode_str(int mode);
-void cluster_dead(int ci);
-
-/* member_cman.c */
-int setup_cluster(void);
-void close_cluster(void);
-void process_cluster(int ci);
-void update_cluster(void);
-int is_cluster_member(uint32_t nodeid);
-int setup_cluster_cfg(void);
-void close_cluster_cfg(void);
-void process_cluster_cfg(int ci);
-void kick_node_from_cluster(int nodeid);
-int fence_node_time(int nodeid, uint64_t *last_fenced_time);
-int fence_in_progress(int *count);
-
-/* netlink.c */
-int setup_netlink(void);
-void process_netlink(int ci);
-
-/* plock.c */
-int setup_plocks(void);
-void close_plocks(void);
-void process_plocks(int ci);
-void drop_resources_all(void);
-int limit_plocks(void);
-void receive_plock(struct lockspace *ls, struct dlm_header *hd, int len);
-void receive_own(struct lockspace *ls, struct dlm_header *hd, int len);
-void receive_sync(struct lockspace *ls, struct dlm_header *hd, int len);
-void receive_drop(struct lockspace *ls, struct dlm_header *hd, int len);
-void process_saved_plocks(struct lockspace *ls);
-void purge_plocks(struct lockspace *ls, int nodeid, int unmount);
-int copy_plock_state(struct lockspace *ls, char *buf, int *len_out);
-
-void send_all_plocks_data(struct lockspace *ls, uint32_t seq, uint32_t *plocks_data);
-void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len);
-void clear_plocks_data(struct lockspace *ls);
-
-/* logging.c */
-
-void init_logging(void);
-void setup_logging(void);
-void close_logging(void);
-void copy_log_dump(char *buf, int *len);
-void copy_log_dump_plock(char *buf, int *len);
-
-/* crc.c */
-uint32_t cpgname_to_crc(const char *data, int len);
-
-#endif
-
diff --git a/group/dlm_controld/logging.c b/group/dlm_controld/logging.c
deleted file mode 100644
index e6e82b7..0000000
--- a/group/dlm_controld/logging.c
+++ /dev/null
@@ -1,173 +0,0 @@
-#include "dlm_daemon.h"
-
-static int log_mode;
-static int syslog_facility;
-static int syslog_priority;
-static int logfile_priority;
-static char logfile[PATH_MAX];
-
-void init_logging(void)
-{
- log_mode = DEFAULT_LOG_MODE;
- syslog_facility = DEFAULT_SYSLOG_FACILITY;
- syslog_priority = DEFAULT_SYSLOG_PRIORITY;
- logfile_priority = DEFAULT_LOGFILE_PRIORITY;
- strcpy(logfile, DEFAULT_LOGFILE);
-
- /* logfile_priority is the only one of these options that
- can be controlled from command line or environment variable */
-
- if (cfgd_debug_logfile)
- logfile_priority = LOG_DEBUG;
-
- log_debug("logging mode %d syslog f %d p %d logfile p %d %s",
- log_mode, syslog_facility, syslog_priority,
- logfile_priority, logfile);
-
- logt_init(DAEMON_NAME, log_mode, syslog_facility, syslog_priority,
- logfile_priority, logfile);
-}
-
-void setup_logging(void)
-{
- /* TODO */
- /*
- ccs_read_logging(ccs_handle, DAEMON_NAME,
- &cfgd_debug_logfile, &log_mode,
- &syslog_facility, &syslog_priority,
- &logfile_priority, logfile);
- */
-
- log_debug("logging mode %d syslog f %d p %d logfile p %d %s",
- log_mode, syslog_facility, syslog_priority,
- logfile_priority, logfile);
-
- logt_conf(DAEMON_NAME, log_mode, syslog_facility, syslog_priority,
- logfile_priority, logfile);
-}
-
-void close_logging(void)
-{
- logt_exit();
-}
-
-#define NAME_ID_SIZE 32
-#define LOG_STR_LEN 512
-static char log_str[LOG_STR_LEN];
-
-static char log_dump[LOG_DUMP_SIZE];
-static unsigned int log_point;
-static unsigned int log_wrap;
-
-static char log_dump_plock[LOG_DUMP_SIZE];
-static unsigned int log_point_plock;
-static unsigned int log_wrap_plock;
-
-static void log_copy(char *buf, int *len, char *log_buf,
- unsigned int *point, unsigned int *wrap)
-{
- unsigned int p = *point;
- unsigned int w = *wrap;
- int tail_len;
-
- if (!w && !p) {
- *len = 0;
- } else if (*wrap) {
- tail_len = LOG_DUMP_SIZE - p;
- memcpy(buf, log_buf + p, tail_len);
- if (p)
- memcpy(buf+tail_len, log_buf, p);
- *len = LOG_DUMP_SIZE;
- } else {
- memcpy(buf, log_buf, p-1);
- *len = p-1;
- }
-}
-
-void copy_log_dump(char *buf, int *len)
-{
- log_copy(buf, len, log_dump, &log_point, &log_wrap);
-}
-
-void copy_log_dump_plock(char *buf, int *len)
-{
- log_copy(buf, len, log_dump_plock, &log_point_plock, &log_wrap_plock);
-}
-
-static void log_save_str(int level, int len, char *log_buf,
- unsigned int *point, unsigned int *wrap)
-{
- unsigned int p = *point;
- unsigned int w = *wrap;
- int i;
-
- if (len < LOG_DUMP_SIZE - p) {
- memcpy(log_buf + p, log_str, len);
- p += len;
-
- if (p == LOG_DUMP_SIZE) {
- p = 0;
- w = 1;
- }
- goto out;
- }
-
- for (i = 0; i < len; i++) {
- log_buf[p++] = log_str[i];
-
- if (p == LOG_DUMP_SIZE) {
- p = 0;
- w = 1;
- }
- }
- out:
- *point = p;
- *wrap = w;
-}
-
-void log_level(char *name_in, uint32_t level_in, const char *fmt, ...)
-{
- va_list ap;
- char name[NAME_ID_SIZE + 1];
- uint32_t level = level_in & 0x0000FFFF;
- uint32_t extra = level_in & 0xFFFF0000;
- int ret, pos = 0;
- int len = LOG_STR_LEN - 2;
- int plock = extra & LOG_PLOCK;
-
- memset(name, 0, sizeof(name));
-
- if (name_in)
- snprintf(name, NAME_ID_SIZE, "%s ", name_in);
-
- ret = snprintf(log_str + pos, len - pos, "%llu %s",
- (unsigned long long)time(NULL), name);
-
- pos += ret;
-
- va_start(ap, fmt);
- ret = vsnprintf(log_str + pos, len - pos, fmt, ap);
- va_end(ap);
-
- if (ret >= len - pos)
- pos = len - 1;
- else
- pos += ret;
-
- log_str[pos++] = '\n';
- log_str[pos++] = '\0';
-
- if (level)
- log_save_str(level, pos - 1, log_dump, &log_point, &log_wrap);
- if (plock)
- log_save_str(level, pos - 1, log_dump_plock, &log_point_plock, &log_wrap_plock);
- if (level)
- logt_print(level, "%s", log_str);
-
- if (!daemon_debug_opt)
- return;
-
- if (level || (plock && cfgd_plock_debug))
- fprintf(stderr, "%s", log_str);
-}
-
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
deleted file mode 100644
index b0d9b03..0000000
--- a/group/dlm_controld/main.c
+++ /dev/null
@@ -1,1274 +0,0 @@
-#include "dlm_daemon.h"
-#include <pthread.h>
-#include <linux/netlink.h>
-#include <linux/genetlink.h>
-#include <linux/dlm_netlink.h>
-
-#define CLIENT_NALLOC 32
-static int client_maxi;
-static int client_size = 0;
-static struct client *client = NULL;
-static struct pollfd *pollfd = NULL;
-static pthread_t query_thread;
-static pthread_mutex_t query_mutex;
-static struct list_head fs_register_list;
-static int kernel_monitor_fd;
-
-struct client {
- int fd;
- void *workfn;
- void *deadfn;
- struct lockspace *ls;
-};
-
-int do_read(int fd, void *buf, size_t count)
-{
- int rv, off = 0;
-
- while (off < count) {
- rv = read(fd, (char *)buf + off, count - off);
- if (rv == 0)
- return -1;
- if (rv == -1 && errno == EINTR)
- continue;
- if (rv == -1)
- return -1;
- off += rv;
- }
- return 0;
-}
-
-int do_write(int fd, void *buf, size_t count)
-{
- int rv, off = 0;
-
- retry:
- rv = write(fd, (char *)buf + off, count);
- if (rv == -1 && errno == EINTR)
- goto retry;
- if (rv < 0) {
- log_error("write errno %d", errno);
- return rv;
- }
-
- if (rv != count) {
- count -= rv;
- off += rv;
- goto retry;
- }
- return 0;
-}
-
-static void client_alloc(void)
-{
- int i;
-
- if (!client) {
- client = malloc(CLIENT_NALLOC * sizeof(struct client));
- pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
- } else {
- client = realloc(client, (client_size + CLIENT_NALLOC) *
- sizeof(struct client));
- pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) *
- sizeof(struct pollfd));
- if (!pollfd)
- log_error("can't alloc for pollfd");
- }
- if (!client || !pollfd)
- log_error("can't alloc for client array");
-
- for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
- client[i].workfn = NULL;
- client[i].deadfn = NULL;
- client[i].fd = -1;
- pollfd[i].fd = -1;
- pollfd[i].revents = 0;
- }
- client_size += CLIENT_NALLOC;
-}
-
-void client_dead(int ci)
-{
- close(client[ci].fd);
- client[ci].workfn = NULL;
- client[ci].fd = -1;
- pollfd[ci].fd = -1;
-}
-
-int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
-{
- int i;
-
- if (!client)
- client_alloc();
- again:
- for (i = 0; i < client_size; i++) {
- if (client[i].fd == -1) {
- client[i].workfn = workfn;
- if (deadfn)
- client[i].deadfn = deadfn;
- else
- client[i].deadfn = client_dead;
- client[i].fd = fd;
- pollfd[i].fd = fd;
- pollfd[i].events = POLLIN;
- if (i > client_maxi)
- client_maxi = i;
- return i;
- }
- }
-
- client_alloc();
- goto again;
-}
-
-int client_fd(int ci)
-{
- return client[ci].fd;
-}
-
-void client_ignore(int ci, int fd)
-{
- pollfd[ci].fd = -1;
- pollfd[ci].events = 0;
-}
-
-void client_back(int ci, int fd)
-{
- pollfd[ci].fd = fd;
- pollfd[ci].events = POLLIN;
-}
-
-static void sigterm_handler(int sig)
-{
- daemon_quit = 1;
-}
-
-static struct lockspace *create_ls(char *name)
-{
- struct lockspace *ls;
-
- ls = malloc(sizeof(*ls));
- if (!ls)
- goto out;
- memset(ls, 0, sizeof(struct lockspace));
- strncpy(ls->name, name, DLM_LOCKSPACE_LEN);
-
- INIT_LIST_HEAD(&ls->changes);
- INIT_LIST_HEAD(&ls->node_history);
- INIT_LIST_HEAD(&ls->saved_messages);
- INIT_LIST_HEAD(&ls->plock_resources);
- ls->plock_resources_root = RB_ROOT;
-#if 0
- INIT_LIST_HEAD(&ls->deadlk_nodes);
- INIT_LIST_HEAD(&ls->transactions);
- INIT_LIST_HEAD(&ls->resources);
-#endif
- out:
- return ls;
-}
-
-struct lockspace *find_ls(char *name)
-{
- struct lockspace *ls;
-
- list_for_each_entry(ls, &lockspaces, list) {
- if ((strlen(ls->name) == strlen(name)) &&
- !strncmp(ls->name, name, strlen(name)))
- return ls;
- }
- return NULL;
-}
-
-struct lockspace *find_ls_id(uint32_t id)
-{
- struct lockspace *ls;
-
- list_for_each_entry(ls, &lockspaces, list) {
- if (ls->global_id == id)
- return ls;
- }
- return NULL;
-}
-
-struct fs_reg {
- struct list_head list;
- char name[DLM_LOCKSPACE_LEN+1];
-};
-
-static int fs_register_check(char *name)
-{
- struct fs_reg *fs;
- list_for_each_entry(fs, &fs_register_list, list) {
- if (!strcmp(name, fs->name))
- return 1;
- }
- return 0;
-}
-
-static int fs_register_add(char *name)
-{
- struct fs_reg *fs;
-
- if (fs_register_check(name))
- return -EALREADY;
-
- fs = malloc(sizeof(struct fs_reg));
- if (!fs)
- return -ENOMEM;
- strncpy(fs->name, name, DLM_LOCKSPACE_LEN);
- list_add(&fs->list, &fs_register_list);
- return 0;
-}
-
-static void fs_register_del(char *name)
-{
- struct fs_reg *fs;
- list_for_each_entry(fs, &fs_register_list, list) {
- if (!strcmp(name, fs->name)) {
- list_del(&fs->list);
- free(fs);
- return;
- }
- }
-}
-
-#define MAXARGS 8
-
-static char *get_args(char *buf, int *argc, char **argv, char sep, int want)
-{
- char *p = buf, *rp = NULL;
- int i;
-
- argv[0] = p;
-
- for (i = 1; i < MAXARGS; i++) {
- p = strchr(buf, sep);
- if (!p)
- break;
- *p = '\0';
-
- if (want == i) {
- rp = p + 1;
- break;
- }
-
- argv[i] = p + 1;
- buf = p + 1;
- }
- *argc = i;
-
- /* we ended by hitting \0, return the point following that */
- if (!rp)
- rp = strchr(buf, '\0') + 1;
-
- return rp;
-}
-
-const char *dlm_mode_str(int mode)
-{
- switch (mode) {
- case DLM_LOCK_IV:
- return "IV";
- case DLM_LOCK_NL:
- return "NL";
- case DLM_LOCK_CR:
- return "CR";
- case DLM_LOCK_CW:
- return "CW";
- case DLM_LOCK_PR:
- return "PR";
- case DLM_LOCK_PW:
- return "PW";
- case DLM_LOCK_EX:
- return "EX";
- }
- return "??";
-}
-
-/* recv "online" (join) and "offline" (leave) messages from dlm via uevents */
-
-static void process_uevent(int ci)
-{
- struct lockspace *ls;
- char buf[MAXLINE];
- char *argv[MAXARGS], *act, *sys;
- int rv, argc = 0;
-
- memset(buf, 0, sizeof(buf));
- memset(argv, 0, sizeof(char *) * MAXARGS);
-
- retry_recv:
- rv = recv(client[ci].fd, &buf, sizeof(buf), 0);
- if (rv < 0) {
- if (errno == EINTR)
- goto retry_recv;
- if (errno != EAGAIN)
- log_error("uevent recv error %d errno %d", rv, errno);
- return;
- }
-
- if (!strstr(buf, "dlm"))
- return;
-
- log_debug("uevent: %s", buf);
-
- get_args(buf, &argc, argv, '/', 4);
- if (argc != 4)
- log_error("uevent message has %d args", argc);
- act = argv[0];
- sys = argv[2];
-
- if ((strlen(sys) != strlen("dlm")) || strcmp(sys, "dlm"))
- return;
-
- log_debug("kernel: %s %s", act, argv[3]);
-
- rv = 0;
-
- if (!strcmp(act, "online@")) {
- ls = find_ls(argv[3]);
- if (ls) {
- rv = -EEXIST;
- goto out;
- }
-
- ls = create_ls(argv[3]);
- if (!ls) {
- rv = -ENOMEM;
- goto out;
- }
-
- if (fs_register_check(ls->name))
- ls->fs_registered = 1;
-
- rv = dlm_join_lockspace(ls);
- if (rv) {
- /* ls already freed */
- goto out;
- }
-
- } else if (!strcmp(act, "offline@")) {
- ls = find_ls(argv[3]);
- if (!ls) {
- rv = -ENOENT;
- goto out;
- }
-
- dlm_leave_lockspace(ls);
- }
- out:
- if (rv < 0)
- log_error("process_uevent %s error %d errno %d",
- act, rv, errno);
-}
-
-static int setup_uevent(void)
-{
- struct sockaddr_nl snl;
- int s, rv;
-
- s = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
- if (s < 0) {
- log_error("uevent netlink socket");
- return s;
- }
-
- memset(&snl, 0, sizeof(snl));
- snl.nl_family = AF_NETLINK;
- snl.nl_pid = getpid();
- snl.nl_groups = 1;
-
- rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
- if (rv < 0) {
- log_error("uevent bind error %d errno %d", rv, errno);
- close(s);
- return rv;
- }
-
- return s;
-}
-
-static void init_header(struct dlmc_header *h, int cmd, char *name, int result,
- int extra_len)
-{
- memset(h, 0, sizeof(struct dlmc_header));
-
- h->magic = DLMC_MAGIC;
- h->version = DLMC_VERSION;
- h->len = sizeof(struct dlmc_header) + extra_len;
- h->command = cmd;
- h->data = result;
-
- if (name)
- strncpy(h->name, name, DLM_LOCKSPACE_LEN);
-}
-
-static char copy_buf[LOG_DUMP_SIZE];
-
-static void query_dump_debug(int fd)
-{
- struct dlmc_header h;
- int len = 0;
-
- copy_log_dump(copy_buf, &len);
-
- init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
- send(fd, &h, sizeof(h), MSG_NOSIGNAL);
-
- if (len)
- send(fd, copy_buf, len, MSG_NOSIGNAL);
-}
-
-static void query_dump_log_plock(int fd)
-{
- struct dlmc_header h;
- int len = 0;
-
- copy_log_dump_plock(copy_buf, &len);
-
- init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
- send(fd, &h, sizeof(h), MSG_NOSIGNAL);
-
- if (len)
- send(fd, copy_buf, len, MSG_NOSIGNAL);
-}
-
-static void query_dump_plocks(int fd, char *name)
-{
- struct lockspace *ls;
- struct dlmc_header h;
- int len = 0;
- int rv;
-
- ls = find_ls(name);
- if (!ls) {
- rv = -ENOENT;
- goto out;
- }
-
- rv = copy_plock_state(ls, copy_buf, &len);
- out:
- init_header(&h, DLMC_CMD_DUMP_PLOCKS, name, rv, len);
- send(fd, &h, sizeof(h), MSG_NOSIGNAL);
-
- if (len)
- send(fd, copy_buf, len, MSG_NOSIGNAL);
-}
-
-/* combines a header and the data and sends it back to the client in
- a single do_write() call */
-
-static void do_reply(int fd, int cmd, char *name, int result, int option,
- char *buf, int buflen)
-{
- struct dlmc_header *h;
- char *reply;
- int reply_len;
-
- reply_len = sizeof(struct dlmc_header) + buflen;
- reply = malloc(reply_len);
- if (!reply)
- return;
- memset(reply, 0, reply_len);
- h = (struct dlmc_header *)reply;
-
- init_header(h, cmd, name, result, buflen);
- h->option = option;
-
- if (buf && buflen)
- memcpy(reply + sizeof(struct dlmc_header), buf, buflen);
-
- do_write(fd, reply, reply_len);
-
- free(reply);
-}
-
-static void query_lockspace_info(int fd, char *name)
-{
- struct lockspace *ls;
- struct dlmc_lockspace lockspace;
- int rv;
-
- ls = find_ls(name);
- if (!ls) {
- rv = -ENOENT;
- goto out;
- }
-
- memset(&lockspace, 0, sizeof(lockspace));
-
- rv = set_lockspace_info(ls, &lockspace);
- out:
- do_reply(fd, DLMC_CMD_LOCKSPACE_INFO, name, rv, 0,
- (char *)&lockspace, sizeof(lockspace));
-}
-
-static void query_node_info(int fd, char *name, int nodeid)
-{
- struct lockspace *ls;
- struct dlmc_node node;
- int rv;
-
- ls = find_ls(name);
- if (!ls) {
- rv = -ENOENT;
- goto out;
- }
-
- memset(&node, 0, sizeof(node));
-
- rv = set_node_info(ls, nodeid, &node);
- out:
- do_reply(fd, DLMC_CMD_NODE_INFO, name, rv, 0,
- (char *)&node, sizeof(node));
-}
-
-static void query_lockspaces(int fd, int max)
-{
- int ls_count = 0;
- struct dlmc_lockspace *lss = NULL;
- int rv, result;
-
- rv = set_lockspaces(&ls_count, &lss);
- if (rv < 0) {
- result = rv;
- ls_count = 0;
- goto out;
- }
-
- if (ls_count > max) {
- result = -E2BIG;
- ls_count = max;
- } else {
- result = ls_count;
- }
- out:
- do_reply(fd, DLMC_CMD_LOCKSPACES, NULL, result, 0,
- (char *)lss, ls_count * sizeof(struct dlmc_lockspace));
-
- if (lss)
- free(lss);
-}
-
-static void query_lockspace_nodes(int fd, char *name, int option, int max)
-{
- struct lockspace *ls;
- int node_count = 0;
- struct dlmc_node *nodes = NULL;
- int rv, result;
-
- ls = find_ls(name);
- if (!ls) {
- result = -ENOENT;
- node_count = 0;
- goto out;
- }
-
- rv = set_lockspace_nodes(ls, option, &node_count, &nodes);
- if (rv < 0) {
- result = rv;
- node_count = 0;
- goto out;
- }
-
- /* node_count is the number of structs copied/returned; the caller's
- max may be less than that, in which case we copy as many as they
- asked for and return -E2BIG */
-
- if (node_count > max) {
- result = -E2BIG;
- node_count = max;
- } else {
- result = node_count;
- }
- out:
- do_reply(fd, DLMC_CMD_LOCKSPACE_NODES, name, result, 0,
- (char *)nodes, node_count * sizeof(struct dlmc_node));
-
- if (nodes)
- free(nodes);
-}
-
-static void process_connection(int ci)
-{
- struct dlmc_header h;
- char *extra = NULL;
- int rv, extra_len;
- struct lockspace *ls;
-
- rv = do_read(client[ci].fd, &h, sizeof(h));
- if (rv < 0) {
- log_debug("connection %d read error %d", ci, rv);
- goto out;
- }
-
- if (h.magic != DLMC_MAGIC) {
- log_debug("connection %d magic error %x", ci, h.magic);
- goto out;
- }
-
- if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
- log_debug("connection %d version error %x", ci, h.version);
- goto out;
- }
-
- if (h.len > sizeof(h)) {
- extra_len = h.len - sizeof(h);
- extra = malloc(extra_len);
- if (!extra) {
- log_error("process_connection no mem %d", extra_len);
- goto out;
- }
- memset(extra, 0, extra_len);
-
- rv = do_read(client[ci].fd, extra, extra_len);
- if (rv < 0) {
- log_debug("connection %d extra read error %d", ci, rv);
- goto out;
- }
- }
-
- switch (h.command) {
- case DLMC_CMD_FS_REGISTER:
- rv = fs_register_add(h.name);
- ls = find_ls(h.name);
- if (ls)
- ls->fs_registered = 1;
- do_reply(client[ci].fd, DLMC_CMD_FS_REGISTER, h.name, rv, 0,
- NULL, 0);
- break;
-
- case DLMC_CMD_FS_UNREGISTER:
- fs_register_del(h.name);
- ls = find_ls(h.name);
- if (ls)
- ls->fs_registered = 0;
- break;
-
- case DLMC_CMD_FS_NOTIFIED:
- ls = find_ls(h.name);
- if (ls)
- rv = set_fs_notified(ls, h.data);
- else
- rv = -ENOENT;
- /* pass back the nodeid provided by caller in option field */
- do_reply(client[ci].fd, DLMC_CMD_FS_NOTIFIED, h.name, rv,
- h.data, NULL, 0);
- break;
-
-#if 0
- case DLMC_CMD_DEADLOCK_CHECK:
- ls = find_ls(h.name);
- if (ls)
- send_cycle_start(ls);
- client_dead(ci);
- break;
-#endif
- default:
- log_error("process_connection %d unknown command %d",
- ci, h.command);
- }
- out:
- if (extra)
- free(extra);
-}
-
-static void process_listener(int ci)
-{
- int fd, i;
-
- fd = accept(client[ci].fd, NULL, NULL);
- if (fd < 0) {
- log_error("process_listener: accept error %d %d", fd, errno);
- return;
- }
-
- i = client_add(fd, process_connection, NULL);
-
- log_debug("client connection %d fd %d", i, fd);
-}
-
-static int setup_listener(const char *sock_path)
-{
- struct sockaddr_un addr;
- socklen_t addrlen;
- int rv, s;
-
- /* we listen for new client connections on socket s */
-
- s = socket(AF_LOCAL, SOCK_STREAM, 0);
- if (s < 0) {
- log_error("socket error %d %d", s, errno);
- return s;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sun_family = AF_LOCAL;
- strcpy(&addr.sun_path[1], sock_path);
- addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
-
- rv = bind(s, (struct sockaddr *) &addr, addrlen);
- if (rv < 0) {
- log_error("bind error %d %d", rv, errno);
- close(s);
- return rv;
- }
-
- rv = listen(s, 5);
- if (rv < 0) {
- log_error("listen error %d %d", rv, errno);
- close(s);
- return rv;
- }
- return s;
-}
-
-static void query_lock(void)
-{
- pthread_mutex_lock(&query_mutex);
-}
-
-static void query_unlock(void)
-{
- pthread_mutex_unlock(&query_mutex);
-}
-
-/* This is a thread, so we have to be careful, don't call log_ functions.
- We need a thread to process queries because the main thread may block
- for long periods when writing to sysfs to stop dlm-kernel (any maybe
- other places). */
-
-static void *process_queries(void *arg)
-{
- struct dlmc_header h;
- int s, f, rv;
-
- rv = setup_listener(DLMC_QUERY_SOCK_PATH);
- if (rv < 0)
- return NULL;
-
- s = rv;
-
- for (;;) {
- f = accept(s, NULL, NULL);
- if (f < 0)
- return NULL;
-
- rv = do_read(f, &h, sizeof(h));
- if (rv < 0) {
- goto out;
- }
-
- if (h.magic != DLMC_MAGIC) {
- goto out;
- }
-
- if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
- goto out;
- }
-
- query_lock();
-
- switch (h.command) {
- case DLMC_CMD_DUMP_DEBUG:
- query_dump_debug(f);
- break;
- case DLMC_CMD_DUMP_LOG_PLOCK:
- query_dump_log_plock(f);
- break;
- case DLMC_CMD_DUMP_PLOCKS:
- query_dump_plocks(f, h.name);
- break;
- case DLMC_CMD_LOCKSPACE_INFO:
- query_lockspace_info(f, h.name);
- break;
- case DLMC_CMD_NODE_INFO:
- query_node_info(f, h.name, h.data);
- break;
- case DLMC_CMD_LOCKSPACES:
- query_lockspaces(f, h.data);
- break;
- case DLMC_CMD_LOCKSPACE_NODES:
- query_lockspace_nodes(f, h.name, h.option, h.data);
- break;
- default:
- break;
- }
- query_unlock();
-
- out:
- close(f);
- }
-}
-
-static int setup_queries(void)
-{
- int rv;
-
- pthread_mutex_init(&query_mutex, NULL);
-
- rv = pthread_create(&query_thread, NULL, process_queries, NULL);
- if (rv < 0) {
- log_error("can't create query thread");
- return rv;
- }
- return 0;
-}
-
-/* The dlm in kernels before 2.6.28 do not have the monitor device. We
- keep this fd open as long as we're running. If we exit/terminate while
- lockspaces exist in the kernel, the kernel will detect a close on this
- fd and stop the lockspaces. */
-
-static void setup_monitor(void)
-{
- if (!monitor_minor)
- return;
-
- kernel_monitor_fd = open("/dev/misc/dlm-monitor", O_RDONLY);
- log_debug("/dev/misc/dlm-monitor fd %d", kernel_monitor_fd);
-}
-
-void cluster_dead(int ci)
-{
- if (!cluster_down)
- log_error("cluster is down, exiting");
- daemon_quit = 1;
- cluster_down = 1;
-}
-
-static void loop(void)
-{
- struct lockspace *ls;
- int poll_timeout = -1;
- int rv, i;
- void (*workfn) (int ci);
- void (*deadfn) (int ci);
-
- rv = setup_queries();
- if (rv < 0)
- goto out;
-
- rv = setup_listener(DLMC_SOCK_PATH);
- if (rv < 0)
- goto out;
- client_add(rv, process_listener, NULL);
-
- rv = setup_cluster_cfg();
- if (rv < 0)
- goto out;
- if (rv > 0)
- client_add(rv, process_cluster_cfg, cluster_dead);
-
- rv = setup_cluster();
- if (rv < 0)
- goto out;
- client_add(rv, process_cluster, cluster_dead);
-
- setup_config(0);
-
- setup_logging();
-
- rv = check_uncontrolled_lockspaces();
- if (rv < 0)
- goto out;
-
- rv = setup_misc_devices();
- if (rv < 0)
- goto out;
-
- setup_monitor();
-
- rv = setup_configfs(); /* calls update_cluster() */
- if (rv < 0)
- goto out;
-
- rv = setup_uevent();
- if (rv < 0)
- goto out;
- client_add(rv, process_uevent, NULL);
-
- rv = setup_cpg_daemon();
- if (rv < 0)
- goto out;
- client_add(rv, process_cpg_daemon, cluster_dead);
-
- rv = set_protocol();
- if (rv < 0)
- goto out;
-
-#if 0
- if (cfgd_enable_deadlk) {
- rv = setup_netlink();
- if (rv < 0)
- goto out;
- client_add(rv, process_netlink, NULL);
-
- setup_deadlock();
- }
-#endif
-
- rv = setup_plocks();
- if (rv < 0)
- goto out;
- plock_fd = rv;
- plock_ci = client_add(rv, process_plocks, NULL);
-
- for (;;) {
- rv = poll(pollfd, client_maxi + 1, poll_timeout);
- if (rv == -1 && errno == EINTR) {
- if (daemon_quit && list_empty(&lockspaces))
- goto out;
- log_error("shutdown ignored, active lockspaces");
- daemon_quit = 0;
- continue;
- }
- if (rv < 0) {
- log_error("poll errno %d", errno);
- goto out;
- }
-
- query_lock();
-
- for (i = 0; i <= client_maxi; i++) {
- if (client[i].fd < 0)
- continue;
- if (pollfd[i].revents & POLLIN) {
- workfn = client[i].workfn;
- workfn(i);
- }
- if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- deadfn = client[i].deadfn;
- deadfn(i);
- }
- }
- query_unlock();
-
- if (daemon_quit)
- break;
-
- query_lock();
-
- poll_timeout = -1;
-
- if (poll_fencing || poll_quorum || poll_fs) {
- process_lockspace_changes();
- poll_timeout = 1000;
- }
-
- if (poll_ignore_plock) {
- if (!limit_plocks()) {
- poll_ignore_plock = 0;
- client_back(plock_ci, plock_fd);
- }
- poll_timeout = 1000;
- }
-
- if (poll_drop_plock) {
- drop_resources_all();
- if (poll_drop_plock)
- poll_timeout = 1000;
- }
-
- query_unlock();
- }
- out:
- log_debug("shutdown");
- close_plocks();
- close_cpg_daemon();
- clear_configfs();
- close_logging();
- close_cluster();
- close_cluster_cfg();
-
- list_for_each_entry(ls, &lockspaces, list)
- log_error("abandoned lockspace %s", ls->name);
-}
-
-static void lockfile(void)
-{
- int fd, error;
- struct flock lock;
- char buf[33];
-
- memset(buf, 0, 33);
-
- fd = open(LOCKFILE_NAME, O_CREAT|O_WRONLY,
- S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
- if (fd < 0) {
- fprintf(stderr, "cannot open/create lock file %s\n",
- LOCKFILE_NAME);
- exit(EXIT_FAILURE);
- }
-
- lock.l_type = F_WRLCK;
- lock.l_start = 0;
- lock.l_whence = SEEK_SET;
- lock.l_len = 0;
-
- error = fcntl(fd, F_SETLK, &lock);
- if (error) {
- fprintf(stderr, "dlm_controld is already running\n");
- exit(EXIT_FAILURE);
- }
-
- error = ftruncate(fd, 0);
- if (error) {
- fprintf(stderr, "cannot clear lock file %s\n", LOCKFILE_NAME);
- exit(EXIT_FAILURE);
- }
-
- sprintf(buf, "%d\n", getpid());
-
- error = write(fd, buf, strlen(buf));
- if (error <= 0) {
- fprintf(stderr, "cannot write lock file %s\n", LOCKFILE_NAME);
- exit(EXIT_FAILURE);
- }
-}
-
-static void print_usage(void)
-{
- printf("Usage:\n");
- printf("\n");
- printf("dlm_controld [options]\n");
- printf("\n");
- printf("Options:\n");
- printf("\n");
- printf(" -D Enable debugging to stderr and don't fork\n");
- printf(" -L Enable debugging to log file\n");
- printf(" -K Enable kernel dlm debugging messages\n");
- printf(" -r <num> dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect\n");
- printf(" 2 selects tcp if corosync rrp_mode is \"none\", otherwise sctp\n");
- printf(" Default is 2\n");
- printf(" -f <num> Enable (1) or disable (0) fencing recovery dependency\n");
- printf(" Default is %d\n", DEFAULT_ENABLE_FENCING);
- printf(" -q <num> Enable (1) or disable (0) quorum recovery dependency\n");
- printf(" Default is %d\n", DEFAULT_ENABLE_QUORUM);
-#if 0
- printf(" -d <num> Enable (1) or disable (0) deadlock detection code\n");
- printf(" Default is %d\n", DEFAULT_ENABLE_DEADLK);
-#endif
- printf(" -p <num> Enable (1) or disable (0) plock code for cluster fs\n");
- printf(" Default is %d\n", DEFAULT_ENABLE_PLOCK);
- printf(" -P Enable plock debugging\n");
- printf(" -l <limit> Limit the rate of plock operations\n");
- printf(" Default is %d, set to 0 for no limit\n", DEFAULT_PLOCK_RATE_LIMIT);
- printf(" -o <n> Enable (1) or disable (0) plock ownership\n");
- printf(" Default is %d\n", DEFAULT_PLOCK_OWNERSHIP);
- printf(" -t <ms> plock ownership drop resources time (milliseconds)\n");
- printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_TIME);
- printf(" -c <num> plock ownership drop resources count\n");
- printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_COUNT);
- printf(" -a <ms> plock ownership drop resources age (milliseconds)\n");
- printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_AGE);
- printf(" -h Print this help, then exit\n");
- printf(" -V Print program version information, then exit\n");
-}
-
-#define OPTION_STRING "LDKf:q:p:Pl:o:t:c:a:hVr:"
-
-static void read_arguments(int argc, char **argv)
-{
- int cont = 1;
- int optchar;
-
- while (cont) {
- optchar = getopt(argc, argv, OPTION_STRING);
-
- switch (optchar) {
- case 'D':
- daemon_debug_opt = 1;
- break;
-
- case 'L':
- optd_debug_logfile = 1;
- cfgd_debug_logfile = 1;
- break;
-
- case 'K':
- optk_debug = 1;
- cfgk_debug = 1;
- break;
-
- case 'r':
- optk_protocol = 1;
- cfgk_protocol = atoi(optarg);
- break;
-
- case 'f':
- optd_enable_fencing = 1;
- cfgd_enable_fencing = atoi(optarg);
- break;
-
- case 'q':
- optd_enable_quorum = 1;
- cfgd_enable_quorum = atoi(optarg);
- break;
-
- case 'p':
- optd_enable_plock = 1;
- cfgd_enable_plock = atoi(optarg);
- break;
-
- case 'P':
- optd_plock_debug = 1;
- cfgd_plock_debug = 1;
- break;
-
- case 'l':
- optd_plock_rate_limit = 1;
- cfgd_plock_rate_limit = atoi(optarg);
- break;
-
- case 'o':
- optd_plock_ownership = 1;
- cfgd_plock_ownership = atoi(optarg);
- break;
-
- case 't':
- optd_drop_resources_time = 1;
- cfgd_drop_resources_time = atoi(optarg);
- break;
-
- case 'c':
- optd_drop_resources_count = 1;
- cfgd_drop_resources_count = atoi(optarg);
- break;
-
- case 'a':
- optd_drop_resources_age = 1;
- cfgd_drop_resources_age = atoi(optarg);
- break;
-
- case 'h':
- print_usage();
- exit(EXIT_SUCCESS);
- break;
-
- case 'V':
- printf("dlm_controld %s (built %s %s)\n",
- VERSION, __DATE__, __TIME__);
- printf("%s\n", REDHAT_COPYRIGHT);
- exit(EXIT_SUCCESS);
- break;
-
- case ':':
- case '?':
- fprintf(stderr, "Please use '-h' for usage.\n");
- exit(EXIT_FAILURE);
- break;
-
- case EOF:
- cont = 0;
- break;
-
- default:
- fprintf(stderr, "unknown option: %c\n", optchar);
- exit(EXIT_FAILURE);
- break;
- };
- }
-
- if (getenv("DLM_CONTROLD_DEBUG")) {
- optd_debug_logfile = 1;
- cfgd_debug_logfile = 1;
- }
-}
-
-static void set_scheduler(void)
-{
- struct sched_param sched_param;
- int rv;
-
- rv = sched_get_priority_max(SCHED_RR);
- if (rv != -1) {
- sched_param.sched_priority = rv;
- rv = sched_setscheduler(0, SCHED_RR, &sched_param);
- if (rv == -1)
- log_error("could not set SCHED_RR priority %d err %d",
- sched_param.sched_priority, errno);
- } else {
- log_error("could not get maximum scheduler priority err %d",
- errno);
- }
-}
-
-int main(int argc, char **argv)
-{
- INIT_LIST_HEAD(&lockspaces);
- INIT_LIST_HEAD(&fs_register_list);
-
- read_arguments(argc, argv);
-
- if (!daemon_debug_opt) {
- if (daemon(0, 0) < 0) {
- perror("daemon error");
- exit(EXIT_FAILURE);
- }
- }
- lockfile();
- init_logging();
- log_level(NULL, LOG_INFO, "dlm_controld %s started", VERSION);
- signal(SIGTERM, sigterm_handler);
- set_scheduler();
-
- loop();
-
- return 0;
-}
-
-int daemon_debug_opt;
-int daemon_quit;
-int cluster_down;
-int poll_fencing;
-int poll_quorum;
-int poll_fs;
-int poll_ignore_plock;
-int poll_drop_plock;
-int plock_fd;
-int plock_ci;
-struct list_head lockspaces;
-int cluster_quorate;
-int our_nodeid;
-uint32_t cluster_ringid_seq;
-uint32_t control_minor;
-uint32_t monitor_minor;
-uint32_t plock_minor;
-uint32_t old_plock_minor;
-
-/* was a config value set on command line?, 0 or 1.
- optk is a kernel option, optd is a daemon option */
-
-int optk_debug;
-int optk_timewarn;
-int optk_protocol;
-int optd_debug_logfile;
-int optd_enable_fencing;
-int optd_enable_quorum;
-int optd_enable_plock;
-int optd_plock_debug;
-int optd_plock_rate_limit;
-int optd_plock_ownership;
-int optd_drop_resources_time;
-int optd_drop_resources_count;
-int optd_drop_resources_age;
-
-/* actual config value from command line, cluster.conf, or default.
- cfgk is a kernel config value, cfgd is a daemon config value */
-
-int cfgk_debug = -1;
-int cfgk_timewarn = -1;
-int cfgk_protocol = PROTO_DETECT;
-int cfgd_debug_logfile = DEFAULT_DEBUG_LOGFILE;
-int cfgd_enable_fencing = DEFAULT_ENABLE_FENCING;
-int cfgd_enable_quorum = DEFAULT_ENABLE_QUORUM;
-int cfgd_enable_plock = DEFAULT_ENABLE_PLOCK;
-int cfgd_plock_debug = DEFAULT_PLOCK_DEBUG;
-int cfgd_plock_rate_limit = DEFAULT_PLOCK_RATE_LIMIT;
-int cfgd_plock_ownership = DEFAULT_PLOCK_OWNERSHIP;
-int cfgd_drop_resources_time = DEFAULT_DROP_RESOURCES_TIME;
-int cfgd_drop_resources_count = DEFAULT_DROP_RESOURCES_COUNT;
-int cfgd_drop_resources_age = DEFAULT_DROP_RESOURCES_AGE;
-
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
deleted file mode 100644
index 909c249..0000000
--- a/group/dlm_controld/member_cman.c
+++ /dev/null
@@ -1,267 +0,0 @@
-#include "dlm_daemon.h"
-#include <corosync/corotypes.h>
-#include <corosync/cfg.h>
-#include <corosync/quorum.h>
-#include "libfenced.h"
-
-static corosync_cfg_handle_t ch;
-static quorum_handle_t qh;
-static uint32_t old_nodes[MAX_NODES];
-static int old_node_count;
-static uint32_t quorum_nodes[MAX_NODES];
-static int quorum_node_count;
-
-static int is_member(uint32_t *node_list, int count, uint32_t nodeid)
-{
- int i;
-
- for (i = 0; i < count; i++) {
- if (node_list[i] == nodeid)
- return 1;
- }
- return 0;
-}
-
-static int is_old_member(uint32_t nodeid)
-{
- return is_member(old_nodes, old_node_count, nodeid);
-}
-
-int is_cluster_member(uint32_t nodeid)
-{
- return is_member(quorum_nodes, quorum_node_count, nodeid);
-}
-
-static void quorum_callback(quorum_handle_t h, uint32_t quorate,
- uint64_t ring_seq, uint32_t node_list_entries,
- uint32_t *node_list)
-{
- corosync_cfg_node_address_t addrs[MAX_NODE_ADDRESSES];
- corosync_cfg_node_address_t *addrptr = addrs;
- cs_error_t err;
- int i, j, num_addrs;
-
-
- cluster_quorate = quorate;
- cluster_ringid_seq = (uint32_t)ring_seq;
-
- log_debug("cluster quorum %u seq %u nodes %u",
- cluster_quorate, cluster_ringid_seq, node_list_entries);
-
- old_node_count = quorum_node_count;
- memcpy(&old_nodes, &quorum_nodes, sizeof(old_nodes));
-
- quorum_node_count = 0;
- memset(&quorum_nodes, 0, sizeof(quorum_nodes));
-
- for (i = 0; i < node_list_entries; i++)
- quorum_nodes[quorum_node_count++] = node_list[i];
-
- for (i = 0; i < old_node_count; i++) {
- if (!is_cluster_member(old_nodes[i])) {
- log_debug("cluster node %u removed seq %u",
- old_nodes[i], cluster_ringid_seq);
- node_history_cluster_remove(old_nodes[i]);
- del_configfs_node(old_nodes[i]);
- }
- }
-
- for (i = 0; i < quorum_node_count; i++) {
- if (!is_old_member(quorum_nodes[i])) {
- log_debug("cluster node %u added seq %u",
- quorum_nodes[i], cluster_ringid_seq);
- node_history_cluster_add(quorum_nodes[i]);
-
- err = corosync_cfg_get_node_addrs(ch, quorum_nodes[i],
- MAX_NODE_ADDRESSES,
- &num_addrs, addrs);
- if (err != CS_OK) {
- log_error("corosync_cfg_get_node_addrs failed "
- "nodeid %u", quorum_nodes[i]);
- continue;
- }
-
- for (j = 0; j < num_addrs; j++) {
- add_configfs_node(quorum_nodes[i],
- addrptr[j].address,
- addrptr[j].address_length,
- (quorum_nodes[i] ==
- our_nodeid));
- }
- }
- }
-}
-
-static quorum_callbacks_t quorum_callbacks =
-{
- .quorum_notify_fn = quorum_callback,
-};
-
-void process_cluster(int ci)
-{
- cs_error_t err;
-
- err = quorum_dispatch(qh, CS_DISPATCH_ALL);
- if (err != CS_OK)
- cluster_dead(0);
-}
-
-/* Force re-read of quorum nodes */
-void update_cluster(void)
-{
- cs_error_t err;
-
- err = quorum_dispatch(qh, CS_DISPATCH_ONE);
- if (err != CS_OK)
- cluster_dead(0);
-}
-
-int setup_cluster(void)
-{
- cs_error_t err;
- int fd;
-
- err = quorum_initialize(&qh, &quorum_callbacks);
- if (err != CS_OK) {
- log_error("quorum init error %d", err);
- return -1;
- }
-
- err = quorum_fd_get(qh, &fd);
- if (err != CS_OK) {
- log_error("quorum fd_get error %d", err);
- goto fail;
- }
-
- err = quorum_trackstart(qh, CS_TRACK_CHANGES);
- if (err != CS_OK) {
- log_error("quorum trackstart error %d", err);
- goto fail;
- }
-
- old_node_count = 0;
- memset(&old_nodes, 0, sizeof(old_nodes));
- quorum_node_count = 0;
- memset(&quorum_nodes, 0, sizeof(quorum_nodes));
-
- return fd;
- fail:
- quorum_finalize(qh);
- return -1;
-}
-
-void close_cluster(void)
-{
- quorum_trackstop(qh);
- quorum_finalize(qh);
-}
-
-void kick_node_from_cluster(int nodeid)
-{
- if (!nodeid) {
- log_error("telling corosync to shut down cluster locally");
- corosync_cfg_try_shutdown(ch,
- COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
- } else {
- log_error("telling corosync to remove nodeid %d from cluster",
- nodeid);
- corosync_cfg_kill_node(ch, nodeid, "dlm_controld");
- }
-}
-
-static void shutdown_callback(corosync_cfg_handle_t h,
- corosync_cfg_shutdown_flags_t flags)
-{
- if (flags & COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) {
- if (list_empty(&lockspaces))
- corosync_cfg_replyto_shutdown(ch,
- COROSYNC_CFG_SHUTDOWN_FLAG_YES);
- else {
- log_debug("no to corosync shutdown");
- corosync_cfg_replyto_shutdown(ch,
- COROSYNC_CFG_SHUTDOWN_FLAG_NO);
- }
- }
-}
-
-static corosync_cfg_callbacks_t cfg_callbacks =
-{
- .corosync_cfg_shutdown_callback = shutdown_callback,
- .corosync_cfg_state_track_callback = NULL,
-};
-
-void process_cluster_cfg(int ci)
-{
- cs_error_t err;
-
- err = corosync_cfg_dispatch(ch, CS_DISPATCH_ALL);
- if (err != CS_OK)
- cluster_dead(0);
-}
-
-int setup_cluster_cfg(void)
-{
- cs_error_t err;
- unsigned int nodeid;
- int fd;
-
- err = corosync_cfg_initialize(&ch, &cfg_callbacks);
- if (err != CS_OK) {
- log_error("corosync cfg init error %d", err);
- return -1;
- }
-
- err = corosync_cfg_fd_get(ch, &fd);
- if (err != CS_OK) {
- log_error("corosync cfg fd_get error %d", err);
- corosync_cfg_finalize(ch);
- return -1;
- }
-
- err = corosync_cfg_local_get(ch, &nodeid);
- if (err != CS_OK) {
- log_error("corosync cfg local_get error %d", err);
- corosync_cfg_finalize(ch);
- return -1;
- }
- our_nodeid = nodeid;
- log_debug("our_nodeid %d", our_nodeid);
-
- return fd;
-}
-
-void close_cluster_cfg(void)
-{
- corosync_cfg_finalize(ch);
-}
-
-int fence_node_time(int nodeid, uint64_t *last_fenced_time)
-{
- struct fenced_node nodeinfo;
- int rv;
-
- memset(&nodeinfo, 0, sizeof(nodeinfo));
-
- rv = fenced_node_info(nodeid, &nodeinfo);
- if (rv < 0)
- return rv;
-
- *last_fenced_time = nodeinfo.last_fenced_time;
- return 0;
-}
-
-int fence_in_progress(int *count)
-{
- struct fenced_domain domain;
- int rv;
-
- memset(&domain, 0, sizeof(domain));
-
- rv = fenced_domain_info(&domain);
- if (rv < 0)
- return rv;
-
- *count = domain.victim_count;
- return 0;
-}
-
diff --git a/group/dlm_controld/netlink.c b/group/dlm_controld/netlink.c
deleted file mode 100644
index 63122f7..0000000
--- a/group/dlm_controld/netlink.c
+++ /dev/null
@@ -1,225 +0,0 @@
-#include "dlm_daemon.h"
-#include "config.h"
-#include <linux/dlm.h>
-#include <linux/netlink.h>
-#include <linux/genetlink.h>
-#include <linux/dlm_netlink.h>
-
-#define DEADLOCK_CHECK_SECS 10
-
-/* FIXME: look into using libnl/libnetlink */
-
-#define GENLMSG_DATA(glh) ((void *)((char *)NLMSG_DATA(glh) + GENL_HDRLEN))
-#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
-#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
-#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
-
-/* Maximum size of response requested or message sent */
-#define MAX_MSG_SIZE 1024
-
-struct msgtemplate {
- struct nlmsghdr n;
- struct genlmsghdr g;
- char buf[MAX_MSG_SIZE];
-};
-
-static int send_genetlink_cmd(int sd, uint16_t nlmsg_type, uint32_t nlmsg_pid,
- uint8_t genl_cmd, uint16_t nla_type,
- void *nla_data, int nla_len)
-{
- struct nlattr *na;
- struct sockaddr_nl nladdr;
- int r, buflen;
- char *buf;
-
- struct msgtemplate msg;
-
- msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
- msg.n.nlmsg_type = nlmsg_type;
- msg.n.nlmsg_flags = NLM_F_REQUEST;
- msg.n.nlmsg_seq = 0;
- msg.n.nlmsg_pid = nlmsg_pid;
- msg.g.cmd = genl_cmd;
- msg.g.version = 0x1;
- na = (struct nlattr *) GENLMSG_DATA(&msg);
- na->nla_type = nla_type;
- na->nla_len = nla_len + 1 + NLA_HDRLEN;
- if (nla_data)
- memcpy(NLA_DATA(na), nla_data, nla_len);
- msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
-
- buf = (char *) &msg;
- buflen = msg.n.nlmsg_len ;
- memset(&nladdr, 0, sizeof(nladdr));
- nladdr.nl_family = AF_NETLINK;
- while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
- sizeof(nladdr))) < buflen) {
- if (r > 0) {
- buf += r;
- buflen -= r;
- } else if (errno != EAGAIN)
- return -1;
- }
- return 0;
-}
-
-/*
- * Probe the controller in genetlink to find the family id
- * for the DLM family
- */
-static int get_family_id(int sd)
-{
- char genl_name[100];
- struct {
- struct nlmsghdr n;
- struct genlmsghdr g;
- char buf[256];
- } ans;
-
- int id = 0, rc;
- struct nlattr *na;
- int rep_len;
-
- strcpy(genl_name, DLM_GENL_NAME);
- rc = send_genetlink_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
- CTRL_ATTR_FAMILY_NAME, (void *)genl_name,
- strlen(DLM_GENL_NAME)+1);
-
- rep_len = recv(sd, &ans, sizeof(ans), 0);
- if (ans.n.nlmsg_type == NLMSG_ERROR ||
- (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
- return 0;
-
- na = (struct nlattr *) GENLMSG_DATA(&ans);
- na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
- if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
- id = *(uint16_t *) NLA_DATA(na);
- }
- return id;
-}
-
-/* genetlink messages are timewarnings used as part of deadlock detection */
-
-int setup_netlink(void)
-{
- struct sockaddr_nl snl;
- int s, rv;
- uint16_t id;
-
- s = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
- if (s < 0) {
- log_error("generic netlink socket");
- return s;
- }
-
- memset(&snl, 0, sizeof(snl));
- snl.nl_family = AF_NETLINK;
-
- rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
- if (rv < 0) {
- log_error("gen netlink bind error %d errno %d", rv, errno);
- close(s);
- return rv;
- }
-
- id = get_family_id(s);
- if (!id) {
- log_error("Error getting family id, errno %d", errno);
- close(s);
- return -1;
- }
-
- rv = send_genetlink_cmd(s, id, getpid(), DLM_CMD_HELLO, 0, NULL, 0);
- if (rv < 0) {
- log_error("error sending hello cmd, errno %d", errno);
- close(s);
- return -1;
- }
-
- return s;
-}
-
-static void process_timewarn(struct dlm_lock_data *data)
-{
- struct lockspace *ls;
- struct timeval now;
- unsigned int sec;
-
- ls = find_ls_id(data->lockspace_id);
- if (!ls)
- return;
-
- data->resource_name[data->resource_namelen] = '\0';
-
- log_group(ls, "timewarn: lkid %x pid %d name %s",
- data->id, data->ownpid, data->resource_name);
-
- /* Problem: we don't want to get a timewarn, assume it's resolved
- by the current cycle, but in fact it's from a deadlock that
- formed after the checkpoints for the current cycle. Then we'd
- have to hope for another warning (that may not come) to trigger
- a new cycle to catch the deadlock. If our last cycle ckpt
- was say N (~5?) sec before we receive the timewarn, then we
- can be confident that the cycle included the lock in question.
- Otherwise, we're not sure if the warning is for a new deadlock
- that's formed since our last cycle ckpt (unless it's a long
- enough time since the last cycle that we're confident it *is*
- a new deadlock). When there is a deadlock, I suspect it will
- be common to receive warnings before, during, and possibly
- after the cycle that resolves it. Wonder if we should record
- timewarns and match them with deadlock cycles so we can tell
- which timewarns are addressed by a given cycle and which aren't. */
-
-
- gettimeofday(&now, NULL);
-
- /* don't send a new start until at least SECS after the last
- we sent, and at least SECS after the last completed cycle */
-
- sec = now.tv_sec - ls->last_send_cycle_start.tv_sec;
-
- if (sec < DEADLOCK_CHECK_SECS) {
- log_group(ls, "skip send: recent send cycle %d sec", sec);
- return;
- }
-
- sec = now.tv_sec - ls->cycle_end_time.tv_sec;
-
- if (sec < DEADLOCK_CHECK_SECS) {
- log_group(ls, "skip send: recent cycle end %d sec", sec);
- return;
- }
-
- gettimeofday(&ls->last_send_cycle_start, NULL);
-
- if (cfgd_enable_deadlk)
- send_cycle_start(ls);
-}
-
-void process_netlink(int ci)
-{
- struct msgtemplate msg;
- struct nlattr *na;
- int len;
- int fd;
-
- fd = client_fd(ci);
-
- len = recv(fd, &msg, sizeof(msg), 0);
-
- if (len < 0) {
- log_error("nonfatal netlink error: errno %d", errno);
- return;
- }
-
- if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), len)) {
- struct nlmsgerr *err = NLMSG_DATA(&msg);
- log_error("fatal netlink error: errno %d", err->error);
- return;
- }
-
- na = (struct nlattr *) GENLMSG_DATA(&msg);
-
- process_timewarn((struct dlm_lock_data *) NLA_DATA(na));
-}
-
diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c
deleted file mode 100644
index c38b998..0000000
--- a/group/dlm_controld/plock.c
+++ /dev/null
@@ -1,2197 +0,0 @@
-#include "dlm_daemon.h"
-#include <linux/dlm_plock.h>
-
-/* FIXME: remove this once everyone is using the version of
- * dlm_plock.h which defines it */
-
-#ifndef DLM_PLOCK_FL_CLOSE
-#warning DLM_PLOCK_FL_CLOSE undefined. Enabling build workaround.
-#define DLM_PLOCK_FL_CLOSE 1
-#define DLM_PLOCK_BUILD_WORKAROUND 1
-#endif
-
-static uint32_t plock_read_count;
-static uint32_t plock_recv_count;
-static uint32_t plock_rate_delays;
-static struct timeval plock_read_time;
-static struct timeval plock_recv_time;
-static struct timeval plock_rate_last;
-
-static int plock_device_fd = -1;
-static int need_fsid_translation = 0;
-
-extern int message_flow_control_on;
-
-#define RD_CONTINUE 0x00000001
-
-struct resource_data {
- uint64_t number;
- int owner;
- uint32_t lock_count;
- uint32_t flags;
- uint32_t pad;
-};
-
-struct plock_data {
- uint64_t start;
- uint64_t end;
- uint64_t owner;
- uint32_t pid;
- uint32_t nodeid;
- uint8_t ex;
- uint8_t waiter;
- uint16_t pad1;
- uint32_t pad;
-};
-
-#define R_GOT_UNOWN 0x00000001 /* have received owner=0 message */
-#define R_SEND_UNOWN 0x00000002 /* have sent owner=0 message */
-#define R_SEND_OWN 0x00000004 /* have sent owner=our_nodeid message */
-#define R_PURGE_UNOWN 0x00000008 /* set owner=0 in purge */
-#define R_SEND_DROP 0x00000010
-
-struct resource {
- struct list_head list; /* list of resources */
- uint64_t number;
- int owner; /* nodeid or 0 for unowned */
- uint32_t flags;
- struct timeval last_access;
- struct list_head locks; /* one lock for each range */
- struct list_head waiters;
- struct list_head pending; /* discovering r owner */
- struct rb_node rb_node;
-};
-
-#define P_SYNCING 0x00000001 /* plock has been sent as part of sync but not
- yet received */
-
-struct posix_lock {
- struct list_head list; /* resource locks or waiters list */
- uint32_t pid;
- uint64_t owner;
- uint64_t start;
- uint64_t end;
- int ex;
- int nodeid;
- uint32_t flags;
-};
-
-struct lock_waiter {
- struct list_head list;
- uint32_t flags;
- struct dlm_plock_info info;
-};
-
-struct save_msg {
- struct list_head list;
- int nodeid;
- int len;
- int type;
- char buf[0];
-};
-
-
-static void send_own(struct lockspace *ls, struct resource *r, int owner);
-static void save_pending_plock(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in);
-
-
-static int got_unown(struct resource *r)
-{
- return !!(r->flags & R_GOT_UNOWN);
-}
-
-static void info_bswap_out(struct dlm_plock_info *i)
-{
- i->version[0] = cpu_to_le32(i->version[0]);
- i->version[1] = cpu_to_le32(i->version[1]);
- i->version[2] = cpu_to_le32(i->version[2]);
- i->pid = cpu_to_le32(i->pid);
- i->nodeid = cpu_to_le32(i->nodeid);
- i->rv = cpu_to_le32(i->rv);
- i->fsid = cpu_to_le32(i->fsid);
- i->number = cpu_to_le64(i->number);
- i->start = cpu_to_le64(i->start);
- i->end = cpu_to_le64(i->end);
- i->owner = cpu_to_le64(i->owner);
-}
-
-static void info_bswap_in(struct dlm_plock_info *i)
-{
- i->version[0] = le32_to_cpu(i->version[0]);
- i->version[1] = le32_to_cpu(i->version[1]);
- i->version[2] = le32_to_cpu(i->version[2]);
- i->pid = le32_to_cpu(i->pid);
- i->nodeid = le32_to_cpu(i->nodeid);
- i->rv = le32_to_cpu(i->rv);
- i->fsid = le32_to_cpu(i->fsid);
- i->number = le64_to_cpu(i->number);
- i->start = le64_to_cpu(i->start);
- i->end = le64_to_cpu(i->end);
- i->owner = le64_to_cpu(i->owner);
-}
-
-static const char *op_str(int optype)
-{
- switch (optype) {
- case DLM_PLOCK_OP_LOCK:
- return "LK";
- case DLM_PLOCK_OP_UNLOCK:
- return "UN";
- case DLM_PLOCK_OP_GET:
- return "GET";
- default:
- return "??";
- }
-}
-
-static const char *ex_str(int optype, int ex)
-{
- if (optype == DLM_PLOCK_OP_UNLOCK || optype == DLM_PLOCK_OP_GET)
- return "-";
- if (ex)
- return "WR";
- else
- return "RD";
-}
-
-/*
- * In kernels before 2.6.26, plocks came from gfs2's lock_dlm module.
- * Reading plocks from there as well should allow us to use cluster3
- * on old (RHEL5) kernels. In this case, the fsid we read in plock_info
- * structs is the mountgroup id, which we need to translate to the ls id.
- */
-
-int setup_plocks(void)
-{
- plock_read_count = 0;
- plock_recv_count = 0;
- plock_rate_delays = 0;
- gettimeofday(&plock_read_time, NULL);
- gettimeofday(&plock_recv_time, NULL);
- gettimeofday(&plock_rate_last, NULL);
-
- if (plock_minor) {
- plock_device_fd = open("/dev/misc/dlm_plock", O_RDWR);
- } else if (old_plock_minor) {
- log_debug("setup_plocks using old lock_dlm interface");
- need_fsid_translation = 1;
- plock_device_fd = open("/dev/misc/lock_dlm_plock", O_RDWR);
- }
-
- if (plock_device_fd < 0) {
- log_error("Failure to open plock device: %s", strerror(errno));
- return -1;
- }
-
- log_debug("plocks %d", plock_device_fd);
- log_debug("plock cpg message size: %u bytes",
- (unsigned int) (sizeof(struct dlm_header) +
- sizeof(struct dlm_plock_info)));
-
- return plock_device_fd;
-}
-
-void close_plocks(void)
-{
- if (plock_device_fd > 0)
- close(plock_device_fd);
-}
-
-static uint32_t mg_to_ls_id(uint32_t fsid)
-{
- struct lockspace *ls;
- int do_set = 1;
-
- retry:
- list_for_each_entry(ls, &lockspaces, list) {
- if (ls->associated_mg_id == fsid)
- return ls->global_id;
- }
-
- if (do_set) {
- do_set = 0;
- set_associated_id(fsid);
- goto retry;
- }
-
- return fsid;
-}
-
-/* FIXME: unify these two */
-
-static unsigned long time_diff_ms(struct timeval *begin, struct timeval *end)
-{
- struct timeval result;
- timersub(end, begin, &result);
- return (result.tv_sec * 1000) + (result.tv_usec / 1000);
-}
-
-static uint64_t dt_usec(struct timeval *start, struct timeval *stop)
-{
- uint64_t dt;
-
- dt = stop->tv_sec - start->tv_sec;
- dt *= 1000000;
- dt += stop->tv_usec - start->tv_usec;
- return dt;
-}
-
-static struct resource * rb_search_plock_resource(struct lockspace *ls, uint64_t number)
-{
- struct rb_node *n = ls->plock_resources_root.rb_node;
- struct resource *r;
-
- while (n) {
- r = rb_entry(n, struct resource, rb_node);
- if (number < r->number)
- n = n->rb_left;
- else if (number > r->number)
- n = n->rb_right;
- else
- return r;
- }
- return NULL;
-}
-
-static void rb_insert_plock_resource(struct lockspace *ls, struct resource *r)
-{
- struct resource *entry;
- struct rb_node **p;
- struct rb_node *parent = NULL;
-
- p = &ls->plock_resources_root.rb_node;
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct resource, rb_node);
- if (r->number < entry->number)
- p = &parent->rb_left;
- else if (r->number > entry->number)
- p = &parent->rb_right;
- else
- return;
- }
- rb_link_node(&r->rb_node, parent, p);
- rb_insert_color(&r->rb_node, &ls->plock_resources_root);
-}
-
-static void rb_del_plock_resource(struct lockspace *ls, struct resource *r)
-{
- if (!RB_EMPTY_NODE(&r->rb_node)) {
- rb_erase(&r->rb_node, &ls->plock_resources_root);
- RB_CLEAR_NODE(&r->rb_node);
- }
-}
-
-static struct resource *search_resource(struct lockspace *ls, uint64_t number)
-{
- struct resource *r;
-
- list_for_each_entry(r, &ls->plock_resources, list) {
- if (r->number == number)
- return r;
- }
- return NULL;
-}
-
-static int find_resource(struct lockspace *ls, uint64_t number, int create,
- struct resource **r_out)
-{
- struct resource *r = NULL;
- int rv = 0;
-
- r = rb_search_plock_resource(ls, number);
- if (r)
- goto out;
-
- if (create == 0) {
- rv = -ENOENT;
- goto out;
- }
-
- r = malloc(sizeof(struct resource));
- if (!r) {
- log_elock(ls, "find_resource no memory %d", errno);
- rv = -ENOMEM;
- goto out;
- }
-
- memset(r, 0, sizeof(struct resource));
- r->number = number;
- INIT_LIST_HEAD(&r->locks);
- INIT_LIST_HEAD(&r->waiters);
- INIT_LIST_HEAD(&r->pending);
-
- if (cfgd_plock_ownership)
- r->owner = -1;
- else
- r->owner = 0;
-
- list_add_tail(&r->list, &ls->plock_resources);
- rb_insert_plock_resource(ls, r);
- out:
- if (r)
- gettimeofday(&r->last_access, NULL);
- *r_out = r;
- return rv;
-}
-
-static void put_resource(struct lockspace *ls, struct resource *r)
-{
- /* with ownership, resources are only freed via drop messages */
- if (cfgd_plock_ownership)
- return;
-
- if (list_empty(&r->locks) && list_empty(&r->waiters)) {
- rb_del_plock_resource(ls, r);
- list_del(&r->list);
- free(r);
- }
-}
-
-static inline int ranges_overlap(uint64_t start1, uint64_t end1,
- uint64_t start2, uint64_t end2)
-{
- if (end1 < start2 || start1 > end2)
- return 0;
- return 1;
-}
-
-/**
- * overlap_type - returns a value based on the type of overlap
- * @s1 - start of new lock range
- * @e1 - end of new lock range
- * @s2 - start of existing lock range
- * @e2 - end of existing lock range
- *
- */
-
-static int overlap_type(uint64_t s1, uint64_t e1, uint64_t s2, uint64_t e2)
-{
- int ret;
-
- /*
- * ---r1---
- * ---r2---
- */
-
- if (s1 == s2 && e1 == e2)
- ret = 0;
-
- /*
- * --r1--
- * ---r2---
- */
-
- else if (s1 == s2 && e1 < e2)
- ret = 1;
-
- /*
- * --r1--
- * ---r2---
- */
-
- else if (s1 > s2 && e1 == e2)
- ret = 1;
-
- /*
- * --r1--
- * ---r2---
- */
-
- else if (s1 > s2 && e1 < e2)
- ret = 2;
-
- /*
- * ---r1--- or ---r1--- or ---r1---
- * --r2-- --r2-- --r2--
- */
-
- else if (s1 <= s2 && e1 >= e2)
- ret = 3;
-
- /*
- * ---r1---
- * ---r2---
- */
-
- else if (s1 > s2 && e1 > e2)
- ret = 4;
-
- /*
- * ---r1---
- * ---r2---
- */
-
- else if (s1 < s2 && e1 < e2)
- ret = 4;
-
- else
- ret = -1;
-
- return ret;
-}
-
-/* shrink the range start2:end2 by the partially overlapping start:end */
-
-static int shrink_range2(uint64_t *start2, uint64_t *end2,
- uint64_t start, uint64_t end)
-{
- int error = 0;
-
- if (*start2 < start)
- *end2 = start - 1;
- else if (*end2 > end)
- *start2 = end + 1;
- else
- error = -1;
- return error;
-}
-
-static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end)
-{
- return shrink_range2(&po->start, &po->end, start, end);
-}
-
-static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get)
-{
- struct posix_lock *po;
-
- list_for_each_entry(po, &r->locks, list) {
- if (po->nodeid == in->nodeid && po->owner == in->owner)
- continue;
- if (!ranges_overlap(po->start, po->end, in->start, in->end))
- continue;
-
- if (in->ex || po->ex) {
- if (get) {
- in->ex = po->ex;
- in->pid = po->pid;
- in->start = po->start;
- in->end = po->end;
- }
- return 1;
- }
- }
- return 0;
-}
-
-static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
- uint32_t pid, int ex, uint64_t start, uint64_t end)
-{
- struct posix_lock *po;
-
- po = malloc(sizeof(struct posix_lock));
- if (!po)
- return -ENOMEM;
- memset(po, 0, sizeof(struct posix_lock));
-
- po->start = start;
- po->end = end;
- po->nodeid = nodeid;
- po->owner = owner;
- po->pid = pid;
- po->ex = ex;
- list_add_tail(&po->list, &r->locks);
-
- return 0;
-}
-
-/* RN within RE (and starts or ends on RE boundary)
- 1. add new lock for non-overlap area of RE, orig mode
- 2. convert RE to RN range and mode */
-
-static int lock_case1(struct posix_lock *po, struct resource *r,
- struct dlm_plock_info *in)
-{
- uint64_t start2, end2;
- int rv;
-
- /* non-overlapping area start2:end2 */
- start2 = po->start;
- end2 = po->end;
- rv = shrink_range2(&start2, &end2, in->start, in->end);
- if (rv)
- goto out;
-
- po->start = in->start;
- po->end = in->end;
- po->ex = in->ex;
-
- rv = add_lock(r, in->nodeid, in->owner, in->pid, !in->ex, start2, end2);
- out:
- return rv;
-}
-
-/* RN within RE (RE overlaps RN on both sides)
- 1. add new lock for front fragment, orig mode
- 2. add new lock for back fragment, orig mode
- 3. convert RE to RN range and mode */
-
-static int lock_case2(struct posix_lock *po, struct resource *r,
- struct dlm_plock_info *in)
-
-{
- int rv;
-
- rv = add_lock(r, in->nodeid, in->owner, in->pid,
- !in->ex, po->start, in->start - 1);
- if (rv)
- goto out;
-
- rv = add_lock(r, in->nodeid, in->owner, in->pid,
- !in->ex, in->end + 1, po->end);
- if (rv)
- goto out;
-
- po->start = in->start;
- po->end = in->end;
- po->ex = in->ex;
- out:
- return rv;
-}
-
-static int lock_internal(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- struct posix_lock *po, *safe;
- int rv = 0;
-
- list_for_each_entry_safe(po, safe, &r->locks, list) {
- if (po->nodeid != in->nodeid || po->owner != in->owner)
- continue;
- if (!ranges_overlap(po->start, po->end, in->start, in->end))
- continue;
-
- /* existing range (RE) overlaps new range (RN) */
-
- switch(overlap_type(in->start, in->end, po->start, po->end)) {
-
- case 0:
- if (po->ex == in->ex)
- goto out;
-
- /* ranges the same - just update the existing lock */
- po->ex = in->ex;
- goto out;
-
- case 1:
- if (po->ex == in->ex)
- goto out;
-
- rv = lock_case1(po, r, in);
- goto out;
-
- case 2:
- if (po->ex == in->ex)
- goto out;
-
- rv = lock_case2(po, r, in);
- goto out;
-
- case 3:
- list_del(&po->list);
- free(po);
- break;
-
- case 4:
- if (po->start < in->start)
- po->end = in->start - 1;
- else
- po->start = in->end + 1;
- break;
-
- default:
- rv = -1;
- goto out;
- }
- }
-
- rv = add_lock(r, in->nodeid, in->owner, in->pid,
- in->ex, in->start, in->end);
- out:
- return rv;
-
-}
-
-static int unlock_internal(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- struct posix_lock *po, *safe;
- int rv = 0;
-
- list_for_each_entry_safe(po, safe, &r->locks, list) {
- if (po->nodeid != in->nodeid || po->owner != in->owner)
- continue;
- if (!ranges_overlap(po->start, po->end, in->start, in->end))
- continue;
-
- /* existing range (RE) overlaps new range (RN) */
-
- switch (overlap_type(in->start, in->end, po->start, po->end)) {
-
- case 0:
- /* ranges the same - just remove the existing lock */
-
- list_del(&po->list);
- free(po);
- goto out;
-
- case 1:
- /* RN within RE and starts or ends on RE boundary -
- * shrink and update RE */
-
- rv = shrink_range(po, in->start, in->end);
- goto out;
-
- case 2:
- /* RN within RE - shrink and update RE to be front
- * fragment, and add a new lock for back fragment */
-
- rv = add_lock(r, in->nodeid, in->owner, in->pid,
- po->ex, in->end + 1, po->end);
- po->end = in->start - 1;
- goto out;
-
- case 3:
- /* RE within RN - remove RE, then continue checking
- * because RN could cover other locks */
-
- list_del(&po->list);
- free(po);
- continue;
-
- case 4:
- /* front of RE in RN, or end of RE in RN - shrink and
- * update RE, then continue because RN could cover
- * other locks */
-
- rv = shrink_range(po, in->start, in->end);
- continue;
-
- default:
- rv = -1;
- goto out;
- }
- }
- out:
- return rv;
-}
-
-static void clear_waiters(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- struct lock_waiter *w, *safe;
-
- list_for_each_entry_safe(w, safe, &r->waiters, list) {
- if (w->info.nodeid != in->nodeid || w->info.owner != in->owner)
- continue;
-
- list_del(&w->list);
-
- log_elock(ls, "clear waiter %llx %llx-%llx %d/%u/%llx",
- (unsigned long long)in->number,
- (unsigned long long)in->start,
- (unsigned long long)in->end,
- in->nodeid, in->pid,
- (unsigned long long)in->owner);
- free(w);
- }
-}
-
-static int add_waiter(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-
-{
- struct lock_waiter *w;
-
- w = malloc(sizeof(struct lock_waiter));
- if (!w)
- return -ENOMEM;
- memcpy(&w->info, in, sizeof(struct dlm_plock_info));
- list_add_tail(&w->list, &r->waiters);
- return 0;
-}
-
-static void write_result(struct lockspace *ls, struct dlm_plock_info *in,
- int rv)
-{
- if (need_fsid_translation)
- in->fsid = ls->associated_mg_id;
-
- in->rv = rv;
- write(plock_device_fd, in, sizeof(struct dlm_plock_info));
-}
-
-static void do_waiters(struct lockspace *ls, struct resource *r)
-{
- struct lock_waiter *w, *safe;
- struct dlm_plock_info *in;
- int rv;
-
- list_for_each_entry_safe(w, safe, &r->waiters, list) {
- in = &w->info;
-
- if (is_conflict(r, in, 0))
- continue;
-
- list_del(&w->list);
-
- /*
- log_group(ls, "take waiter %llx %llx-%llx %d/%u/%llx",
- in->number, in->start, in->end,
- in->nodeid, in->pid, in->owner);
- */
-
- rv = lock_internal(ls, r, in);
-
- if (in->nodeid == our_nodeid)
- write_result(ls, in, rv);
-
- free(w);
- }
-}
-
-static void do_lock(struct lockspace *ls, struct dlm_plock_info *in,
- struct resource *r)
-{
- int rv;
-
- if (is_conflict(r, in, 0)) {
- if (!in->wait)
- rv = -EAGAIN;
- else {
- rv = add_waiter(ls, r, in);
- if (rv)
- goto out;
- rv = -EINPROGRESS;
- }
- } else
- rv = lock_internal(ls, r, in);
-
- out:
- if (in->nodeid == our_nodeid && rv != -EINPROGRESS)
- write_result(ls, in, rv);
-
- do_waiters(ls, r);
- put_resource(ls, r);
-}
-
-static void do_unlock(struct lockspace *ls, struct dlm_plock_info *in,
- struct resource *r)
-{
- int rv;
-
- rv = unlock_internal(ls, r, in);
-
-#ifdef DLM_PLOCK_BUILD_WORKAROUND
- if (in->pad & DLM_PLOCK_FL_CLOSE) {
-#else
- if (in->flags & DLM_PLOCK_FL_CLOSE) {
-#endif
- clear_waiters(ls, r, in);
- /* no replies for unlock-close ops */
- goto skip_result;
- }
-
- if (in->nodeid == our_nodeid)
- write_result(ls, in, rv);
-
- skip_result:
- do_waiters(ls, r);
- put_resource(ls, r);
-}
-
-/* we don't even get to this function if the getlk isn't from us */
-
-static void do_get(struct lockspace *ls, struct dlm_plock_info *in,
- struct resource *r)
-{
- int rv;
-
- if (is_conflict(r, in, 1))
- rv = 1;
- else
- rv = 0;
-
- write_result(ls, in, rv);
- put_resource(ls, r);
-}
-
-static void save_message(struct lockspace *ls, struct dlm_header *hd, int len,
- int from, int type)
-{
- struct save_msg *sm;
-
- sm = malloc(sizeof(struct save_msg) + len);
- if (!sm)
- return;
- memset(sm, 0, sizeof(struct save_msg) + len);
-
- memcpy(&sm->buf, hd, len);
- sm->type = type;
- sm->len = len;
- sm->nodeid = from;
-
- log_plock(ls, "save %s from %d len %d", msg_name(type), from, len);
-
- list_add_tail(&sm->list, &ls->saved_messages);
-}
-
-static void __receive_plock(struct lockspace *ls, struct dlm_plock_info *in,
- int from, struct resource *r)
-{
- switch (in->optype) {
- case DLM_PLOCK_OP_LOCK:
- ls->last_plock_time = time(NULL);
- do_lock(ls, in, r);
- break;
- case DLM_PLOCK_OP_UNLOCK:
- ls->last_plock_time = time(NULL);
- do_unlock(ls, in, r);
- break;
- case DLM_PLOCK_OP_GET:
- do_get(ls, in, r);
- break;
- default:
- log_elock(ls, "receive_plock error from %d optype %d",
- from, in->optype);
- if (from == our_nodeid)
- write_result(ls, in, -EINVAL);
- }
-}
-
-/* When ls members receive our options message (for our mount), one of them
- saves all plock state received to that point in a checkpoint and then sends
- us our journals message. We know to retrieve the plock state from the
- checkpoint when we receive our journals message. Any plocks messages that
- arrive between seeing our options message and our journals message needs to
- be saved and processed after we synchronize our plock state from the
- checkpoint. Any plock message received while we're mounting but before we
- set save_plocks (when we see our options message) can be ignored because it
- should be reflected in the checkpointed state. */
-
-static void _receive_plock(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct dlm_plock_info info;
- struct resource *r = NULL;
- struct timeval now;
- uint64_t usec;
- int from = hd->nodeid;
- int rv, create;
-
- memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
- info_bswap_in(&info);
-
- log_plock(ls, "receive plock %llx %s %s %llx-%llx %d/%u/%llx w %d",
- (unsigned long long)info.number,
- op_str(info.optype),
- ex_str(info.optype, info.ex),
- (unsigned long long)info.start, (unsigned long long)info.end,
- info.nodeid, info.pid, (unsigned long long)info.owner,
- info.wait);
-
- plock_recv_count++;
- if (!(plock_recv_count % 1000)) {
- gettimeofday(&now, NULL);
- usec = dt_usec(&plock_recv_time, &now);
- log_plock(ls, "plock_recv_count %u time %.3f s",
- plock_recv_count, usec * 1.e-6);
- plock_recv_time = now;
- }
-
- if (info.optype == DLM_PLOCK_OP_GET && from != our_nodeid)
- return;
-
- if (from != hd->nodeid || from != info.nodeid) {
- log_elock(ls, "receive_plock error from %d header %d info %d",
- from, hd->nodeid, info.nodeid);
- return;
- }
-
- create = !cfgd_plock_ownership;
-
- rv = find_resource(ls, info.number, create, &r);
-
- if (rv && cfgd_plock_ownership) {
- /* There must have been a race with a drop, so we need to
- ignore this plock op which will be resent. If we're the one
- who sent the plock, we need to send_own() and put it on the
- pending list to resend once the owner is established. */
-
- log_plock(ls, "receive_plock from %d no r %llx", from,
- (unsigned long long)info.number);
-
- if (from != our_nodeid)
- return;
-
- rv = find_resource(ls, info.number, 1, &r);
- if (rv)
- return;
- send_own(ls, r, our_nodeid);
- save_pending_plock(ls, r, &info);
- return;
- }
- if (rv) {
- /* r not found, rv is -ENOENT, this shouldn't happen because
- process_plocks() creates a resource for every op */
-
- log_elock(ls, "receive_plock error from %d no r %llx %d",
- from, (unsigned long long)info.number, rv);
- return;
- }
-
- /* The owner should almost always be 0 here, but other owners may
- be possible given odd combinations of races with drop. Odd races to
- worry about (some seem pretty improbable):
-
- - A sends drop, B sends plock, receive drop, receive plock.
- This is addressed above.
-
- - A sends drop, B sends plock, receive drop, B reads plock
- and sends own, receive plock, on B we find owner of -1.
-
- - A sends drop, B sends two plocks, receive drop, receive plocks.
- Receiving the first plock is the previous case, receiving the
- second plock will find r with owner of -1.
-
- - A sends drop, B sends two plocks, receive drop, C sends own,
- receive plock, B sends own, receive own (C), receive plock,
- receive own (B).
-
- Haven't tried to cook up a scenario that would lead to the
- last case below; receiving a plock from ourself and finding
- we're the owner of r. */
-
- if (!r->owner) {
- __receive_plock(ls, &info, from, r);
-
- } else if (r->owner == -1) {
- log_plock(ls, "receive_plock from %d r %llx owner %d", from,
- (unsigned long long)info.number, r->owner);
-
- if (from == our_nodeid)
- save_pending_plock(ls, r, &info);
-
- } else if (r->owner != our_nodeid) {
- log_plock(ls, "receive_plock from %d r %llx owner %d", from,
- (unsigned long long)info.number, r->owner);
-
- if (from == our_nodeid)
- save_pending_plock(ls, r, &info);
-
- } else if (r->owner == our_nodeid) {
- log_plock(ls, "receive_plock from %d r %llx owner %d", from,
- (unsigned long long)info.number, r->owner);
-
- if (from == our_nodeid)
- __receive_plock(ls, &info, from, r);
- }
-}
-
-void receive_plock(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- if (ls->save_plocks) {
- save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK);
- return;
- }
-
- _receive_plock(ls, hd, len);
-}
-
-static int send_struct_info(struct lockspace *ls, struct dlm_plock_info *in,
- int msg_type)
-{
- struct dlm_header *hd;
- int rv = 0, len;
- char *buf;
-
- len = sizeof(struct dlm_header) + sizeof(struct dlm_plock_info);
- buf = malloc(len);
- if (!buf) {
- rv = -ENOMEM;
- goto out;
- }
- memset(buf, 0, len);
-
- info_bswap_out(in);
-
- hd = (struct dlm_header *)buf;
- hd->type = msg_type;
-
- memcpy(buf + sizeof(struct dlm_header), in, sizeof(*in));
-
- dlm_send_message(ls, buf, len);
-
- free(buf);
- out:
- if (rv)
- log_elock(ls, "send_struct_info error %d", rv);
- return rv;
-}
-
-static void send_plock(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- send_struct_info(ls, in, DLM_MSG_PLOCK);
-}
-
-static void send_own(struct lockspace *ls, struct resource *r, int owner)
-{
- struct dlm_plock_info info;
-
- /* if we've already sent an own message for this resource,
- (pending list is not empty), then we shouldn't send another */
-
- if (!list_empty(&r->pending)) {
- log_plock(ls, "send_own %llx already pending",
- (unsigned long long)r->number);
- return;
- }
-
- if (!owner)
- r->flags |= R_SEND_UNOWN;
- else
- r->flags |= R_SEND_OWN;
-
- memset(&info, 0, sizeof(info));
- info.number = r->number;
- info.nodeid = owner;
-
- send_struct_info(ls, &info, DLM_MSG_PLOCK_OWN);
-}
-
-static void send_syncs(struct lockspace *ls, struct resource *r)
-{
- struct dlm_plock_info info;
- struct posix_lock *po;
- struct lock_waiter *w;
- int rv;
-
- list_for_each_entry(po, &r->locks, list) {
- memset(&info, 0, sizeof(info));
- info.number = r->number;
- info.start = po->start;
- info.end = po->end;
- info.nodeid = po->nodeid;
- info.owner = po->owner;
- info.pid = po->pid;
- info.ex = po->ex;
-
- rv = send_struct_info(ls, &info, DLM_MSG_PLOCK_SYNC_LOCK);
- if (rv)
- goto out;
-
- po->flags |= P_SYNCING;
- }
-
- list_for_each_entry(w, &r->waiters, list) {
- memcpy(&info, &w->info, sizeof(info));
-
- rv = send_struct_info(ls, &info, DLM_MSG_PLOCK_SYNC_WAITER);
- if (rv)
- goto out;
-
- w->flags |= P_SYNCING;
- }
- out:
- return;
-}
-
-static void send_drop(struct lockspace *ls, struct resource *r)
-{
- struct dlm_plock_info info;
-
- memset(&info, 0, sizeof(info));
- info.number = r->number;
- r->flags |= R_SEND_DROP;
-
- send_struct_info(ls, &info, DLM_MSG_PLOCK_DROP);
-}
-
-/* plock op can't be handled until we know the owner value of the resource,
- so the op is saved on the pending list until the r owner is established */
-
-static void save_pending_plock(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- struct lock_waiter *w;
-
- w = malloc(sizeof(struct lock_waiter));
- if (!w) {
- log_elock(ls, "save_pending_plock no mem");
- return;
- }
- memcpy(&w->info, in, sizeof(struct dlm_plock_info));
- list_add_tail(&w->list, &r->pending);
-}
-
-/* plock ops are on pending list waiting for ownership to be established.
- owner has now become us, so add these plocks to r */
-
-static void add_pending_plocks(struct lockspace *ls, struct resource *r)
-{
- struct lock_waiter *w, *safe;
-
- list_for_each_entry_safe(w, safe, &r->pending, list) {
- __receive_plock(ls, &w->info, our_nodeid, r);
- list_del(&w->list);
- free(w);
- }
-}
-
-/* plock ops are on pending list waiting for ownership to be established.
- owner has now become 0, so send these plocks to everyone */
-
-static void send_pending_plocks(struct lockspace *ls, struct resource *r)
-{
- struct lock_waiter *w, *safe;
-
- list_for_each_entry_safe(w, safe, &r->pending, list) {
- send_plock(ls, r, &w->info);
- list_del(&w->list);
- free(w);
- }
-}
-
-static void _receive_own(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct dlm_plock_info info;
- struct resource *r;
- int should_not_happen = 0;
- int from = hd->nodeid;
- int rv;
-
- ls->last_plock_time = time(NULL);
-
- memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
- info_bswap_in(&info);
-
- log_plock(ls, "receive_own %llx from %u owner %u",
- (unsigned long long)info.number, hd->nodeid, info.nodeid);
-
- rv = find_resource(ls, info.number, 1, &r);
- if (rv)
- return;
-
- if (from == our_nodeid) {
- /*
- * received our own own message
- */
-
- if (info.nodeid == 0) {
- /* we are setting owner to 0 */
-
- if (r->owner == our_nodeid) {
- /* we set owner to 0 when we relinquish
- ownership */
- should_not_happen = 1;
- } else if (r->owner == 0) {
- /* this happens when we relinquish ownership */
- r->flags |= R_GOT_UNOWN;
- } else {
- should_not_happen = 1;
- }
-
- } else if (info.nodeid == our_nodeid) {
- /* we are setting owner to ourself */
-
- if (r->owner == -1) {
- /* we have gained ownership */
- r->owner = our_nodeid;
- add_pending_plocks(ls, r);
- } else if (r->owner == our_nodeid) {
- should_not_happen = 1;
- } else if (r->owner == 0) {
- send_pending_plocks(ls, r);
- } else {
- /* resource is owned by other node;
- they should set owner to 0 shortly */
- }
-
- } else {
- /* we should only ever set owner to 0 or ourself */
- should_not_happen = 1;
- }
- } else {
- /*
- * received own message from another node
- */
-
- if (info.nodeid == 0) {
- /* other node is setting owner to 0 */
-
- if (r->owner == -1) {
- /* we should have a record of the owner before
- it relinquishes */
- should_not_happen = 1;
- } else if (r->owner == our_nodeid) {
- /* only the owner should relinquish */
- should_not_happen = 1;
- } else if (r->owner == 0) {
- should_not_happen = 1;
- } else {
- r->owner = 0;
- r->flags |= R_GOT_UNOWN;
- send_pending_plocks(ls, r);
- }
-
- } else if (info.nodeid == from) {
- /* other node is setting owner to itself */
-
- if (r->owner == -1) {
- /* normal path for a node becoming owner */
- r->owner = from;
- } else if (r->owner == our_nodeid) {
- /* we relinquish our ownership: sync our local
- plocks to everyone, then set owner to 0 */
- send_syncs(ls, r);
- send_own(ls, r, 0);
- /* we need to set owner to 0 here because
- local ops may arrive before we receive
- our send_own message and can't be added
- locally */
- r->owner = 0;
- } else if (r->owner == 0) {
- /* can happen because we set owner to 0 before
- we receive our send_own sent just above */
- } else {
- /* do nothing, current owner should be
- relinquishing its ownership */
- }
-
- } else if (info.nodeid == our_nodeid) {
- /* no one else should try to set the owner to us */
- should_not_happen = 1;
- } else {
- /* a node should only ever set owner to 0 or itself */
- should_not_happen = 1;
- }
- }
-
- if (should_not_happen) {
- log_elock(ls, "receive_own error from %u %llx "
- "info nodeid %d r owner %d",
- from, (unsigned long long)r->number,
- info.nodeid, r->owner);
- }
-}
-
-void receive_own(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- if (ls->save_plocks) {
- save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK_OWN);
- return;
- }
-
- _receive_own(ls, hd, len);
-}
-
-static void clear_syncing_flag(struct lockspace *ls, struct resource *r,
- struct dlm_plock_info *in)
-{
- struct posix_lock *po;
- struct lock_waiter *w;
-
- list_for_each_entry(po, &r->locks, list) {
- if ((po->flags & P_SYNCING) &&
- in->start == po->start &&
- in->end == po->end &&
- in->nodeid == po->nodeid &&
- in->owner == po->owner &&
- in->pid == po->pid &&
- in->ex == po->ex) {
- po->flags &= ~P_SYNCING;
- return;
- }
- }
-
- list_for_each_entry(w, &r->waiters, list) {
- if ((w->flags & P_SYNCING) &&
- in->start == w->info.start &&
- in->end == w->info.end &&
- in->nodeid == w->info.nodeid &&
- in->owner == w->info.owner &&
- in->pid == w->info.pid &&
- in->ex == w->info.ex) {
- w->flags &= ~P_SYNCING;
- return;
- }
- }
-
- log_elock(ls, "clear_syncing error %llx no match %s %llx-%llx %d/%u/%llx",
- (unsigned long long)r->number,
- in->ex ? "WR" : "RD",
- (unsigned long long)in->start,
- (unsigned long long)in->end,
- in->nodeid, in->pid,
- (unsigned long long)in->owner);
-}
-
-static void _receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct dlm_plock_info info;
- struct resource *r;
- int from = hd->nodeid;
- int rv;
-
- ls->last_plock_time = time(NULL);
-
- memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
- info_bswap_in(&info);
-
- log_plock(ls, "receive sync %llx from %u %s %llx-%llx %d/%u/%llx",
- (unsigned long long)info.number, from, info.ex ? "WR" : "RD",
- (unsigned long long)info.start, (unsigned long long)info.end,
- info.nodeid, info.pid, (unsigned long long)info.owner);
-
- rv = find_resource(ls, info.number, 0, &r);
- if (rv) {
- log_elock(ls, "receive_sync error no r %llx from %d",
- info.number, from);
- return;
- }
-
- if (from == our_nodeid) {
- /* this plock now in sync on all nodes */
- clear_syncing_flag(ls, r, &info);
- return;
- }
-
- if (hd->type == DLM_MSG_PLOCK_SYNC_LOCK)
- add_lock(r, info.nodeid, info.owner, info.pid, info.ex,
- info.start, info.end);
- else if (hd->type == DLM_MSG_PLOCK_SYNC_WAITER)
- add_waiter(ls, r, &info);
-}
-
-void receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- if (ls->save_plocks) {
- save_message(ls, hd, len, hd->nodeid, hd->type);
- return;
- }
-
- _receive_sync(ls, hd, len);
-}
-
-static void _receive_drop(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct dlm_plock_info info;
- struct resource *r;
- int from = hd->nodeid;
- int rv;
-
- ls->last_plock_time = time(NULL);
-
- memcpy(&info, (char *)hd + sizeof(struct dlm_header), sizeof(info));
- info_bswap_in(&info);
-
- log_plock(ls, "receive_drop %llx from %u",
- (unsigned long long)info.number, from);
-
- rv = find_resource(ls, info.number, 0, &r);
- if (rv) {
- /* we'll find no r if two nodes sent drop at once */
- log_plock(ls, "receive_drop from %d no r %llx", from,
- (unsigned long long)info.number);
- return;
- }
-
- if (r->owner != 0) {
- /* - A sent drop, B sent drop, receive drop A, C sent own,
- receive drop B (this warning on C, owner -1)
- - A sent drop, B sent drop, receive drop A, A sent own,
- receive own A, receive drop B (this warning on all,
- owner A) */
- log_plock(ls, "receive_drop from %d r %llx owner %d", from,
- (unsigned long long)r->number, r->owner);
- return;
- }
-
- if (!list_empty(&r->pending)) {
- /* shouldn't happen */
- log_elock(ls, "receive_drop error from %d r %llx pending op",
- from, (unsigned long long)r->number);
- return;
- }
-
- /* the decision to drop or not must be based on things that are
- guaranteed to be the same on all nodes */
-
- if (list_empty(&r->locks) && list_empty(&r->waiters)) {
- rb_del_plock_resource(ls, r);
- list_del(&r->list);
- free(r);
- } else {
- /* A sent drop, B sent a plock, receive plock, receive drop */
- log_plock(ls, "receive_drop from %d r %llx in use", from,
- (unsigned long long)r->number);
- }
-}
-
-void receive_drop(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- if (ls->save_plocks) {
- save_message(ls, hd, len, hd->nodeid, DLM_MSG_PLOCK_DROP);
- return;
- }
-
- _receive_drop(ls, hd, len);
-}
-
-/* We only drop resources from the unowned state to simplify things.
- If we want to drop a resource we own, we unown/relinquish it first. */
-
-/* FIXME: in the transition from owner = us, to owner = 0, to drop;
- we want the second period to be shorter than the first */
-
-static int drop_resources(struct lockspace *ls)
-{
- struct resource *r;
- struct timeval now;
- int count = 0;
-
- if (!cfgd_plock_ownership)
- return 0;
-
- if (list_empty(&ls->plock_resources))
- return 0;
-
- gettimeofday(&now, NULL);
-
- if (time_diff_ms(&ls->drop_resources_last, &now) <
- cfgd_drop_resources_time)
- return 1;
-
- ls->drop_resources_last = now;
-
- /* try to drop the oldest, unused resources */
-
- list_for_each_entry_reverse(r, &ls->plock_resources, list) {
- if (count >= cfgd_drop_resources_count)
- break;
- if (r->owner && r->owner != our_nodeid)
- continue;
- if (time_diff_ms(&r->last_access, &now) <
- cfgd_drop_resources_age)
- continue;
-
- if (list_empty(&r->locks) && list_empty(&r->waiters)) {
- if (r->owner == our_nodeid) {
- send_own(ls, r, 0);
- r->owner = 0;
- } else if (r->owner == 0 && got_unown(r)) {
- send_drop(ls, r);
- }
-
- count++;
- }
- }
-
- return 1;
-}
-
-void drop_resources_all(void)
-{
- struct lockspace *ls;
- int rv = 0;
-
- poll_drop_plock = 0;
-
- list_for_each_entry(ls, &lockspaces, list) {
- rv = drop_resources(ls);
- if (rv)
- poll_drop_plock = 1;
- }
-}
-
-int limit_plocks(void)
-{
- struct timeval now;
-
- /* Don't send more messages while the cpg message queue is backed up */
-
- if (message_flow_control_on) {
- update_flow_control_status();
- if (message_flow_control_on)
- return 1;
- }
-
- if (!cfgd_plock_rate_limit || !plock_read_count)
- return 0;
-
- gettimeofday(&now, NULL);
-
- /* Every time a plock op is read from the kernel, we increment
- plock_read_count. After every cfgd_plock_rate_limit (N) reads,
- we check the time it's taken to do those N; if the time is less than
- a second, then we delay reading any more until a second is up.
- This way we read a max of N ops from the kernel every second. */
-
- if (!(plock_read_count % cfgd_plock_rate_limit)) {
- if (time_diff_ms(&plock_rate_last, &now) < 1000) {
- plock_rate_delays++;
- return 2;
- }
- plock_rate_last = now;
- plock_read_count++;
- }
- return 0;
-}
-
-void process_plocks(int ci)
-{
- struct lockspace *ls;
- struct resource *r;
- struct dlm_plock_info info;
- struct timeval now;
- uint64_t usec;
- int create, rv;
-
- if (limit_plocks()) {
- poll_ignore_plock = 1;
- client_ignore(plock_ci, plock_fd);
- return;
- }
-
- gettimeofday(&now, NULL);
-
- memset(&info, 0, sizeof(info));
-
- rv = do_read(plock_device_fd, &info, sizeof(info));
- if (rv < 0) {
- log_debug("process_plocks: read error %d fd %d\n",
- errno, plock_device_fd);
- return;
- }
-
- /* kernel doesn't set the nodeid field */
- info.nodeid = our_nodeid;
-
- if (!cfgd_enable_plock) {
- rv = -ENOSYS;
- goto fail;
- }
-
- if (need_fsid_translation)
- info.fsid = mg_to_ls_id(info.fsid);
-
- ls = find_ls_id(info.fsid);
- if (!ls) {
- log_plock(ls, "process_plocks: no ls id %x", info.fsid);
- rv = -EEXIST;
- goto fail;
- }
-
- if (ls->disable_plock) {
- rv = -ENOSYS;
- goto fail;
- }
-
- log_plock(ls, "read plock %llx %s %s %llx-%llx %d/%u/%llx w %d",
- (unsigned long long)info.number,
- op_str(info.optype),
- ex_str(info.optype, info.ex),
- (unsigned long long)info.start, (unsigned long long)info.end,
- info.nodeid, info.pid, (unsigned long long)info.owner,
- info.wait);
-
- /* report plock rate and any delays since the last report */
- plock_read_count++;
- if (!(plock_read_count % 1000)) {
- usec = dt_usec(&plock_read_time, &now) ;
- log_plock(ls, "plock_read_count %u time %.3f s delays %u",
- plock_read_count, usec * 1.e-6, plock_rate_delays);
- plock_read_time = now;
- plock_rate_delays = 0;
- }
-
- create = (info.optype == DLM_PLOCK_OP_UNLOCK) ? 0 : 1;
-
- rv = find_resource(ls, info.number, create, &r);
- if (rv)
- goto fail;
-
- if (r->owner == 0) {
- /* plock state replicated on all nodes */
- send_plock(ls, r, &info);
-
- } else if (r->owner == our_nodeid) {
- /* we are the owner of r, so our plocks are local */
- __receive_plock(ls, &info, our_nodeid, r);
-
- } else {
- /* r owner is -1: r is new, try to become the owner;
- r owner > 0: tell other owner to give up ownership;
- both done with a message trying to set owner to ourself */
- send_own(ls, r, our_nodeid);
- save_pending_plock(ls, r, &info);
- }
-
- if (cfgd_plock_ownership && !list_empty(&ls->plock_resources))
- poll_drop_plock = 1;
- return;
-
- fail:
-#ifdef DLM_PLOCK_BUILD_WORKAROUND
- if (!(info.pad & DLM_PLOCK_FL_CLOSE)) {
-#else
- if (!(info.flags & DLM_PLOCK_FL_CLOSE)) {
-#endif
- info.rv = rv;
- rv = write(plock_device_fd, &info, sizeof(info));
- }
-}
-
-void process_saved_plocks(struct lockspace *ls)
-{
- struct save_msg *sm, *sm2;
- struct dlm_header *hd;
- int count = 0;
-
- log_dlock(ls, "process_saved_plocks begin");
-
- if (list_empty(&ls->saved_messages))
- goto out;
-
- list_for_each_entry_safe(sm, sm2, &ls->saved_messages, list) {
- hd = (struct dlm_header *)sm->buf;
-
- switch (sm->type) {
- case DLM_MSG_PLOCK:
- _receive_plock(ls, hd, sm->len);
- break;
- case DLM_MSG_PLOCK_OWN:
- _receive_own(ls, hd, sm->len);
- break;
- case DLM_MSG_PLOCK_DROP:
- _receive_drop(ls, hd, sm->len);
- break;
- case DLM_MSG_PLOCK_SYNC_LOCK:
- case DLM_MSG_PLOCK_SYNC_WAITER:
- _receive_sync(ls, hd, sm->len);
- break;
- default:
- continue;
- }
-
- list_del(&sm->list);
- free(sm);
- count++;
- }
- out:
- log_dlock(ls, "process_saved_plocks %d done", count);
-}
-
-/* locks still marked SYNCING should not go into the ckpt; the new node
- will get those locks by receiving PLOCK_SYNC messages */
-
-#define MAX_SEND_SIZE 1024 /* 1024 holds 24 plock_data */
-
-static char send_buf[MAX_SEND_SIZE];
-
-static int pack_send_buf(struct lockspace *ls, struct resource *r, int owner,
- int full, int *count_out, void **last)
-{
- struct resource_data *rd;
- struct plock_data *pp;
- struct posix_lock *po;
- struct lock_waiter *w;
- int count = 0;
- int find = 0;
- int len;
-
- /* N.B. owner not always equal to r->owner */
- rd = (struct resource_data *)(send_buf + sizeof(struct dlm_header));
- rd->number = cpu_to_le64(r->number);
- rd->owner = cpu_to_le32(owner);
-
- if (full) {
- rd->flags = RD_CONTINUE;
- find = 1;
- }
-
- /* plocks not replicated for owned resources */
- if (cfgd_plock_ownership && (owner == our_nodeid))
- goto done;
-
- len = sizeof(struct dlm_header) + sizeof(struct resource_data);
-
- pp = (struct plock_data *)(send_buf + sizeof(struct dlm_header) + sizeof(struct resource_data));
-
- list_for_each_entry(po, &r->locks, list) {
- if (find && *last != po)
- continue;
- find = 0;
-
- if (po->flags & P_SYNCING)
- continue;
-
- if (len + sizeof(struct plock_data) > sizeof(send_buf)) {
- *last = po;
- goto full;
- }
- len += sizeof(struct plock_data);
-
- pp->start = cpu_to_le64(po->start);
- pp->end = cpu_to_le64(po->end);
- pp->owner = cpu_to_le64(po->owner);
- pp->pid = cpu_to_le32(po->pid);
- pp->nodeid = cpu_to_le32(po->nodeid);
- pp->ex = po->ex;
- pp->waiter = 0;
- pp++;
- count++;
- }
-
- list_for_each_entry(w, &r->waiters, list) {
- if (find && *last != w)
- continue;
- find = 0;
-
- if (w->flags & P_SYNCING)
- continue;
-
- if (len + sizeof(struct plock_data) > sizeof(send_buf)) {
- *last = w;
- goto full;
- }
- len += sizeof(struct plock_data);
-
- pp->start = cpu_to_le64(w->info.start);
- pp->end = cpu_to_le64(w->info.end);
- pp->owner = cpu_to_le64(w->info.owner);
- pp->pid = cpu_to_le32(w->info.pid);
- pp->nodeid = cpu_to_le32(w->info.nodeid);
- pp->ex = w->info.ex;
- pp->waiter = 1;
- pp++;
- count++;
- }
- done:
- rd->lock_count = cpu_to_le32(count);
- *count_out = count;
- *last = NULL;
- return 0;
-
- full:
- rd->lock_count = cpu_to_le32(count);
- *count_out = count;
- return 1;
-}
-
-/* Copy all plock state into a checkpoint so new node can retrieve it. The
- node creating the ckpt for the mounter needs to be the same node that's
- sending the mounter its journals message (i.e. the low nodeid). The new
- mounter knows the ckpt is ready to read only after it gets its journals
- message.
-
- If the mounter is becoming the new low nodeid in the group, the node doing
- the store closes the ckpt and the new node unlinks the ckpt after reading
- it. The ckpt should then disappear and the new node can create a new ckpt
- for the next mounter. */
-
-static int send_plocks_data(struct lockspace *ls, uint32_t seq, char *buf, int len)
-{
- struct dlm_header *hd;
-
- hd = (struct dlm_header *)buf;
- hd->type = DLM_MSG_PLOCKS_DATA;
- hd->msgdata = seq;
-
- dlm_send_message(ls, buf, len);
-
- return 0;
-}
-
-void send_all_plocks_data(struct lockspace *ls, uint32_t seq, uint32_t *plocks_data)
-{
- struct resource *r;
- void *last;
- int owner, count, len, full;
- uint32_t send_count = 0;
-
- if (!cfgd_enable_plock || ls->disable_plock)
- return;
-
- log_dlock(ls, "send_all_plocks_data %d:%u", our_nodeid, seq);
-
- /* - If r owner is -1, ckpt nothing.
- - If r owner is us, ckpt owner of us and no plocks.
- - If r owner is other, ckpt that owner and any plocks we have on r
- (they've just been synced but owner=0 msg not recved yet).
- - If r owner is 0 and !got_unown, then we've just unowned r;
- ckpt owner of us and any plocks that don't have SYNCING set
- (plocks with SYNCING will be handled by our sync messages).
- - If r owner is 0 and got_unown, then ckpt owner 0 and all plocks;
- (there should be no SYNCING plocks) */
-
- list_for_each_entry(r, &ls->plock_resources, list) {
- if (!cfgd_plock_ownership)
- owner = 0;
- else if (r->owner == -1)
- continue;
- else if (r->owner == our_nodeid)
- owner = our_nodeid;
- else if (r->owner)
- owner = r->owner;
- else if (!r->owner && !got_unown(r))
- owner = our_nodeid;
- else if (!r->owner)
- owner = 0;
- else {
- log_elock(ls, "send_all_plocks_data error owner %d r %llx",
- r->owner, (unsigned long long)r->number);
- continue;
- }
-
- memset(&send_buf, 0, sizeof(send_buf));
- count = 0;
- full = 0;
- last = NULL;
-
- do {
- full = pack_send_buf(ls, r, owner, full, &count, &last);
-
- len = sizeof(struct dlm_header) +
- sizeof(struct resource_data) +
- sizeof(struct plock_data) * count;
-
- log_plock(ls, "send_plocks_data %d:%u n %llu o %d locks %d len %d",
- our_nodeid, seq, (unsigned long long)r->number, r->owner,
- count, len);
-
- send_plocks_data(ls, seq, send_buf, len);
-
- send_count++;
-
- } while (full);
- }
-
- *plocks_data = send_count;
-
- log_dlock(ls, "send_all_plocks_data %d:%u %u done",
- our_nodeid, seq, send_count);
-}
-
-static void free_r_lists(struct resource *r)
-{
- struct posix_lock *po, *po2;
- struct lock_waiter *w, *w2;
-
- list_for_each_entry_safe(po, po2, &r->locks, list) {
- list_del(&po->list);
- free(po);
- }
-
- list_for_each_entry_safe(w, w2, &r->waiters, list) {
- list_del(&w->list);
- free(w);
- }
-}
-
-void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
-{
- struct resource_data *rd;
- struct plock_data *pp;
- struct posix_lock *po;
- struct lock_waiter *w;
- struct resource *r;
- uint64_t num;
- uint32_t count;
- uint32_t flags;
- int owner;
- int i;
-
- if (!cfgd_enable_plock || ls->disable_plock)
- return;
-
- if (!ls->need_plocks)
- return;
-
- if (!ls->save_plocks)
- return;
-
- ls->recv_plocks_data_count++;
-
- if (len < sizeof(struct dlm_header) + sizeof(struct resource_data)) {
- log_elock(ls, "recv_plocks_data %d:%u bad len %d",
- hd->nodeid, hd->msgdata, len);
- return;
- }
-
- rd = (struct resource_data *)((char *)hd + sizeof(struct dlm_header));
- num = le64_to_cpu(rd->number);
- owner = le32_to_cpu(rd->owner);
- count = le32_to_cpu(rd->lock_count);
- flags = le32_to_cpu(rd->flags);
-
- if (flags & RD_CONTINUE) {
- r = search_resource(ls, num);
- if (!r) {
- log_elock(ls, "recv_plocks_data %d:%u n %llu not found",
- hd->nodeid, hd->msgdata, (unsigned long long)num);
- return;
- }
- log_plock(ls, "recv_plocks_data %d:%u n %llu continue",
- hd->nodeid, hd->msgdata, (unsigned long long)num);
- goto unpack;
- }
-
- r = malloc(sizeof(struct resource));
- if (!r) {
- log_elock(ls, "recv_plocks_data %d:%u n %llu no mem",
- hd->nodeid, hd->msgdata, (unsigned long long)num);
- return;
- }
- memset(r, 0, sizeof(struct resource));
- INIT_LIST_HEAD(&r->locks);
- INIT_LIST_HEAD(&r->waiters);
- INIT_LIST_HEAD(&r->pending);
-
- if (!cfgd_plock_ownership) {
- if (owner) {
- log_elock(ls, "recv_plocks_data %d:%u n %llu bad owner %d",
- hd->nodeid, hd->msgdata, (unsigned long long)num,
- owner);
- goto fail_free;
- }
- } else {
- if (!owner)
- r->flags |= R_GOT_UNOWN;
-
- /* no locks should be included for owned resources */
-
- if (owner && count) {
- log_elock(ls, "recv_plocks_data %d:%u n %llu o %d bad count %u",
- (unsigned long long)num, owner, count);
- goto fail_free;
- }
- }
-
- r->number = num;
- r->owner = owner;
-
- unpack:
- if (len < sizeof(struct dlm_header) +
- sizeof(struct resource_data) +
- sizeof(struct plock_data) * count) {
- log_elock(ls, "recv_plocks_data %d:%u count %u bad len %d",
- hd->nodeid, hd->msgdata, count, len);
- goto fail_free;
- }
-
- pp = (struct plock_data *)((char *)rd + sizeof(struct resource_data));
-
- for (i = 0; i < count; i++) {
- if (!pp->waiter) {
- po = malloc(sizeof(struct posix_lock));
- if (!po)
- goto fail_free;
- po->start = le64_to_cpu(pp->start);
- po->end = le64_to_cpu(pp->end);
- po->owner = le64_to_cpu(pp->owner);
- po->pid = le32_to_cpu(pp->pid);
- po->nodeid = le32_to_cpu(pp->nodeid);
- po->ex = pp->ex;
- list_add_tail(&po->list, &r->locks);
- } else {
- w = malloc(sizeof(struct lock_waiter));
- if (!w)
- goto fail_free;
- w->info.start = le64_to_cpu(pp->start);
- w->info.end = le64_to_cpu(pp->end);
- w->info.owner = le64_to_cpu(pp->owner);
- w->info.pid = le32_to_cpu(pp->pid);
- w->info.nodeid = le32_to_cpu(pp->nodeid);
- w->info.ex = pp->ex;
- list_add_tail(&w->list, &r->waiters);
- }
- pp++;
- }
-
- log_plock(ls, "recv_plocks_data %d:%u n %llu o %d locks %d len %d",
- hd->nodeid, hd->msgdata, (unsigned long long)r->number,
- r->owner, count, len);
-
- if (!(flags & RD_CONTINUE)) {
- list_add_tail(&r->list, &ls->plock_resources);
- rb_insert_plock_resource(ls, r);
- }
- return;
-
- fail_free:
- if (!(flags & RD_CONTINUE)) {
- free_r_lists(r);
- free(r);
- }
- return;
-}
-
-void clear_plocks_data(struct lockspace *ls)
-{
- struct resource *r, *r2;
- uint32_t count = 0;
-
- if (!cfgd_enable_plock || ls->disable_plock)
- return;
-
- list_for_each_entry_safe(r, r2, &ls->plock_resources, list) {
- free_r_lists(r);
- rb_del_plock_resource(ls, r);
- list_del(&r->list);
- free(r);
- count++;
- }
-
- log_dlock(ls, "clear_plocks_data done %u recv_plocks_data_count %u",
- count, ls->recv_plocks_data_count);
-
- ls->recv_plocks_data_count = 0;
-}
-
-/* Called when a node has failed, or we're unmounting. For a node failure, we
- need to call this when the cpg confchg arrives so that we're guaranteed all
- nodes do this in the same sequence wrt other messages. */
-
-void purge_plocks(struct lockspace *ls, int nodeid, int unmount)
-{
- struct posix_lock *po, *po2;
- struct lock_waiter *w, *w2;
- struct resource *r, *r2;
- int purged = 0;
-
- if (!cfgd_enable_plock || ls->disable_plock)
- return;
-
- list_for_each_entry_safe(r, r2, &ls->plock_resources, list) {
- list_for_each_entry_safe(po, po2, &r->locks, list) {
- if (po->nodeid == nodeid || unmount) {
- list_del(&po->list);
- free(po);
- purged++;
- }
- }
-
- list_for_each_entry_safe(w, w2, &r->waiters, list) {
- if (w->info.nodeid == nodeid || unmount) {
- list_del(&w->list);
- free(w);
- purged++;
- }
- }
-
- /* TODO: haven't thought carefully about how this transition
- to owner 0 might interact with other owner messages in
- progress. */
-
- if (r->owner == nodeid) {
- r->owner = 0;
- r->flags |= R_GOT_UNOWN;
- r->flags |= R_PURGE_UNOWN;
- send_pending_plocks(ls, r);
- }
-
- if (!list_empty(&r->waiters))
- do_waiters(ls, r);
-
- if (!cfgd_plock_ownership &&
- list_empty(&r->locks) && list_empty(&r->waiters)) {
- rb_del_plock_resource(ls, r);
- list_del(&r->list);
- free(r);
- }
- }
-
- if (purged)
- ls->last_plock_time = time(NULL);
-
- log_dlock(ls, "purged %d plocks for %d", purged, nodeid);
-}
-
-int copy_plock_state(struct lockspace *ls, char *buf, int *len_out)
-{
- struct posix_lock *po;
- struct lock_waiter *w;
- struct resource *r;
- struct timeval now;
- int rv = 0;
- int len = DLMC_DUMP_SIZE, pos = 0, ret;
-
- gettimeofday(&now, NULL);
-
- list_for_each_entry(r, &ls->plock_resources, list) {
-
- if (list_empty(&r->locks) &&
- list_empty(&r->waiters) &&
- list_empty(&r->pending)) {
- ret = snprintf(buf + pos, len - pos,
- "%llu rown %d unused_ms %llu\n",
- (unsigned long long)r->number, r->owner,
- (unsigned long long)time_diff_ms(&r->last_access,
- &now));
- if (ret >= len - pos) {
- rv = -ENOSPC;
- goto out;
- }
- pos += ret;
- continue;
- }
-
- list_for_each_entry(po, &r->locks, list) {
- ret = snprintf(buf + pos, len - pos,
- "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d\n",
- (unsigned long long)r->number,
- po->ex ? "WR" : "RD",
- (unsigned long long)po->start,
- (unsigned long long)po->end,
- po->nodeid, po->pid,
- (unsigned long long)po->owner, r->owner);
-
- if (ret >= len - pos) {
- rv = -ENOSPC;
- goto out;
- }
- pos += ret;
- }
-
- list_for_each_entry(w, &r->waiters, list) {
- ret = snprintf(buf + pos, len - pos,
- "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d WAITING\n",
- (unsigned long long)r->number,
- w->info.ex ? "WR" : "RD",
- (unsigned long long)w->info.start,
- (unsigned long long)w->info.end,
- w->info.nodeid, w->info.pid,
- (unsigned long long)w->info.owner, r->owner);
-
- if (ret >= len - pos) {
- rv = -ENOSPC;
- goto out;
- }
- pos += ret;
- }
-
- list_for_each_entry(w, &r->pending, list) {
- ret = snprintf(buf + pos, len - pos,
- "%llu %s %llu-%llu nodeid %d pid %u owner %llx rown %d PENDING\n",
- (unsigned long long)r->number,
- w->info.ex ? "WR" : "RD",
- (unsigned long long)w->info.start,
- (unsigned long long)w->info.end,
- w->info.nodeid, w->info.pid,
- (unsigned long long)w->info.owner, r->owner);
-
- if (ret >= len - pos) {
- rv = -ENOSPC;
- goto out;
- }
- pos += ret;
- }
- }
- out:
- *len_out = pos;
- return rv;
-}
-
diff --git a/group/dlm_controld/rbtree.c b/group/dlm_controld/rbtree.c
deleted file mode 100644
index 430ccc1..0000000
--- a/group/dlm_controld/rbtree.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- Red Black Trees
- (C) 1999 Andrea Arcangeli <andrea(a)suse.de>
- (C) 2002 David Woodhouse <dwmw2(a)infradead.org>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- linux/lib/rbtree.c
-*/
-
-#include "rbtree.h"
-
-static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
-{
- struct rb_node *right = node->rb_right;
- struct rb_node *parent = rb_parent(node);
-
- if ((node->rb_right = right->rb_left))
- rb_set_parent(right->rb_left, node);
- right->rb_left = node;
-
- rb_set_parent(right, parent);
-
- if (parent)
- {
- if (node == parent->rb_left)
- parent->rb_left = right;
- else
- parent->rb_right = right;
- }
- else
- root->rb_node = right;
- rb_set_parent(node, right);
-}
-
-static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
-{
- struct rb_node *left = node->rb_left;
- struct rb_node *parent = rb_parent(node);
-
- if ((node->rb_left = left->rb_right))
- rb_set_parent(left->rb_right, node);
- left->rb_right = node;
-
- rb_set_parent(left, parent);
-
- if (parent)
- {
- if (node == parent->rb_right)
- parent->rb_right = left;
- else
- parent->rb_left = left;
- }
- else
- root->rb_node = left;
- rb_set_parent(node, left);
-}
-
-void rb_insert_color(struct rb_node *node, struct rb_root *root)
-{
- struct rb_node *parent, *gparent;
-
- while ((parent = rb_parent(node)) && rb_is_red(parent))
- {
- gparent = rb_parent(parent);
-
- if (parent == gparent->rb_left)
- {
- {
- register struct rb_node *uncle = gparent->rb_right;
- if (uncle && rb_is_red(uncle))
- {
- rb_set_black(uncle);
- rb_set_black(parent);
- rb_set_red(gparent);
- node = gparent;
- continue;
- }
- }
-
- if (parent->rb_right == node)
- {
- register struct rb_node *tmp;
- __rb_rotate_left(parent, root);
- tmp = parent;
- parent = node;
- node = tmp;
- }
-
- rb_set_black(parent);
- rb_set_red(gparent);
- __rb_rotate_right(gparent, root);
- } else {
- {
- register struct rb_node *uncle = gparent->rb_left;
- if (uncle && rb_is_red(uncle))
- {
- rb_set_black(uncle);
- rb_set_black(parent);
- rb_set_red(gparent);
- node = gparent;
- continue;
- }
- }
-
- if (parent->rb_left == node)
- {
- register struct rb_node *tmp;
- __rb_rotate_right(parent, root);
- tmp = parent;
- parent = node;
- node = tmp;
- }
-
- rb_set_black(parent);
- rb_set_red(gparent);
- __rb_rotate_left(gparent, root);
- }
- }
-
- rb_set_black(root->rb_node);
-}
-
-static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
- struct rb_root *root)
-{
- struct rb_node *other;
-
- while ((!node || rb_is_black(node)) && node != root->rb_node)
- {
- if (parent->rb_left == node)
- {
- other = parent->rb_right;
- if (rb_is_red(other))
- {
- rb_set_black(other);
- rb_set_red(parent);
- __rb_rotate_left(parent, root);
- other = parent->rb_right;
- }
- if ((!other->rb_left || rb_is_black(other->rb_left)) &&
- (!other->rb_right || rb_is_black(other->rb_right)))
- {
- rb_set_red(other);
- node = parent;
- parent = rb_parent(node);
- }
- else
- {
- if (!other->rb_right || rb_is_black(other->rb_right))
- {
- rb_set_black(other->rb_left);
- rb_set_red(other);
- __rb_rotate_right(other, root);
- other = parent->rb_right;
- }
- rb_set_color(other, rb_color(parent));
- rb_set_black(parent);
- rb_set_black(other->rb_right);
- __rb_rotate_left(parent, root);
- node = root->rb_node;
- break;
- }
- }
- else
- {
- other = parent->rb_left;
- if (rb_is_red(other))
- {
- rb_set_black(other);
- rb_set_red(parent);
- __rb_rotate_right(parent, root);
- other = parent->rb_left;
- }
- if ((!other->rb_left || rb_is_black(other->rb_left)) &&
- (!other->rb_right || rb_is_black(other->rb_right)))
- {
- rb_set_red(other);
- node = parent;
- parent = rb_parent(node);
- }
- else
- {
- if (!other->rb_left || rb_is_black(other->rb_left))
- {
- rb_set_black(other->rb_right);
- rb_set_red(other);
- __rb_rotate_left(other, root);
- other = parent->rb_left;
- }
- rb_set_color(other, rb_color(parent));
- rb_set_black(parent);
- rb_set_black(other->rb_left);
- __rb_rotate_right(parent, root);
- node = root->rb_node;
- break;
- }
- }
- }
- if (node)
- rb_set_black(node);
-}
-
-void rb_erase(struct rb_node *node, struct rb_root *root)
-{
- struct rb_node *child, *parent;
- int color;
-
- if (!node->rb_left)
- child = node->rb_right;
- else if (!node->rb_right)
- child = node->rb_left;
- else
- {
- struct rb_node *old = node, *left;
-
- node = node->rb_right;
- while ((left = node->rb_left) != NULL)
- node = left;
-
- if (rb_parent(old)) {
- if (rb_parent(old)->rb_left == old)
- rb_parent(old)->rb_left = node;
- else
- rb_parent(old)->rb_right = node;
- } else
- root->rb_node = node;
-
- child = node->rb_right;
- parent = rb_parent(node);
- color = rb_color(node);
-
- if (parent == old) {
- parent = node;
- } else {
- if (child)
- rb_set_parent(child, parent);
- parent->rb_left = child;
-
- node->rb_right = old->rb_right;
- rb_set_parent(old->rb_right, node);
- }
-
- node->rb_parent_color = old->rb_parent_color;
- node->rb_left = old->rb_left;
- rb_set_parent(old->rb_left, node);
-
- goto color;
- }
-
- parent = rb_parent(node);
- color = rb_color(node);
-
- if (child)
- rb_set_parent(child, parent);
- if (parent)
- {
- if (parent->rb_left == node)
- parent->rb_left = child;
- else
- parent->rb_right = child;
- }
- else
- root->rb_node = child;
-
- color:
- if (color == RB_BLACK)
- __rb_erase_color(child, parent, root);
-}
-
-/*
- * This function returns the first node (in sort order) of the tree.
- */
-struct rb_node *rb_first(const struct rb_root *root)
-{
- struct rb_node *n;
-
- n = root->rb_node;
- if (!n)
- return NULL;
- while (n->rb_left)
- n = n->rb_left;
- return n;
-}
-
-struct rb_node *rb_last(const struct rb_root *root)
-{
- struct rb_node *n;
-
- n = root->rb_node;
- if (!n)
- return NULL;
- while (n->rb_right)
- n = n->rb_right;
- return n;
-}
-
-struct rb_node *rb_next(const struct rb_node *node)
-{
- struct rb_node *parent;
-
- if (rb_parent(node) == node)
- return NULL;
-
- /* If we have a right-hand child, go down and then left as far
- as we can. */
- if (node->rb_right) {
- node = node->rb_right;
- while (node->rb_left)
- node=node->rb_left;
- return (struct rb_node *)node;
- }
-
- /* No right-hand children. Everything down and left is
- smaller than us, so any 'next' node must be in the general
- direction of our parent. Go up the tree; any time the
- ancestor is a right-hand child of its parent, keep going
- up. First time it's a left-hand child of its parent, said
- parent is our 'next' node. */
- while ((parent = rb_parent(node)) && node == parent->rb_right)
- node = parent;
-
- return parent;
-}
-
-struct rb_node *rb_prev(const struct rb_node *node)
-{
- struct rb_node *parent;
-
- if (rb_parent(node) == node)
- return NULL;
-
- /* If we have a left-hand child, go down and then right as far
- as we can. */
- if (node->rb_left) {
- node = node->rb_left;
- while (node->rb_right)
- node=node->rb_right;
- return (struct rb_node *)node;
- }
-
- /* No left-hand children. Go up till we find an ancestor which
- is a right-hand child of its parent */
- while ((parent = rb_parent(node)) && node == parent->rb_left)
- node = parent;
-
- return parent;
-}
-
-void rb_replace_node(struct rb_node *victim, struct rb_node *new,
- struct rb_root *root)
-{
- struct rb_node *parent = rb_parent(victim);
-
- /* Set the surrounding nodes to point to the replacement */
- if (parent) {
- if (victim == parent->rb_left)
- parent->rb_left = new;
- else
- parent->rb_right = new;
- } else {
- root->rb_node = new;
- }
- if (victim->rb_left)
- rb_set_parent(victim->rb_left, new);
- if (victim->rb_right)
- rb_set_parent(victim->rb_right, new);
-
- /* Copy the pointers/colour from the victim to the replacement */
- *new = *victim;
-}
diff --git a/group/include/linux_endian.h b/group/include/linux_endian.h
deleted file mode 100644
index 43089d2..0000000
--- a/group/include/linux_endian.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef __LINUX_ENDIAN_DOT_H__
-#define __LINUX_ENDIAN_DOT_H__
-
-
-#include <endian.h>
-#include <byteswap.h>
-
-
-/* I'm not sure which versions of alpha glibc/gcc are broken,
- so fix all of them. */
-#ifdef __alpha__
-#undef bswap_64
-static __inline__ unsigned long bswap_64(unsigned long x)
-{
- unsigned int h = x >> 32;
- unsigned int l = x;
-
- h = bswap_32(h);
- l = bswap_32(l);
-
- return ((unsigned long)l << 32) | h;
-}
-#endif /* __alpha__ */
-
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-
-#define be16_to_cpu(x) (x)
-#define be32_to_cpu(x) (x)
-#define be64_to_cpu(x) (x)
-
-#define cpu_to_be16(x) (x)
-#define cpu_to_be32(x) (x)
-#define cpu_to_be64(x) (x)
-
-#define le16_to_cpu(x) (bswap_16((x)))
-#define le32_to_cpu(x) (bswap_32((x)))
-#define le64_to_cpu(x) (bswap_64((x)))
-
-#define cpu_to_le16(x) (bswap_16((x)))
-#define cpu_to_le32(x) (bswap_32((x)))
-#define cpu_to_le64(x) (bswap_64((x)))
-
-#endif /* __BYTE_ORDER == __BIG_ENDIAN */
-
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-
-#define be16_to_cpu(x) (bswap_16((x)))
-#define be32_to_cpu(x) (bswap_32((x)))
-#define be64_to_cpu(x) (bswap_64((x)))
-
-#define cpu_to_be16(x) (bswap_16((x)))
-#define cpu_to_be32(x) (bswap_32((x)))
-#define cpu_to_be64(x) (bswap_64((x)))
-
-#define le16_to_cpu(x) (x)
-#define le32_to_cpu(x) (x)
-#define le64_to_cpu(x) (x)
-
-#define cpu_to_le16(x) (x)
-#define cpu_to_le32(x) (x)
-#define cpu_to_le64(x) (x)
-
-#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */
-
-
-#endif /* __LINUX_ENDIAN_DOT_H__ */
diff --git a/group/include/list.h b/group/include/list.h
deleted file mode 100644
index 8100cbc..0000000
--- a/group/include/list.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/* Copied from include/linux/list.h */
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
-/**
- * container_of - cast a member of a structure out to the containing structure
- *
- * @ptr: the pointer to the member.
- * @type: the type of the container struct this is embedded in.
- * @member: the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({ \
- const typeof( ((type *)0)->member ) *__mptr = (ptr); \
- (type *)( (char *)__mptr - offsetof(type,member) );})
-
-
-/*
- * These are non-NULL pointers that will result in page faults
- * under normal circumstances, used to verify that nobody uses
- * non-initialized list entries.
- */
-#define LIST_POISON1 ((void *) 0x00100100)
-#define LIST_POISON2 ((void *) 0x00200200)
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
- struct list_head name = LIST_HEAD_INIT(name)
-
-#define INIT_LIST_HEAD(ptr) do { \
- (ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
-{
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
- next->prev = prev;
- prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
- * in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- entry->next = LIST_POISON1;
- entry->prev = LIST_POISON2;
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
- struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add_tail(list, head);
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(const struct list_head *head)
-{
- return head->next == head;
-}
-
-/**
- * list_empty_careful - tests whether a list is
- * empty _and_ checks that no other CPU might be
- * in the process of still modifying either member
- *
- * NOTE: using list_empty_careful() without synchronization
- * can only be safe if the only activity that can happen
- * to the list entry is list_del_init(). Eg. it cannot be used
- * if another CPU could re-list_add() it.
- *
- * @head: the list to test.
- */
-static inline int list_empty_careful(const struct list_head *head)
-{
- struct list_head *next = head->next;
- return (next == head) && (next == head->prev);
-}
-
-static inline void __list_splice(struct list_head *list,
- struct list_head *head)
-{
- struct list_head *first = list->next;
- struct list_head *last = list->prev;
- struct list_head *at = head->next;
-
- first->prev = head;
- head->next = first;
-
- last->next = at;
- at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
- if (!list_empty(list))
- __list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
- struct list_head *head)
-{
- if (!list_empty(list)) {
- __list_splice(list, head);
- INIT_LIST_HEAD(list);
- }
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr: the &struct list_head pointer.
- * @type: the type of the struct this is embedded in.
- * @member: the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
-
-/**
- * list_first_entry - get the first element from a list
- * @ptr: the list head to take the element from.
- * @type: the type of the struct this is embedded in.
- * @member: the name of the list_struct within the struct.
- *
- * Note, that list is expected to be not empty.
- */
-#define list_first_entry(ptr, type, member) \
- list_entry((ptr)->next, type, member)
-
-/**
- * list_for_each - iterate over a list
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * __list_for_each - iterate over a list
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
- */
-#define __list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * list_for_each_prev - iterate over a list backwards
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev; pos != (head); pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos: the &struct list_head to use as a loop counter.
- * @n: another &struct list_head to use as temporary storage
- * @head: the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
-
-/**
- * list_for_each_entry - iterate over list of given type
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member) \
- for (pos = list_entry((head)->prev, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_prepare_entry - prepare a pos entry for use as a start point in
- * list_for_each_entry_continue
- * @pos: the type * to use as a start point
- * @head: the head of the list
- * @member: the name of the list_struct within the struct.
- */
-#define list_prepare_entry(pos, head, member) \
- ((pos) ? : list_entry(head, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue - iterate over list of given type
- * continuing after existing point
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_continue(pos, head, member) \
- for (pos = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop counter.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-
-#endif
diff --git a/group/include/rbtree.h b/group/include/rbtree.h
deleted file mode 100644
index e2b2409..0000000
--- a/group/include/rbtree.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- Red Black Trees
- (C) 1999 Andrea Arcangeli <andrea(a)suse.de>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- linux/include/linux/rbtree.h
-
- To use rbtrees you'll have to implement your own insert and search cores.
- This will avoid us to use callbacks and to drop drammatically performances.
- I know it's not the cleaner way, but in C (not in C++) to get
- performances and genericity...
-
- Some example of insert and search follows here. The search is a plain
- normal search over an ordered tree. The insert instead must be implemented
- int two steps: as first thing the code must insert the element in
- order as a red leaf in the tree, then the support library function
- rb_insert_color() must be called. Such function will do the
- not trivial work to rebalance the rbtree if necessary.
-
------------------------------------------------------------------------
-static inline struct page * rb_search_page_cache(struct inode * inode,
- unsigned long offset)
-{
- struct rb_node * n = inode->i_rb_page_cache.rb_node;
- struct page * page;
-
- while (n)
- {
- page = rb_entry(n, struct page, rb_page_cache);
-
- if (offset < page->offset)
- n = n->rb_left;
- else if (offset > page->offset)
- n = n->rb_right;
- else
- return page;
- }
- return NULL;
-}
-
-static inline struct page * __rb_insert_page_cache(struct inode * inode,
- unsigned long offset,
- struct rb_node * node)
-{
- struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
- struct rb_node * parent = NULL;
- struct page * page;
-
- while (*p)
- {
- parent = *p;
- page = rb_entry(parent, struct page, rb_page_cache);
-
- if (offset < page->offset)
- p = &(*p)->rb_left;
- else if (offset > page->offset)
- p = &(*p)->rb_right;
- else
- return page;
- }
-
- rb_link_node(node, parent, p);
-
- return NULL;
-}
-
-static inline struct page * rb_insert_page_cache(struct inode * inode,
- unsigned long offset,
- struct rb_node * node)
-{
- struct page * ret;
- if ((ret = __rb_insert_page_cache(inode, offset, node)))
- goto out;
- rb_insert_color(node, &inode->i_rb_page_cache);
- out:
- return ret;
-}
------------------------------------------------------------------------
-*/
-
-#ifndef _LINUX_RBTREE_H
-#define _LINUX_RBTREE_H
-
-#include <linux/stddef.h>
-
-struct rb_node
-{
- unsigned long rb_parent_color;
-#define RB_RED 0
-#define RB_BLACK 1
- struct rb_node *rb_right;
- struct rb_node *rb_left;
-} __attribute__((aligned(sizeof(long))));
- /* The alignment might seem pointless, but allegedly CRIS needs it */
-
-struct rb_root
-{
- struct rb_node *rb_node;
-};
-
-
-#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3))
-#define rb_color(r) ((r)->rb_parent_color & 1)
-#define rb_is_red(r) (!rb_color(r))
-#define rb_is_black(r) rb_color(r)
-#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0)
-#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0)
-
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
- rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
-}
-static inline void rb_set_color(struct rb_node *rb, int color)
-{
- rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
-}
-
-#define RB_ROOT (struct rb_root) { NULL, }
-#define rb_entry(ptr, type, member) container_of(ptr, type, member)
-
-#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
-#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
-
-extern void rb_insert_color(struct rb_node *, struct rb_root *);
-extern void rb_erase(struct rb_node *, struct rb_root *);
-
-/* Find logical next and previous nodes in a tree */
-extern struct rb_node *rb_next(const struct rb_node *);
-extern struct rb_node *rb_prev(const struct rb_node *);
-extern struct rb_node *rb_first(const struct rb_root *);
-extern struct rb_node *rb_last(const struct rb_root *);
-
-/* Fast replacement of a single node without remove/rebalance/add/rebalance */
-extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
- struct rb_root *root);
-
-static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
- struct rb_node ** rb_link)
-{
- node->rb_parent_color = (unsigned long )parent;
- node->rb_left = node->rb_right = NULL;
-
- *rb_link = node;
-}
-
-#endif /* _LINUX_RBTREE_H */
diff --git a/group/man/dlm_controld.8 b/group/man/dlm_controld.8
deleted file mode 100644
index 7100f0e..0000000
--- a/group/man/dlm_controld.8
+++ /dev/null
@@ -1,313 +0,0 @@
-.TH DLM_CONTROLD 8 2009-01-18 cluster cluster
-
-.SH NAME
-dlm_controld \- daemon that configures dlm according to cluster events
-
-.SH SYNOPSIS
-.B dlm_controld
-[OPTIONS]
-
-.SH DESCRIPTION
-The dlm lives in the kernel, and the cluster infrastructure (corosync
-membership and group management) lives in user space. The dlm in the
-kernel needs to adjust/recover for certain cluster events. It's the job
-of dlm_controld to receive these events and reconfigure the kernel dlm as
-needed. dlm_controld controls and configures the dlm through sysfs and
-configfs files that are considered dlm-internal interfaces.
-
-The cman init script usually starts the dlm_controld daemon.
-
-.SH OPTIONS
-Command line options override a corresponding setting in cluster.conf.
-
-.TP
-.B \-D
-Enable debugging to stderr and don't fork.
-.br
-See also
-.B dlm_tool dump
-in
-.BR dlm_tool (8).
-
-.TP
-.B \-L
-Enable debugging to log file.
-.br
-See also
-.B logging
-in
-.BR cluster.conf (5).
-
-.TP
-.B \-K
-Enable kernel dlm debugging messages.
-.br
-See also
-.B log_debug
-below.
-
-.TP
-.BI \-r " num"
-dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect.
-2 selects tcp if corosync rrp_mode is "none", otherwise sctp.
-.br
-Default 2.
-
-.TP
-.BI \-g " num"
-groupd compatibility mode, 0 off, 1 on.
-.br
-Default 0.
-
-.TP
-.BI \-f " num"
-Enable (1) or disable (0) fencing recovery dependency.
-.br
-Default 1.
-
-.TP
-.BI \-q " num"
-Enable (1) or disable (0) quorum recovery dependency.
-.br
-Default 0.
-
-.TP
-.BI \-d " num"
-Enable (1) or disable (0) deadlock detection code.
-.br
-Default 0.
-
-.TP
-.BI \-p " num"
-Enable (1) or disable (0) plock code for cluster fs.
-.br
-Default 1.
-
-.TP
-.BI \-l " num"
-Limit the rate of plock operations, 0 for no limit.
-.br
-Default 0.
-
-.TP
-.BI \-o " num"
-Enable (1) or disable (0) plock ownership.
-.br
-Default 1.
-
-.TP
-.BI \-t " ms"
-Plock ownership drop resources time (milliseconds).
-.br
-Default 10000.
-
-.TP
-.BI \-c " num"
-Plock ownership drop resources count.
-.br
-Default 10.
-
-.TP
-.BI \-a " ms"
-Plock ownership drop resources age (milliseconds).
-.br
-Default 10000.
-
-.TP
-.B \-P
-Enable plock debugging messages (can produce excessive output).
-
-.TP
-.B \-h
-Print a help message describing available options, then exit.
-
-.TP
-.B \-V
-Print program version information, then exit.
-
-
-.SH FILES
-.BR cluster.conf (5)
-is usually located at /etc/cluster/cluster.conf. It is not read directly.
-Other cluster components load the contents into memory, and the values are
-accessed through the libccs library.
-
-Configuration options for dlm (kernel) and dlm_controld are added to the
-<dlm /> section of cluster.conf, within the top level <cluster> section.
-
-.SS Kernel options
-
-.TP
-.B protocol
-The network
-.B protocol
-can be set to tcp, sctp or detect which selects tcp or sctp based on
-the corosync rrp_mode configuration (redundant ring protocol).
-The rrp_mode "none" results in tcp. Default detect.
-
-<dlm protocol="detect"/>
-
-.TP
-.B timewarn
-After waiting
-.B timewarn
-centiseconds, the dlm will emit a warning via netlink. This only applies
-to lockspaces created with the DLM_LSFL_TIMEWARN flag, and is used for
-deadlock detection. Default 500 (5 seconds).
-
-<dlm timewarn="500"/>
-
-.TP
-.B log_debug
-DLM kernel debug messages can be enabled by setting
-.B log_debug
-to 1. Default 0.
-
-<dlm log_debug="0"/>
-
-.TP
-.B clusternode/weight
-The lock directory
-.B weight
-can be specified one the clusternode lines. Weights would usually be
-used in the lock server configurations shown below instead.
-
-<clusternode name="node01" nodeid="1" weight="1"/>
-
-.SS Daemon options
-
-.TP
-.B enable_fencing
-See command line description.
-
-<dlm enable_fencing="1"/>
-
-.TP
-.B enable_quorum
-See command line description.
-
-<dlm enable_quorum="0"/>
-
-.TP
-.B enable_deadlk
-See command line description.
-
-<dlm enable_deadlk="0"/>
-
-.TP
-.B enable_plock
-See command line description.
-
-<dlm enable_plock="1"/>
-
-.TP
-.B plock_rate_limit
-See command line description.
-
-<dlm plock_rate_limit="0"/>
-
-.TP
-.B plock_ownership
-See command line description.
-
-<dlm plock_ownership="1"/>
-
-.TP
-.B drop_resources_time
-See command line description.
-
-<dlm drop_resources_time="10000"/>
-
-.TP
-.B drop_resources_count
-See command line description.
-
-<dlm drop_resources_count="10"/>
-
-.TP
-.B drop_resources_age
-See command line description.
-
-<dlm drop_resources_age="10000"/>
-
-.TP
-.B plock_debug
-Enable (1) or disable (0) plock debugging messages (can produce excessive
-output). Default 0.
-
-<dlm plock_debug="0"/>
-
-
-.SS Disabling resource directory
-
-Lockspaces usually use a resource directory to keep track of which node is
-the master of each resource. The dlm can operate without the resource
-directory, though, by statically assigning the master of a resource using
-a hash of the resource name. To enable, set the per-lockspace
-.B nodir
-option to 1.
-
-.nf
-<dlm>
- <lockspace name="foo" nodir="1">
-</dlm>
-.fi
-
-.SS Lock-server configuration
-
-The nodir setting can be combined with node weights to create a
-configuration where select node(s) are the master of all resources/locks.
-These
-.B master
-nodes can be viewed as "lock servers" for the other nodes.
-
-.nf
-<dlm>
- <lockspace name="foo" nodir="1">
- <master name="node01"/>
- </lockspace>
-</dlm>
-
-or,
-
-<dlm>
- <lockspace name="foo" nodir="1">
- <master name="node01"/>
- <master name="node02"/>
- </lockspace>
-</dlm>
-.fi
-
-Lock management will be partitioned among the available masters. There
-can be any number of masters defined. The designated master nodes will
-master all resources/locks (according to the resource name hash). When no
-masters are members of the lockspace, then the nodes revert to the common
-fully-distributed configuration. Recovery is faster, with little
-disruption, when a non-master node joins/leaves.
-
-There is no special mode in the dlm for this lock server configuration,
-it's just a natural consequence of combining the "nodir" option with node
-weights. When a lockspace has master nodes defined, the master has a
-default weight of 1 and all non-master nodes have weight of 0. An explicit
-non-zero
-.B weight
-can also be assigned to master nodes, e.g.
-
-.nf
-<dlm>
- <lockspace name="foo" nodir="1">
- <master name="node01" weight="2"/>
- <master name="node02" weight="1"/>
- </lockspace>
-</dlm>
-.fi
-
-In which case node01 will master 2/3 of the total resources and node2 will
-master the other 1/3.
-
-.SH SEE ALSO
-.BR dlm_tool (8),
-.BR fenced (8),
-.BR cman (5),
-.BR cluster.conf (5)
-
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=2b5e4f87ca…
Commit: 2b5e4f87ca394113fd54395d5b5b3ab1e410dbb2
Parent: 591a71f29677257ae49ce6d10a4bd1da19d73233
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Sep 30 13:19:43 2011 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Sep 30 13:19:43 2011 -0500
dlm: clear out old stuff and build system
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
Makefile.am | 28 -
autogen.sh | 4 -
configure.ac | 270 ------
dlm/Makefile.am | 3 -
dlm/doc/Makefile.am | 6 -
dlm/doc/dlm_tool.txt | 167 ----
dlm/doc/example.c | 52 --
dlm/doc/libdlm.txt | 533 ------------
dlm/doc/user-dlm-overview.txt | 325 --------
dlm/libdlm/Makefile.am | 28 -
dlm/libdlm/libdlm.pc.in | 11 -
dlm/libdlm/libdlm_lt.pc.in | 11 -
dlm/libdlmcontrol/Makefile.am | 17 -
dlm/libdlmcontrol/libdlmcontrol.pc.in | 11 -
dlm/man/Makefile.am | 23 -
dlm/tests/Makefile.am | 3 -
dlm/tests/usertest/Makefile.am | 18 -
dlm/tests/usertest/alternate-lvb.c | 167 ----
dlm/tests/usertest/asttest.c | 283 -------
dlm/tests/usertest/dlmtest.c | 291 -------
dlm/tests/usertest/dlmtest2.c | 1469 ---------------------------------
dlm/tests/usertest/flood.c | 170 ----
dlm/tests/usertest/joinleave.c | 64 --
dlm/tests/usertest/lstest.c | 328 --------
dlm/tests/usertest/lvb.c | 246 ------
dlm/tests/usertest/pingtest.c | 345 --------
dlm/tests/usertest/sublocks.c | 180 ----
dlm/tests/usertest/threads.c | 311 -------
dlm/tool/Makefile.am | 12 -
doc/COPYING.applications | 339 --------
doc/COPYING.libraries | 510 ------------
doc/COPYRIGHT | 42 -
doc/Makefile.am | 10 -
doc/README.licence | 33 -
doc/gfs2.txt | 45 -
doc/journaling.txt | 155 ----
doc/min-gfs.txt | 159 ----
doc/usage.txt | 177 ----
group/Makefile.am | 3 -
group/dlm_controld/Makefile.am | 33 -
group/include/Makefile.am | 3 -
group/man/Makefile.am | 3 -
42 files changed, 0 insertions(+), 6888 deletions(-)
diff --git a/Makefile.am b/Makefile.am
deleted file mode 100644
index 5bbbda4..0000000
--- a/Makefile.am
+++ /dev/null
@@ -1,28 +0,0 @@
-EXTRA_DIST = autogen.sh
-
-AUTOMAKE_OPTIONS = foreign
-
-MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \
- config.guess config.sub missing install-sh \
- autoheader automake autoconf libtool libtoolize \
- ltmain.sh compile make/clusterautoconfig.h.in \
- make/clusterautoconfig.h.in~
-
-noinst_HEADERS = make/copyright.cf
-
-ACLOCAL_AMFLAGS = -I m4
-
-SUBDIRS = dlm group doc
-
-install-exec-local:
- $(INSTALL) -d $(DESTDIR)/$(LOGDIR)
- $(INSTALL) -d $(DESTDIR)/$(CLUSTERVARRUN)
- $(INSTALL) -d $(DESTDIR)/$(CLUSTERVARLIB)
-
-uninstall-local:
- rmdir $(DESTDIR)/$(LOGDIR) || :;
- rmdir $(DESTDIR)/$(CLUSTERVARRUN) || :;
- rmdir $(DESTDIR)/$(CLUSTERVARLIB) || :;
-
-maintainer-clean-local:
- rm -rf m4
diff --git a/autogen.sh b/autogen.sh
deleted file mode 100755
index 3c5e1d9..0000000
--- a/autogen.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# Run this to generate all the initial makefiles, etc.
-mkdir -p m4
-autoreconf -i -v && echo Now run ./configure and make
diff --git a/configure.ac b/configure.ac
deleted file mode 100644
index 170074a..0000000
--- a/configure.ac
+++ /dev/null
@@ -1,270 +0,0 @@
-
-# Process this file with autoconf to produce a configure script.
-
-AC_PREREQ([2.63])
-AC_INIT([dlm], [master], [linux-cluster(a)redhat.com]
-AM_INIT_AUTOMAKE([-Wno-portability])
-LT_PREREQ([2.2.6])
-LT_INIT
-
-AC_CONFIG_MACRO_DIR([m4])
-AC_CONFIG_SRCDIR([dlm/libdlm/libdlm.c])
-AC_CONFIG_HEADERS([make/clusterautoconfig.h])
-
-AC_CANONICAL_HOST
-AC_PROG_LIBTOOL
-
-AC_LANG([C])
-
-# Sanitize path
-
-if test "$prefix" = "NONE"; then
- prefix="/usr"
- if test "$localstatedir" = "\${prefix}/var"; then
- localstatedir="/var"
- fi
- if test "$sysconfdir" = "\${prefix}/etc"; then
- sysconfdir="/etc"
- fi
- if test "$libdir" = "\${exec_prefix}/lib"; then
- if test -e /usr/lib64; then
- libdir="/usr/lib64"
- else
- libdir="/usr/lib"
- fi
- fi
-fi
-
-case $exec_prefix in
- NONE) exec_prefix=$prefix;;
- prefix) exec_prefix=$prefix;;
-esac
-
-# Checks for programs.
-
-# check stolen from gnulib/m4/gnu-make.m4
-if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
- AC_MSG_ERROR([you don't seem to have GNU make; it is required])
-fi
-
-AC_PROG_CC
-AM_PROG_CC_C_O
-AC_PROG_LN_S
-AC_PROG_INSTALL
-AC_PROG_MAKE_SET
-AC_PROG_CXX
-AC_PROG_RANLIB
-
-## local helper functions
-
-# this function checks if CC support options passed as
-# args. Global CFLAGS are ignored during this test.
-cc_supports_flag() {
- local CFLAGS="$@"
- AC_MSG_CHECKING([whether $CC supports "$@"])
- AC_COMPILE_IFELSE([int main(){return 0;}] ,
- [RC=0; AC_MSG_RESULT([yes])],
- [RC=1; AC_MSG_RESULT([no])])
- return $RC
-}
-
-# this function tests if a library has a certain function
-# by using AC_CHECK_LIB but restores the original LIBS global
-# envvar. This is required to avoid libtool to link everything
-# with everything.
-check_lib_no_libs() {
- AC_CHECK_LIB([$1], [$2],,
- [AC_MSG_ERROR([Unable to find $1 library])])
- LIBS=$ac_check_lib_save_LIBS
-}
-
-# check kernel headers path
-uname="$(uname -r)"
-if test -d "/lib/modules/$uname/source" -o -L "/lib/modules/$uname/source"; then
- DEFAULT_KERNEL_DIR="/lib/modules/$uname/source"
-elif test -d "/lib/modules/$uname/build" -o -L "/lib/modules/$uname -r/build"; then
- DEFAULT_KERNEL_DIR="/lib/modules/$uname/build"
-else
- DEFAULT_KERNEL_DIR="/usr/src/linux"
-fi
-
-# local options
-AC_ARG_ENABLE([debug],
- [ --enable-debug enable debug build. ],
- [ default="no" ])
-
-AC_ARG_WITH([syslogfacility],
- [ --syslogfacility=FACILITY
- cluster default syslog facility. ],
- [ SYSLOGFACILITY="$withval" ],
- [ SYSLOGFACILITY="LOG_LOCAL4" ])
-
-AC_ARG_WITH([sysloglevel],
- [ --sysloglevel=LEVEL
- cluster default syslog level. ],
- [ SYSLOGLEVEL="$withval" ],
- [ SYSLOGLEVEL="LOG_INFO" ])
-
-AC_ARG_WITH([kernel],
- [ --with-kernel=path path to kernel source. ],
- [ KERNEL_DIR="$withval" ],
- [ KERNEL_DIR="$DEFAULT_KERNEL_DIR" ])
-
-KERNEL_CPPFLAGS="-I$KERNEL_DIR/include"
-
-PKG_CHECK_MODULES([corosync],[corosync])
-PKG_CHECK_MODULES([cpg],[libcpg])
-PKG_CHECK_MODULES([logt],[liblogthread])
-PKG_CHECK_MODULES([ccs],[libccs])
-PKG_CHECK_MODULES([cfg],[libcfg])
-PKG_CHECK_MODULES([confdb],[libconfdb])
-PKG_CHECK_MODULES([quorum],[libquorum])
-PKG_CHECK_MODULES([fenced],[libfenced])
-
-# external libs (no pkgconfig)
-check_lib_no_libs pthread pthread_mutex_lock
-
-# Checks for header files.
-
-AC_CHECK_HEADERS([arpa/inet.h fcntl.h inttypes.h limits.h netdb.h netinet/in.h stddef.h stdint.h stdlib.h string.h sys/file.h sys/ioctl.h sys/param.h sys/socket.h sys/time.h syslog.h unistd.h])
-
-BACKUP_CPPFLAGS="$CPPFLAGS"
-CPPFLAGS="$KERNEL_CPPFLAGS"
-
-# check kernel headers
-AC_CHECK_HEADERS([linux/major.h linux/types.h linux/dlmconstants.h])
-AC_CHECK_HEADERS([linux/dlm.h linux/dlm_device.h])
-# 2.6.26 adds dlm_plock.h that's our checking barrier, instead
-# of implementing some insane kernel version checks
-# At least on 2.6.27.21-170.2.56.fc10.x86_64, one must include
-# <linux/types.h> to avoid syntax errors regarding __u32.
-AC_CHECK_HEADERS([linux/dlm_plock.h],,
- [AC_MSG_ERROR([Unable to find dlm kernel headers.
-Make sure to install kernel headers 2.6.26 (or higher) or use --with-kernel=path
-configure option to point to the kernel source.])],
- [#include <linux/types.h>])
-
-
-CPPFLAGS="$BACKUP_CPPFLAGS"
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_C_INLINE
-AC_TYPE_INT8_T
-AC_TYPE_MODE_T
-AC_TYPE_SIZE_T
-AC_CHECK_MEMBERS([struct stat.st_rdev])
-AC_TYPE_UINT16_T
-AC_TYPE_UINT32_T
-AC_TYPE_UINT64_T
-AC_TYPE_UINT8_T
-
-# Checks for library functions.
-AC_FUNC_ERROR_AT_LINE
-AC_FUNC_FORK
-AC_FUNC_LSTAT_FOLLOWS_SLASHED_SYMLINK
-AC_FUNC_MALLOC
-AC_HEADER_MAJOR
-AC_FUNC_REALLOC
-AC_CHECK_FUNCS([ftruncate gettimeofday memset mkdir rmdir socket strchr strdup strerror strncasecmp strstr])
-
-## random vars
-
-LOGDIR=${localstatedir}/log/cluster
-CLUSTERVARRUN=${localstatedir}/run/cluster
-CLUSTERVARLIB=${localstatedir}/lib/cluster
-
-## do subst
-
-AC_SUBST([LOGDIR])
-AC_DEFINE_UNQUOTED([LOGDIR], "$(eval echo ${LOGDIR})",
- [Default logging directory])
-
-AC_SUBST([CLUSTERVARRUN])
-AC_DEFINE_UNQUOTED([CLUSTERVARRUN], "$(eval echo ${CLUSTERVARRUN})",
- [Default cluster var/run directory])
-
-AC_SUBST([CLUSTERVARLIB])
-
-AC_DEFINE_UNQUOTED([SYSLOGFACILITY], $(eval echo ${SYSLOGFACILITY}),
- [Default syslog facility])
-
-AC_DEFINE_UNQUOTED([SYSLOGLEVEL], $(eval echo ${SYSLOGLEVEL}),
- [Default syslog level])
-
-## *FLAGS handling
-
-ENV_CFLAGS="$CFLAGS"
-ENV_CPPFLAGS="$CPPFLAGS"
-ENV_LDFLAGS="$LDFLAGS"
-
-# debug build stuff
-if test "x${enable_debug}" = xyes; then
- AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code])
- OPT_CFLAGS="-O0"
-else
- OPT_CFLAGS="-O2"
-fi
-
-# gdb flags
-if test "x${GCC}" = xyes; then
- GDB_FLAGS="-ggdb3"
-else
- GDB_FLAGS="-g"
-fi
-
-# extra warnings
-EXTRA_WARNINGS=""
-
-WARNLIST="
- all
- shadow
- missing-prototypes
- missing-declarations
- strict-prototypes
- declaration-after-statement
- pointer-arith
- write-strings
- cast-align
- bad-function-cast
- missing-format-attribute
- format=2
- format-security
- format-nonliteral
- no-long-long
- unsigned-char
- gnu89-inline
- no-strict-aliasing
- "
-
-for j in $WARNLIST; do
- if cc_supports_flag -W$j; then
- EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";
- fi
-done
-
-CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS \
- $EXTRA_WARNINGS $WERROR_CFLAGS"
-CPPFLAGS="$KERNEL_CPPFLAGS -I\$(top_builddir)/make -I\$(top_srcdir)/make \
- -I. $ENV_CPPFLAGS"
-LDFLAGS="$ENV_LDFLAGS"
-
-AC_CONFIG_FILES([Makefile
- dlm/Makefile
- dlm/libdlm/Makefile
- dlm/libdlm/libdlm.pc
- dlm/libdlm/libdlm_lt.pc
- dlm/libdlmcontrol/Makefile
- dlm/libdlmcontrol/libdlmcontrol.pc
- dlm/tool/Makefile
- dlm/tests/Makefile
- dlm/tests/usertest/Makefile
- dlm/man/Makefile
- dlm/doc/Makefile
- doc/Makefile
- group/Makefile
- group/dlm_controld/Makefile
- group/include/Makefile
- group/man/Makefile
- ])
-
-AC_OUTPUT
diff --git a/dlm/Makefile.am b/dlm/Makefile.am
deleted file mode 100644
index 454f628..0000000
--- a/dlm/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-SUBDIRS = libdlm libdlmcontrol tool man doc tests
diff --git a/dlm/doc/Makefile.am b/dlm/doc/Makefile.am
deleted file mode 100644
index 40a1c79..0000000
--- a/dlm/doc/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-dist_doc_DATA = dlm_tool.txt \
- example.c \
- libdlm.txt \
- user-dlm-overview.txt
diff --git a/dlm/doc/dlm_tool.txt b/dlm/doc/dlm_tool.txt
deleted file mode 100644
index d66cdb0..0000000
--- a/dlm/doc/dlm_tool.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-
-The dlm is configured and controlled from user space through sysfs and a
-couple of ioctl's. A command line program, dlm_tool, can be used to do
-everything manually.
-
-Here are the dlm_tool config/control actions that will be used:
-
-set_local <nodeid> <ipaddr> [<weight>]
-set_node <nodeid> <ipaddr> [<weight>]
-stop <ls_name>
-terminate <ls_name>
-start <ls_name> <event_nr> <type> <nodeid>...
-get_done <ls_name>
-finish <ls_name> <event_nr>
-set_id <ls_name> <id>
-
-For testing and illustration, some actions have been added to dlm_tool to use
-the libdlm API.
-
-create <ls_name>
-release <ls_name>
-lock <ls_name> <res_name> <mode> [<flag>,...]
-unlock <ls_name> <lkid> [<flag>,...]
-convert <ls_name> <lkid> <mode> [<flag>,...]
-
-So, dlm_tool is standing in for what would usually be two different entities.
-The first set of config/control actions would usually be performed by a system
-daemon associated with a cluster membership manager. The second set of libdlm
-actions would usually be performed by an application that wants to use the dlm
-for synchronization.
-
-
-Example
-
-1. There are three machines that we want to use the dlm:
-
-nodea -- 10.0.0.1
-nodeb -- 10.0.0.2
-nodec -- 10.0.0.3
-
-
-2. We'll pick arbitrary integer node ID's for these machines:
-
-nodea -- 1
-nodeb -- 2
-nodec -- 3
-
-
-3. On each node we first need to tell the dlm what the local IP address
-and nodeid are:
-
-nodea> dlm_tool set_local 1 10.0.0.1
-nodeb> dlm_tool set_local 2 10.0.0.2
-nodec> dlm_tool set_local 3 10.0.0.3
-
-
-4. On all nodes we need to set up the nodeid to IP address mappings:
-
-all> dlm_tool set_node 1 10.0.0.1
-all> dlm_tool set_node 2 10.0.0.2
-all> dlm_tool set_node 3 10.0.0.3
-
-
-5. All dlm locking happens within a lockspace; we need to create a test
-lockspace for all the nodes to use. This step would usually be an application
-that wants to use the dlm and creates a lockspace to use.
-
-all> dlm_tool create test
-
-
-6. The lockspace needs to be "started" on all the nodes. The <event_nr>
-should begin at 1 and be incremented for each consecutive start that's done on
-the dlm. The <type> field isn't used by the dlm and can be 0. Finally, a
-list of nodeid's using the lockspace is given.
-
-all> dlm_tool start test 1 0 1 2 3
-
-
-7. The dlm will now start up on all three nodes. Whenever it starts it needs
-to do recovery. Once recovery is done, the event_nr used for the start (1
-above) will be shown as the dlm_tool get_done output. You need to wait for
-this on all nodes (i.e. for all nodes to complete recovery) before moving on
-to the next step.
-
-all> dlm_tool get_done test
-done event_nr 1
-
-
-8. The lockspace finally needs to know that recovery is finished on all nodes.
-The event_nr used for the start is used here.
-
-all> dlm_tool finish test 1
-
-
-9. The lockspace can now be used by the application for locking, or using
-dlm_tool using the libdlm actions above.
-
-all> dlm_tool lock/unlock/convert ...
-
-
-10. Say that nodea fails. Nodeb and nodec need to remove nodea from the
-lockspace and do recovery. The first step is to suspend the dlm operation on
-the remaining nodes:
-
-nodeb,nodec> dlm_tool stop test
-
-
-11. The lockspace then needs to be started again with the new set of lockspace
-members and an incremented event_nr.
-
-nodeb,nodec> dlm_tool start test 2 0 2 3
-
-
-12. We wait for recovery to complete on nodeb and nodec.
-
-nodeb,nodec> dlm_tool get_done test
-done event_nr 2
-
-
-13. Tell the lockspace that recovery is finished on both nodes.
-
-nodeb,nodec> dlm_tool finish test 2
-
-
-14. Nodea comes back and wants to use the dlm again.
-
-nodea> dlm_tool create test
-
-
-15. To add nodea back into the lockspace, first suspend lockspace operations
-on nodeb and nodec.
-
-nodeb,nodec> dlm_tool stop test
-
-
-16. Start the lockspace on all the nodes with an incremented event_nr
-(event_nr can go back to 1 again for nodea).
-
-nodeb,nodec> dlm_tool start test 3 0 1 2 3
-nodea> dlm_tool start test 1 0 1 2 3
-
-
-17. Wait for all nodes to complete recovery.
-
-nodeb,nodec> dlm_tool get_done test
-done event_nr 3
-
-nodea> dlm_tool get_done test
-done event_nr 1
-
-
-18. Tell the lockspace that recovery is finished everywhere.
-
-nodeb,nodec> dlm_tool finish test 3
-nodea> dlm_tool finish test 1
-
-
-
-Notes:
-
-- When you use more than one lockspace on the nodes, you need to use
- dlm_tool set_id on all nodes to assign each lockspace a unique
- integer id. This is done between the create and the first start.
-
-- A node can leave a lockspace using dlm_tool release (the opposite of
- dlm_tool create).
-
diff --git a/dlm/doc/example.c b/dlm/doc/example.c
deleted file mode 100644
index 9f3ea11..0000000
--- a/dlm/doc/example.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <signal.h>
-#include <libdlm.h>
-
-/*
- * Simple libdlm locking demo
- *
- * Daniel Phillips, phillips(a)redhat.com
- *
- */
-
-#define error(string, args...) do { printf(string, ##args); exit(1); } while (0)
-
-void my_ast(void *arg)
-{
- printf("ast got arg %p\n", arg);
-}
-
-int main(void)
-{
- int fd, child;
- struct dlm_lksb lksb;
-
- if ((fd = dlm_get_fd()) < 0)
- error("dlm error %i, %s\n", errno, strerror(errno));
-
- switch (child = fork()) {
- case -1:
- error("fork error %i, %s\n", errno, strerror(errno));
- case 0:
- while (1)
- dlm_dispatch(fd);
- }
-
- if (dlm_lock(LKM_PWMODE, &lksb, LKF_NOQUEUE, "foo", 3,
- 0, my_ast, (void *)&fd, NULL, NULL) < 0)
- error("dlm error %i, %s\n", errno, strerror(errno));
- sleep(1);
-
- if (dlm_unlock(lksb.sb_lkid, 0, &lksb, NULL) < 0)
- error("dlm error %i, %s\n", errno, strerror(errno));
- sleep(1);
-
- kill(child, SIGTERM);
- return 0;
-}
-
diff --git a/dlm/doc/libdlm.txt b/dlm/doc/libdlm.txt
deleted file mode 100644
index 18d44f9..0000000
--- a/dlm/doc/libdlm.txt
+++ /dev/null
@@ -1,533 +0,0 @@
-User-space interface to DLM
----------------------------
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <libdlm.h>
-
-cc -D_REENTRANT prog.c -ldlm -lpthread
-
-cc prog.c -ldlm_lt
-
-
-There are basically two interfaces to libdlm. The first is the "dead simple"
-one that has limited functionality and assumes that the application is linked
-with pthreads. The second is the full-featured DLM interface that looks
-identical to the kernel interface.
-
-See CVS dlm/tests/usertest for examples of use of both these APIs.
-
-The simple one
---------------
-This provides two API calls, lock_resource() and unlock_resource(). Both of
-these calls block until the lock operation has completed - using a worker
-thread to deal with the callbacks that come from the kernel.
-
-int lock_resource(const char *resource, int mode, int flags, int *lockid);
-
- This function locks a named (NUL-terminated) resource and returns the
- lockid if successful. The mode may be any of
-
- LKM_NLMODE LKM_CRMODE LKM_CWMODE LKM_PRMODE LKM_PWMODE LKM_EXMODE
-
- Flags may be any combination of
-
- LKF_NOQUEUE - Don't wait if the lock cannot be granted immediately,
- will return EAGAIN if this is so.
-
- LKF_CONVERT - Convert lock to new mode. *lockid must be valid,
- resource name is ignored.
-
- LKF_QUECVT - Add conversion to the back of the convert queue - only
- valid for some convert operations
-
- LKF_PERSISTENT - Don't automatically unlock this lock when the process
- exits (must be root).
-
-
-int unlock_resource(int lockid);
-
- Unlocks the resource.
-
-
-
-The complicated one
--------------------
-This interface is identical to the kernel interface with the exception of
-the lockspace argument. All userland locks sit in the same lockspace by default.
-
-libdlm can be used in pthread or non-pthread applications. For pthread
-applications simply call the following function before doing any lock
-operations. If you're using pthreads, remember to define _REENTRANT at the
-top of the program or using -D_REENTRANT on the compile line.
-
-int dlm_pthread_init()
-
- Creates a thread to receive all lock ASTs. The AST callback function
- for lock operations will be called in the context of this thread.
- If there is a potential for local resource access conflicts you must
- provide your own pthread-based locking in the AST routine.
-
-
-int dlm_pthread_cleanup()
-
- Cleans up the default lockspace threads after use. Normally you
- don't need to call this, but if the locking code is in a
- dynamically loadable shared library this will probably be necessary.
-
-
-For non-pthread based applications the DLM provides a file descriptor
-that the program can feed into poll/select. If activity is detected
-on that FD then a dispatch function should be called:
-
-int dlm_get_fd()
-
- Returns a file-descriptor for the DLM suitable for passing in to
- poll() or select().
-
-int dlm_dispatch(int fd)
-
- Reads from the DLM and calls any AST routines that may be needed.
- This routine runs in the context of the caller so no extra locking
- is needed to protect local resources.
-
-
-int dlm_lock(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*astaddr) (void *astarg),
- void *astarg,
- void (*bastaddr) (void *astarg),
- struct dlm_range *range);
-
-
-mode lock mode:
- LKM_NLMODE NULL Lock
- LKM_CRMODE Concurrent read
- LKM_CWMODE Concurrent write
- LKM_PRMODE Protected read
- LKM_PWMODE Protected write
- LKM_EXMODE Exclusive
-
-flags LKF_NOQUEUE Don't queue the lock. If it cannot be
- granted return -EAGAIN
- LKF_CONVERT Convert an existing lock
- LKF_VALBLK Lock has a value block
- LKF_QUECVT Put conversion to the back of the queue
- LKF_EXPEDITE Grant a NL lock immediately regardless of
- other locks on the conversion queue
- LKF_PERSISTENT Specifies a lock that will
- not be unlocked when the process exits.
-
-lksb Lock status block.
- This structure contains the returned lock ID, the actual
- status of the lock operation (all lock ops are asynchronous)
- and the value block if LKF_VALBLK is set.
-
-name Name of the lock. Can be binary, max 64 bytes. Ignored for lock
- conversions.
-
-namelen Length of the above name. Ignored for lock conversions.
-
-parent ID of parent lock or NULL if this is a top-level lock
-
-ast Address of AST routine to be called when the lock operation
- completes. The final completion status of the lock will be
- in the lksb. the AST routine must not be NULL.
-
-astargs Argument to pass to the AST routine (most people pass the lksb
- in here but it can be anything you like.)
-
-bast Blocking AST routine. address of a function to call if this
- lock is blocking another. The function will be called with
- astargs.
-
-range an optional structure of two uint64_t that indicate the range
- of the lock. Locks with overlapping ranges will be granted only
- if the lock modes are compatible. locks with non-overlapping
- ranges (on the same resource) do not conflict. A lock with no
- range is assumed to have a range emcompassing the largest
- possible range. ie. 0-0xFFFFFFFFFFFFFFFF. Note that is is more
- efficient to specify no range than to specify the full range
- above.
-
-
-dlm_lock operations are asynchronous. If the call to dlm_lock returns an error
-then the operation has failed and the AST routine will not be called. If
-dlm_lock returns 0 it is still possible that the lock operation will fail. The
-AST routine will be called when the locking is complete or has failed and the
-status is returned in the lksb.
-
-For conversion operations the name and namelen are ignored and the lock ID in
-the LKSB is used to identify the lock.
-
-If a lock value block is specified then in general, a grant or a conversion to
-an equal-level or higher-level lock mode reads the lock value from the resource
-into the caller's lock value block. When a lock conversion from EX or PW
-to an equal-level or lower-level lock mode occurs, the contents of
-the caller's lock value block are written into the resource.
-
-If the AST routines or parameter are passed to a conversion operation then they
-will overwrite those values that were passed to a previous dlm_lock call.
-
-int dlm_lock_wait(uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- const void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bastaddr) (void *bastarg),
- struct dlm_range *range);
-
-
-As above except that the call will block until the lock is
-granted or has failed. The return from the function is
-the final status of the lock request (ie that was returned
-in the lksb after the AST routine was called).
-
-
-
-int dlm_unlock(uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg)
-
-lkid Lock ID as returned in the lksb
-
-flags flags affecting the unlock operation:
- LKF_CANCEL CANCEL a pending lock or conversion.
- This returns the lock to it's
- previously granted mode (in case of a
- conversion) or unlocks it (in case of a
- waiting lock).
-
- LKF_IVVALBLK Invalidate value block
-
-lksb LKSB to return status and value block information.
-
-astarg New parameter to be passed to the completion AST.
- The completion AST routine is the
- last completion AST routine specified in a dlm_lock call.
- If dlm_lock_wait() was the last routine to issue a lock,
- dlm_unlock_wait() must be used to release the lock. If dlm_lock()
- was the last routine to issue a lock then either dlm_unlock()
- or dlm_unlock_wait() may be called.
-
-Unlocks are also asynchronous. The AST routine is called when the resource is
-successfully unlocked (see below).
-
-
-Extra status returns to the completion AST (apart from those already
-defined in errno.h)
-
-ECANCEL
- A lock conversion was successfully cancelled
-
-EUNLOCK
- An Unlock operation completed successfully
-
-EDEADLOCK
- The lock operation is causing a deadlock and has been cancelled. If this
- was a conversion then the lock is reverted to its previously granted state.
- If it was a new lock then it has not been granted.
- (NB Only conversion deadlocks are currently detected)
-
-int dlm_unlock_wait(uint32_t lkid,
- uint32 flags,
- struct dlm_lksb *lksb)
-
-As above but returns when the unlock operation is complete either because it
-finished or because an error was detected. In the case where
-the unlock operation was succesful no error is returned.
-
-The return value of the function call is the status of issuing
-the unlock operation. The status of the unlock operation itself
-is in the lock status block. Both of these must be checked to
-verify that the unlock has completed succesfully.
-
-Lock Query Operations
----------------------
-int dlm_query(struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo,
- void (*ast_routine(void *astarg)),
- void *astarg);
-
-The operation is asynchronous, the ultimate status and data will be returned into the
-dlm_query_info structure which should be checked when the ast_routine is
-called. The lksb must contain a valid lock ID in sb_lkid which is used to
-identify the resource to be queried, status will be returned in sb_status;
-As with the locking calls an AST woutine will be called when the query completes
-if the call itself returns 0.
-
-int dlm_query_wait(struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo)
-
-Same as dlm_query() except that it waits for the operation to complete.
-When the operation is complete the status of will be in the lksb. Both
-the return value from the function call and the condition code in the
-lksb must be evaluated.
-
-If the provided lock list is too short to hold all the locks, then sb_status
-in the lksb will contain -E2BIG but the list will be filled in as far as possible.
-Either gqi_lockinfo or gqi_resinfo may be NULL if that information is not required.
-
-/* Structures passed into and out of the query */
-
-struct gdlm_lockinfo
-{
- int lki_lkid; /* Lock ID on originating node */
- int lki_parent;
- int lki_node; /* Originating node (not master) */
- int lki_ownpid; /* Owner pid on the originating node */
- uint8 lki_state; /* Queue the lock is on */
- int8 lki_grmode /* Granted mode */
- int8 lki_rqmode; /* Requested mode */
- struct dlm_range lki_grrange /* Granted range, if applicable */
- struct dlm_range lki_rqrange /* Requested range, if applicable */
-};
-
-struct gdlm_resinfo
-{
- int rsi_length;
- int rsi_grantcount; /* No. of nodes on grant queue */
- int rsi_convcount; /* No. of nodes on convert queue */
- int rsi_waitcount; /* No. of nodes on wait queue */
- int rsi_masternode; /* Master for this resource */
- char rsi_name[DLM_RESNAME_MAXLEN]; /* Resource name */
- char rsi_valblk[DLM_LVB_LEN]; /* Master's LVB contents, if applicable */
-};
-
-struct dlm_queryinfo
-{
- struct dlm_resinfo *gqi_resinfo; /* Points to a single resinfo struct */
- struct dlm_lockinfo *gqi_lockinfo; /* This points to an array of structs */
- int gqi_locksize; /* input */
- int gqi_lockcount; /* output */
-};
-
-The query is made up of several blocks of bits as follows:
-
- 9 8 6 5 3 0
-+----------------+---+-------+---+-------+-----------+
-| reserved | Q | query | F | queue | lock mode |
-+----------------+---+-------+---+-------+-----------+
-
-lock mode is a normal DLM lock mode or DLM_LOCK_THIS
-to use the mode of the lock in sb_lkid.
-
-queue is a bitmap of
- DLM_QUERY_QUEUE_WAIT
- DLM_QUERY_QUEUE_CONVERT
- DLM_QUERY_QUEUE_GRANT
-
-or one of the two shorthands:
- DLM_QUERY_QUEUE_GRANTED (for WAIT+GRANT)
- DLM_QUERY_QUEUE_ALL (for all queues)
-
- F is a flag DLM_QUERY_LOCAL
-which specifies that a remote access should not
-happen. Only lock information that can
-be gleaned from the local node will be returned so
-be aware that it may not be complete.
-
-The query is one of the following:
- DLM_QUERY_LOCKS_HIGHER
- DLM_QUERY_LOCKS_LOWER
- DLM_QUERY_LOCKS_EQUAL
- DLM_QUERY_LOCKS_BLOCKING
- DLM_QUERY_LOCKS_NOTBLOCK
- DLM_QUERY_LOCKS_ALL
-
-which specifies which locks to look for by mode,
-either the lockmode is lower, equal or higher
-to the mode at the bottom of the query.
-DLM_QUERY_ALL will return all locks on the
-resource.
-
-DLM_QUERY_LOCKS_BLOCKING returns only locks
-that are blocking the current lock. The lock
-must not be waiting for grant or conversion
-for this to be a valid query, the other flags
-are ignored.
-
-DLM_QUERY_LOCKS_NOTBLOCKING returns only locks
-that are granted but NOT blocking the current lock.
-
-Q specifies which lock queue to compare. By default
-the granted queue is used. If the flags
-DLM_QUERY_RQMODE is set then the requested mode
-will be used instead.
-
-
-The "normal" way to call dlm_query is to put the
-address of the dlm_queryinfo struct into
-lksb.sb_lvbptr and pass the lksb as the AST param,
-that way all the information is available to you
-in the AST routine.
-
-Lockspace Operations
---------------------
-The DLM allows locks to be partitioned into "lockspaces", and these can be
-manipulated by userspace calls. It is possible (though not recommended) for
-an application to have multiple lockspaces open at one time.
-
-All the above calls work on the "default" lockspace, which should be
-fine for most users. The calls below allow you to isolate your
-application from all others running in the cluster. Remember, lockspaces
-are a cluster-wide resource, so if you create a lockspace called "myls" it
-will share locks with a lockspace called "myls" on all nodes.
-
-
-dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
-
- This creates a lockspace called <name> and the mode of the file
- user to access it wil be <mode> (subject to umask as usual).
- The lockspace must not already exist on this node, if it does -1
- will be returned and errno will be set to EEXIST. If you really
- want to use this lockspace you can then user dlm_open_lockspace()
- below. The name is the name of a misc device that will be created
- in /dev/misc.
-
- On success a handle to the lockspace is returned, which can be used
- to pass into subsequent dlm_ls_lock/unlock calls. Make no assumptions
- as to the content of this handle as it's content may change in future.
-
- The caller must have CAP_SYSADMIN privileges to do this operation.
-
-
-int dlm_release_lockspace(const char *name, dlm_lshandle_t lockspace, int force)
-
- Deletes a lockspace. If the lockspace still has active locks then -1 will be
- returned and errno set to EBUSY. Both the lockspace handle /and/ the name
- must be specified. This call also closes the lockspace and stops the thread
- associated with the lockspace, if any.
-
- Note that other nodes in the cluster may still have locks open on this
- lockspace.
- This call only removes the lockspace from the current node.
-
- If the force flag is set then the lockspace will be removed even if another
- user on this node has active locks in it. Existing users will NOT
- be notified if you do this, so be careful.
-
-
-dlm_lshandle_t dlm_open_lockspace(const char *name)
-
- Opens an already existing lockspace and returns a handle to it.
-
-
-int dlm_close_lockspace(dlm_lshandle_t lockspace)
-
- Close the lockspace. Any locks held by this process will be freed.
- If a thread is associated with this lockspace then it will be stopped.
-
-
-int dlm_ls_get_fd(dlm_lshandle_t lockspace)
-
- Returns the file descriptor associated with the lockspace so that the
- user call use it as input to poll/select.
-
-
-int dlm_ls_pthread_init(dlm_lshandle_t lockspace)
-
- Initialise threaded environment for this lockspace, similar
- to dlm_pthread_init() above.
-
-
-int dlm_ls_lock(dlm_lshandle_t lockspace,
- int mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*ast) (void *astarg),
- void *astarg,
- void (*bast) (void *astarg),
- struct dlm_range *range)
-
- Same as dlm_lock() above but takes a lockspace argument.
-
-int dlm_ls_lock_wait(dlm_lshandle_t lockspace,
- int mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bast) (void *bastarg),
- struct dlm_range *range)
-
- Same as dlm_lock_wait() above but takes a lockspace argument.
-
-
-int dlm_ls_unlock(dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg)
-
-
- Same as dlm_unlock above but takes a lockspace argument.
-
-int dlm_ls_unlock_wait(dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb)
-
-
- Same as dlm_unlock_wait above but takes a lockspace argument.
-
-
-int dlm_ls_query(dlm_lshandle_t lockspace,
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo,
- void (*ast_routine(void *astarg)),
- void *astarg);
-
- Same as dlm_query above but takes a lockspace argument.
-
-int dlm_ls_query_wait(dlm_lshandle_t lockspace,
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo)
-
- Same as dlm_query_wait above but takes a lockspace argument.
-
-
-One further point about lockspace operations is that there is no locking
-on the creating/destruction of lockspaces in the library so it is up to the
-application to only call dlm_*_lockspace when it is sure that
-no other locking operations are likely to be happening within that process.
-
-Libraries
----------
-There are two DLM libraries, one that uses pthreads (libdlm) to deliver ASTs
-and a light one one that doesn't (libdlm_lt).
-
-The "light" library contains only the following calls.
-
-- dlm_lock
-- dlm_unlock
-- dlm_query
-- dlm_get_fd
-- dlm_dispatch
-- dlm_ls_lock
-- dlm_ls_unlock
-- dlm_ls_query
-- dlm_ls_get_fd
-- dlm_create_lockspace
-- dlm_open_lockspace
-- dlm_release_lockspace
-- dlm_close_lockspace
-
-Note that libdlm (the pthreads one) also contains the non-threaded calls
-so you can choose at runtime if you need to.
diff --git a/dlm/doc/user-dlm-overview.txt b/dlm/doc/user-dlm-overview.txt
deleted file mode 100644
index bda3aea..0000000
--- a/dlm/doc/user-dlm-overview.txt
+++ /dev/null
@@ -1,325 +0,0 @@
-
-There are five ways to request a dlm lock (and five corresponding ways to
-unlock).
-
-- lock_resource
-- dlm_lock
-- dlm_ls_lock
-- dlm_lock_wait
-- dlm_ls_lock_wait
-
-- unlock_resource
-- dlm_unlock
-- dlm_ls_unlock
-- dlm_unlock_wait
-- dlm_ls_unlock_wait
-
-There is also a set of "administrative" functions that are used along with
-some of the lock/unlock requests. Which are used depends on which locking
-method is used or whether the application is threaded.
-
-- dlm_pthread_init
-- dlm_ls_pthread_init
-- dlm_pthread_cleanup
-- dlm_get_fd
-- dlm_ls_get_fd
-- dlm_dispatch
-- dlm_create_lockspace
-- dlm_open_lockspace
-- dlm_release_lockspace
-- dlm_close_lockspace
-
-
-Overview of lock request methods
---------------------------------
-
-- synchronous, default lockspace
- use dlm_pthread_init/dlm_pthread_cleanup if app is threaded
- use dlm_get_fd/dlm_dispatch if app is not threaded
- use unlock_resource to unlock
-
-int lock_resource(
- const char *resource,
- uint32_t mode,
- uint32_t flags,
- uint32_t *lkid);
-
-
-- asynchronous, default lockspace
- use dlm_pthread_init/dlm_pthread_cleanup if app is threaded
- use dlm_get_fd/dlm_dispatch if app is not threaded
- use dlm_unlock/dlm_unlock_wait to unlock
-
-int dlm_lock(
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*ast) (void *astarg),
- void *astarg,
- void (*bast) (void *astarg),
- struct dlm_range *range);
-
-
-- synchronous, default lockspace
- use dlm_pthread_init/dlm_pthread_cleanup if app is threaded
- use dlm_get_fd/dlm_dispatch if app is not threaded
- use dlm_unlock/dlm_unlock_wait to unlock
-
-int dlm_lock_wait(
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bast) (void *bastarg),
- struct dlm_range *range);
-
-
-- asynchronous, any lockspace
- use dlm_ls_pthread_init/dlm_pthread_cleanup if app is threaded
- use dlm_ls_get_fd/dlm_dispatch if app is not threaded
- use dlm_create_lockspace/dlm_open_lockspace to start
- use dlm_release_lockspace/dlm_close_lockspace to finish
- use dlm_ls_unlock/dlm_ls_unlock_wait to unlock
-
-int dlm_ls_lock(
- dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void (*ast) (void *astarg),
- void *astarg,
- void (*bast) (void *astarg),
- struct dlm_range *range);
-
-
-- synchronous, any lockspace
- use dlm_ls_pthread_init/dlm_pthread_cleanup if app is threaded
- use dlm_ls_get_fd/dlm_dispatch if app is not threaded
- use dlm_create_lockspace/dlm_open_lockspace to start
- use dlm_release_lockspace/dlm_close_lockspace to finish
- use dlm_ls_unlock/dlm_ls_unlock_wait to unlock
-
-int dlm_ls_lock_wait(
- dlm_lshandle_t lockspace,
- uint32_t mode,
- struct dlm_lksb *lksb,
- uint32_t flags,
- void *name,
- unsigned int namelen,
- uint32_t parent,
- void *bastarg,
- void (*bast) (void *bastarg),
- struct dlm_range *range);
-
-
-
-Corresponding unlock requests
------------------------------
-
-int unlock_resource(
- uint32_t lkid);
-
-int dlm_unlock(
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg);
-
-int dlm_unlock_wait(
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb);
-
-int dlm_ls_unlock(
- dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb,
- void *astarg);
-
-int dlm_ls_unlock_wait(
- dlm_lshandle_t lockspace,
- uint32_t lkid,
- uint32_t flags,
- struct dlm_lksb *lksb);
-
-
-
-Common to all of the above
---------------------------
-
-#define DLM_RESNAME_MAXLEN (64)
-#define DLM_LVB_LEN (32)
-
-#define LKM_NLMODE 0 /* null lock */
-#define LKM_CRMODE 1 /* concurrent read */
-#define LKM_CWMODE 2 /* concurrent write */
-#define LKM_PRMODE 3 /* protected read */
-#define LKM_PWMODE 4 /* protected write */
-#define LKM_EXMODE 5 /* exclusive */
-
-#define LKF_NOQUEUE (0x00000001)
-#define LKF_CANCEL (0x00000002)
-#define LKF_CONVERT (0x00000004)
-#define LKF_VALBLK (0x00000008)
-#define LKF_QUECVT (0x00000010)
-#define LKF_IVVALBLK (0x00000020)
-#define LKF_CONVDEADLK (0x00000040)
-#define LKF_PERSISTENT (0x00000080)
-#define LKF_NODLCKWT (0x00000100)
-#define LKF_NODLCKBLK (0x00000200)
-#define LKF_EXPEDITE (0x00000400)
-#define LKF_NOQUEUEBAST (0x00000800)
-#define LKF_HEADQUE (0x00001000)
-#define LKF_NOORDER (0x00002000)
-
-#define ECANCEL (0x10001)
-#define EUNLOCK (0x10002)
-#define EINPROG (0x10003)
-
-struct dlm_lksb {
- int sb_status;
- uint32_t sb_lkid;
- char sb_flags;
- char *sb_lvbptr;
-};
-
-struct dlm_range {
- uint64_t ra_start;
- uint64_t ra_end;
-};
-
-
-
-
-Overview of administrative functions
-------------------------------------
-
-- dlm_pthread_init
-- dlm_ls_pthread_init
-- dlm_pthread_cleanup
-- dlm_get_fd
-- dlm_ls_get_fd
-- dlm_dispatch
-- dlm_create_lockspace
-- dlm_open_lockspace
-- dlm_release_lockspace
-- dlm_close_lockspace
-
-
-typedef void * dlm_lshandle_t;
-
-dlm_lshandle_t dlm_create_lockspace(const char *name, mode_t mode);
-
-int dlm_release_lockspace(const char *name, dlm_lshandle_t ls, int force);
-
-dlm_lshandle_t dlm_open_lockspace(const char *name);
-
-int dlm_close_lockspace(dlm_lshandle_t ls);
-
-int dlm_pthread_init();
-
-int dlm_ls_pthread_init(dlm_lshandle_t lockspace);
-
-int dlm_pthread_cleanup();
-
-int dlm_get_fd();
-
-int dlm_ls_get_fd(dlm_lshandle_t ls);
-
-int dlm_dispatch(int fd);
-
-
-
-Query functions
----------------
-
-Query functions follow the same pattern as the lock and unlock functions.
-
-int dlm_query(
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo,
- void (*astaddr) (void *astarg),
- void *astarg);
-
-int dlm_query_wait(
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo);
-
-int dlm_ls_query(
- dlm_lshandle_t lockspace,
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo,
- void (*astaddr) (void *astarg),
- void *astarg);
-
-int dlm_ls_query_wait(
- dlm_lshandle_t lockspace,
- struct dlm_lksb *lksb,
- int query,
- struct dlm_queryinfo *qinfo);
-
-#define DLM_LOCK_THIS 0x0007
-#define DLM_QUERY_MODE_MASK 0x0007
-
-#define DLM_QUERY_QUEUE_WAIT 0x0008
-#define DLM_QUERY_QUEUE_CONVERT 0x0010
-#define DLM_QUERY_QUEUE_GRANT 0x0020
-#define DLM_QUERY_QUEUE_GRANTED 0x0030
-#define DLM_QUERY_QUEUE_ALL 0x0038
-
-#define DLM_QUERY_LOCKS_HIGHER 0x0100
-#define DLM_QUERY_LOCKS_LOWER 0x0200
-#define DLM_QUERY_LOCKS_EQUAL 0x0300
-#define DLM_QUERY_LOCKS_BLOCKING 0x0400
-#define DLM_QUERY_LOCKS_NOTBLOCK 0x0500
-#define DLM_QUERY_LOCKS_ALL 0x0600
-#define DLM_QUERY_MASK 0x0F00
-
-#define DLM_QUERY_GRMODE 0x0000
-#define DLM_QUERY_RQMODE 0x1000
-
-struct dlm_lockinfo {
- int lki_lkid;
- int lki_mstlkid;
- int lki_parent;
- int lki_node;
- int lki_ownpid;
- uint8_t lki_state;
- uint8_t lki_grmode;
- uint8_t lki_rqmode;
- struct dlm_range lki_grrange;
- struct dlm_range lki_rqrange;
-};
-
-struct dlm_resinfo {
- int rsi_length;
- int rsi_grantcount;
- int rsi_convcount;
- int rsi_waitcount;
- int rsi_masternode;
- char rsi_name[DLM_RESNAME_MAXLEN];
- char rsi_valblk[DLM_LVB_LEN];
-};
-
-struct dlm_queryinfo {
- struct dlm_resinfo *gqi_resinfo;
- struct dlm_lockinfo *gqi_lockinfo;
- int gqi_locksize;
- int gqi_lockcount;
-};
-
-
-
diff --git a/dlm/libdlm/Makefile.am b/dlm/libdlm/Makefile.am
deleted file mode 100644
index 7236ee9..0000000
--- a/dlm/libdlm/Makefile.am
+++ /dev/null
@@ -1,28 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-libversion = 3:0:0
-
-include_HEADERS = libdlm.h
-
-noinst_HEADERS = libdlm_internal.h
-
-pkgconfigdir = $(libdir)/pkgconfig
-
-pkgconfig_DATA = libdlm.pc libdlm_lt.pc
-
-udevrulesdir = $(sysconfdir)/udev/rules.d
-
-udevrules_DATA = $(srcdir)/51-dlm.rules
-
-lib_LTLIBRARIES = libdlm.la libdlm_lt.la
-
-libdlm_la_SOURCES = libdlm.c
-
-libdlm_lt_la_SOURCES = libdlm.c
-
-libdlm_la_CPPFLAGS = -D_REENTRANT
-
-libdlm_la_LDFLAGS = -lpthread \
- -version-info $(libversion)
-
-libdlm_lt_la_LDFLAGS = -version-info $(libversion)
diff --git a/dlm/libdlm/libdlm.pc.in b/dlm/libdlm/libdlm.pc.in
deleted file mode 100644
index 917b261..0000000
--- a/dlm/libdlm/libdlm.pc.in
+++ /dev/null
@@ -1,11 +0,0 @@
-prefix=@prefix@
-exec_prefix=${prefix}
-libdir=@libdir@
-includedir=${prefix}/include
-
-Name: libdlm
-Version: @VERSION@
-Description: Cluster Distributed Lock Manager library
-Requires:
-Libs: -L${libdir} -ldlm -lpthread
-Cflags: -I${includedir}
diff --git a/dlm/libdlm/libdlm_lt.pc.in b/dlm/libdlm/libdlm_lt.pc.in
deleted file mode 100644
index 502cb00..0000000
--- a/dlm/libdlm/libdlm_lt.pc.in
+++ /dev/null
@@ -1,11 +0,0 @@
-prefix=@prefix@
-exec_prefix=${prefix}
-libdir=@libdir@
-includedir=${prefix}/include
-
-Name: libdlm_lt
-Version: @VERSION@
-Description: Cluster Distributed Lock Manager non-threaded library
-Requires:
-Libs: -L${libdir} -ldlm_lt
-Cflags: -I${includedir}
diff --git a/dlm/libdlmcontrol/Makefile.am b/dlm/libdlmcontrol/Makefile.am
deleted file mode 100644
index 3e3f48f..0000000
--- a/dlm/libdlmcontrol/Makefile.am
+++ /dev/null
@@ -1,17 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-libversion = 3:0:0
-
-include_HEADERS = libdlmcontrol.h
-
-pkgconfigdir = $(libdir)/pkgconfig
-
-pkgconfig_DATA = libdlmcontrol.pc
-
-lib_LTLIBRARIES = libdlmcontrol.la
-
-libdlmcontrol_la_SOURCES = main.c
-
-libdlmcontrol_la_CPPFLAGS = -I${top_srcdir}/group/dlm_controld
-
-libdlmcontrol_la_LDFLAGS = -version-info $(libversion)
diff --git a/dlm/libdlmcontrol/libdlmcontrol.pc.in b/dlm/libdlmcontrol/libdlmcontrol.pc.in
deleted file mode 100644
index 4273ee4..0000000
--- a/dlm/libdlmcontrol/libdlmcontrol.pc.in
+++ /dev/null
@@ -1,11 +0,0 @@
-prefix=@prefix@
-exec_prefix=${prefix}
-libdir=@libdir@
-includedir=${prefix}/include
-
-Name: libdlmcontrol
-Version: @VERSION@
-Description: DLM control library
-Requires:
-Libs: -L${libdir} -ldlmcontrol
-Cflags: -I${includedir}
diff --git a/dlm/man/Makefile.am b/dlm/man/Makefile.am
deleted file mode 100644
index 8502496..0000000
--- a/dlm/man/Makefile.am
+++ /dev/null
@@ -1,23 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-dist_man_MANS = dlm_cleanup.3 \
- dlm_close_lockspace.3 \
- dlm_create_lockspace.3 \
- dlm_dispatch.3 \
- dlm_get_fd.3 \
- dlm_lock.3 \
- dlm_lock_wait.3 \
- dlm_ls_lock.3 \
- dlm_ls_lockx.3 \
- dlm_ls_lock_wait.3 \
- dlm_ls_pthread_init.3 \
- dlm_ls_unlock.3 \
- dlm_ls_unlock_wait.3 \
- dlm_new_lockspace.3 \
- dlm_open_lockspace.3 \
- dlm_pthread_init.3 \
- dlm_release_lockspace.3 \
- dlm_unlock.3 \
- dlm_unlock_wait.3 \
- libdlm.3 \
- dlm_tool.8
diff --git a/dlm/tests/Makefile.am b/dlm/tests/Makefile.am
deleted file mode 100644
index 78fd9bb..0000000
--- a/dlm/tests/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-SUBDIRS = usertest
diff --git a/dlm/tests/usertest/Makefile.am b/dlm/tests/usertest/Makefile.am
deleted file mode 100644
index 5b30f1c..0000000
--- a/dlm/tests/usertest/Makefile.am
+++ /dev/null
@@ -1,18 +0,0 @@
-MAINTAINERCLEANFILES = Makefile.in
-
-noinst_PROGRAMS = dlmtest asttest lstest pingtest lvb \
- dlmtest2 flood alternate-lvb joinleave threads
-
-AM_CPPFLAGS = -D_REENTRANT \
- -I$(top_srcdir)/dlm/libdlm
-
-dlmtest_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-asttest_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-lstest_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-pingtest_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-lvb_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-dlmtest2_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-flood_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-alternate_lvb_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-joinleave_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
-threads_LDADD = $(top_builddir)/dlm/libdlm/libdlm.la
diff --git a/dlm/tests/usertest/alternate-lvb.c b/dlm/tests/usertest/alternate-lvb.c
deleted file mode 100644
index 6e4df0b..0000000
--- a/dlm/tests/usertest/alternate-lvb.c
+++ /dev/null
@@ -1,167 +0,0 @@
-#include "clusterautoconfig.h"
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <time.h>
-#include <sys/time.h>
-#include <syslog.h>
-#include <asm/types.h>
-#include <sys/socket.h>
-#include <sys/poll.h>
-#include <sys/un.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/errno.h>
-
-#include "libdlm.h"
-
-#define die(fmt, args...) \
-do \
-{ \
- fprintf(stderr, "%s: ", prog_name); \
- fprintf(stderr, fmt, ##args); \
- exit(EXIT_FAILURE); \
-} \
-while (0)
-
-static char *prog_name;
-static dlm_lshandle_t *dh;
-static int verbose;
-
-static struct dlm_lksb lksb;
-static char lvb[32];
-
-int main(int argc, char *argv[])
-{
- unsigned long long offset;
- unsigned long long num, last_num = 0;
- unsigned int id, clients, sleeptime = 0;
- unsigned long long skip = 0;
- char *name;
- int rv;
-
- prog_name = argv[0];
- verbose = 0;
-
- if (argc < 5)
- die("name offset id clients [sleep]\n");
-
- name = argv[1];
- offset = atoll(argv[2]);
- id = atoi(argv[3]);
- clients = atoi(argv[4]);
-
- if (argc > 5)
- sleeptime = atoi(argv[5]);
-
- printf("Joining \"alternate\" lockspace...\n");
-
- dh = dlm_create_lockspace("alternate", 0600);
- if (!dh) {
- printf("dlm_create_lockspace error %p %d\n",dh, errno);
- return -ENOTCONN;
- }
-
- rv = dlm_ls_pthread_init(dh);
- if (rv < 0) {
- printf("dlm_ls_pthread_init error %d %d\n", rv, errno);
- dlm_release_lockspace("alternate", dh, 1);
- return rv;
- }
-
- memset(&lksb, 0, sizeof(lksb));
- memset(&lvb, 0, sizeof(lvb));
- lksb.sb_lvbptr = lvb;
-
- if (verbose)
- printf("request NL\n");
-
- rv = dlm_ls_lock_wait(dh, LKM_NLMODE, &lksb, LKF_VALBLK,
- name, strlen(name), 0, NULL, NULL, NULL);
-
- while (1) {
- if (verbose)
- printf("convert NL->PR\n");
-
- rv = dlm_ls_lock_wait(dh, LKM_PRMODE, &lksb,
- LKF_VALBLK | LKF_CONVERT,
- name, strlen(name),
- 0, NULL, NULL, NULL);
- if (rv)
- printf("lock1 error: %d %d\n", rv, lksb.sb_status);
-
- memcpy(&num, &lvb, sizeof(num));
-
- if (verbose)
- printf("read lvb %llu\n", num);
-
- /* it's our turn */
- if (num % clients == id) {
- if (last_num && last_num + clients != num + 1)
- die("bad: num %llu last_num %llu\n",
- num, last_num);
-
- if (verbose)
- printf("convert PR->EX\n");
-
- rv = dlm_ls_lock_wait(dh, LKM_EXMODE, &lksb,
- LKF_VALBLK | LKF_CONVERT,
- name, strlen(name),
- 0, NULL, NULL, NULL);
- if (rv)
- printf("lock2 error: %d %d\n", rv,
- lksb.sb_status);
-
- memcpy(&num, &lvb, sizeof(num));
- if (num % clients != id)
- die("bad2: num %llu\n", num);
-
- num++;
-
- memcpy(&lvb, &num, sizeof(num));
- printf("%llu %llu\n", num, skip);
-
- if (verbose)
- printf("convert EX->NL\n");
-
- rv = dlm_ls_lock_wait(dh, LKM_NLMODE, &lksb,
- LKF_VALBLK | LKF_CONVERT,
- name, strlen(name),
- 0, NULL, NULL, NULL);
- if (rv)
- printf("lock3 error: %d %d\n", rv,
- lksb.sb_status);
-
- last_num = num;
- skip = 0;
- } else {
- skip++;
-
- if (verbose)
- printf("convert PR->NL, skip %llu\n", skip);
-
- rv = dlm_ls_lock_wait(dh, LKM_NLMODE, &lksb,
- LKF_VALBLK | LKF_CONVERT,
- name, strlen(name),
- 0, NULL, NULL, NULL);
- if (rv)
- printf("lock4 error: %d %d\n", rv,
- lksb.sb_status);
- }
-
- if (sleeptime)
- usleep(sleeptime);
- }
-
- dlm_ls_unlock_wait(dh, lksb.sb_lkid, 0, &lksb);
- dlm_release_lockspace("alternate", dh, 1);
-
- exit(EXIT_SUCCESS);
-}
-
diff --git a/dlm/tests/usertest/asttest.c b/dlm/tests/usertest/asttest.c
deleted file mode 100644
index 4b07f6d..0000000
--- a/dlm/tests/usertest/asttest.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/* Test program for userland DLM interface w/ AST */
-/* NOTE: This is not much of a program and it fails in all
- sorts of ways. But it /does/ illustrate the full dlm_lock
- call and ASTs. It doesn /not/ show how you should use the
- FD parts fo the API!
-*/
-
-#include "clusterautoconfig.h"
-
-#ifdef _REENTRANT
-#include <pthread.h>
-#endif
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <sys/poll.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <fcntl.h>
-#include <netdb.h>
-#include <limits.h>
-#include <unistd.h>
-#include <errno.h>
-#include <getopt.h>
-
-#include "libdlm.h"
-
-static struct dlm_lksb lksb;
-static int use_threads = 0;
-static int quiet = 0;
-
-/* Used by the pthread test */
-static pthread_cond_t cond;
-static pthread_mutex_t mutex;
-
-/* Used by the poll() test */
-static int ast_called = 0;
-
-static int modetonum(char *modestr)
-{
- int mode = LKM_EXMODE;
-
- if (strncasecmp(modestr, "NL", 2) == 0) mode = LKM_NLMODE;
- if (strncasecmp(modestr, "CR", 2) == 0) mode = LKM_CRMODE;
- if (strncasecmp(modestr, "CW", 2) == 0) mode = LKM_CWMODE;
- if (strncasecmp(modestr, "PR", 2) == 0) mode = LKM_PRMODE;
- if (strncasecmp(modestr, "PW", 2) == 0) mode = LKM_PWMODE;
- if (strncasecmp(modestr, "EX", 2) == 0) mode = LKM_EXMODE;
-
- return mode;
-}
-
-static const char *numtomode(int mode)
-{
- switch (mode)
- {
- case LKM_NLMODE: return "NL";
- case LKM_CRMODE: return "CR";
- case LKM_CWMODE: return "CW";
- case LKM_PRMODE: return "PR";
- case LKM_PWMODE: return "PW";
- case LKM_EXMODE: return "EX";
- default: return "??";
- }
-}
-
-static void usage(char *prog, FILE *file)
-{
- fprintf(file, "Usage:\n");
- fprintf(file, "%s [mcnpquhV] <lockname>\n", prog);
- fprintf(file, "\n");
- fprintf(file, " -V Show version of dlmtest\n");
- fprintf(file, " -h Show this help information\n");
- fprintf(file, " -m <mode> lock mode (default EX)\n");
- fprintf(file, " -c <mode> mode to convert to (default none)\n");
- fprintf(file, " -n don't block\n");
- fprintf(file, " -p Use pthreads\n");
- fprintf(file, " -u Don't unlock\n");
- fprintf(file, " -C Crash after lock\n");
- fprintf(file, " -q Quiet\n");
- fprintf(file, " -u Don't unlock explicitly\n");
- fprintf(file, "\n");
-
-}
-
-static void ast_routine(void *arg)
-{
- struct dlm_lksb *alksb = arg;
-
- if (!quiet)
- printf("ast called, status = %d, lkid=%x\n", alksb->sb_status, alksb->sb_lkid);
-
- /* Wake the main thread */
- if (use_threads)
- {
- pthread_mutex_lock(&mutex);
- pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
- }
- else
- {
- ast_called = 1;
- }
-}
-
-static void bast_routine(void *arg)
-{
- struct dlm_lksb *blksb = arg;
-
- if (!quiet)
- printf("\nblocking ast called, status = %d, lkid=%x\n", blksb->sb_status, blksb->sb_lkid);
-}
-
-/* Using poll(2) to wait for and dispatch ASTs */
-static int poll_for_ast(void)
-{
- struct pollfd pfd;
-
- pfd.fd = dlm_get_fd();
- pfd.events = POLLIN;
- while (!ast_called)
- {
- if (poll(&pfd, 1, 0) < 0)
- {
- perror("poll");
- return -1;
- }
- dlm_dispatch(pfd.fd);
- }
- ast_called = 0;
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- const char *resource = "LOCK-NAME";
- int flags = 0;
- int delay = 0;
- int status;
- int mode = LKM_EXMODE;
- int convmode = -1;
- int do_unlock = 1;
- int do_crash = 0;
- signed char opt;
-
- /* Deal with command-line arguments */
- opterr = 0;
- optind = 0;
- while ((opt=getopt(argc,argv,"?m:nqupc:d:CvV")) != EOF)
- {
- switch(opt)
- {
- case 'h':
- usage(argv[0], stdout);
- exit(0);
-
- case '?':
- usage(argv[0], stderr);
- exit(0);
-
- case 'm':
- mode = modetonum(optarg);
- break;
-
- case 'c':
- convmode = modetonum(optarg);
- break;
-
- case 'p':
- use_threads++;
- break;
-
- case 'n':
- flags |= LKF_NOQUEUE;
- break;
-
- case 'q':
- quiet = 1;
- break;
-
- case 'u':
- do_unlock = 0;
- break;
-
- case 'C':
- do_crash = 1;
- break;
-
- case 'd':
- delay = atoi(optarg);
- break;
-
- case 'V':
- printf("\nasttest version 0.1\n\n");
- exit(1);
- break;
- }
- }
-
- if (argv[optind])
- resource = argv[optind];
-
- if (!quiet)
- fprintf(stderr, "locking %s %s %s...", resource,
- numtomode(mode),
- (flags&LKF_NOQUEUE?"(NOQUEUE)":""));
-
- fflush(stderr);
-
- if (use_threads)
- {
- pthread_cond_init(&cond, NULL);
- pthread_mutex_init(&mutex, NULL);
- pthread_mutex_lock(&mutex);
-
- dlm_pthread_init();
- }
-
- status = dlm_lock(mode,
- &lksb,
- flags,
- resource,
- strlen(resource),
- 0, // Parent,
- ast_routine,
- &lksb,
- bast_routine,
- NULL); // Range
- if (status == -1)
- {
- if (!quiet) fprintf(stderr, "\n");
- perror("lock");
-
- return -1;
- }
- printf("(lkid=%x)", lksb.sb_lkid);
-
- if (do_crash)
- *(int *)0 = 0xdeadbeef;
-
- /* Wait */
- if (use_threads)
- pthread_cond_wait(&cond, &mutex);
- else
- poll_for_ast();
-
- if (delay)
- sleep(delay);
-
- if (!quiet)
- {
- fprintf(stderr, "unlocking %s...", resource);
- fflush(stderr);
- }
-
- if (do_unlock)
- {
- status = dlm_unlock(lksb.sb_lkid,
- 0, // flags
- &lksb,
- &lksb); // AST args
- if (status == -1)
- {
- if (!quiet) fprintf(stderr, "\n");
- perror("unlock");
- return -1;
- }
-
- /* Wait */
- if (use_threads)
- pthread_cond_wait(&cond, &mutex);
- else
- poll_for_ast();
- }
-
- return 0;
-}
-
diff --git a/dlm/tests/usertest/dlmtest.c b/dlm/tests/usertest/dlmtest.c
deleted file mode 100644
index 02f8f07..0000000
--- a/dlm/tests/usertest/dlmtest.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/* Test program for userland DLM interface */
-
-#include "clusterautoconfig.h"
-
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <fcntl.h>
-#include <netdb.h>
-#include <limits.h>
-#include <unistd.h>
-#include <errno.h>
-#include <getopt.h>
-
-#include "libdlm.h"
-
-static int modetonum(char *modestr)
-{
- int mode = LKM_EXMODE;
-
- if (strncasecmp(modestr, "NL", 2) == 0) mode = LKM_NLMODE;
- if (strncasecmp(modestr, "CR", 2) == 0) mode = LKM_CRMODE;
- if (strncasecmp(modestr, "CW", 2) == 0) mode = LKM_CWMODE;
- if (strncasecmp(modestr, "PR", 2) == 0) mode = LKM_PRMODE;
- if (strncasecmp(modestr, "PW", 2) == 0) mode = LKM_PWMODE;
- if (strncasecmp(modestr, "EX", 2) == 0) mode = LKM_EXMODE;
-
- return mode;
-}
-
-static const char *numtomode(int mode)
-{
- switch (mode)
- {
- case LKM_NLMODE: return "NL";
- case LKM_CRMODE: return "CR";
- case LKM_CWMODE: return "CW";
- case LKM_PRMODE: return "PR";
- case LKM_PWMODE: return "PW";
- case LKM_EXMODE: return "EX";
- default: return "??";
- }
-}
-
-static void usage(char *prog, FILE *file)
-{
- fprintf(file, "Usage:\n");
- fprintf(file, "%s [hmcnQpequdV] <lockname>\n", prog);
- fprintf(file, "\n");
- fprintf(file, " -V Show version of dlmtest\n");
- fprintf(file, " -h Show this help information\n");
- fprintf(file, " -m <mode> lock mode (default EX)\n");
- fprintf(file, " -c <mode> mode to convert to (default none)\n");
- fprintf(file, " -n don't block\n");
- fprintf(file, " -Q query the lock\n");
- fprintf(file, " -p Persistent lock\n");
- fprintf(file, " -e Expedite conversion\n");
- fprintf(file, " -q Quiet\n");
- fprintf(file, " -u Don't unlock explicitly\n");
- fprintf(file, " -d <secs> Time to hold the lock for\n");
- fprintf(file, "\n");
-
-}
-
-#ifdef QUERY
-static void query_ast_routine(void *arg)
-{
- struct dlm_lksb *lksb = arg;
- struct dlm_queryinfo *qi = (struct dlm_queryinfo *)lksb->sb_lvbptr;
- int i;
-
- qi->gqi_resinfo->rsi_name[qi->gqi_resinfo->rsi_length] = '\0';
- /* Dump resource info */
- printf("lockinfo: status = %d\n", lksb->sb_status);
- printf("lockinfo: resource = '%s'\n", qi->gqi_resinfo->rsi_name);
- printf("lockinfo: grantcount = %d\n", qi->gqi_resinfo->rsi_grantcount);
- printf("lockinfo: convcount = %d\n", qi->gqi_resinfo->rsi_convcount);
- printf("lockinfo: waitcount = %d\n", qi->gqi_resinfo->rsi_waitcount);
- printf("lockinfo: masternode = %d\n", qi->gqi_resinfo->rsi_masternode);
-
- /* Dump all the locks */
- for (i = 0; i < qi->gqi_lockcount; i++)
- {
- struct dlm_lockinfo *li = &qi->gqi_lockinfo[i];
-
- printf("lockinfo: lock: lkid = %x\n", li->lki_lkid);
- printf("lockinfo: lock: master lkid = %x\n", li->lki_mstlkid);
- printf("lockinfo: lock: parent lkid = %x\n", li->lki_parent);
- printf("lockinfo: lock: node = %d\n", li->lki_node);
- printf("lockinfo: lock: pid = %d\n", li->lki_ownpid);
- printf("lockinfo: lock: state = %d\n", li->lki_state);
- printf("lockinfo: lock: grmode = %d\n", li->lki_grmode);
- printf("lockinfo: lock: rqmode = %d\n", li->lki_rqmode);
- printf("\n");
- }
-
- if (qi->gqi_lockinfo)
- free(qi->gqi_lockinfo);
-}
-
-static struct dlm_queryinfo qinfo;
-static struct dlm_resinfo resinfo;
-#define MAX_QUERY_LOCKS 10
-
-
-static int query_lock(int lockid)
-{
- int status;
-struct dlm_lksb tmplksb;
- lksb.sb_lkid = lockid;
- qinfo.gqi_resinfo = &resinfo;
- qinfo.gqi_lockinfo = malloc(sizeof(struct dlm_lockinfo) * MAX_QUERY_LOCKS);
- qinfo.gqi_locksize = MAX_QUERY_LOCKS;
- lksb.sb_lvbptr = (char *)&qinfo;
-
- status = dlm_query(&tmplksb,
- DLM_QUERY_QUEUE_ALL | DLM_QUERY_LOCKS_ALL,
- &qinfo,
- query_ast_routine,
- &tmplksb);
- if (status)
- perror("Query failed");
- else
- sleep(1); /* Just to allow the result to come back. There isn't
- a synchronous version of this call */
- return status;
-}
-#endif
-
-
-int main(int argc, char *argv[])
-{
- const char *resource = "LOCK-NAME";
- int flags = 0;
- int status;
- int delay = 5;
- int mode = LKM_EXMODE;
- int convmode = -1;
- int lockid;
- int quiet = 0;
- int do_unlock = 1;
- int do_query = 0;
- int do_expedite = 0;
- signed char opt;
-
- /* Deal with command-line arguments */
- opterr = 0;
- optind = 0;
- while ((opt=getopt(argc,argv,"?m:nquQepd:c:vV")) != EOF)
- {
- switch(opt)
- {
- case 'h':
- usage(argv[0], stdout);
- exit(0);
-
- case '?':
- usage(argv[0], stderr);
- exit(0);
-
- case 'm':
- mode = modetonum(optarg);
- break;
-
- case 'c':
- convmode = modetonum(optarg);
- break;
-
- case 'e':
- do_expedite = 1;
- break;
-
- case 'p':
- flags |= LKF_PERSISTENT;
- break;
-
- case 'n':
- flags |= LKF_NOQUEUE;
- break;
-
- case 'd':
- delay = atoi(optarg);
- break;
-
- case 'q':
- quiet = 1;
- break;
-
- case 'u':
- do_unlock = 0;
- break;
-
- case 'Q':
- do_query = 1;
- break;
-
- case 'V':
- printf("\ndlmtest version 0.3\n\n");
- exit(1);
- break;
- }
- }
-
- if (argv[optind])
- resource = argv[optind];
-
- if (!quiet)
- fprintf(stderr, "locking %s %s %s...", resource,
- numtomode(mode),
- (flags&LKF_NOQUEUE?"(NOQUEUE)":""));
-
- fflush(stderr);
-
- status = lock_resource(resource, mode, flags, &lockid);
- if (status == -1)
- {
- if (!quiet) fprintf(stderr, "\n");
- perror("lock");
-
- return -1;
- }
- if (lockid == 0)
- {
- fprintf(stderr, "error: got lockid of zero\n");
- return 0;
- }
-
- if (!quiet) fprintf(stderr, "done (lkid = %x)\n", lockid);
-
- if (!do_unlock) return 0;
-
-#ifdef QUERY
- if (do_query) query_lock(lockid);
-#endif
-
- sleep(delay);
-
- if (convmode != -1)
- {
- if (do_expedite)
- flags |= LKF_EXPEDITE;
-
- if (!quiet)
- {
- fprintf(stderr, "converting %s to %s...", resource, numtomode(convmode));
- fflush(stderr);
- }
-
- status = lock_resource(resource, convmode, flags | LKF_CONVERT, &lockid);
- if (status == -1)
- {
- if (!quiet) fprintf(stderr, "\n");
- perror("convert");
- return -1;
- }
- if (!quiet) fprintf(stderr, "done\n");
- }
-
- sleep(delay);
-
- if (!quiet)
- {
- fprintf(stderr, "unlocking %s...", resource);
- fflush(stderr);
- }
-
- status = unlock_resource(lockid);
- if (status == -1)
- {
- if (!quiet) fprintf(stderr, "\n");
- perror("unlock");
- return -1;
- }
-
- if (!quiet) fprintf(stderr, "done\n");
-
- /* For some reason, calling this IMMEDIATELY before
- exitting, causes a thread hang. either don't call it at
- all or do something in afterwards before calling exit
- */
- dlm_pthread_cleanup();
- return 0;
-}
-
diff --git a/dlm/tests/usertest/dlmtest2.c b/dlm/tests/usertest/dlmtest2.c
deleted file mode 100644
index 9d3afd9..0000000
--- a/dlm/tests/usertest/dlmtest2.c
+++ /dev/null
@@ -1,1469 +0,0 @@
-#include "clusterautoconfig.h"
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <time.h>
-#include <signal.h>
-#include <syslog.h>
-#include <sys/time.h>
-#include <asm/types.h>
-#include <sys/socket.h>
-#include <sys/poll.h>
-#include <sys/un.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/errno.h>
-
-#include "libdlm.h"
-
-#define MAX_CLIENTS 4
-#define MAX_LOCKS 16
-#define MAX_RESOURCES 16
-
-static dlm_lshandle_t *dh;
-static int libdlm_fd;
-static int timewarn = 0;
-static uint64_t timeout = 0;
-static int noqueue = 1;
-static int persistent = 0;
-static int ignore_bast = 0;
-static int quiet = 1;
-static int verbose = 0;
-static int bast_cb;
-static int maxn = MAX_LOCKS;
-static int maxr = MAX_RESOURCES;
-static int iterations;
-static int minhold = 0;
-static int stress_stop = 0;
-static int stress_delay = 0;
-static int stress_lock_only = 0;
-static int openclose_ls = 0;
-static uint64_t our_xid;
-static char cmd[32];
-static int opt_cmd = 0;
-
-static unsigned int sts_eunlock, sts_ecancel, sts_etimedout, sts_edeadlk, sts_eagain, sts_other, sts_zero;
-static unsigned int bast_unlock, bast_skip;
-
-
-#define log_print(fmt, args...) \
-do { \
- if (!quiet) \
- printf(fmt , ##args); \
-} while (0)
-
-#define log_op(fmt, args...) \
-do { \
- if (!quiet) \
- printf(fmt , ##args); \
-} while (0)
-
-#define log_ast(fmt, args...) \
-do { \
- if (verbose) \
- printf(fmt , ##args); \
-} while (0)
-
-#define log_bast(fmt, args...) \
-do { \
- if (verbose > 1) \
- printf(fmt , ##args); \
-} while (0)
-
-#define log_verbose(fmt, args...) \
-do { \
- if (verbose > 2) \
- printf(fmt , ##args); \
-} while (0)
-
-struct client {
- int fd;
- char type[32];
-};
-
-static int client_size = MAX_CLIENTS;
-static struct client client[MAX_CLIENTS];
-static struct pollfd pollfd[MAX_CLIENTS];
-
-enum {
- Op_lock = 1,
- Op_unlock,
- Op_unlockf,
- Op_cancel,
-};
-
-struct lk {
- int id;
- int rqmode;
- int grmode;
- int wait_ast;
- int lastop;
- int last_status;
- int bast;
- int minhold;
- struct dlm_lksb lksb;
- struct timeval begin;
- struct timeval acquired;
-};
-
-struct lk *locks;
-
-static void unlock(int i);
-static void unlockf(int i);
-
-
-static int rand_int(int a, int b)
-{
- return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
-}
-
-static const char *status_str(int status)
-{
- static char sts_str[8];
-
- switch (status) {
- case 0:
- return "0 ";
- case EUNLOCK:
- return "EUNLOCK";
- case ECANCEL:
- return "ECANCEL";
- case EAGAIN:
- return "EAGAIN ";
- case EBUSY:
- return "EBUSY ";
- case ETIMEDOUT:
- return "ETIMEDO";
- case EDEADLK:
- return "EDEADLK";
- default:
- snprintf(sts_str, 8, "%8x", status);
- return sts_str;
- }
-}
-
-static const char *op_str(int op)
-{
- switch (op) {
- case Op_lock:
- return "lock";
- case Op_unlock:
- return "unlock";
- case Op_unlockf:
- return "unlockf";
- case Op_cancel:
- return "cancel";
- default:
- return "unknown";
- }
-}
-
-static struct lk *get_lock(int i)
-{
- if (i < 0)
- return NULL;
- if (i >= maxn)
- return NULL;
- return &locks[i];
-}
-
-static int all_unlocks_done(void)
-{
- struct lk *lk;
- int i;
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- if (lk->grmode == -1 && !lk->wait_ast)
- continue;
- return 0;
- }
- return 1;
-}
-
-static void dump(void)
-{
- struct timeval now;
- struct lk *lk;
- int i;
-
- gettimeofday(&now, NULL);
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- printf("x %2d lkid %08x gr %2d rq %2d wait_ast %d last op %s \t%s %us\n",
- i,
- lk->lksb.sb_lkid,
- lk->grmode,
- lk->rqmode,
- lk->wait_ast,
- op_str(lk->lastop),
- status_str(lk->last_status),
- lk->wait_ast ? (unsigned int)(now.tv_sec - lk->begin.tv_sec) : 0);
- }
-}
-
-static void bastfn(void *arg)
-{
- struct lk *lk = arg;
- lk->bast = 1;
- bast_cb = 1;
-}
-
-static void do_bast(struct lk *lk)
-{
- int skip = 0;
-
- if (lk->lastop == Op_unlock || lk->lastop == Op_unlockf) {
- skip = 1;
- }
- if (!lk->lksb.sb_lkid) {
- skip = 1;
- }
-
- if (skip) {
- bast_skip++;
- log_bast(" bast: skip %3d\t%x\n", lk->id, lk->lksb.sb_lkid);
- } else {
- bast_unlock++;
- log_bast(" bast: unlockf %3d\t%x\n", lk->id, lk->lksb.sb_lkid);
- unlockf(lk->id);
- }
- lk->bast = 0;
-}
-
-static void do_bast_unlocks(void)
-{
- struct lk *lk;
- int i;
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- if (lk->bast)
- do_bast(lk);
- }
- bast_cb = 0;
-}
-
-static void process_libdlm(void)
-{
- dlm_dispatch(libdlm_fd);
- if (bast_cb && !ignore_bast)
- do_bast_unlocks();
-}
-
-static void astfn(void *arg)
-{
- struct lk *lk = arg;
- int i = lk->id;
-
- if (!lk->wait_ast) {
- printf(" ast: %s %3d\t%x: !wait_ast gr %d rq %d last op %s %s\n",
- status_str(lk->lksb.sb_status), i, lk->lksb.sb_lkid,
- lk->grmode, lk->rqmode,
- op_str(lk->lastop), status_str(lk->last_status));
- }
-
- log_ast(" ast: %s %3d\t%x\n",
- status_str(lk->lksb.sb_status), i, lk->lksb.sb_lkid);
-
- lk->last_status = lk->lksb.sb_status;
-
- if (lk->lksb.sb_status == EUNLOCK) {
- sts_eunlock++;
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->grmode = -1;
- lk->wait_ast = 0;
-
- } else if (lk->lksb.sb_status == ECANCEL) {
- sts_ecancel++;
- if (lk->grmode == -1) {
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->wait_ast = 0;
- } else {
- if (lk->lastop != Op_unlock && lk->lastop != Op_unlockf)
- lk->wait_ast = 0;
- }
-
- } else if (lk->lksb.sb_status == ETIMEDOUT) {
- sts_etimedout++;
- if (lk->grmode == -1) {
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->wait_ast = 0;
- } else {
- if (lk->lastop != Op_unlock && lk->lastop != Op_unlockf)
- lk->wait_ast = 0;
- }
-
- } else if (lk->lksb.sb_status == EDEADLK) {
- sts_edeadlk++;
- if (lk->grmode == -1) {
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->wait_ast = 0;
- } else {
- if (lk->lastop != Op_unlock && lk->lastop != Op_unlockf)
- lk->wait_ast = 0;
- }
-
- } else if (lk->lksb.sb_status == EAGAIN) {
- sts_eagain++;
- if (lk->grmode == -1) {
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->wait_ast = 0;
- } else {
- if (lk->lastop != Op_unlockf)
- lk->wait_ast = 0;
- }
-
- } else {
- if (lk->lksb.sb_status != 0) {
- sts_other++;
- printf("BAD ast: %d %3d\t%x: gr %d rq %d last op %s %s\n",
- lk->lksb.sb_status, i, lk->lksb.sb_lkid,
- lk->grmode, lk->rqmode, op_str(lk->lastop),
- status_str(lk->last_status));
- stress_stop = 1;
- return;
- }
-
- sts_zero++;
-
- if (lk->lastop != Op_unlockf)
- lk->wait_ast = 0;
-
- lk->grmode = lk->rqmode;
-
- if (minhold) {
- gettimeofday(&lk->acquired, NULL);
- lk->minhold = minhold;
- }
- }
-
- lk->rqmode = -1;
-}
-
-/* EBUSY from dlm_ls_lockx() is expected sometimes, e.g. lock, cancel, lock;
- the first lock is successful and the app gets the status back,
- and issues the second lock before the reply for the overlapping
- cancel (which did nothing) has been received in the dlm. */
-
-static void lock(int i, int mode)
-{
- char name[DLM_RESNAME_MAXLEN];
- struct lk *lk;
- int flags = 0;
- int rv;
- uint64_t *timeout_arg = NULL;
-
- lk = get_lock(i);
- if (!lk)
- return;
-
- if (noqueue)
- flags |= LKF_NOQUEUE;
- if (persistent)
- flags |= LKF_PERSISTENT;
- if (timeout) {
- flags |= LKF_TIMEOUT;
- timeout_arg = &timeout;
- }
-
- if (lk->lksb.sb_lkid)
- flags |= LKF_CONVERT;
-
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name), "test%d", (i % maxr));
-
- log_verbose("lock: %d grmode %d rqmode %d flags %x lkid %x %s\n",
- i, lk->grmode, mode, flags, lk->lksb.sb_lkid, name);
-
-#if 0
- rv = dlm_ls_lock(dh, mode, &lk->lksb, flags, name, strlen(name), 0,
- astfn, (void *) lk, bastfn, NULL);
-#else
- rv = dlm_ls_lockx(dh, mode, &lk->lksb, flags, name, strlen(name), 0,
- astfn, (void *) lk, bastfn, &our_xid, timeout_arg);
-#endif
- if (!rv) {
- lk->wait_ast = 1;
- lk->rqmode = mode;
- gettimeofday(&lk->begin, NULL);
- } else if (rv == -1 && errno == EBUSY) {
- printf(" : lock %3d\t%x: EBUSY gr %d rq %d wait_ast %d\n",
- i, lk->lksb.sb_lkid, lk->grmode, lk->rqmode, lk->wait_ast);
- } else {
- printf(" : lock %3d\t%x: errno %d rv %d gr %d rq %d wait_ast %d\n",
- i, lk->lksb.sb_lkid, errno, rv, lk->grmode, lk->rqmode, lk->wait_ast);
- stress_stop = 1;
- }
-
- log_verbose("lock: %d rv %d sb_lkid %x sb_status %x\n",
- i, rv, lk->lksb.sb_lkid, lk->lksb.sb_status);
-
- lk->lastop = Op_lock;
-}
-
-static void lock_sync(int i, int mode)
-{
- char name[DLM_RESNAME_MAXLEN];
- int flags = 0;
- int rv;
- struct lk *lk;
-
- lk = get_lock(i);
- if (!lk)
- return;
-
- if (noqueue)
- flags |= LKF_NOQUEUE;
- if (persistent)
- flags |= LKF_PERSISTENT;
-
- if (lk->lksb.sb_lkid)
- flags |= LKF_CONVERT;
-
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name), "test%d", (i % maxr));
-
- log_verbose("lock_sync: %d rqmode %d flags %x lkid %x %s\n",
- i, mode, flags, lk->lksb.sb_lkid, name);
-
- rv = dlm_ls_lock_wait(dh, mode, &lk->lksb, flags,
- name, strlen(name), 0, (void *) lk,
- bastfn, NULL);
-
- log_verbose("lock_sync: %d rv %d sb_lkid %x sb_status %x\n",
- i, rv, lk->lksb.sb_lkid, lk->lksb.sb_status);
-
- if (!rv) {
- lk->grmode = mode;
- lk->rqmode = -1;
- } else if (rv == EAGAIN) {
- if (lk->grmode == -1)
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- } else {
- printf("unknown rv %d\n", rv);
- exit(-1);
- }
-}
-
-static void lock_all(int mode)
-{
- int i;
-
- for (i = 0; i < maxn; i++)
- lock(i, mode);
-}
-
-static const char *uflags(uint32_t flags)
-{
- if (flags == LKF_FORCEUNLOCK)
- return "FORCEUNLOCK";
- if (flags == LKF_CANCEL)
- return "CANCEL";
- return "0";
-}
-
-/* ENOENT is expected from dlm_ls_unlock() sometimes because we'll
- try to do an unlockf during an outstanding op that will free
- the lock itself */
-
-static void _unlock(int i, uint32_t flags)
-{
- struct lk *lk;
- uint32_t lkid;
- int rv;
-
- lk = get_lock(i);
- if (!lk)
- return;
-
- lkid = lk->lksb.sb_lkid;
- if (!lkid)
- return;
-
- log_verbose("unlock: %d lkid %x flags %x\n", i, lkid, flags);
-
- rv = dlm_ls_unlock(dh, lkid, flags, &lk->lksb, lk);
- if (!rv) {
- lk->wait_ast = 1;
- gettimeofday(&lk->begin, NULL);
- } else if (rv == -1 && errno == EBUSY) {
- printf(" : unlock %3d\t%x: EBUSY flags %s gr %d rq %d wait_ast %d\n",
- i, lk->lksb.sb_lkid, uflags(flags), lk->grmode, lk->rqmode, lk->wait_ast);
- } else if (rv == -1 && errno == ENOENT) {
- printf(" : unlock %3d\t%x: ENOENT flags %s gr %d rq %d wait_ast %d\n",
- i, lk->lksb.sb_lkid, uflags(flags), lk->grmode, lk->rqmode, lk->wait_ast);
- } else {
- printf(" : unlock %3d\t%x: errno %d flags %s rv %d gr %d rq %d wait_ast %d\n",
- i, lk->lksb.sb_lkid, errno, uflags(flags), rv, lk->grmode, lk->rqmode, lk->wait_ast);
- }
-}
-
-static void unlock(int i)
-{
- struct lk *lk = get_lock(i);
-
- if (minhold) {
- struct timeval now;
-
- if (lk->wait_ast)
- return;
-
- gettimeofday(&now, NULL);
- if (lk->acquired.tv_sec + lk->minhold > now.tv_sec) {
- printf(" : unlock %3d\t%x: gr %d rq %d held %u of %u s\n",
- i, lk->lksb.sb_lkid, lk->grmode, lk->rqmode,
- (unsigned int)(now.tv_sec - lk->acquired.tv_sec), lk->minhold);
- return;
- }
- }
-
- _unlock(i, 0);
- lk->rqmode = -1;
- lk->lastop = Op_unlock;
-}
-
-static void unlockf(int i)
-{
- struct lk *lk = get_lock(i);
-
- if (minhold) {
- struct timeval now;
-
- if (lk->wait_ast)
- return;
-
- gettimeofday(&now, NULL);
- if (lk->acquired.tv_sec + lk->minhold > now.tv_sec) {
- printf(" : unlockf %3d\t%x: gr %d rq %d held %u of %u s\n",
- i, lk->lksb.sb_lkid, lk->grmode, lk->rqmode,
- (unsigned int)(now.tv_sec - lk->acquired.tv_sec), lk->minhold);
- return;
- }
- }
-
- _unlock(i, LKF_FORCEUNLOCK);
- lk->rqmode = -1;
- lk->lastop = Op_unlockf;
-}
-
-static void cancel(int i)
-{
- struct lk *lk = get_lock(i);
- _unlock(i, LKF_CANCEL);
- lk->lastop = Op_cancel;
-}
-
-static void canceld(int i, uint32_t lkid)
-{
- int rv;
-
- rv = dlm_ls_deadlock_cancel(dh, lkid, 0);
-
- printf("canceld %x: %d %d\n", lkid, rv, errno);
-}
-
-static void unlock_sync(int i)
-{
- uint32_t lkid;
- int rv;
- struct lk *lk;
-
- lk = get_lock(i);
- if (!lk)
- return;
-
- lkid = lk->lksb.sb_lkid;
- if (!lkid) {
- log_print("unlock %d skip zero lkid\n", i);
- return;
- }
-
- log_verbose("unlock_sync: %d lkid %x\n", i, lkid);
-
- rv = dlm_ls_unlock_wait(dh, lkid, 0, &lk->lksb);
-
- log_verbose("unlock_sync: %d rv %d sb_status %x\n", i, rv,
- lk->lksb.sb_status);
-
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->grmode = -1;
- lk->rqmode = -1;
-}
-
-static void unlock_all(void)
-{
- struct lk *lk;
- int i;
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- unlock(i);
- }
-}
-
-static void purge(int nodeid, int pid)
-{
- struct lk *lk;
- int i, rv;
-
- rv = dlm_ls_purge(dh, nodeid, pid);
- if (rv) {
- printf("dlm_ls_purge %d %d error %d\n", nodeid, pid, rv);
- return;
- }
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- memset(&lk->lksb, 0, sizeof(struct dlm_lksb));
- lk->grmode = -1;
- lk->wait_ast = 0;
- }
-}
-
-static void purgetest(int nodeid, int pid)
-{
- struct lk *lk;
- int i, mid = maxn / 2;
-
- printf("lock %d to %d\n", 0, mid-1);
- for (i = 0; i < mid; i++)
- lock(i, 3);
-
- while (1) {
- process_libdlm();
- for (i = 0; i < mid; i++) {
- lk = get_lock(i);
- if (!lk->wait_ast)
- continue;
- break;
- }
- if (i == mid)
- break;
- }
-
- for (i = mid; i < maxn; i++)
- lock(i, 3);
- for (i = 0; i < mid; i++)
- unlock(i);
- /* usleep(10000); */
- purge(nodeid, pid);
-}
-
-static void tstress_unlocks(void)
-{
- struct lk *lk;
- struct timeval now;
- int i;
-
- for (i = 0; i < maxn; i++) {
- lk = get_lock(i);
- if (!lk)
- continue;
- if (lk->wait_ast)
- continue;
- if (lk->grmode < 0)
- continue;
-
- /* if we've held the lock for minhold seconds, then unlock */
-
- gettimeofday(&now, NULL);
-
- if (now.tv_sec >= lk->acquired.tv_sec + minhold) {
- printf(" : unlock %3d\t%x: gr %d rq %d held %u of %u s\n",
- i, lk->lksb.sb_lkid, lk->grmode, lk->rqmode,
- (unsigned int)(now.tv_sec - lk->acquired.tv_sec), minhold);
-
- _unlock(i, 0);
- lk->rqmode = -1;
- lk->lastop = Op_unlock;
- }
-
- }
-}
-
-static void tstress(int num)
-{
- unsigned int n, skips, lock_ops, unlock_ops, unlockf_ops, cancel_ops;
- int i;
- struct lk *lk;
-
- n = skips = lock_ops = unlock_ops = unlockf_ops = cancel_ops = 0;
- sts_eunlock = sts_ecancel = sts_etimedout = sts_edeadlk = sts_eagain = sts_other = sts_zero = 0;
- bast_unlock = bast_skip = 0;
-
- noqueue = 0;
- ignore_bast = 1;
- quiet = 0;
-
- if (!timeout)
- timeout = 4;
- if (!minhold)
- minhold = 5;
-
- while (!stress_stop) {
- if (stress_delay)
- usleep(stress_delay);
-
- process_libdlm();
-
- tstress_unlocks();
-
- if (++n == num) {
- if (all_unlocks_done())
-