Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=c361fd8d3d7b10... Commit: c361fd8d3d7b1074107f0800fdae79820574b81d Parent: 0cf2544edfa377c47b0e904ecdce00194590593e Author: Steven Whitehouse swhiteho@redhat.com AuthorDate: Mon Feb 18 17:06:58 2013 +0000 Committer: Bob Peterson rpeterso@redhat.com CommitterDate: Fri May 17 14:29:20 2013 -0500
fsck: Speed up reading of dir leaf blocks
This patch adds readahead for directory leaf blocks. It gives me a speed up of only around one second on my test filesystem, however that only has one directory with a reasonable number of files in it. So that is actually pretty good going for that small a filesystem.
Due to the reading of the dir hash table in a single sweep, this reduces the number of calls to read dir hash table blocks considerably.
The patch takes all the valid leaf block pointers, sorts them into disk block order and then issues readahead requests for the blocks in order that they are read in, in good time before they are needed.
rhbz#902920 --- gfs2/fsck/metawalk.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c index fb461ae..ce80738 100644 --- a/gfs2/fsck/metawalk.c +++ b/gfs2/fsck/metawalk.c @@ -7,6 +7,7 @@ #include <unistd.h> #include <libintl.h> #include <ctype.h> +#include <fcntl.h> #define _(String) gettext(String)
#include "libgfs2.h" @@ -640,24 +641,87 @@ out_copy_old_leaf: return 1; }
+static uint64_t *get_dir_hash(struct gfs2_inode *ip) +{ + unsigned hsize = (1 << ip->i_di.di_depth) * sizeof(uint64_t); + int ret; + uint64_t *tbl = malloc(hsize); + + if (tbl == NULL) + return NULL; + + ret = gfs2_readi(ip, tbl, 0, hsize); + if (ret != hsize) { + free(tbl); + return NULL; + } + + return tbl; +} + +static int u64cmp(const void *p1, const void *p2) +{ + uint64_t a = *(uint64_t *)p1; + uint64_t b = *(uint64_t *)p2; + + if (a > b) + return 1; + if (b < b) + return -1; + + return 0; +} + +static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize) +{ + uint64_t *t = alloca(hsize * sizeof(uint64_t)); + uint64_t leaf_no; + struct gfs2_sbd *sdp = ip->i_sbd; + unsigned n = 0; + unsigned i; + + for (i = 0; i < hsize; i++) { + leaf_no = be64_to_cpu(tbl[i]); + if (valid_block(ip->i_sbd, leaf_no)) + t[n++] = leaf_no * sdp->bsize; + } + qsort(t, n, sizeof(uint64_t), u64cmp); + for (i = 0; i < n; i++) + posix_fadvise(sdp->device_fd, t[i], sdp->bsize, POSIX_FADV_WILLNEED); +} + /* Checks exhash directory entries */ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) { int error; struct gfs2_leaf leaf, oldleaf; + unsigned hsize = (1 << ip->i_di.di_depth); uint64_t leaf_no, old_leaf, bad_leaf = -1; uint64_t first_ok_leaf; struct gfs2_buffer_head *lbh; int lindex; struct gfs2_sbd *sdp = ip->i_sbd; int ref_count = 0, old_was_dup; + uint64_t *tbl; + + tbl = get_dir_hash(ip); + if (tbl == NULL) { + perror("get_dir_hash"); + return -1; + } + + /* Turn off system readahead */ + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM); + + /* Readahead */ + dir_leaf_reada(ip, tbl, hsize);
/* Find the first valid leaf pointer in range and use it as our "old" leaf. That way, bad blocks at the beginning will be overwritten with the first valid leaf. */ first_ok_leaf = leaf_no = -1; - for (lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) { - gfs2_get_leaf_nr(ip, lindex, &leaf_no); + for (lindex = 0; lindex < hsize; lindex++) { + leaf_no = be64_to_cpu(tbl[lindex]); if (valid_block(ip->i_sbd, leaf_no)) { lbh = bread(sdp, leaf_no); /* Make sure it's really a valid leaf block. */ @@ -674,19 +738,22 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) "blocks\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); + free(tbl); + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 1; } old_leaf = -1; memset(&oldleaf, 0, sizeof(oldleaf)); old_was_dup = 0; - for (lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) { + for (lindex = 0; lindex < hsize; lindex++) { if (fsck_abort) break; - gfs2_get_leaf_nr(ip, lindex, &leaf_no); + leaf_no = be64_to_cpu(tbl[lindex]);
/* GFS has multiple indirect pointers to the same leaf * until those extra pointers are needed, so skip the dups */ if (leaf_no == bad_leaf) { + tbl[lindex] = cpu_to_be64(old_leaf); gfs2_put_leaf_nr(ip, lindex, old_leaf); ref_count++; continue; @@ -696,8 +763,11 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) }
do { - if (fsck_abort) + if (fsck_abort) { + free(tbl); + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 0; + } /* If the old leaf was a duplicate referenced by a previous dinode, we can't check the number of pointers because the number of pointers may be for @@ -708,8 +778,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) &ref_count, &lindex, &oldleaf); - if (error) + if (error) { + free(tbl); return error; + } } error = check_leaf(ip, lindex, pass, &ref_count, &leaf_no, old_leaf, &bad_leaf, @@ -724,6 +796,8 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) (unsigned long long)leaf_no); } while (1); /* while we have chained leaf blocks */ } /* for every leaf block */ + free(tbl); + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 0; }
cluster-commits@lists.stg.fedorahosted.org