Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=de4... Commit: de4850b3764331085a4657c57fd518b3586d5b6c Parent: fd0a7e2b1ba68ef3e96fa10b54b1049cf963202e Author: Bob Peterson bob@ganesha.peterson AuthorDate: Mon Jan 25 09:03:16 2010 -0600 Committer: Bob Peterson rpeterso@redhat.com CommitterDate: Tue Jan 26 14:39:31 2010 -0600
fsck.gfs2: Check for massive amounts of pointer corruption
Sometimes, due to faulty hardware or whatever, a whole bunch of random nonsense is written into a block. If that block happens to be a indirect list of pointers, pass1 may not find the corruption for a long time. This happens when the corruption starts, for example, at offset 0x200, or if the corruption just happens to look like valid pointers for a while, like low blocks that correspond to system inodes, rgrps, or journals. If pass1 marks a whole bunch of pointers as valid, then later decides the whole inode is corrupt, it becomes a major pain to undo what it has done. For example, if it had found one of the "bad" pointers to be the statfs file's dinode and marked that as a duplicate reference, it's a pain to undo that once it becomes apparent that there's too much damage to recover.
This patch introduces a block range check function that pass1 can use to traverse the metadata tree initially, just checking for lots of damage to pointers. If there are a lot of damaged metadata pointers it's better to just mark the dinode as free space and let pass5 clean up any blocks that it referenced. If a bridge has too many damaged rungs to cross, it's better to find that out first rather than to cross half-way and have to tip-toe back to the start.
rhbz#455300 --- gfs2/fsck/fsck.h | 2 + gfs2/fsck/pass1.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 122 insertions(+), 8 deletions(-)
diff --git a/gfs2/fsck/fsck.h b/gfs2/fsck/fsck.h index 5948210..30eb223 100644 --- a/gfs2/fsck/fsck.h +++ b/gfs2/fsck/fsck.h @@ -23,6 +23,8 @@ #define FSCK_CANCELED 32 /* Aborted with a signal or ^C */ #define FSCK_LIBRARY 128 /* Shared library error */
+#define BAD_POINTER_TOLERANCE 10 /* How many bad pointers is too many? */ + struct inode_info { struct osi_node node; diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c index 04938db..4382ad9 100644 --- a/gfs2/fsck/pass1.c +++ b/gfs2/fsck/pass1.c @@ -554,6 +554,97 @@ static int clear_leaf(struct gfs2_inode *ip, uint64_t block, return 0; }
+/** + * Check for massive amounts of pointer corruption. If the block has + * lots of out-of-range pointers, we can't trust any of the pointers. + * For example, a stray pointer with a value of 0x1d might be + * corruption/nonsense, and if so, we don't want to delete an + * important file (like master or the root directory) because of it. + * We need to check for a large number of bad pointers BEFORE we start + * messing with them because we don't want to mark a block as a + * duplicate (for example) until we know if the pointers in general can + * be trusted. Thus it needs to be in a separate loop. + */ +static int rangecheck_block(struct gfs2_inode *ip, uint64_t block, + struct gfs2_buffer_head **bh, + const char *btype, void *private) +{ + long *bad_pointers = (long *)private; + uint8_t q; + + if (gfs2_check_range(ip->i_sbd, block) != 0) { + (*bad_pointers)++; + log_debug( _("Bad %s block pointer (out of range #%ld) " + "found in inode %lld (0x%llx).\n"), btype, + *bad_pointers, + (unsigned long long)ip->i_di.di_num.no_addr, + (unsigned long long)ip->i_di.di_num.no_addr); + if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) + return ENOENT; + else + return -ENOENT; /* Exits check_metatree quicker */ + } + /* See how many duplicate blocks it has */ + q = block_type(block); + if (q != gfs2_block_free) { + (*bad_pointers)++; + log_debug( _("Duplicated %s block pointer (violation #%ld) " + "found in inode %lld (0x%llx).\n"), btype, + *bad_pointers, + (unsigned long long)ip->i_di.di_num.no_addr, + (unsigned long long)ip->i_di.di_num.no_addr); + if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) + return ENOENT; + else + return -ENOENT; /* Exits check_metatree quicker */ + } + return 0; +} + +static int rangecheck_metadata(struct gfs2_inode *ip, uint64_t block, + struct gfs2_buffer_head **bh, void *private) +{ + return rangecheck_block(ip, block, bh, _("metadata"), private); +} + +static int rangecheck_leaf(struct gfs2_inode *ip, uint64_t block, + struct gfs2_buffer_head *bh, void *private) +{ + return rangecheck_block(ip, block, &bh, _("leaf"), private); +} + +static int rangecheck_data(struct gfs2_inode *ip, uint64_t block, + void *private) +{ + return rangecheck_block(ip, block, NULL, _("data"), private); +} + +static int rangecheck_eattr_indir(struct gfs2_inode *ip, uint64_t block, + uint64_t parent, + struct gfs2_buffer_head **bh, void *private) +{ + return rangecheck_block(ip, block, NULL, + _("indirect extended attribute"), + private); +} + +static int rangecheck_eattr_leaf(struct gfs2_inode *ip, uint64_t block, + uint64_t parent, struct gfs2_buffer_head **bh, + void *private) +{ + return rangecheck_block(ip, block, NULL, _("extended attribute"), + private); +} + +struct metawalk_fxns rangecheck_fxns = { + .private = NULL, + .check_metalist = rangecheck_metadata, + .check_data = rangecheck_data, + .check_leaf = rangecheck_leaf, + .check_eattr_indir = rangecheck_eattr_indir, + .check_eattr_leaf = rangecheck_eattr_leaf, +}; + static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, uint64_t block) { @@ -562,10 +653,16 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, int error; struct block_count bc = {0}; struct metawalk_fxns invalidate_metatree = {0}; + long bad_pointers;
- invalidate_metatree.check_metalist = clear_metalist; - invalidate_metatree.check_data = clear_data; - invalidate_metatree.check_leaf = clear_leaf; + q = block_type(block); + if(q != gfs2_block_free) { + log_err( _("Found duplicate block referenced as an inode at " + "#%" PRIu64 " (0x%" PRIx64 ")\n"), block, block); + gfs2_dup_set(block); + fsck_inode_put(&ip); + return 0; + }
ip = fsck_inode_get(sdp, bh); if (ip->i_di.di_num.no_addr != block) { @@ -584,11 +681,22 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, " (0x%" PRIx64 ") not fixed\n"), block, block); }
- q = block_type(block); - if(q != gfs2_block_free) { - log_err( _("Found duplicate block referenced as an inode at " - "#%" PRIu64 " (0x%" PRIx64 ")\n"), block, block); - gfs2_dup_set(block); + bad_pointers = 0L; + + /* First, check the metadata for massive amounts of pointer corruption. + Such corruption can only lead us to ruin trying to clean it up, + so it's better to check it up front and delete the inode if + there is corruption. */ + rangecheck_fxns.private = &bad_pointers; + error = check_metatree(ip, &rangecheck_fxns); + if (bad_pointers > BAD_POINTER_TOLERANCE) { + log_err( _("Error: inode %llu (0x%llx) has more than " + "%d bad pointers.\n"), + (unsigned long long)ip->i_di.di_num.no_addr, + (unsigned long long)ip->i_di.di_num.no_addr, + BAD_POINTER_TOLERANCE); + fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, + _("badly corrupt"), gfs2_block_free); fsck_inode_put(&ip); return 0; } @@ -703,6 +811,10 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, "errors; invalidating.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); + invalidate_metatree.check_metalist = clear_metalist; + invalidate_metatree.check_data = clear_data; + invalidate_metatree.check_leaf = clear_leaf; + /* FIXME: Must set all leaves invalid as well */ check_metatree(ip, &invalidate_metatree); fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
cluster-commits@lists.stg.fedorahosted.org