/* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License version 2. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" #include "glock.h" #include "glops.h" #include "lops.h" #include "meta_io.h" #include "recovery.h" #include "super.h" #include "util.h" #include "dir.h" struct workqueue_struct *gfs_recovery_wq; int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_glock *gl = ip->i_gl; int new = 0; u64 dblock; u32 extlen; int error; error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen); if (error) return error; if (!dblock) { gfs2_consist_inode(ip); return -EIO; } *bh = gfs2_meta_ra(gl, dblock, extlen); return error; } int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) { struct list_head *head = &sdp->sd_revoke_list; struct gfs2_revoke_replay *rr; int found = 0; list_for_each_entry(rr, head, rr_list) { if (rr->rr_blkno == blkno) { found = 1; break; } } if (found) { rr->rr_where = where; return 0; } rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS); if (!rr) return -ENOMEM; rr->rr_blkno = blkno; rr->rr_where = where; list_add(&rr->rr_list, head); return 1; } int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) { struct gfs2_revoke_replay *rr; int wrap, a, b, revoke; int found = 0; list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { if (rr->rr_blkno == blkno) { found = 1; break; } } if (!found) return 0; wrap = (rr->rr_where < sdp->sd_replay_tail); a = (sdp->sd_replay_tail < where); b = (where < rr->rr_where); revoke = (wrap) ? (a || b) : (a && b); return revoke; } void gfs2_revoke_clean(struct gfs2_sbd *sdp) { struct list_head *head = &sdp->sd_revoke_list; struct gfs2_revoke_replay *rr; while (!list_empty(head)) { rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list); list_del(&rr->rr_list); kfree(rr); } } static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) { const struct gfs2_log_header *str = buf; if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) || str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH)) return 1; lh->lh_sequence = be64_to_cpu(str->lh_sequence); lh->lh_flags = be32_to_cpu(str->lh_flags); lh->lh_tail = be32_to_cpu(str->lh_tail); lh->lh_blkno = be32_to_cpu(str->lh_blkno); lh->lh_hash = be32_to_cpu(str->lh_hash); return 0; } /** * get_log_header - read the log header for a given segment * @jd: the journal * @blk: the block to look at * @lh: the log header to return * * Read the log header for a given segement in a given journal. Do a few * sanity checks on it. * * Returns: 0 on success, * 1 if the header was invalid or incomplete, * errno on error */ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, struct gfs2_log_header_host *head) { struct buffer_head *bh; struct gfs2_log_header_host uninitialized_var(lh); const u32 nothing = 0; u32 hash; int error; error = gfs2_replay_read_block(jd, blk, &bh); if (error) return error; hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) - sizeof(u32)); hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); hash ^= (u32)~0; error = gfs2_log_header_in(&lh, bh->b_data); brelse(bh); if (error || lh.lh_blkno != blk || lh.lh_hash != hash) return 1; *head = lh; return 0; } /** * find_good_lh - find a good log header * @jd: the journal * @blk: the segment to start searching from * @lh: the log header to fill in * @forward: if true search forward in the log, else search backward * * Call get_log_header() to get a log header for a segment, but if the * segment is bad, either scan forward or backward until we find a good one. * * Returns: errno */ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, struct gfs2_log_header_host *head) { unsigned int orig_blk = *blk; int error; for (;;) { error = get_log_header(jd, *blk, head); if (error <= 0) return error; if (++*blk == jd->jd_blocks) *blk = 0; if (*blk == orig_blk) { gfs2_consist_inode(GFS2_I(jd->jd_inode)); return -EIO; } } } /** * jhead_scan - make sure we've found the head of the log * @jd: the journal * @head: this is filled in with the log descriptor of the head * * At this point, seg and lh should be either the head of the log or just * before. Scan forward until we find the head. * * Returns: errno */ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { unsigned int blk = head->lh_blkno; struct gfs2_log_header_host lh; int error; for (;;) { if (++blk == jd->jd_blocks) blk = 0; error = get_log_header(jd, blk, &lh); if (error < 0) return error; if (error == 1) continue; if (lh.lh_sequence == head->lh_sequence) { gfs2_consist_inode(GFS2_I(jd->jd_inode)); return -EIO; } if (lh.lh_sequence < head->lh_sequence) break; *head = lh; } return 0; } /** * gfs2_find_jhead - find the head of a log * @jd: the journal * @head: the log descriptor for the head of the log is returned here * * Do a binary search of a journal and find the valid log entry with the * highest sequence number. (i.e. the log head) * * Returns: errno */ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_log_header_host lh_1, lh_m; u32 blk_1, blk_2, blk_m; int error; blk_1 = 0; blk_2 = jd->jd_blocks - 1; for (;;) { blk_m = (blk_1 + blk_2) / 2; error = find_good_lh(jd, &blk_1, &lh_1); if (error) return error; error = find_good_lh(jd, &blk_m, &lh_m); if (error) return error; if (blk_1 == blk_m || blk_m == blk_2) break; if (lh_1.lh_sequence <= lh_m.lh_sequence) blk_1 = blk_m; else blk_2 = blk_m; } error = jhead_scan(jd, &lh_1); if (error) return error; *head = lh_1; return error; } /** * foreach_descriptor - go through the active part of the log * @jd: the journal * @start: the first log header in the active region * @end: the last log header (don't process the contents of this entry)) * * Call a given function once for every log descriptor in the active * portion of the log. * * Returns: errno */ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, unsigned int end, int pass) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct buffer_head *bh; struct gfs2_log_descriptor *ld; int error = 0; u32 length; __be64 *ptr; unsigned int offset = sizeof(struct gfs2_log_descriptor); offset += sizeof(__be64) - 1; offset &= ~(sizeof(__be64) - 1); while (start != end) { error = gfs2_replay_read_block(jd, start, &bh); if (error) return error; if (gfs2_meta_check(sdp, bh)) { brelse(bh); return -EIO; } ld = (struct gfs2_log_descriptor *)bh->b_data; length = be32_to_cpu(ld->ld_length); if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { struct gfs2_log_header_host lh; error = get_log_header(jd, start, &lh); if (!error) { gfs2_replay_incr_blk(sdp, &start); brelse(bh); continue; } if (error == 1) { gfs2_consist_inode(GFS2_I(jd->jd_inode)); error = -EIO; } brelse(bh); return error; } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) { brelse(bh); return -EIO; } ptr = (__be64 *)(bh->b_data + offset); error = lops_scan_elements(jd, start, ld, ptr, pass); if (error) { brelse(bh); return error; } while (length--) gfs2_replay_incr_blk(sdp, &start); brelse(bh); } return 0; } /** * clean_journal - mark a dirty journal as being clean * @sdp: the filesystem * @jd: the journal * @gl: the journal's glock * @head: the head journal to start from * * Returns: errno */ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); unsigned int lblock; struct gfs2_log_header *lh; u32 hash; struct buffer_head *bh; int error; struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; lblock = head->lh_blkno; gfs2_replay_incr_blk(sdp, &lblock); bh_map.b_size = 1 << ip->i_inode.i_blkbits; error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); if (error) return error; if (!bh_map.b_blocknr) { gfs2_consist_inode(ip); return -EIO; } bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr); lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); clear_buffer_dirty(bh); unlock_buffer(bh); lh = (struct gfs2_log_header *)bh->b_data; memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header)); lh->lh_hash = cpu_to_be32(hash); set_buffer_dirty(bh); if (sync_dirty_buffer(bh)) gfs2_io_error_bh(sdp, bh); brelse(bh); return error; } static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, unsigned int message) { char env_jid[20]; char env_status[20]; char *envp[] = { env_jid, env_status, NULL }; struct lm_lockstruct *ls = &sdp->sd_lockstruct; ls->ls_recover_jid_done = jid; ls->ls_recover_jid_status = message; sprintf(env_jid, "JID=%d", jid); sprintf(env_status, "RECOVERY=%s", message == LM_RD_SUCCESS ? "Done" : "Failed"); kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } void gfs2_recover_func(struct work_struct *work) { struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; struct gfs2_holder j_gh, ji_gh, t_gh; unsigned long t; int ro = 0; unsigned int pass; int error; if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid); /* Acquire the journal lock so we can do recovery */ error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, &j_gh); switch (error) { case 0: break; case GLR_TRYFAILED: fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid); error = 0; default: goto fail; }; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); if (error) goto fail_gunlock_j; } else { fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid); } fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid); error = gfs2_jdesc_check(jd); if (error) goto fail_gunlock_ji; error = gfs2_find_jhead(jd, &head); if (error) goto fail_gunlock_ji; if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", jd->jd_jid); t = jiffies; /* Acquire a shared hold on the transaction lock */ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, LM_FLAG_NOEXP | LM_FLAG_PRIORITY | GL_NOCACHE, &t_gh); if (error) goto fail_gunlock_ji; if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { if (sdp->sd_vfs->s_flags & MS_RDONLY) { /* check if device itself is read-only */ ro = bdev_read_only(sdp->sd_vfs->s_bdev); if (!ro) { fs_info(sdp, "recovery required on " "read-only filesystem.\n"); fs_info(sdp, "write access will be " "enabled during recovery.\n"); } } } if (ro) { fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; goto fail_gunlock_tr; } fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); error = foreach_descriptor(jd, head.lh_tail, head.lh_blkno, pass); lops_after_scan(jd, error, pass); if (error) goto fail_gunlock_tr; } error = clean_journal(jd, &head); if (error) goto fail_gunlock_tr; gfs2_glock_dq_uninit(&t_gh); t = DIV_ROUND_UP(jiffies - t, HZ); fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t); } if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) gfs2_glock_dq_uninit(&ji_gh); gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) gfs2_glock_dq_uninit(&j_gh); fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); fail_gunlock_ji: if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { gfs2_glock_dq_uninit(&ji_gh); fail_gunlock_j: gfs2_glock_dq_uninit(&j_gh); } fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); smp_mb__after_clear_bit(); wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } static int gfs2_recovery_wait(void *word) { schedule(); return 0; } int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) { int rv; if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) return -EBUSY; /* we have JDF_RECOVERY, queue should always succeed */ rv = queue_work(gfs_recovery_wq, &jd->jd_work); BUG_ON(!rv); if (wait) wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); return 0; }