diff options
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/Makefile | 6 | ||||
-rw-r--r-- | fs/gfs2/acl.c | 7 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 14 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 24 | ||||
-rw-r--r-- | fs/gfs2/dentry.c | 2 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 199 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 4 | ||||
-rw-r--r-- | fs/gfs2/export.c | 10 | ||||
-rw-r--r-- | fs/gfs2/file.c | 181 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 463 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 38 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 203 | ||||
-rw-r--r-- | fs/gfs2/glops.h | 2 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 15 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 1567 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 11 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 14 | ||||
-rw-r--r-- | fs/gfs2/log.c | 228 | ||||
-rw-r--r-- | fs/gfs2/log.h | 2 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 55 | ||||
-rw-r--r-- | fs/gfs2/main.c | 20 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 7 | ||||
-rw-r--r-- | fs/gfs2/meta_io.h | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 45 | ||||
-rw-r--r-- | fs/gfs2/ops_inode.c | 1344 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 26 | ||||
-rw-r--r-- | fs/gfs2/quota.h | 4 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 62 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 2 | ||||
-rw-r--r-- | fs/gfs2/super.c | 154 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 6 | ||||
-rw-r--r-- | fs/gfs2/trace_gfs2.h | 38 |
32 files changed, 2333 insertions, 2422 deletions
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 21f7e46da4c..86128202384 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,9 +1,9 @@ -EXTRA_CFLAGS := -I$(src) +ccflags-y := -I$(src) obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ - glops.o inode.o log.o lops.o main.o meta_io.o \ + glops.o log.o lops.o main.o meta_io.o \ aops.o dentry.o export.o file.o \ - ops_fstype.o ops_inode.o quota.o \ + ops_fstype.o inode.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7118f1a780a..cbc07155b1a 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) struct posix_acl *acl; int error; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); if (IS_ERR(acl)) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4f36f8832b9..802ac5eeba2 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -695,6 +695,7 @@ out: if (error == 0) return 0; + unlock_page(page); page_cache_release(page); gfs2_trans_end(sdp); @@ -883,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, } brelse(dibh); - gfs2_trans_end(sdp); failed: + gfs2_trans_end(sdp); if (al) { gfs2_inplace_release(ip); gfs2_quota_unlock(ip); @@ -1075,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bd = bh->b_private; if (bd && bd->bd_ail) goto cannot_release; - gfs2_assert_warn(sdp, !buffer_pinned(bh)); - gfs2_assert_warn(sdp, !buffer_dirty(bh)); + if (buffer_pinned(bh) || buffer_dirty(bh)) + goto not_possible; bh = bh->b_this_page; } while(bh != head); gfs2_log_unlock(sdp); @@ -1106,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) } while (bh != head); return try_to_free_buffers(page); + +not_possible: /* Should never happen */ + WARN_ON(buffer_dirty(bh)); + WARN_ON(buffer_pinned(bh)); cannot_release: gfs2_log_unlock(sdp); return 0; @@ -1116,7 +1121,6 @@ static const struct address_space_operations gfs2_writeback_aops = { .writepages = gfs2_writeback_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .sync_page = block_sync_page, .write_begin = gfs2_write_begin, .write_end = gfs2_write_end, .bmap = gfs2_bmap, @@ -1132,7 +1136,6 @@ static const struct address_space_operations gfs2_ordered_aops = { .writepage = gfs2_ordered_writepage, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .sync_page = block_sync_page, .write_begin = gfs2_write_begin, .write_end = gfs2_write_end, .set_page_dirty = gfs2_set_page_dirty, @@ -1150,7 +1153,6 @@ static const struct address_space_operations gfs2_jdata_aops = { .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .sync_page = block_sync_page, .write_begin = gfs2_write_begin, .write_end = gfs2_write_end, .set_page_dirty = gfs2_set_page_dirty, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3c4039d5eef..e65493a8ac0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -21,6 +21,7 @@ #include "meta_io.h" #include "quota.h" #include "rgrp.h" +#include "super.h" #include "trans.h" #include "dir.h" #include "util.h" @@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrp_list rlist; u64 bn, bstart; - u32 blen; + u32 blen, btotal; __be64 *p; unsigned int rg_blocks = 0; int metadata; @@ -779,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, metadata = (height != ip->i_height - 1); if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; + else if (ip->i_depth) + revokes = sdp->sd_inptrs; if (ip != GFS2_I(sdp->sd_rindex)) error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); @@ -839,6 +842,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, bstart = 0; blen = 0; + btotal = 0; for (p = top; p < bottom; p++) { if (!*p) @@ -851,9 +855,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, else { if (bstart) { if (metadata) - gfs2_free_meta(ip, bstart, blen); + __gfs2_free_meta(ip, bstart, blen); else - gfs2_free_data(ip, bstart, blen); + __gfs2_free_data(ip, bstart, blen); + + btotal += blen; } bstart = bn; @@ -865,11 +871,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, } if (bstart) { if (metadata) - gfs2_free_meta(ip, bstart, blen); + __gfs2_free_meta(ip, bstart, blen); else - gfs2_free_data(ip, bstart, blen); + __gfs2_free_data(ip, bstart, blen); + + btotal += blen; } + gfs2_statfs_change(sdp, 0, +btotal, 0); + gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, + ip->i_inode.i_gid); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); @@ -1126,7 +1138,7 @@ void gfs2_trim_blocks(struct inode *inode) * earlier versions of GFS2 have a bug in the stuffed file reading * code which will result in a buffer overrun if the size is larger * than the max stuffed file size. In order to prevent this from - * occuring, such files are unstuffed, but in other cases we can + * occurring, such files are unstuffed, but in other cases we can * just update the inode size directly. * * Returns: 0 on success, or -ve on error diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 4a456338b87..0da8da2c991 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) int error; int had_lock = 0; - if (nd->flags & LOOKUP_RCU) + if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 5c356d09c32..091ee477953 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -82,12 +82,9 @@ struct qstr gfs2_qdot __read_mostly; struct qstr gfs2_qdotdot __read_mostly; -typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, void *data); typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); - int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) { @@ -1506,7 +1503,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) inode = gfs2_inode_lookup(dir->i_sb, be16_to_cpu(dent->de_type), be64_to_cpu(dent->de_inum.no_addr), - be64_to_cpu(dent->de_inum.no_formal_ino)); + be64_to_cpu(dent->de_inum.no_formal_ino), 0); brelse(bh); return inode; } @@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inode *nip, unsigned type) + const struct gfs2_inode *nip) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); gfs2_inum_out(nip, dent); - dent->de_type = cpu_to_be16(type); + dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); @@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_entries++; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + if (S_ISDIR(nip->i_inode.i_mode)) + inc_nlink(&ip->i_inode); gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; @@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, * Returns: 0 on success, error code on failure */ -int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) +int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) { + const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; int error; @@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_entries--; dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; + if (S_ISDIR(dentry->d_inode->i_mode)) + drop_nlink(&dip->i_inode); gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, } /** - * foreach_leaf - call a function for each leaf in a directory - * @dip: the directory - * @lc: the function to call for each each - * @data: private data to pass to it - * - * Returns: errno - */ - -static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct buffer_head *bh; - struct gfs2_leaf *leaf; - u32 hsize, len; - u32 ht_offset, lp_offset, ht_offset_cur = -1; - u32 index = 0; - __be64 *lp; - u64 leaf_no; - int error = 0; - - hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { - gfs2_consist_inode(dip); - return -EIO; - } - - lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); - if (!lp) - return -ENOMEM; - - while (index < hsize) { - lp_offset = index & (sdp->sd_hash_ptrs - 1); - ht_offset = index - lp_offset; - - if (ht_offset_cur != ht_offset) { - error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(__be64), - sdp->sd_hash_bsize, 1); - if (error != sdp->sd_hash_bsize) { - if (error >= 0) - error = -EIO; - goto out; - } - ht_offset_cur = ht_offset; - } - - leaf_no = be64_to_cpu(lp[lp_offset]); - if (leaf_no) { - error = get_leaf(dip, leaf_no, &bh); - if (error) - goto out; - leaf = (struct gfs2_leaf *)bh->b_data; - len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); - brelse(bh); - - error = lc(dip, index, len, leaf_no, data); - if (error) - goto out; - - index = (index & ~(len - 1)) + len; - } else - index++; - } - - if (index != hsize) { - gfs2_consist_inode(dip); - error = -EIO; - } - -out: - kfree(lp); - - return error; -} - -/** * leaf_dealloc - Deallocate a directory leaf * @dip: the directory * @index: the hash table offset in the directory * @len: the number of pointers to this leaf * @leaf_no: the leaf number - * @data: not used + * @leaf_bh: buffer_head for the starting leaf + * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 * * Returns: errno */ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, void *data) + u64 leaf_no, struct buffer_head *leaf_bh, + int last_dealloc) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_leaf *tmp_leaf; @@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_qs; /* Count the number of leaves */ + bh = leaf_bh; for (blk = leaf_no; blk; blk = nblk) { - error = get_leaf(dip, blk, &bh); - if (error) - goto out_rlist; + if (blk != leaf_no) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_rlist; + } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); - brelse(bh); + if (blk != leaf_no) + brelse(bh); gfs2_rlist_add(sdp, &rlist, blk); l_blocks++; @@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (error) goto out_rg_gunlock; + bh = leaf_bh; + for (blk = leaf_no; blk; blk = nblk) { - error = get_leaf(dip, blk, &bh); - if (error) - goto out_end_trans; + if (blk != leaf_no) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_end_trans; + } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); - brelse(bh); + if (blk != leaf_no) + brelse(bh); gfs2_free_meta(dip, blk, 1); gfs2_add_inode_blocks(&dip->i_inode, -1); @@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_end_trans; gfs2_trans_add_bh(dip->i_gl, dibh, 1); + /* On the last dealloc, make this a regular file in case we crash. + (We don't want to free these blocks a second time.) */ + if (last_dealloc) + dip->i_inode.i_mode = S_IFREG; gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); @@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *bh; - int error; + struct gfs2_leaf *leaf; + u32 hsize, len; + u32 ht_offset, lp_offset, ht_offset_cur = -1; + u32 index = 0, next_index; + __be64 *lp; + u64 leaf_no; + int error = 0, last; - /* Dealloc on-disk leaves to FREEMETA state */ - error = foreach_leaf(dip, leaf_dealloc, NULL); - if (error) - return error; + hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { + gfs2_consist_inode(dip); + return -EIO; + } - /* Make this a regular file in case we crash. - (We don't want to free these blocks a second time.) */ + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; - error = gfs2_meta_inode_buffer(dip, &bh); - if (!error) { - gfs2_trans_add_bh(dip->i_gl, bh, 1); - ((struct gfs2_dinode *)bh->b_data)->di_mode = - cpu_to_be32(S_IFREG); - brelse(bh); + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = be64_to_cpu(lp[lp_offset]); + if (leaf_no) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + goto out; + leaf = (struct gfs2_leaf *)bh->b_data; + len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); + + next_index = (index & ~(len - 1)) + len; + last = ((next_index >= hsize) ? 1 : 0); + error = leaf_dealloc(dip, index, len, leaf_no, bh, + last); + brelse(bh); + if (error) + goto out; + index = next_index; + } else + index++; } - gfs2_trans_end(sdp); + if (index != hsize) { + gfs2_consist_inode(dip); + error = -EIO; + } + +out: + kfree(lp); return error; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index a98f644bd3d..e686af11bec 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir, extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip, unsigned int type); -extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); + const struct gfs2_inode *ip); +extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir); extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9023db8184f..fe9945f2ff7 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); - if (*len < GFS2_SMALL_FH_SIZE || - (connectable && *len < GFS2_LARGE_FH_SIZE)) + if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { + *len = GFS2_LARGE_FH_SIZE; return 255; + } else if (*len < GFS2_SMALL_FH_SIZE) { + *len = GFS2_SMALL_FH_SIZE; + return 255; + } fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); @@ -135,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; - inode = gfs2_ilookup(sb, inum->no_addr); + inode = gfs2_ilookup(sb, inum->no_addr, 0); if (inode) { if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { iput(inode); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7cfdcb91336..a9f5cbe45cd 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out_drop_write; error = -EACCES; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) goto out; error = 0; @@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - if (!(file->f_flags & O_NOATIME)) { + if (!(file->f_flags & O_NOATIME) && + !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); error = gfs2_glock_nq(&i_gh); - file_accessed(file); - if (error == 0) - gfs2_glock_dq_uninit(&i_gh); + if (error == 0) { + file_accessed(file); + gfs2_glock_dq(&i_gh); + } + gfs2_holder_uninit(&i_gh); + if (error) + return error; } vma->vm_ops = &gfs2_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; @@ -540,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file) /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry (we ignore this) - * @dentry: the dentry that points to the inode to sync - * - * The VFS will flush "normal" data for us. We only need to worry - * about metadata here. For journaled data, we just do a log flush - * as we can't avoid it. Otherwise we can just bale out if datasync - * is set. For stuffed inodes we must flush the log in order to - * ensure that all data is on disk. + * @datasync: set if we can ignore timestamp changes * - * The call to write_inode_now() is there to write back metadata and - * the inode itself. It does also try and write the data, but thats - * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() - * for us. + * The VFS will flush data for us. We only need to worry + * about metadata here. * * Returns: errno */ @@ -560,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); - int ret = 0; - - if (gfs2_is_jdata(GFS2_I(inode))) { - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); - return 0; - } + struct gfs2_inode *ip = GFS2_I(inode); + int ret; - if (sync_state != 0) { - if (!datasync) - ret = write_inode_now(inode, 0); + if (datasync) + sync_state &= ~I_DIRTY_SYNC; - if (gfs2_is_stuffed(GFS2_I(inode))) - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); + if (sync_state) { + ret = sync_inode_metadata(inode, 1); + if (ret) + return ret; + gfs2_ail_flush(ip->i_gl); } - return ret; + return 0; } /** @@ -612,66 +607,110 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } -static void empty_write_end(struct page *page, unsigned from, - unsigned to) +static int empty_write_end(struct page *page, unsigned from, + unsigned to, int mode) { - struct gfs2_inode *ip = GFS2_I(page->mapping->host); + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *bh; + unsigned offset, blksize = 1 << inode->i_blkbits; + pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; - page_zero_new_buffers(page, from, to); - flush_dcache_page(page); + zero_user(page, from, to-from); mark_page_accessed(page); - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); + if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) { + if (!gfs2_is_writeback(ip)) + gfs2_page_add_databufs(ip, page, from, to); + + block_commit_write(page, from, to); + return 0; + } + + offset = 0; + bh = page_buffers(page); + while (offset < to) { + if (offset >= from) { + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + clear_buffer_new(bh); + write_dirty_buffer(bh, WRITE); + } + offset += blksize; + bh = bh->b_this_page; + } - block_commit_write(page, from, to); + offset = 0; + bh = page_buffers(page); + while (offset < to) { + if (offset >= from) { + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + return -EIO; + } + offset += blksize; + bh = bh->b_this_page; + } + return 0; } -static int write_empty_blocks(struct page *page, unsigned from, unsigned to) +static int needs_empty_write(sector_t block, struct inode *inode) { - unsigned start, end, next; - struct buffer_head *bh, *head; int error; + struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; - if (!page_has_buffers(page)) { - error = __block_write_begin(page, from, to - from, gfs2_block_map); - if (unlikely(error)) - return error; + bh_map.b_size = 1 << inode->i_blkbits; + error = gfs2_block_map(inode, block, &bh_map, 0); + if (unlikely(error)) + return error; + return !buffer_mapped(&bh_map); +} - empty_write_end(page, from, to); - return 0; - } +static int write_empty_blocks(struct page *page, unsigned from, unsigned to, + int mode) +{ + struct inode *inode = page->mapping->host; + unsigned start, end, next, blksize; + sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + int ret; - bh = head = page_buffers(page); + blksize = 1 << inode->i_blkbits; next = end = 0; while (next < from) { - next += bh->b_size; - bh = bh->b_this_page; + next += blksize; + block++; } start = next; do { - next += bh->b_size; - if (buffer_mapped(bh)) { + next += blksize; + ret = needs_empty_write(block, inode); + if (unlikely(ret < 0)) + return ret; + if (ret == 0) { if (end) { - error = __block_write_begin(page, start, end - start, - gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); + ret = __block_write_begin(page, start, end - start, + gfs2_block_map); + if (unlikely(ret)) + return ret; + ret = empty_write_end(page, start, end, mode); + if (unlikely(ret)) + return ret; end = 0; } start = next; } else end = next; - bh = bh->b_this_page; + block++; } while (next < to); if (end) { - error = __block_write_begin(page, start, end - start, gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); + ret = __block_write_begin(page, start, end - start, gfs2_block_map); + if (unlikely(ret)) + return ret; + ret = empty_write_end(page, start, end, mode); + if (unlikely(ret)) + return ret; } return 0; @@ -720,7 +759,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, if (curr == end) to = end_offset; - error = write_empty_blocks(page, from, to); + error = write_empty_blocks(page, from, to, mode); if (!error && offset + to > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) { i_size_write(inode, offset + to); @@ -777,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bytes, max_bytes; struct gfs2_alloc *al; int error; + loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; @@ -784,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; + offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; + bytes &= bsize_mask; + if (bytes == 0) + bytes = sdp->sd_sb.sb_bsize; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); @@ -821,6 +863,9 @@ retry: if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; + bytes &= bsize_mask; + if (bytes == 0) + bytes = sdp->sd_sb.sb_bsize; goto retry; } goto out_qunlock; @@ -976,8 +1021,10 @@ static void do_unflock(struct file *file, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); flock_lock_file_wait(file, fl); - if (fl_gh->gh_gl) - gfs2_glock_dq_uninit(fl_gh); + if (fl_gh->gh_gl) { + gfs2_glock_dq_wait(fl_gh); + gfs2_holder_uninit(fl_gh); + } mutex_unlock(&fp->f_fl_mutex); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb152e..2792a790e50 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -26,6 +26,9 @@ #include <linux/freezer.h> #include <linux/workqueue.h> #include <linux/jiffies.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> +#include <linux/bit_spinlock.h> #include "gfs2.h" #include "incore.h" @@ -41,10 +44,6 @@ #define CREATE_TRACE_POINTS #include "trace_gfs2.h" -struct gfs2_gl_hash_bucket { - struct hlist_head hb_list; -}; - struct gfs2_glock_iter { int hash; /* hash bucket index */ struct gfs2_sbd *sdp; /* incore superblock */ @@ -54,7 +53,6 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); @@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) -static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; static struct dentry *gfs2_root; -/* - * Despite what you might think, the numbers below are not arbitrary :-) - * They are taken from the ipv4 routing hash code, which is well tested - * and thus should be nearly optimal. Later on we might tweek the numbers - * but for now this should be fine. - * - * The reason for putting the locks in a separate array from the list heads - * is that we can have fewer locks than list heads and save memory. We use - * the same hash function for both, but with a different hash mask. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) - -#ifdef CONFIG_LOCKDEP -# define GL_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define GL_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define GL_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define GL_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define GL_HASH_LOCK_SZ 512 -# else -# define GL_HASH_LOCK_SZ 256 -# endif -#endif - -/* We never want more locks than chains */ -#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ -# undef GL_HASH_LOCK_SZ -# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE -#endif - -static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ]; - -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; -} -#else /* not SMP, so no spinlocks required */ -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return NULL; -} -#endif - /** * gl_hash() - Turn glock number into hash bucket number * @lock: The glock number @@ -141,25 +91,33 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, return h; } -/** - * glock_free() - Perform a few checks and then release struct gfs2_glock - * @gl: The glock to release - * - * Also calls lock module to release its internal structure for this glock. - * - */ +static inline void spin_lock_bucket(unsigned int hash) +{ + hlist_bl_lock(&gl_hash_table[hash]); +} + +static inline void spin_unlock_bucket(unsigned int hash) +{ + hlist_bl_unlock(&gl_hash_table[hash]); +} + +static void gfs2_glock_dealloc(struct rcu_head *rcu) +{ + struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); + + if (gl->gl_ops->go_flags & GLOF_ASPACE) + kmem_cache_free(gfs2_glock_aspace_cachep, gl); + else + kmem_cache_free(gfs2_glock_cachep, gl); +} -static void glock_free(struct gfs2_glock *gl) +void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; - struct address_space *mapping = gfs2_glock2aspace(gl); - struct kmem_cache *cachep = gfs2_glock_cachep; - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - if (mapping) - cachep = gfs2_glock_aspace_cachep; - sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl); + call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); } /** @@ -194,28 +152,47 @@ static int demote_ok(const struct gfs2_glock *gl) return 1; } -/** - * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - */ -static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { - int may_reclaim; - may_reclaim = (demote_ok(gl) && - (atomic_read(&gl->gl_ref) == 1 || - (gl->gl_name.ln_type == LM_TYPE_INODE && - atomic_read(&gl->gl_ref) <= 2))); spin_lock(&lru_lock); - if (list_empty(&gl->gl_lru) && may_reclaim) { - list_add_tail(&gl->gl_lru, &lru_list); + + if (!list_empty(&gl->gl_lru)) + list_del_init(&gl->gl_lru); + else atomic_inc(&lru_count); + + list_add_tail(&gl->gl_lru, &lru_list); + set_bit(GLF_LRU, &gl->gl_flags); + spin_unlock(&lru_lock); +} + +static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + if (!list_empty(&gl->gl_lru)) { + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); } spin_unlock(&lru_lock); } /** + * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list + * @gl: the glock + * + * If the glock is demotable, then we add it (or move it) to the end + * of the glock LRU list. + */ + +static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +{ + if (demote_ok(gl)) + gfs2_glock_add_to_lru(gl); +} + +/** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put * @@ -227,7 +204,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) { if (atomic_dec_and_test(&gl->gl_ref)) GLOCK_BUG_ON(gl, 1); - gfs2_glock_schedule_for_reclaim(gl); } /** @@ -236,30 +212,21 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) * */ -int gfs2_glock_put(struct gfs2_glock *gl) +void gfs2_glock_put(struct gfs2_glock *gl) { - int rv = 0; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = gfs2_glock2aspace(gl); - write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { - hlist_del(&gl->gl_list); - if (!list_empty(&gl->gl_lru)) { - list_del_init(&gl->gl_lru); - atomic_dec(&lru_count); - } - spin_unlock(&lru_lock); - write_unlock(gl_lock_addr(gl->gl_hash)); + if (atomic_dec_and_test(&gl->gl_ref)) { + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - glock_free(gl); - rv = 1; - goto out; + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } - spin_lock(&gl->gl_spin); - gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); - write_unlock(gl_lock_addr(gl->gl_hash)); -out: - return rv; } /** @@ -275,17 +242,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash, const struct lm_lockname *name) { struct gfs2_glock *gl; - struct hlist_node *h; + struct hlist_bl_node *h; - hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { + hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { if (!lm_name_equal(&gl->gl_name, name)) continue; if (gl->gl_sbd != sdp) continue; - - atomic_inc(&gl->gl_ref); - - return gl; + if (atomic_inc_not_zero(&gl->gl_ref)) + return gl; } return NULL; @@ -576,11 +541,6 @@ __acquires(&gl->gl_spin) clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_hold(gl); - if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || - gl->gl_state == LM_ST_DEFERRED) && - !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) - lck_flags |= LM_FLAG_TRY_1CB; - if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); @@ -682,7 +642,7 @@ static void delete_work_func(struct work_struct *work) /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ if (ip) - inode = gfs2_ilookup(sdp->sd_vfs, no_addr); + inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1); else inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); if (inode && !IS_ERR(inode)) { @@ -743,10 +703,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct gfs2_glock *gl, *tmp; unsigned int hash = gl_hash(sdp, &name); struct address_space *mapping; + struct kmem_cache *cachep; - read_lock(gl_lock_addr(hash)); + rcu_read_lock(); gl = search_bucket(hash, sdp, &name); - read_unlock(gl_lock_addr(hash)); + rcu_read_unlock(); *glp = gl; if (gl) @@ -755,9 +716,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, return -ENOENT; if (glops->go_flags & GLOF_ASPACE) - gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); + cachep = gfs2_glock_aspace_cachep; else - gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); + cachep = gfs2_glock_cachep; + gl = kmem_cache_alloc(cachep, GFP_KERNEL); if (!gl) return -ENOMEM; @@ -790,15 +752,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->writeback_index = 0; } - write_lock(gl_lock_addr(hash)); + spin_lock_bucket(hash); tmp = search_bucket(hash, sdp, &name); if (tmp) { - write_unlock(gl_lock_addr(hash)); - glock_free(gl); + spin_unlock_bucket(hash); + kmem_cache_free(cachep, gl); + atomic_dec(&sdp->sd_glock_disposal); gl = tmp; } else { - hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); + hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); + spin_unlock_bucket(hash); } *glp = gl; @@ -1007,13 +970,13 @@ fail: insert_pt = &gh2->gh_list; } set_bit(GLF_QUEUED, &gl->gl_flags); + trace_gfs2_glock_queue(gh, 1); if (likely(insert_pt == NULL)) { list_add_tail(&gh->gh_list, &gl->gl_holders); if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) goto do_cancel; return; } - trace_gfs2_glock_queue(gh, 1); list_add_tail(&gh->gh_list, insert_pt); do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); @@ -1056,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; + if (test_bit(GLF_LRU, &gl->gl_flags)) + gfs2_glock_remove_from_lru(gl); + spin_lock(&gl->gl_spin); add_to_queue(gh); if ((LM_FLAG_NOEXP & gh->gh_flags) && @@ -1113,6 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } + if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) + __gfs2_glock_schedule_for_reclaim(gl); trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1151,7 +1119,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh) * @number: the lock number * @glops: the glock operations for the type of glock * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition + * @flags: modifier flags for the acquisition * @gh: the struct gfs2_holder * * Returns: errno @@ -1276,10 +1244,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) { - unsigned int x; - - for (x = 0; x < num_gh; x++) - gfs2_glock_dq(&ghs[x]); + while (num_gh--) + gfs2_glock_dq(&ghs[num_gh]); } /** @@ -1291,10 +1257,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) { - unsigned int x; - - for (x = 0; x < num_gh; x++) - gfs2_glock_dq_uninit(&ghs[x]); + while (num_gh--) + gfs2_glock_dq_uninit(&ghs[num_gh]); } void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) @@ -1382,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int gfs2_shrink_glock_memory(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_glock *gl; int may_demote; int nr_skipped = 0; + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; LIST_HEAD(skipped); if (nr == 0) @@ -1399,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m while(nr && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); + clear_bit(GLF_LRU, &gl->gl_flags); atomic_dec(&lru_count); /* Test for being demotable */ @@ -1421,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m } nr_skipped++; list_add(&gl->gl_lru, &skipped); + set_bit(GLF_LRU, &gl->gl_flags); } list_splice(&skipped, &lru_list); atomic_add(nr_skipped, &lru_count); @@ -1440,42 +1409,30 @@ static struct shrinker glock_shrinker = { * @sdp: the filesystem * @bucket: the bucket * - * Returns: 1 if the bucket has entries */ -static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, +static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, unsigned int hash) { - struct gfs2_glock *gl, *prev = NULL; - int has_entries = 0; - struct hlist_head *head = &gl_hash_table[hash].hb_list; + struct gfs2_glock *gl; + struct hlist_bl_head *head = &gl_hash_table[hash]; + struct hlist_bl_node *pos; - read_lock(gl_lock_addr(hash)); - /* Can't use hlist_for_each_entry - don't want prefetch here */ - if (hlist_empty(head)) - goto out; - gl = list_entry(head->first, struct gfs2_glock, gl_list); - while(1) { - if (!sdp || gl->gl_sbd == sdp) { - gfs2_glock_hold(gl); - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); - prev = gl; + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { + if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) examiner(gl); - has_entries = 1; - read_lock(gl_lock_addr(hash)); - } - if (gl->gl_list.next == NULL) - break; - gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); } -out: - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); + rcu_read_unlock(); cond_resched(); - return has_entries; +} + +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) +{ + unsigned x; + + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(examiner, sdp, x); } @@ -1505,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl) static void clear_glock(struct gfs2_glock *gl) { - spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) { - list_del_init(&gl->gl_lru); - atomic_dec(&lru_count); - } - spin_unlock(&lru_lock); + gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_spin); if (gl->gl_state != LM_ST_UNLOCKED) @@ -1529,10 +1481,21 @@ static void clear_glock(struct gfs2_glock *gl) void gfs2_glock_thaw(struct gfs2_sbd *sdp) { - unsigned x; + glock_hash_walk(thaw_glock, sdp); +} - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(thaw_glock, sdp, x); +static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +{ + int ret; + spin_lock(&gl->gl_spin); + ret = __dump_glock(seq, gl); + spin_unlock(&gl->gl_spin); + return ret; +} + +static void dump_glock_func(struct gfs2_glock *gl) +{ + dump_glock(NULL, gl); } /** @@ -1545,13 +1508,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { - unsigned int x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(clear_glock, sdp, x); + glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); - gfs2_dump_lockstate(sdp); + glock_hash_walk(dump_glock_func, sdp); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) @@ -1637,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) return 0; } -static const char *gflags2str(char *buf, const unsigned long *gflags) +static const char *gflags2str(char *buf, const struct gfs2_glock *gl) { + const unsigned long *gflags = &gl->gl_flags; char *p = buf; + if (test_bit(GLF_LOCK, gflags)) *p++ = 'l'; if (test_bit(GLF_DEMOTE, gflags)) @@ -1662,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) *p++ = 'F'; if (test_bit(GLF_QUEUED, gflags)) *p++ = 'q'; + if (test_bit(GLF_LRU, gflags)) + *p++ = 'L'; + if (gl->gl_object) + *p++ = 'o'; *p = 0; return buf; } @@ -1696,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, - gflags2str(gflags_buf, &gl->gl_flags), + gflags2str(gflags_buf, gl), state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_revokes), atomic_read(&gl->gl_ref)); list_for_each_entry(gh, &gl->gl_holders, gh_list) { @@ -1717,73 +1684,22 @@ out: return error; } -static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) -{ - int ret; - spin_lock(&gl->gl_spin); - ret = __dump_glock(seq, gl); - spin_unlock(&gl->gl_spin); - return ret; -} -/** - * gfs2_dump_lockstate - print out the current lockstate - * @sdp: the filesystem - * @ub: the buffer to copy the information into - * - * If @ub is NULL, dump the lockstate to the console. - * - */ - -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) -{ - struct gfs2_glock *gl; - struct hlist_node *h; - unsigned int x; - int error = 0; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - - read_lock(gl_lock_addr(x)); - - hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { - if (gl->gl_sbd != sdp) - continue; - - error = dump_glock(NULL, gl); - if (error) - break; - } - - read_unlock(gl_lock_addr(x)); - - if (error) - break; - } - - - return error; -} int __init gfs2_glock_init(void) { unsigned i; for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); + INIT_HLIST_BL_HEAD(&gl_hash_table[i]); } -#ifdef GL_HASH_LOCK_SZ - for(i = 0; i < GL_HASH_LOCK_SZ; i++) { - rwlock_init(&gl_hash_locks[i]); - } -#endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + WQ_HIGHPRI | WQ_FREEZABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); @@ -1802,62 +1718,54 @@ void gfs2_glock_exit(void) destroy_workqueue(gfs2_delete_workqueue); } +static inline struct gfs2_glock *glock_hash_chain(unsigned hash) +{ + return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), + struct gfs2_glock, gl_list); +} + +static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) +{ + return hlist_bl_entry(rcu_dereference(gl->gl_list.next), + struct gfs2_glock, gl_list); +} + static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { struct gfs2_glock *gl; -restart: - read_lock(gl_lock_addr(gi->hash)); - gl = gi->gl; - if (gl) { - gi->gl = hlist_entry(gl->gl_list.next, - struct gfs2_glock, gl_list); - } else { - gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, - struct gfs2_glock, gl_list); - } - if (gi->gl) - gfs2_glock_hold(gi->gl); - read_unlock(gl_lock_addr(gi->hash)); - if (gl) - gfs2_glock_put(gl); - while (gi->gl == NULL) { - gi->hash++; - if (gi->hash >= GFS2_GL_HASH_SIZE) - return 1; - read_lock(gl_lock_addr(gi->hash)); - gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, - struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); - read_unlock(gl_lock_addr(gi->hash)); - } - - if (gi->sdp != gi->gl->gl_sbd) - goto restart; + do { + gl = gi->gl; + if (gl) { + gi->gl = glock_hash_next(gl); + } else { + gi->gl = glock_hash_chain(gi->hash); + } + while (gi->gl == NULL) { + gi->hash++; + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } + gi->gl = glock_hash_chain(gi->hash); + } + /* Skip entries for other sb and dead entries */ + } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); return 0; } -static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) -{ - if (gi->gl) - gfs2_glock_put(gi->gl); - gi->gl = NULL; -} - static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; gi->hash = 0; + rcu_read_lock(); do { - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + if (gfs2_glock_iter_next(gi)) return NULL; - } } while (n--); return gi->gl; @@ -1870,10 +1778,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + if (gfs2_glock_iter_next(gi)) return NULL; - } return gi->gl; } @@ -1881,7 +1787,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock_iter *gi = seq->private; - gfs2_glock_iter_free(gi); + + if (gi->gl) + rcu_read_unlock(); + gi->gl = NULL; } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 691851ceb61..6b2f757b928 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -118,7 +118,7 @@ struct lm_lockops { int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); - void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); + void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); @@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, int create, struct gfs2_glock **glp); void gfs2_glock_hold(struct gfs2_glock *gl); void gfs2_glock_put_nolock(struct gfs2_glock *gl); -int gfs2_glock_put(struct gfs2_glock *gl); +void gfs2_glock_put(struct gfs2_glock *gl); void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); void gfs2_holder_reinit(unsigned int state, unsigned flags, @@ -223,25 +223,21 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, return error; } -/* Lock Value Block functions */ - -int gfs2_lvb_hold(struct gfs2_glock *gl); -void gfs2_lvb_unhold(struct gfs2_glock *gl); - -void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); -void gfs2_glock_complete(struct gfs2_glock *gl, int ret); -void gfs2_reclaim_glock(struct gfs2_sbd *sdp); -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -void gfs2_glock_finish_truncate(struct gfs2_inode *ip); -void gfs2_glock_thaw(struct gfs2_sbd *sdp); - -int __init gfs2_glock_init(void); -void gfs2_glock_exit(void); - -int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); -void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); -int gfs2_register_debugfs(void); -void gfs2_unregister_debugfs(void); +extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); +extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); +extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); +extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); +extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); +extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); +extern void gfs2_glock_free(struct gfs2_glock *gl); + +extern int __init gfs2_glock_init(void); +extern void gfs2_glock_exit(void); + +extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +extern int gfs2_register_debugfs(void); +extern void gfs2_unregister_debugfs(void); extern const struct lm_lockops gfs2_dlm_ops; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 263561bf1a5..8ef70f46473 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,18 +28,45 @@ #include "trans.h" /** - * ail_empty_gl - remove all buffers for a given lock from the AIL + * __gfs2_ail_flush - remove all buffers for a given lock from the AIL * @gl: the glock * * None of the buffers should be dirty, locked, or pinned. */ -static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +static void __gfs2_ail_flush(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd; struct buffer_head *bh; + + spin_lock(&sdp->sd_ail_lock); + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, + bd_ail_gl_list); + bh = bd->bd_bh; + gfs2_remove_from_ail(bd); + spin_unlock(&sdp->sd_ail_lock); + + bd->bd_bh = NULL; + bh->b_private = NULL; + bd->bd_blkno = bh->b_blocknr; + gfs2_log_lock(sdp); + gfs2_assert_withdraw(sdp, !buffer_busy(bh)); + gfs2_trans_add_revoke(sdp, bd); + gfs2_log_unlock(sdp); + + spin_lock(&sdp->sd_ail_lock); + } + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + spin_unlock(&sdp->sd_ail_lock); +} + + +static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_trans tr; memset(&tr, 0, sizeof(tr)); @@ -56,25 +83,29 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) BUG_ON(current->journal_info); current->journal_info = &tr; - gfs2_log_lock(sdp); - while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, - bd_ail_gl_list); - bh = bd->bd_bh; - gfs2_remove_from_ail(bd); - bd->bd_bh = NULL; - bh->b_private = NULL; - bd->bd_blkno = bh->b_blocknr; - gfs2_assert_withdraw(sdp, !buffer_busy(bh)); - gfs2_trans_add_revoke(sdp, bd); - } - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); - gfs2_log_unlock(sdp); + __gfs2_ail_flush(gl); gfs2_trans_end(sdp); gfs2_log_flush(sdp, NULL); } +void gfs2_ail_flush(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int revokes = atomic_read(&gl->gl_ail_count); + int ret; + + if (!revokes) + return; + + ret = gfs2_trans_begin(sdp, 0, revokes); + if (ret) + return; + __gfs2_ail_flush(gl); + gfs2_trans_end(sdp); + gfs2_log_flush(sdp, NULL); +} + /** * rgrp_go_sync - sync out the metadata for this glock * @gl: the glock @@ -206,12 +237,134 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) static int inode_go_demote_ok(const struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_holder *gh; + if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) return 0; + + if (!list_empty(&gl->gl_holders)) { + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (gh->gh_list.next != &gl->gl_holders) + return 0; + } + return 1; } /** + * gfs2_set_nlink - Set the inode's link count based on on-disk info + * @inode: The inode in question + * @nlink: The link count + * + * If the link count has hit zero, it must never be raised, whatever the + * on-disk inode might say. When new struct inodes are created the link + * count is set to 1, so that we can safely use this test even when reading + * in on disk information for the first time. + */ + +static void gfs2_set_nlink(struct inode *inode, u32 nlink) +{ + /* + * We will need to review setting the nlink count here in the + * light of the forthcoming ro bind mount work. This is a reminder + * to do that. + */ + if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { + if (nlink == 0) + clear_nlink(inode); + else + inode->i_nlink = nlink; + } +} + +static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) +{ + const struct gfs2_dinode *str = buf; + struct timespec atime; + u16 height, depth; + + if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) + goto corrupt; + ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); + ip->i_inode.i_mode = be32_to_cpu(str->di_mode); + ip->i_inode.i_rdev = 0; + switch (ip->i_inode.i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); + break; + }; + + ip->i_inode.i_uid = be32_to_cpu(str->di_uid); + ip->i_inode.i_gid = be32_to_cpu(str->di_gid); + gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); + i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); + gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); + atime.tv_sec = be64_to_cpu(str->di_atime); + atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); + if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) + ip->i_inode.i_atime = atime; + ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); + ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + + ip->i_goal = be64_to_cpu(str->di_goal_meta); + ip->i_generation = be64_to_cpu(str->di_generation); + + ip->i_diskflags = be32_to_cpu(str->di_flags); + gfs2_set_inode_flags(&ip->i_inode); + height = be16_to_cpu(str->di_height); + if (unlikely(height > GFS2_MAX_META_HEIGHT)) + goto corrupt; + ip->i_height = (u8)height; + + depth = be16_to_cpu(str->di_depth); + if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) + goto corrupt; + ip->i_depth = (u8)depth; + ip->i_entries = be32_to_cpu(str->di_entries); + + ip->i_eattr = be64_to_cpu(str->di_eattr); + if (S_ISREG(ip->i_inode.i_mode)) + gfs2_set_aops(&ip->i_inode); + + return 0; +corrupt: + gfs2_consist_inode(ip); + return -EIO; +} + +/** + * gfs2_inode_refresh - Refresh the incore copy of the dinode + * @ip: The GFS2 inode + * + * Returns: errno + */ + +int gfs2_inode_refresh(struct gfs2_inode *ip) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { + brelse(dibh); + return -EIO; + } + + error = gfs2_dinode_in(ip, dibh->b_data); + brelse(dibh); + clear_bit(GIF_INVALID, &ip->i_flags); + + return error; +} + +/** * inode_go_lock - operation done after an inode lock is locked by a process * @gl: the glock * @flags: @@ -272,19 +425,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) } /** - * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int rgrp_go_demote_ok(const struct gfs2_glock *gl) -{ - const struct address_space *mapping = (const struct address_space *)(gl + 1); - return !mapping->nrpages; -} - -/** * rgrp_go_lock - operation done after an rgrp lock is locked by * a first holder on this node. * @gl: the glock @@ -383,6 +523,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl) static void iopen_go_callback(struct gfs2_glock *gl) { struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; + struct gfs2_sbd *sdp = gl->gl_sbd; + + if (sdp->sd_vfs->s_flags & MS_RDONLY) + return; if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { @@ -410,7 +554,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_xmote_th = rgrp_go_sync, .go_inval = rgrp_go_inval, - .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index b3aa2e3210f..6fce409b5a5 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; extern const struct gfs2_glock_operations gfs2_journal_glops; extern const struct gfs2_glock_operations *gfs2_glops_list[]; +extern void gfs2_ail_flush(struct gfs2_glock *gl); + #endif /* __GLOPS_DOT_H__ */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a79790c0627..0a064e91ac7 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -15,10 +15,11 @@ #include <linux/workqueue.h> #include <linux/dlm.h> #include <linux/buffer_head.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 -#define DIO_ALL 0x00000100 struct gfs2_log_operations; struct gfs2_log_element; @@ -198,10 +199,12 @@ enum { GLF_INITIAL = 10, GLF_FROZEN = 11, GLF_QUEUED = 12, + GLF_LRU = 13, + GLF_OBJECT = 14, /* Used only for tracing */ }; struct gfs2_glock { - struct hlist_node gl_list; + struct hlist_bl_node gl_list; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; atomic_t gl_ref; @@ -232,8 +235,10 @@ struct gfs2_glock { struct list_head gl_ail_list; atomic_t gl_ail_count; + atomic_t gl_revokes; struct delayed_work gl_work; struct work_struct gl_delete; + struct rcu_head gl_rcu; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ @@ -314,6 +319,7 @@ enum { QDF_USER = 0, QDF_CHANGE = 1, QDF_LOCKED = 2, + QDF_REFRESH = 3, }; struct gfs2_quota_data { @@ -370,8 +376,6 @@ struct gfs2_ail { unsigned int ai_first; struct list_head ai_ail1_list; struct list_head ai_ail2_list; - - u64 ai_sync_gen; }; struct gfs2_journal_extent { @@ -484,7 +488,6 @@ struct gfs2_sb_host { char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; - u8 sb_uuid[16]; }; /* @@ -647,9 +650,9 @@ struct gfs2_sbd { unsigned int sd_log_flush_head; u64 sd_log_flush_wrapped; + spinlock_t sd_ail_lock; struct list_head sd_ail1_list; struct list_head sd_ail2_list; - u64 sd_ail_sync_gen; /* Replay stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7aa7d4f8984..03e0c529063 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1,23 +1,25 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License version 2. */ -#include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> +#include <linux/namei.h> +#include <linux/mm.h> +#include <linux/xattr.h> #include <linux/posix_acl.h> -#include <linux/sort.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> +#include <linux/fiemap.h> #include <linux/security.h> -#include <linux/time.h> +#include <asm/uaccess.h> #include "gfs2.h" #include "incore.h" @@ -26,51 +28,70 @@ #include "dir.h" #include "xattr.h" #include "glock.h" -#include "glops.h" #include "inode.h" -#include "log.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "trans.h" #include "util.h" +#include "super.h" +#include "glops.h" -struct gfs2_inum_range_host { - u64 ir_start; - u64 ir_length; +struct gfs2_skip_data { + u64 no_addr; + int skipped; + int non_block; }; static int iget_test(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - u64 *no_addr = opaque; + struct gfs2_skip_data *data = opaque; - if (ip->i_no_addr == *no_addr) + if (ip->i_no_addr == data->no_addr) { + if (data->non_block && + inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { + data->skipped = 1; + return 0; + } return 1; - + } return 0; } static int iget_set(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - u64 *no_addr = opaque; + struct gfs2_skip_data *data = opaque; - inode->i_ino = (unsigned long)*no_addr; - ip->i_no_addr = *no_addr; + if (data->skipped) + return -ENOENT; + inode->i_ino = (unsigned long)(data->no_addr); + ip->i_no_addr = data->no_addr; return 0; } -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) { unsigned long hash = (unsigned long)no_addr; - return ilookup5(sb, hash, iget_test, &no_addr); + struct gfs2_skip_data data; + + data.no_addr = no_addr; + data.skipped = 0; + data.non_block = non_block; + return ilookup5(sb, hash, iget_test, &data); } -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, + int non_block) { + struct gfs2_skip_data data; unsigned long hash = (unsigned long)no_addr; - return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); + + data.no_addr = no_addr; + data.skipped = 0; + data.non_block = non_block; + return iget5_locked(sb, hash, iget_test, iget_set, &data); } /** @@ -111,19 +132,20 @@ static void gfs2_set_iop(struct inode *inode) * @sb: The super block * @no_addr: The inode number * @type: The type of the inode + * non_block: Can we block on inodes that are being freed? * * Returns: A VFS inode, or an error */ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, - u64 no_addr, u64 no_formal_ino) + u64 no_addr, u64 no_formal_ino, int non_block) { struct inode *inode; struct gfs2_inode *ip; struct gfs2_glock *io_gl = NULL; int error; - inode = gfs2_iget(sb, no_addr); + inode = gfs2_iget(sb, no_addr, non_block); ip = GFS2_I(inode); if (!inode) @@ -185,11 +207,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder i_gh; - struct inode *inode; + struct inode *inode = NULL; int error; + /* Must not read in block until block type is verified */ error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); if (error) return ERR_PTR(error); @@ -197,7 +220,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, if (error) goto fail; - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); if (IS_ERR(inode)) goto fail; @@ -222,203 +245,6 @@ fail_iput: goto fail; } -static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) -{ - const struct gfs2_dinode *str = buf; - struct timespec atime; - u16 height, depth; - - if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) - goto corrupt; - ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); - ip->i_inode.i_mode = be32_to_cpu(str->di_mode); - ip->i_inode.i_rdev = 0; - switch (ip->i_inode.i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), - be32_to_cpu(str->di_minor)); - break; - }; - - ip->i_inode.i_uid = be32_to_cpu(str->di_uid); - ip->i_inode.i_gid = be32_to_cpu(str->di_gid); - /* - * We will need to review setting the nlink count here in the - * light of the forthcoming ro bind mount work. This is a reminder - * to do that. - */ - ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); - gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); - atime.tv_sec = be64_to_cpu(str->di_atime); - atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) - ip->i_inode.i_atime = atime; - ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); - - ip->i_goal = be64_to_cpu(str->di_goal_meta); - ip->i_generation = be64_to_cpu(str->di_generation); - - ip->i_diskflags = be32_to_cpu(str->di_flags); - gfs2_set_inode_flags(&ip->i_inode); - height = be16_to_cpu(str->di_height); - if (unlikely(height > GFS2_MAX_META_HEIGHT)) - goto corrupt; - ip->i_height = (u8)height; - - depth = be16_to_cpu(str->di_depth); - if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) - goto corrupt; - ip->i_depth = (u8)depth; - ip->i_entries = be32_to_cpu(str->di_entries); - - ip->i_eattr = be64_to_cpu(str->di_eattr); - if (S_ISREG(ip->i_inode.i_mode)) - gfs2_set_aops(&ip->i_inode); - - return 0; -corrupt: - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; -} - -/** - * gfs2_inode_refresh - Refresh the incore copy of the dinode - * @ip: The GFS2 inode - * - * Returns: errno - */ - -int gfs2_inode_refresh(struct gfs2_inode *ip) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { - brelse(dibh); - return -EIO; - } - - error = gfs2_dinode_in(ip, dibh->b_data); - brelse(dibh); - clear_bit(GIF_INVALID, &ip->i_flags); - - return error; -} - -int gfs2_dinode_dealloc(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al; - struct gfs2_rgrpd *rgd; - int error; - - if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - al = gfs2_alloc_get(ip); - if (!al) - return -ENOMEM; - - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto out; - - error = gfs2_rindex_hold(sdp, &al->al_ri_gh); - if (error) - goto out_qs; - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - if (!rgd) { - gfs2_consist_inode(ip); - error = -EIO; - goto out_rindex_relse; - } - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, - &al->al_rgd_gh); - if (error) - goto out_rindex_relse; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); - if (error) - goto out_rg_gunlock; - - set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); - set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); - - gfs2_free_di(rgd, ip); - - gfs2_trans_end(sdp); - -out_rg_gunlock: - gfs2_glock_dq_uninit(&al->al_rgd_gh); -out_rindex_relse: - gfs2_glock_dq_uninit(&al->al_ri_gh); -out_qs: - gfs2_quota_unhold(ip); -out: - gfs2_alloc_put(ip); - return error; -} - -/** - * gfs2_change_nlink - Change nlink count on inode - * @ip: The GFS2 inode - * @diff: The change in the nlink count required - * - * Returns: errno - */ -int gfs2_change_nlink(struct gfs2_inode *ip, int diff) -{ - struct buffer_head *dibh; - u32 nlink; - int error; - - BUG_ON(diff != 1 && diff != -1); - nlink = ip->i_inode.i_nlink + diff; - - /* If we are reducing the nlink count, but the new value ends up being - bigger than the old one, we must have underflowed. */ - if (diff < 0 && nlink > ip->i_inode.i_nlink) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (diff > 0) - inc_nlink(&ip->i_inode); - else - drop_nlink(&ip->i_inode); - - ip->i_inode.i_ctime = CURRENT_TIME; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - mark_inode_dirty(&ip->i_inode); - - if (ip->i_inode.i_nlink == 0) - gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ - - return error; -} struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) { @@ -517,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, /* Don't create entries in an unlinked directory */ if (!dip->i_inode.i_nlink) - return -EPERM; + return -ENOENT; error = gfs2_dir_check(&dip->i_inode, name, NULL); switch (error) { @@ -587,21 +413,44 @@ out: return error; } +static void gfs2_init_dir(struct buffer_head *dibh, + const struct gfs2_inode *parent) +{ + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); + + gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); + dent->de_inum = di->di_num; /* already GFS2 endian */ + dent->de_type = cpu_to_be16(DT_DIR); + + dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); + gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); + gfs2_inum_out(parent, dent); + dent->de_type = cpu_to_be16(DT_DIR); + +} + /** * init_dinode - Fill in a new dinode structure - * @dip: the directory this inode is being created in + * @dip: The directory this inode is being created in * @gl: The glock covering the new inode - * @inum: the inode number - * @mode: the file permissions - * @uid: - * @gid: + * @inum: The inode number + * @mode: The file permissions + * @uid: The uid of the new inode + * @gid: The gid of the new inode + * @generation: The generation number of the new inode + * @dev: The device number (if a device node) + * @symname: The symlink destination (if a symlink) + * @size: The inode size (ignored for directories) + * @bhp: The buffer head (returned to caller) * */ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, - const u64 *generation, dev_t dev, struct buffer_head **bhp) + const u64 *generation, dev_t dev, const char *symname, + unsigned size, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; @@ -620,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_uid = cpu_to_be32(uid); di->di_gid = cpu_to_be32(gid); di->di_nlink = 0; - di->di_size = 0; + di->di_size = cpu_to_be64(size); di->di_blocks = cpu_to_be64(1); di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); di->di_major = cpu_to_be32(MAJOR(dev)); @@ -628,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); di->di_generation = cpu_to_be64(*generation); di->di_flags = 0; - - if (S_ISREG(mode)) { - if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || - gfs2_tune_get(sdp, gt_new_files_jdata)) - di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - } else if (S_ISDIR(mode)) { - di->di_flags |= cpu_to_be32(dip->i_diskflags & - GFS2_DIF_INHERIT_JDATA); - } - di->__pad1 = 0; di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); di->di_height = 0; @@ -651,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); - + + switch(mode & S_IFMT) { + case S_IFREG: + if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); + break; + case S_IFDIR: + di->di_flags |= cpu_to_be32(dip->i_diskflags & + GFS2_DIF_INHERIT_JDATA); + di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); + di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); + di->di_entries = cpu_to_be32(2); + gfs2_init_dir(dibh, dip); + break; + case S_IFLNK: + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); + break; + } + set_buffer_uptodate(dibh); *bhp = dibh; @@ -659,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, unsigned int mode, const struct gfs2_inum_host *inum, - const u64 *generation, dev_t dev, struct buffer_head **bhp) + const u64 *generation, dev_t dev, const char *symname, + unsigned int size, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); unsigned int uid, gid; @@ -681,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); gfs2_quota_change(dip, +1, uid, gid); gfs2_trans_end(sdp); @@ -735,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); + error = gfs2_dir_add(&dip->i_inode, name, ip); if (error) goto fail_end_trans; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - ip->i_inode.i_nlink = 1; + inc_nlink(&ip->i_inode); + if (S_ISDIR(ip->i_inode.i_mode)) + inc_nlink(&ip->i_inode); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -763,14 +624,15 @@ fail: return error; } -static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) +static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, + const struct qstr *qstr) { int err; size_t len; void *value; char *name; - err = security_inode_init_security(&ip->i_inode, &dip->i_inode, + err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, &name, &value, &len); if (err) { @@ -788,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) } /** - * gfs2_createi - Create a new inode - * @ghs: An array of two holders - * @name: The name of the new file - * @mode: the permissions on the new inode + * gfs2_create_inode - Create a new inode + * @dir: The parent directory + * @dentry: The new dentry + * @mode: The permissions on the new inode + * @dev: For device nodes, this is the device number + * @symname: For symlinks, this is the link destination + * @size: The initial size of the inode (ignored for directories) * - * @ghs[0] is an initialized holder for the directory - * @ghs[1] is the holder for the inode lock - * - * If the return value is not NULL, the glocks on both the directory and the new - * file are held. A transaction has been started and an inplace reservation - * is held, as well. - * - * Returns: An inode + * Returns: 0 on success, or error code */ -struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode, dev_t dev) +static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + unsigned int mode, dev_t dev, const char *symname, + unsigned int size) { + const struct qstr *name = &dentry->d_name; + struct gfs2_holder ghs[2]; struct inode *inode = NULL; - struct gfs2_inode *dip = ghs->gh_gl->gl_object; - struct inode *dir = &dip->i_inode; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; @@ -816,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, struct buffer_head *bh = NULL; if (!name->len || name->len > GFS2_FNAMESIZE) - return ERR_PTR(-ENAMETOOLONG); + return -ENAMETOOLONG; - gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); - error = gfs2_glock_nq(ghs); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -837,12 +696,12 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock; - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); if (error) goto fail_gunlock2; inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, - inum.no_formal_ino); + inum.no_formal_ino, 0); if (IS_ERR(inode)) goto fail_gunlock2; @@ -854,7 +713,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; - error = gfs2_security_init(dip, GFS2_I(inode)); + error = gfs2_security_init(dip, GFS2_I(inode), name); if (error) goto fail_gunlock2; @@ -864,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (bh) brelse(bh); - return inode; + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + gfs2_glock_dq_uninit_m(2, ghs); + mark_inode_dirty(inode); + d_instantiate(dentry, inode); + return 0; fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); if (inode && !IS_ERR(inode)) iput(inode); fail_gunlock: - gfs2_glock_dq(ghs); + gfs2_glock_dq_uninit(ghs); fail: if (bh) brelse(bh); - return ERR_PTR(error); + return error; +} + +/** + * gfs2_create - Create a file + * @dir: The directory in which to create the file + * @dentry: The dentry of the new file + * @mode: The mode of the new file + * + * Returns: errno + */ + +static int gfs2_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct inode *inode; + int ret; + + for (;;) { + ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0); + if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL))) + return ret; + + inode = gfs2_lookupi(dir, &dentry->d_name, 0); + if (inode) { + if (!IS_ERR(inode)) + break; + return PTR_ERR(inode); + } + } + + d_instantiate(dentry, inode); + return 0; +} + +/** + * gfs2_lookup - Look up a filename in a directory and return its inode + * @dir: The directory inode + * @dentry: The dentry of the new inode + * @nd: passed from Linux VFS, ignored by us + * + * Called by the VFS layer. Lock dir and call gfs2_lookupi() + * + * Returns: errno + */ + +static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = NULL; + + inode = gfs2_lookupi(dir, &dentry->d_name, 0); + if (inode && IS_ERR(inode)) + return ERR_CAST(inode); + + if (inode) { + struct gfs2_glock *gl = GFS2_I(inode)->i_gl; + struct gfs2_holder gh; + int error; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) { + iput(inode); + return ERR_PTR(error); + } + gfs2_glock_dq_uninit(&gh); + return d_splice_alias(inode, dentry); + } + d_add(dentry, inode); + + return NULL; +} + +/** + * gfs2_link - Link to a file + * @old_dentry: The inode to link + * @dir: Add link to this directory + * @dentry: The name of the link + * + * Link the inode in "old_dentry" into the directory "dir" with the + * name in "dentry". + * + * Returns: errno + */ + +static int gfs2_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_sbd *sdp = GFS2_SB(dir); + struct inode *inode = old_dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder ghs[2]; + struct buffer_head *dibh; + int alloc_required; + int error; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq(ghs); /* parent */ + if (error) + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; + + error = -ENOENT; + if (inode->i_nlink == 0) + goto out_gunlock; + + error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); + if (error) + goto out_gunlock; + + error = gfs2_dir_check(dir, &dentry->d_name, NULL); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + } + + error = -EINVAL; + if (!dip->i_inode.i_nlink) + goto out_gunlock; + error = -EFBIG; + if (dip->i_entries == (u32)-1) + goto out_gunlock; + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out_gunlock; + error = -EINVAL; + if (!ip->i_inode.i_nlink) + goto out_gunlock; + error = -EMLINK; + if (ip->i_inode.i_nlink == (u32)-1) + goto out_gunlock; + + alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); + if (error < 0) + goto out_gunlock; + error = 0; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(dip); + if (!al) { + error = -ENOMEM; + goto out_gunlock; + } + + error = gfs2_quota_lock_check(dip); + if (error) + goto out_alloc; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + + gfs2_rg_blocks(al) + + 2 * RES_DINODE + RES_STATFS + + RES_QUOTA, 0); + if (error) + goto out_ipres; + } else { + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); + if (error) + goto out_ipres; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + error = gfs2_dir_add(dir, &dentry->d_name, ip); + if (error) + goto out_brelse; + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + inc_nlink(&ip->i_inode); + ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(&ip->i_inode); + +out_brelse: + brelse(dibh); +out_end_trans: + gfs2_trans_end(sdp); +out_ipres: + if (alloc_required) + gfs2_inplace_release(dip); +out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(dip); +out_alloc: + if (alloc_required) + gfs2_alloc_put(dip); +out_gunlock: + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_glock_dq(ghs); +out_parent: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + if (!error) { + ihold(inode); + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + } + return error; +} + +/* + * gfs2_unlink_ok - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, errno if it isn't + */ + +static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, + const struct gfs2_inode *ip) +{ + int error; + + if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) + return -EPERM; + + if ((dip->i_inode.i_mode & S_ISVTX) && + dip->i_inode.i_uid != current_fsuid() && + ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) + return -EPERM; + + if (IS_APPEND(&dip->i_inode)) + return -EPERM; + + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); + if (error) + return error; + + error = gfs2_dir_check(&dip->i_inode, name, ip); + if (error) + return error; + + return 0; +} + +/** + * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it + * @dip: The parent directory + * @name: The name of the entry in the parent directory + * @bh: The inode buffer for the inode to be removed + * @inode: The inode to be removed + * + * Called with all the locks and in a transaction. This will only be + * called for a directory after it has been checked to ensure it is empty. + * + * Returns: 0 on success, or an error + */ + +static int gfs2_unlink_inode(struct gfs2_inode *dip, + const struct dentry *dentry, + struct buffer_head *bh) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + int error; + + error = gfs2_dir_del(dip, dentry); + if (error) + return error; + + ip->i_entries = 0; + inode->i_ctime = CURRENT_TIME; + if (S_ISDIR(inode->i_mode)) + clear_nlink(inode); + else + drop_nlink(inode); + gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_dinode_out(ip, bh->b_data); + mark_inode_dirty(inode); + if (inode->i_nlink == 0) + gfs2_unlink_di(inode); + return 0; +} + + +/** + * gfs2_unlink - Unlink an inode (this does rmdir as well) + * @dir: The inode of the directory containing the inode to unlink + * @dentry: The file itself + * + * This routine uses the type of the inode as a flag to figure out + * whether this is an unlink or an rmdir. + * + * Returns: errno + */ + +static int gfs2_unlink(struct inode *dir, struct dentry *dentry) +{ + struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_sbd *sdp = GFS2_SB(dir); + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *bh; + struct gfs2_holder ghs[3]; + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh; + int error; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + + + error = gfs2_glock_nq(ghs); /* parent */ + if (error) + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; + + error = -ENOENT; + if (inode->i_nlink == 0) + goto out_rgrp; + + if (S_ISDIR(inode->i_mode)) { + error = -ENOTEMPTY; + if (ip->i_entries > 2 || inode->i_nlink > 2) + goto out_rgrp; + } + + error = gfs2_glock_nq(ghs + 2); /* rgrp */ + if (error) + goto out_rgrp; + + error = gfs2_unlink_ok(dip, &dentry->d_name, ip); + if (error) + goto out_gunlock; + + error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); + if (error) + goto out_gunlock; + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_end_trans; + + error = gfs2_unlink_inode(dip, dentry, bh); + brelse(bh); + +out_end_trans: + gfs2_trans_end(sdp); +out_gunlock: + gfs2_glock_dq(ghs + 2); +out_rgrp: + gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs); +out_parent: + gfs2_holder_uninit(ghs); + gfs2_glock_dq_uninit(&ri_gh); + return error; +} + +/** + * gfs2_symlink - Create a symlink + * @dir: The directory to create the symlink in + * @dentry: The dentry to put the symlink in + * @symname: The thing which the link points to + * + * Returns: errno + */ + +static int gfs2_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct gfs2_sbd *sdp = GFS2_SB(dir); + unsigned int size; + + size = strlen(symname); + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) + return -ENAMETOOLONG; + + return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size); +} + +/** + * gfs2_mkdir - Make a directory + * @dir: The parent directory of the new one + * @dentry: The dentry of the new directory + * @mode: The mode of the new directory + * + * Returns: errno + */ + +static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0); +} + +/** + * gfs2_mknod - Make a special file + * @dir: The directory in which the special file will reside + * @dentry: The dentry of the special file + * @mode: The mode of the special file + * @dev: The device specification of the special file + * + */ + +static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0); +} + +/* + * gfs2_ok_to_move - check if it's ok to move a directory to another directory + * @this: move this + * @to: to here + * + * Follow @to back to the root and make sure we don't encounter @this + * Assumes we already hold the rename lock. + * + * Returns: errno + */ + +static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) +{ + struct inode *dir = &to->i_inode; + struct super_block *sb = dir->i_sb; + struct inode *tmp; + int error = 0; + + igrab(dir); + + for (;;) { + if (dir == &this->i_inode) { + error = -EINVAL; + break; + } + if (dir == sb->s_root->d_inode) { + error = 0; + break; + } + + tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); + if (IS_ERR(tmp)) { + error = PTR_ERR(tmp); + break; + } + + iput(dir); + dir = tmp; + } + + iput(dir); + + return error; +} + +/** + * gfs2_rename - Rename a file + * @odir: Parent directory of old file name + * @odentry: The old dentry of the file + * @ndir: Parent directory of new file name + * @ndentry: The new dentry of the file + * + * Returns: errno + */ + +static int gfs2_rename(struct inode *odir, struct dentry *odentry, + struct inode *ndir, struct dentry *ndentry) +{ + struct gfs2_inode *odip = GFS2_I(odir); + struct gfs2_inode *ndip = GFS2_I(ndir); + struct gfs2_inode *ip = GFS2_I(odentry->d_inode); + struct gfs2_inode *nip = NULL; + struct gfs2_sbd *sdp = GFS2_SB(odir); + struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; + struct gfs2_rgrpd *nrgd; + unsigned int num_gh; + int dir_rename = 0; + int alloc_required = 0; + unsigned int x; + int error; + + if (ndentry->d_inode) { + nip = GFS2_I(ndentry->d_inode); + if (ip == nip) + return 0; + } + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + if (odip != ndip) { + error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, + 0, &r_gh); + if (error) + goto out; + + if (S_ISDIR(ip->i_inode.i_mode)) { + dir_rename = 1; + /* don't move a dirctory into it's subdir */ + error = gfs2_ok_to_move(ip, ndip); + if (error) + goto out_gunlock_r; + } + } + + num_gh = 1; + gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + if (odip != ndip) { + gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); + num_gh++; + } + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); + num_gh++; + + if (nip) { + gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); + num_gh++; + /* grab the resource lock for unlink flag twiddling + * this is the case of the target file already existing + * so we unlink before doing the rename + */ + nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); + if (nrgd) + gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); + } + + for (x = 0; x < num_gh; x++) { + error = gfs2_glock_nq(ghs + x); + if (error) + goto out_gunlock; + } + + error = -ENOENT; + if (ip->i_inode.i_nlink == 0) + goto out_gunlock; + + /* Check out the old directory */ + + error = gfs2_unlink_ok(odip, &odentry->d_name, ip); + if (error) + goto out_gunlock; + + /* Check out the new directory */ + + if (nip) { + error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); + if (error) + goto out_gunlock; + + if (nip->i_inode.i_nlink == 0) { + error = -EAGAIN; + goto out_gunlock; + } + + if (S_ISDIR(nip->i_inode.i_mode)) { + if (nip->i_entries < 2) { + gfs2_consist_inode(nip); + error = -EIO; + goto out_gunlock; + } + if (nip->i_entries > 2) { + error = -ENOTEMPTY; + goto out_gunlock; + } + } + } else { + error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); + if (error) + goto out_gunlock; + + error = gfs2_dir_check(ndir, &ndentry->d_name, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + }; + + if (odip != ndip) { + if (!ndip->i_inode.i_nlink) { + error = -ENOENT; + goto out_gunlock; + } + if (ndip->i_entries == (u32)-1) { + error = -EFBIG; + goto out_gunlock; + } + if (S_ISDIR(ip->i_inode.i_mode) && + ndip->i_inode.i_nlink == (u32)-1) { + error = -EMLINK; + goto out_gunlock; + } + } + } + + /* Check out the dir to be renamed */ + + if (dir_rename) { + error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); + if (error) + goto out_gunlock; + } + + if (nip == NULL) + alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); + error = alloc_required; + if (error < 0) + goto out_gunlock; + error = 0; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(ndip); + if (!al) { + error = -ENOMEM; + goto out_gunlock; + } + + error = gfs2_quota_lock_check(ndip); + if (error) + goto out_alloc; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve_ri(ndip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + + gfs2_rg_blocks(al) + + 4 * RES_DINODE + 4 * RES_LEAF + + RES_STATFS + RES_QUOTA + 4, 0); + if (error) + goto out_ipreserv; + } else { + error = gfs2_trans_begin(sdp, 4 * RES_DINODE + + 5 * RES_LEAF + 4, 0); + if (error) + goto out_gunlock; + } + + /* Remove the target file, if it exists */ + + if (nip) { + struct buffer_head *bh; + error = gfs2_meta_inode_buffer(nip, &bh); + if (error) + goto out_end_trans; + error = gfs2_unlink_inode(ndip, ndentry, bh); + brelse(bh); + } + + if (dir_rename) { + error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); + if (error) + goto out_end_trans; + } else { + struct buffer_head *dibh; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + } + + error = gfs2_dir_del(odip, odentry); + if (error) + goto out_end_trans; + + error = gfs2_dir_add(ndir, &ndentry->d_name, ip); + if (error) + goto out_end_trans; + +out_end_trans: + gfs2_trans_end(sdp); +out_ipreserv: + if (alloc_required) + gfs2_inplace_release(ndip); +out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(ndip); +out_alloc: + if (alloc_required) + gfs2_alloc_put(ndip); +out_gunlock: + while (x--) { + gfs2_glock_dq(ghs + x); + gfs2_holder_uninit(ghs + x); + } +out_gunlock_r: + if (r_gh.gh_gl) + gfs2_glock_dq_uninit(&r_gh); +out: + gfs2_glock_dq_uninit(&ri_gh); + return error; +} + +/** + * gfs2_follow_link - Follow a symbolic link + * @dentry: The dentry of the link + * @nd: Data that we pass to vfs_follow_link() + * + * This can handle symlinks of any size. + * + * Returns: 0 on success or error code + */ + +static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct gfs2_holder i_gh; + struct buffer_head *dibh; + unsigned int size; + char *buf; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + nd_set_link(nd, ERR_PTR(error)); + return NULL; + } + + size = (unsigned int)i_size_read(&ip->i_inode); + if (size == 0) { + gfs2_consist_inode(ip); + buf = ERR_PTR(-EIO); + goto out; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) { + buf = ERR_PTR(error); + goto out; + } + + buf = kzalloc(size + 1, GFP_NOFS); + if (!buf) + buf = ERR_PTR(-ENOMEM); + else + memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); + brelse(dibh); +out: + gfs2_glock_dq_uninit(&i_gh); + nd_set_link(nd, buf); + return NULL; +} + +static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + +/** + * gfs2_permission - + * @inode: The inode + * @mask: The mask to be tested + * @flags: Indicates whether this is an RCU path walk or not + * + * This may be called from the VFS directly, or from within GFS2 with the + * inode locked, so we look to see if the glock is already locked and only + * lock the glock if its not already been done. + * + * Returns: errno + */ + +int gfs2_permission(struct inode *inode, int mask, unsigned int flags) +{ + struct gfs2_inode *ip; + struct gfs2_holder i_gh; + int error; + int unlock = 0; + + + ip = GFS2_I(inode); + if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; + } + + if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) + error = -EACCES; + else + error = generic_permission(inode, mask, flags, gfs2_check_acl); + if (unlock) + gfs2_glock_dq_uninit(&i_gh); + + return error; } static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) @@ -901,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) * @ip: * @attr: * - * Called with a reference on the vnode. - * * Returns: errno */ @@ -922,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) return error; } -void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) -{ - struct gfs2_dinode *str = buf; - - str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); - str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(ip->i_inode.i_uid); - str->di_gid = cpu_to_be32(ip->i_inode.i_gid); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); - str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); - - str->di_goal_meta = cpu_to_be64(ip->i_goal); - str->di_goal_data = cpu_to_be64(ip->i_goal); - str->di_generation = cpu_to_be64(ip->i_generation); - - str->di_flags = cpu_to_be32(ip->i_diskflags); - str->di_height = cpu_to_be16(ip->i_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && - !(ip->i_diskflags & GFS2_DIF_EXHASH) ? - GFS2_FORMAT_DE : 0); - str->di_depth = cpu_to_be16(ip->i_depth); - str->di_entries = cpu_to_be32(ip->i_entries); - - str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); -} - -void gfs2_dinode_print(const struct gfs2_inode *ip) -{ - printk(KERN_INFO " no_formal_ino = %llu\n", - (unsigned long long)ip->i_no_formal_ino); - printk(KERN_INFO " no_addr = %llu\n", - (unsigned long long)ip->i_no_addr); - printk(KERN_INFO " i_size = %llu\n", - (unsigned long long)i_size_read(&ip->i_inode)); - printk(KERN_INFO " blocks = %llu\n", - (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); - printk(KERN_INFO " i_goal = %llu\n", - (unsigned long long)ip->i_goal); - printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); - printk(KERN_INFO " i_height = %u\n", ip->i_height); - printk(KERN_INFO " i_depth = %u\n", ip->i_depth); - printk(KERN_INFO " i_entries = %u\n", ip->i_entries); - printk(KERN_INFO " i_eattr = %llu\n", - (unsigned long long)ip->i_eattr); +static int setattr_chown(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + u32 ouid, ogid, nuid, ngid; + int error; + + ouid = inode->i_uid; + ogid = inode->i_gid; + nuid = attr->ia_uid; + ngid = attr->ia_gid; + + if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) + ouid = nuid = NO_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) + ogid = ngid = NO_QUOTA_CHANGE; + + if (!gfs2_alloc_get(ip)) + return -ENOMEM; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out_alloc; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + error = gfs2_quota_check(ip, nuid, ngid); + if (error) + goto out_gunlock_q; + } + + error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); + if (error) + goto out_gunlock_q; + + error = gfs2_setattr_simple(ip, attr); + if (error) + goto out_end_trans; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); + gfs2_quota_change(ip, -blocks, ouid, ogid); + gfs2_quota_change(ip, blocks, nuid, ngid); + } + +out_end_trans: + gfs2_trans_end(sdp); +out_gunlock_q: + gfs2_quota_unlock(ip); +out_alloc: + gfs2_alloc_put(ip); + return error; } +/** + * gfs2_setattr - Change attributes on an inode + * @dentry: The dentry which is changing + * @attr: The structure describing the change + * + * The VFS layer wants to change one or more of an inodes attributes. Write + * that change out to disk. + * + * Returns: errno + */ + +static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder i_gh; + int error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + if (attr->ia_valid & ATTR_SIZE) + error = gfs2_setattr_size(inode, attr->ia_size); + else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) + error = setattr_chown(inode, attr); + else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) + error = gfs2_acl_chmod(ip, attr); + else + error = gfs2_setattr_simple(ip, attr); + +out: + gfs2_glock_dq_uninit(&i_gh); + if (!error) + mark_inode_dirty(inode); + return error; +} + +/** + * gfs2_getattr - Read out an inode's attributes + * @mnt: The vfsmount the inode is being accessed from + * @dentry: The dentry to stat + * @stat: The inode's stats + * + * This may be called from the VFS directly, or from within GFS2 with the + * inode locked, so we look to see if the glock is already locked and only + * lock the glock if its not already been done. Note that its the NFS + * readdirplus operation which causes this to be called (from filldir) + * with the glock already held. + * + * Returns: errno + */ + +static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int error; + int unlock = 0; + + if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; + } + + generic_fillattr(inode, stat); + if (unlock) + gfs2_glock_dq_uninit(&gh); + + return 0; +} + +static int gfs2_setxattr(struct dentry *dentry, const char *name, + const void *data, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret == 0) { + ret = generic_setxattr(dentry, name, data, size, flags); + gfs2_glock_dq(&gh); + } + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, + void *data, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + ret = gfs2_glock_nq(&gh); + if (ret == 0) { + ret = generic_getxattr(dentry, name, data, size); + gfs2_glock_dq(&gh); + } + gfs2_holder_uninit(&gh); + return ret; +} + +static int gfs2_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret == 0) { + ret = generic_removexattr(dentry, name); + gfs2_glock_dq(&gh); + } + gfs2_holder_uninit(&gh); + return ret; +} + +static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + if (ret) + goto out; + + if (gfs2_is_stuffed(ip)) { + u64 phys = ip->i_no_addr << inode->i_blkbits; + u64 size = i_size_read(inode); + u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED| + FIEMAP_EXTENT_DATA_INLINE; + phys += sizeof(struct gfs2_dinode); + phys += start; + if (start + len > size) + len = size - start; + if (start < size) + ret = fiemap_fill_next_extent(fieinfo, start, phys, + len, flags); + if (ret == 1) + ret = 0; + } else { + ret = __generic_block_fiemap(inode, fieinfo, start, len, + gfs2_block_map); + } + + gfs2_glock_dq_uninit(&gh); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + +const struct inode_operations gfs2_file_iops = { + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, + .fiemap = gfs2_fiemap, +}; + +const struct inode_operations gfs2_dir_iops = { + .create = gfs2_create, + .lookup = gfs2_lookup, + .link = gfs2_link, + .unlink = gfs2_unlink, + .symlink = gfs2_symlink, + .mkdir = gfs2_mkdir, + .rmdir = gfs2_unlink, + .mknod = gfs2_mknod, + .rename = gfs2_rename, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, + .fiemap = gfs2_fiemap, +}; + +const struct inode_operations gfs2_symlink_iops = { + .readlink = generic_readlink, + .follow_link = gfs2_follow_link, + .put_link = gfs2_put_link, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, + .fiemap = gfs2_fiemap, +}; + diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 3e00a66e7cb..31606076f70 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -97,26 +97,21 @@ err: } extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino); + u64 no_addr, u64 no_formal_ino, + int non_block); extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); -extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); +extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock); extern int gfs2_inode_refresh(struct gfs2_inode *ip); -extern int gfs2_dinode_dealloc(struct gfs2_inode *inode); -extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); -extern struct inode *gfs2_createi(struct gfs2_holder *ghs, - const struct qstr *name, - unsigned int mode, dev_t dev); extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -extern void gfs2_dinode_print(const struct gfs2_inode *ip); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 6e493aee28f..98c80d8c2a6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -22,7 +22,6 @@ static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; - struct gfs2_sbd *sdp = gl->gl_sbd; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -31,12 +30,7 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - if (gl->gl_ops->go_flags & GLOF_ASPACE) - kmem_cache_free(gfs2_glock_aspace_cachep, gl); - else - kmem_cache_free(gfs2_glock_cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret |= LM_OUT_CANCELED; @@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); } -static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { - kmem_cache_free(cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + gfs2_glock_free(gl); return; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index eb01f3575e1..903115f2bb3 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/bio.h> +#include <linux/writeback.h> #include "gfs2.h" #include "incore.h" @@ -67,7 +68,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, * @mapping: The associated mapping (maybe NULL) * @bd: The gfs2_bufdata to remove * - * The log lock _must_ be held when calling this function + * The ail lock _must_ be held when calling this function * */ @@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) /** * gfs2_ail1_start_one - Start I/O on a part of the AIL * @sdp: the filesystem - * @tr: the part of the AIL + * @wbc: The writeback control structure + * @ai: The ail structure * */ -static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) -__releases(&sdp->sd_log_lock) -__acquires(&sdp->sd_log_lock) +static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, + struct writeback_control *wbc, + struct gfs2_ail *ai) +__releases(&sdp->sd_ail_lock) +__acquires(&sdp->sd_ail_lock) { + struct gfs2_glock *gl = NULL; + struct address_space *mapping; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; - int retry; - do { - retry = 0; + list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { + bh = bd->bd_bh; - list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, - bd_ail_st_list) { - bh = bd->bd_bh; + gfs2_assert(sdp, bd->bd_ail == ai); - gfs2_assert(sdp, bd->bd_ail == ai); + if (!buffer_busy(bh)) { + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + continue; + } - if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); - continue; - } + if (!buffer_dirty(bh)) + continue; + if (gl == bd->bd_gl) + continue; + gl = bd->bd_gl; + list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); + mapping = bh->b_page->mapping; + if (!mapping) + continue; + spin_unlock(&sdp->sd_ail_lock); + generic_writepages(mapping, wbc); + spin_lock(&sdp->sd_ail_lock); + if (wbc->nr_to_write <= 0) + break; + return 1; + } - if (!buffer_dirty(bh)) - continue; + return 0; +} - list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); - get_bh(bh); - gfs2_log_unlock(sdp); - lock_buffer(bh); - if (test_clear_buffer_dirty(bh)) { - bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE_SYNC_PLUG, bh); - } else { - unlock_buffer(bh); - brelse(bh); - } - gfs2_log_lock(sdp); +/** + * gfs2_ail1_flush - start writeback of some ail1 entries + * @sdp: The super block + * @wbc: The writeback control structure + * + * Writes back some ail1 entries, according to the limits in the + * writeback control structure + */ - retry = 1; +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) +{ + struct list_head *head = &sdp->sd_ail1_list; + struct gfs2_ail *ai; + + trace_gfs2_ail_flush(sdp, wbc, 1); + spin_lock(&sdp->sd_ail_lock); +restart: + list_for_each_entry_reverse(ai, head, ai_list) { + if (wbc->nr_to_write <= 0) break; - } - } while (retry); + if (gfs2_ail1_start_one(sdp, wbc, ai)) + goto restart; + } + spin_unlock(&sdp->sd_ail_lock); + trace_gfs2_ail_flush(sdp, wbc, 0); +} + +/** + * gfs2_ail1_start - start writeback of all ail1 entries + * @sdp: The superblock + */ + +static void gfs2_ail1_start(struct gfs2_sbd *sdp) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = LONG_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + }; + + return gfs2_ail1_flush(sdp, &wbc); } /** @@ -141,7 +184,7 @@ __acquires(&sdp->sd_log_lock) * */ -static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; - gfs2_assert(sdp, bd->bd_ail == ai); - - if (buffer_busy(bh)) { - if (flags & DIO_ALL) - continue; - else - break; - } - + if (buffer_busy(bh)) + continue; if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); } - return list_empty(&ai->ai_ail1_list); } -static void gfs2_ail1_start(struct gfs2_sbd *sdp) -{ - struct list_head *head; - u64 sync_gen; - struct gfs2_ail *ai; - int done = 0; - - gfs2_log_lock(sdp); - head = &sdp->sd_ail1_list; - if (list_empty(head)) { - gfs2_log_unlock(sdp); - return; - } - sync_gen = sdp->sd_ail_sync_gen++; - - while(!done) { - done = 1; - list_for_each_entry_reverse(ai, head, ai_list) { - if (ai->ai_sync_gen >= sync_gen) - continue; - ai->ai_sync_gen = sync_gen; - gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ - done = 0; - break; - } - } - - gfs2_log_unlock(sdp); -} +/** + * gfs2_ail1_empty - Try to empty the ail1 lists + * @sdp: The superblock + * + * Tries to empty the ail1 lists, starting with the oldest first + */ -static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp) { struct gfs2_ail *ai, *s; int ret; - gfs2_log_lock(sdp); - + spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { - if (gfs2_ail1_empty_one(sdp, ai, flags)) + gfs2_ail1_empty_one(sdp, ai); + if (list_empty(&ai->ai_ail1_list)) list_move(&ai->ai_list, &sdp->sd_ail2_list); - else if (!(flags & DIO_ALL)) + else break; } - ret = list_empty(&sdp->sd_ail1_list); - - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_ail_lock); return ret; } +static void gfs2_ail1_wait(struct gfs2_sbd *sdp) +{ + struct gfs2_ail *ai; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + spin_lock(&sdp->sd_ail_lock); + list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) { + list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) { + bh = bd->bd_bh; + if (!buffer_locked(bh)) + continue; + get_bh(bh); + spin_unlock(&sdp->sd_ail_lock); + wait_on_buffer(bh); + brelse(bh); + return; + } + } + spin_unlock(&sdp->sd_ail_lock); +} /** * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced @@ -247,7 +277,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) int wrap = (new_tail < old_tail); int a, b, rm; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { a = (old_tail <= ai->ai_first); @@ -263,7 +293,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) kfree(ai); } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_ail_lock); } /** @@ -421,7 +451,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) struct gfs2_ail *ai; unsigned int tail; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); if (list_empty(&sdp->sd_ail1_list)) { tail = sdp->sd_log_head; @@ -430,7 +460,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) tail = ai->ai_first; } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_ail_lock); return tail; } @@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) set_buffer_uptodate(bh); clear_buffer_dirty(bh); - gfs2_ail1_empty(sdp, 0); + gfs2_ail1_empty(sdp); tail = current_tail(sdp); lh = (struct gfs2_log_header *)bh->b_data; @@ -647,7 +677,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) lock_buffer(bh); if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); } else { unlock_buffer(bh); brelse(bh); @@ -743,10 +773,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) sdp->sd_log_commited_databuf = 0; sdp->sd_log_commited_revoke = 0; + spin_lock(&sdp->sd_ail_lock); if (!list_empty(&ai->ai_ail1_list)) { list_add(&ai->ai_list, &sdp->sd_ail1_list); ai = NULL; } + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); @@ -867,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL); for (;;) { gfs2_ail1_start(sdp); - if (gfs2_ail1_empty(sdp, DIO_ALL)) + gfs2_ail1_wait(sdp); + if (gfs2_ail1_empty(sdp)) break; - msleep(10); } } @@ -903,20 +935,20 @@ int gfs2_logd(void *data) preflush = atomic_read(&sdp->sd_log_pinned); if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { - gfs2_ail1_empty(sdp, DIO_ALL); + gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); - gfs2_ail1_empty(sdp, DIO_ALL); } if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); - io_schedule(); - gfs2_ail1_empty(sdp, 0); + gfs2_ail1_wait(sdp); + gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); - gfs2_ail1_empty(sdp, DIO_ALL); } - wake_up(&sdp->sd_log_waitq); + if (!gfs2_ail_flush_reqd(sdp)) + wake_up(&sdp->sd_log_waitq); + t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; if (freezing(current)) refrigerator(); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 0d007f92023..ab0621698b7 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/writeback.h> #include "incore.h" /** @@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); +extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index bf33f822058..05bbb124699 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) { struct gfs2_bufdata *bd; - gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); + BUG_ON(!current->journal_info); clear_buffer_dirty(bh); if (test_set_buffer_pinned(bh)) @@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) /* If this buffer is in the AIL and it has already been written * to in-place disk block, remove it from the AIL. */ + spin_lock(&sdp->sd_ail_lock); if (bd->bd_ail) list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); + spin_unlock(&sdp->sd_ail_lock); get_bh(bh); atomic_inc(&sdp->sd_log_pinned); trace_gfs2_pin(bd, 1); @@ -63,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) * @sdp: the filesystem the buffer belongs to * @bh: The buffer to unpin * @ai: + * @flags: The inode dirty flags * */ @@ -71,16 +74,14 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, { struct gfs2_bufdata *bd = bh->b_private; - gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); - - if (!buffer_pinned(bh)) - gfs2_assert_withdraw(sdp, 0); + BUG_ON(!buffer_uptodate(bh)); + BUG_ON(!buffer_pinned(bh)); lock_buffer(bh); mark_buffer_dirty(bh); clear_buffer_pinned(bh); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); if (bd->bd_ail) { list_del(&bd->bd_ail_st_list); brelse(bh); @@ -91,9 +92,10 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, } bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + spin_unlock(&sdp->sd_ail_lock); + clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); trace_gfs2_pin(bd, 0); - gfs2_log_unlock(sdp); unlock_buffer(bh); atomic_dec(&sdp->sd_log_pinned); } @@ -200,7 +202,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); gfs2_log_lock(sdp); n = 0; @@ -210,7 +212,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); gfs2_log_lock(sdp); if (++n >= num) break; @@ -318,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_glock *gl = bd->bd_gl; struct gfs2_trans *tr; tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_revoke++; sdp->sd_log_num_revoke++; + atomic_inc(&gl->gl_revokes); + set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&le->le_list, &sdp->sd_log_le_revoke); } @@ -346,13 +352,11 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); offset = sizeof(struct gfs2_log_descriptor); - while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); - list_del_init(&bd->bd_le.le_list); + list_for_each_entry(bd, head, bd_le.le_list) { sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); bh = gfs2_log_get_buf(sdp); mh = (struct gfs2_meta_header *)bh->b_data; @@ -363,13 +367,27 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); - kmem_cache_free(gfs2_bufdata_cachep, bd); - offset += sizeof(u64); } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); +} + +static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_revoke; + struct gfs2_bufdata *bd; + struct gfs2_glock *gl; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + gl = bd->bd_gl; + atomic_dec(&gl->gl_revokes); + clear_bit(GLF_LFLUSH, &gl->gl_flags); + kmem_cache_free(gfs2_bufdata_cachep, bd); + } } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, @@ -571,7 +589,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, ptr = bh_log_ptr(bh); get_bh(bh); - submit_bh(WRITE_SYNC_PLUG, bh); + submit_bh(WRITE_SYNC, bh); gfs2_log_lock(sdp); while(!list_empty(list)) { bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); @@ -597,7 +615,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, } else { bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - submit_bh(WRITE_SYNC_PLUG, bh1); + submit_bh(WRITE_SYNC, bh1); gfs2_log_lock(sdp); ptr += 2; } @@ -745,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = { const struct gfs2_log_operations gfs2_revoke_lops = { .lo_add = revoke_lo_add, .lo_before_commit = revoke_lo_before_commit, + .lo_after_commit = revoke_lo_after_commit, .lo_before_scan = revoke_lo_before_scan, .lo_scan_elements = revoke_lo_scan_elements, .lo_after_scan = revoke_lo_after_scan, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab6e17..c2b34cd2abe 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -14,6 +14,8 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/gfs2_ondisk.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> #include <asm/atomic.h> #include "gfs2.h" @@ -45,12 +47,13 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - INIT_HLIST_NODE(&gl->gl_list); + INIT_HLIST_BL_NODE(&gl->gl_list); spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); atomic_set(&gl->gl_ail_count, 0); + atomic_set(&gl->gl_revokes, 0); } static void gfs2_init_gl_aspace_once(void *foo) @@ -59,14 +62,7 @@ static void gfs2_init_gl_aspace_once(void *foo) struct address_space *mapping = (struct address_space *)(gl + 1); gfs2_init_glock_once(gl); - memset(mapping, 0, sizeof(*mapping)); - INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); - spin_lock_init(&mapping->tree_lock); - spin_lock_init(&mapping->i_mmap_lock); - INIT_LIST_HEAD(&mapping->private_list); - spin_lock_init(&mapping->private_lock); - INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); - INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); + address_space_init_once(mapping); } /** @@ -144,13 +140,13 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs_recovery_wq) goto fail_wq; gfs2_register_debugfs(); - printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); + printk("GFS2 installed\n"); return 0; @@ -198,6 +194,8 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); + rcu_barrier(); + kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 939739c7b3f..747238cd9f9 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -31,13 +31,14 @@ #include "rgrp.h" #include "trans.h" #include "util.h" +#include "trace_gfs2.h" static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh, *head; int nr_underway = 0; int write_op = REQ_META | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); + (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -94,7 +95,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb const struct address_space_operations gfs2_meta_aops = { .writepage = gfs2_aspace_writepage, .releasepage = gfs2_releasepage, - .sync_page = block_sync_page, }; /** @@ -311,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int struct gfs2_bufdata *bd = bh->b_private; if (test_clear_buffer_pinned(bh)) { + trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_le.le_list); if (meta) { @@ -326,6 +327,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int brelse(bh); } if (bd) { + spin_lock(&sdp->sd_ail_lock); if (bd->bd_ail) { gfs2_remove_from_ail(bd); bh->b_private = NULL; @@ -333,6 +335,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int bd->bd_blkno = bh->b_blocknr; gfs2_trans_add_revoke(sdp, bd); } + spin_unlock(&sdp->sd_ail_lock); } clear_buffer_dirty(bh); clear_buffer_uptodate(bh); diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 6a1d9ba1641..22c52659313 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); #define buffer_busy(bh) \ ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) -#define buffer_in_io(bh) \ -((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) #endif /* __DIO_DOT_H__ */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 777927ce6f7..8ac9ae189b5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_log_waitq); init_waitqueue_head(&sdp->sd_logd_waitq); + spin_lock_init(&sdp->sd_ail_lock); INIT_LIST_HEAD(&sdp->sd_ail1_list); INIT_LIST_HEAD(&sdp->sd_ail2_list); @@ -125,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) * changed. */ -static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) +static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) { + struct gfs2_sb_host *sb = &sdp->sd_sb; + if (sb->sb_magic != GFS2_MAGIC || sb->sb_type != GFS2_METATYPE_SB) { if (!silent) @@ -156,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error) unlock_page(page); } -static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) +static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) { + struct gfs2_sb_host *sb = &sdp->sd_sb; + struct super_block *s = sdp->sd_vfs; const struct gfs2_sb *str = buf; sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); @@ -174,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); - memcpy(sb->sb_uuid, str->sb_uuid, 16); + memcpy(s->s_uuid, str->sb_uuid, 16); } /** @@ -196,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) * Returns: 0 on success or error */ -static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) +static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) { struct super_block *sb = sdp->sd_vfs; struct gfs2_sb *p; @@ -226,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) return -EIO; } p = kmap(page); - gfs2_sb_in(&sdp->sd_sb, p); + gfs2_sb_in(sdp, p); kunmap(page); __free_page(page); - return 0; + return gfs2_check_sb(sdp, silent); } /** @@ -246,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) unsigned int x; int error; - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent); if (error) { if (!silent) fs_err(sdp, "can't read superblock\n"); return error; } - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - return error; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; @@ -339,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) /* Try to autodetect */ if (!proto[0] || !table[0]) { - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent); if (error) return error; - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - goto out; - if (!proto[0]) proto = sdp->sd_sb.sb_lockproto; if (!table[0]) @@ -363,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent) while ((table = strchr(table, '/'))) *table = '_'; -out: return error; } @@ -429,7 +425,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, struct dentry *dentry; struct inode *inode; - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); if (IS_ERR(inode)) { fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); return PTR_ERR(inode); @@ -928,17 +924,9 @@ static const match_table_t nolock_tokens = { { Opt_err, NULL }, }; -static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - kmem_cache_free(cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); -} - static const struct lm_lockops nolock_ops = { .lm_proto_name = "lock_nolock", - .lm_put_lock = nolock_put_lock, + .lm_put_lock = gfs2_glock_free, .lm_tokens = &nolock_tokens, }; @@ -1126,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_statfs_quantum) { sdp->sd_tune.gt_statfs_slow = 0; sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; - } - else { + } else { sdp->sd_tune.gt_statfs_slow = 1; sdp->sd_tune.gt_statfs_quantum = 30; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c deleted file mode 100644 index d8b26ac2e20..00000000000 --- a/fs/gfs2/ops_inode.c +++ /dev/null @@ -1,1344 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/completion.h> -#include <linux/buffer_head.h> -#include <linux/namei.h> -#include <linux/mm.h> -#include <linux/xattr.h> -#include <linux/posix_acl.h> -#include <linux/gfs2_ondisk.h> -#include <linux/crc32.h> -#include <linux/fiemap.h> -#include <asm/uaccess.h> - -#include "gfs2.h" -#include "incore.h" -#include "acl.h" -#include "bmap.h" -#include "dir.h" -#include "xattr.h" -#include "glock.h" -#include "inode.h" -#include "meta_io.h" -#include "quota.h" -#include "rgrp.h" -#include "trans.h" -#include "util.h" -#include "super.h" - -/** - * gfs2_create - Create a file - * @dir: The directory in which to create the file - * @dentry: The dentry of the new file - * @mode: The mode of the new file - * - * Returns: errno - */ - -static int gfs2_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - for (;;) { - inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); - if (!IS_ERR(inode)) { - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - gfs2_glock_dq_uninit_m(2, ghs); - mark_inode_dirty(inode); - break; - } else if (PTR_ERR(inode) != -EEXIST || - (nd && nd->flags & LOOKUP_EXCL)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - inode = gfs2_lookupi(dir, &dentry->d_name, 0); - if (inode) { - if (!IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - break; - } else { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - } - } - - d_instantiate(dentry, inode); - - return 0; -} - -/** - * gfs2_lookup - Look up a filename in a directory and return its inode - * @dir: The directory inode - * @dentry: The dentry of the new inode - * @nd: passed from Linux VFS, ignored by us - * - * Called by the VFS layer. Lock dir and call gfs2_lookupi() - * - * Returns: errno - */ - -static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct inode *inode = NULL; - - inode = gfs2_lookupi(dir, &dentry->d_name, 0); - if (inode && IS_ERR(inode)) - return ERR_CAST(inode); - - if (inode) { - struct gfs2_glock *gl = GFS2_I(inode)->i_gl; - struct gfs2_holder gh; - int error; - error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) { - iput(inode); - return ERR_PTR(error); - } - gfs2_glock_dq_uninit(&gh); - return d_splice_alias(inode, dentry); - } - d_add(dentry, inode); - - return NULL; -} - -/** - * gfs2_link - Link to a file - * @old_dentry: The inode to link - * @dir: Add link to this directory - * @dentry: The name of the link - * - * Link the inode in "old_dentry" into the directory "dir" with the - * name in "dentry". - * - * Returns: errno - */ - -static int gfs2_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct inode *inode = old_dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder ghs[2]; - int alloc_required; - int error; - - if (S_ISDIR(inode->i_mode)) - return -EPERM; - - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - - error = gfs2_glock_nq(ghs); /* parent */ - if (error) - goto out_parent; - - error = gfs2_glock_nq(ghs + 1); /* child */ - if (error) - goto out_child; - - error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); - if (error) - goto out_gunlock; - - error = gfs2_dir_check(dir, &dentry->d_name, NULL); - switch (error) { - case -ENOENT: - break; - case 0: - error = -EEXIST; - default: - goto out_gunlock; - } - - error = -EINVAL; - if (!dip->i_inode.i_nlink) - goto out_gunlock; - error = -EFBIG; - if (dip->i_entries == (u32)-1) - goto out_gunlock; - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out_gunlock; - error = -EINVAL; - if (!ip->i_inode.i_nlink) - goto out_gunlock; - error = -EMLINK; - if (ip->i_inode.i_nlink == (u32)-1) - goto out_gunlock; - - alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); - if (error < 0) - goto out_gunlock; - error = 0; - - if (alloc_required) { - struct gfs2_alloc *al = gfs2_alloc_get(dip); - if (!al) { - error = -ENOMEM; - goto out_gunlock; - } - - error = gfs2_quota_lock_check(dip); - if (error) - goto out_alloc; - - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve(dip); - if (error) - goto out_gunlock_q; - - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(al) + - 2 * RES_DINODE + RES_STATFS + - RES_QUOTA, 0); - if (error) - goto out_ipres; - } else { - error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); - if (error) - goto out_ipres; - } - - error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode)); - if (error) - goto out_end_trans; - - error = gfs2_change_nlink(ip, +1); - -out_end_trans: - gfs2_trans_end(sdp); -out_ipres: - if (alloc_required) - gfs2_inplace_release(dip); -out_gunlock_q: - if (alloc_required) - gfs2_quota_unlock(dip); -out_alloc: - if (alloc_required) - gfs2_alloc_put(dip); -out_gunlock: - gfs2_glock_dq(ghs + 1); -out_child: - gfs2_glock_dq(ghs); -out_parent: - gfs2_holder_uninit(ghs); - gfs2_holder_uninit(ghs + 1); - if (!error) { - ihold(inode); - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - } - return error; -} - -/* - * gfs2_unlink_ok - check to see that a inode is still in a directory - * @dip: the directory - * @name: the name of the file - * @ip: the inode - * - * Assumes that the lock on (at least) @dip is held. - * - * Returns: 0 if the parent/child relationship is correct, errno if it isn't - */ - -static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip) -{ - int error; - - if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) - return -EPERM; - - if ((dip->i_inode.i_mode & S_ISVTX) && - dip->i_inode.i_uid != current_fsuid() && - ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) - return -EPERM; - - if (IS_APPEND(&dip->i_inode)) - return -EPERM; - - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); - if (error) - return error; - - error = gfs2_dir_check(&dip->i_inode, name, ip); - if (error) - return error; - - return 0; -} - -/** - * gfs2_unlink - Unlink a file - * @dir: The inode of the directory containing the file to unlink - * @dentry: The file itself - * - * Unlink a file. Call gfs2_unlinki() - * - * Returns: errno - */ - -static int gfs2_unlink(struct inode *dir, struct dentry *dentry) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[3]; - struct gfs2_rgrpd *rgd; - struct gfs2_holder ri_gh; - int error; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - return error; - - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - - - error = gfs2_glock_nq(ghs); /* parent */ - if (error) - goto out_parent; - - error = gfs2_glock_nq(ghs + 1); /* child */ - if (error) - goto out_child; - - error = gfs2_glock_nq(ghs + 2); /* rgrp */ - if (error) - goto out_rgrp; - - error = gfs2_unlink_ok(dip, &dentry->d_name, ip); - if (error) - goto out_gunlock; - - error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); - if (error) - goto out_gunlock; - - error = gfs2_dir_del(dip, &dentry->d_name); - if (error) - goto out_end_trans; - - error = gfs2_change_nlink(ip, -1); - -out_end_trans: - gfs2_trans_end(sdp); -out_gunlock: - gfs2_glock_dq(ghs + 2); -out_rgrp: - gfs2_holder_uninit(ghs + 2); - gfs2_glock_dq(ghs + 1); -out_child: - gfs2_holder_uninit(ghs + 1); - gfs2_glock_dq(ghs); -out_parent: - gfs2_holder_uninit(ghs); - gfs2_glock_dq_uninit(&ri_gh); - return error; -} - -/** - * gfs2_symlink - Create a symlink - * @dir: The directory to create the symlink in - * @dentry: The dentry to put the symlink in - * @symname: The thing which the link points to - * - * Returns: errno - */ - -static int gfs2_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - struct gfs2_inode *dip = GFS2_I(dir), *ip; - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - struct buffer_head *dibh; - int size; - int error; - - /* Must be stuffed with a null terminator for gfs2_follow_link() */ - size = strlen(symname); - if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) - return -ENAMETOOLONG; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - ip = ghs[1].gh_gl->gl_object; - - i_size_write(inode, size); - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(ip, dibh->b_data); - memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, - size); - brelse(dibh); - } - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; -} - -/** - * gfs2_mkdir - Make a directory - * @dir: The parent directory of the new one - * @dentry: The dentry of the new directory - * @mode: The mode of the new directory - * - * Returns: errno - */ - -static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct gfs2_inode *dip = GFS2_I(dir), *ip; - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - struct buffer_head *dibh; - int error; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - ip = ghs[1].gh_gl->gl_object; - - ip->i_inode.i_nlink = 2; - i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); - ip->i_diskflags |= GFS2_DIF_JDATA; - ip->i_entries = 2; - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; - struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); - dent->de_inum = di->di_num; /* already GFS2 endian */ - dent->de_type = cpu_to_be16(DT_DIR); - di->di_entries = cpu_to_be32(1); - - dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); - gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); - - gfs2_inum_out(dip, dent); - dent->de_type = cpu_to_be16(DT_DIR); - - gfs2_dinode_out(ip, di); - - brelse(dibh); - } - - error = gfs2_change_nlink(dip, +1); - gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; -} - -/** - * gfs2_rmdiri - Remove a directory - * @dip: The parent directory of the directory to be removed - * @name: The name of the directory to be removed - * @ip: The GFS2 inode of the directory to be removed - * - * Assumes Glocks on dip and ip are held - * - * Returns: errno - */ - -static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) -{ - int error; - - if (ip->i_entries != 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_dir_del(dip, name); - if (error) - return error; - - error = gfs2_change_nlink(dip, -1); - if (error) - return error; - - error = gfs2_dir_del(ip, &gfs2_qdot); - if (error) - return error; - - error = gfs2_dir_del(ip, &gfs2_qdotdot); - if (error) - return error; - - /* It looks odd, but it really should be done twice */ - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - return error; -} - -/** - * gfs2_rmdir - Remove a directory - * @dir: The parent directory of the directory to be removed - * @dentry: The dentry of the directory to remove - * - * Remove a directory. Call gfs2_rmdiri() - * - * Returns: errno - */ - -static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[3]; - struct gfs2_rgrpd *rgd; - struct gfs2_holder ri_gh; - int error; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - return error; - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - - error = gfs2_glock_nq(ghs); /* parent */ - if (error) - goto out_parent; - - error = gfs2_glock_nq(ghs + 1); /* child */ - if (error) - goto out_child; - - error = gfs2_glock_nq(ghs + 2); /* rgrp */ - if (error) - goto out_rgrp; - - error = gfs2_unlink_ok(dip, &dentry->d_name, ip); - if (error) - goto out_gunlock; - - if (ip->i_entries < 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - error = -EIO; - goto out_gunlock; - } - if (ip->i_entries > 2) { - error = -ENOTEMPTY; - goto out_gunlock; - } - - error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0); - if (error) - goto out_gunlock; - - error = gfs2_rmdiri(dip, &dentry->d_name, ip); - - gfs2_trans_end(sdp); - -out_gunlock: - gfs2_glock_dq(ghs + 2); -out_rgrp: - gfs2_holder_uninit(ghs + 2); - gfs2_glock_dq(ghs + 1); -out_child: - gfs2_holder_uninit(ghs + 1); - gfs2_glock_dq(ghs); -out_parent: - gfs2_holder_uninit(ghs); - gfs2_glock_dq_uninit(&ri_gh); - return error; -} - -/** - * gfs2_mknod - Make a special file - * @dir: The directory in which the special file will reside - * @dentry: The dentry of the special file - * @mode: The mode of the special file - * @rdev: The device specification of the special file - * - */ - -static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, - dev_t dev) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, mode, dev); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; -} - -/* - * gfs2_ok_to_move - check if it's ok to move a directory to another directory - * @this: move this - * @to: to here - * - * Follow @to back to the root and make sure we don't encounter @this - * Assumes we already hold the rename lock. - * - * Returns: errno - */ - -static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) -{ - struct inode *dir = &to->i_inode; - struct super_block *sb = dir->i_sb; - struct inode *tmp; - int error = 0; - - igrab(dir); - - for (;;) { - if (dir == &this->i_inode) { - error = -EINVAL; - break; - } - if (dir == sb->s_root->d_inode) { - error = 0; - break; - } - - tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); - if (IS_ERR(tmp)) { - error = PTR_ERR(tmp); - break; - } - - iput(dir); - dir = tmp; - } - - iput(dir); - - return error; -} - -/** - * gfs2_rename - Rename a file - * @odir: Parent directory of old file name - * @odentry: The old dentry of the file - * @ndir: Parent directory of new file name - * @ndentry: The new dentry of the file - * - * Returns: errno - */ - -static int gfs2_rename(struct inode *odir, struct dentry *odentry, - struct inode *ndir, struct dentry *ndentry) -{ - struct gfs2_inode *odip = GFS2_I(odir); - struct gfs2_inode *ndip = GFS2_I(ndir); - struct gfs2_inode *ip = GFS2_I(odentry->d_inode); - struct gfs2_inode *nip = NULL; - struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; - struct gfs2_rgrpd *nrgd; - unsigned int num_gh; - int dir_rename = 0; - int alloc_required = 0; - unsigned int x; - int error; - - if (ndentry->d_inode) { - nip = GFS2_I(ndentry->d_inode); - if (ip == nip) - return 0; - } - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - return error; - - if (odip != ndip) { - error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, - 0, &r_gh); - if (error) - goto out; - - if (S_ISDIR(ip->i_inode.i_mode)) { - dir_rename = 1; - /* don't move a dirctory into it's subdir */ - error = gfs2_ok_to_move(ip, ndip); - if (error) - goto out_gunlock_r; - } - } - - num_gh = 1; - gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - if (odip != ndip) { - gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); - num_gh++; - } - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); - num_gh++; - - if (nip) { - gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); - num_gh++; - /* grab the resource lock for unlink flag twiddling - * this is the case of the target file already existing - * so we unlink before doing the rename - */ - nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); - if (nrgd) - gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); - } - - for (x = 0; x < num_gh; x++) { - error = gfs2_glock_nq(ghs + x); - if (error) - goto out_gunlock; - } - - /* Check out the old directory */ - - error = gfs2_unlink_ok(odip, &odentry->d_name, ip); - if (error) - goto out_gunlock; - - /* Check out the new directory */ - - if (nip) { - error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); - if (error) - goto out_gunlock; - - if (S_ISDIR(nip->i_inode.i_mode)) { - if (nip->i_entries < 2) { - if (gfs2_consist_inode(nip)) - gfs2_dinode_print(nip); - error = -EIO; - goto out_gunlock; - } - if (nip->i_entries > 2) { - error = -ENOTEMPTY; - goto out_gunlock; - } - } - } else { - error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); - if (error) - goto out_gunlock; - - error = gfs2_dir_check(ndir, &ndentry->d_name, NULL); - switch (error) { - case -ENOENT: - error = 0; - break; - case 0: - error = -EEXIST; - default: - goto out_gunlock; - }; - - if (odip != ndip) { - if (!ndip->i_inode.i_nlink) { - error = -EINVAL; - goto out_gunlock; - } - if (ndip->i_entries == (u32)-1) { - error = -EFBIG; - goto out_gunlock; - } - if (S_ISDIR(ip->i_inode.i_mode) && - ndip->i_inode.i_nlink == (u32)-1) { - error = -EMLINK; - goto out_gunlock; - } - } - } - - /* Check out the dir to be renamed */ - - if (dir_rename) { - error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); - if (error) - goto out_gunlock; - } - - if (nip == NULL) - alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); - error = alloc_required; - if (error < 0) - goto out_gunlock; - error = 0; - - if (alloc_required) { - struct gfs2_alloc *al = gfs2_alloc_get(ndip); - if (!al) { - error = -ENOMEM; - goto out_gunlock; - } - - error = gfs2_quota_lock_check(ndip); - if (error) - goto out_alloc; - - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve_ri(ndip); - if (error) - goto out_gunlock_q; - - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(al) + - 4 * RES_DINODE + 4 * RES_LEAF + - RES_STATFS + RES_QUOTA + 4, 0); - if (error) - goto out_ipreserv; - } else { - error = gfs2_trans_begin(sdp, 4 * RES_DINODE + - 5 * RES_LEAF + 4, 0); - if (error) - goto out_gunlock; - } - - /* Remove the target file, if it exists */ - - if (nip) { - if (S_ISDIR(nip->i_inode.i_mode)) - error = gfs2_rmdiri(ndip, &ndentry->d_name, nip); - else { - error = gfs2_dir_del(ndip, &ndentry->d_name); - if (error) - goto out_end_trans; - error = gfs2_change_nlink(nip, -1); - } - if (error) - goto out_end_trans; - } - - if (dir_rename) { - error = gfs2_change_nlink(ndip, +1); - if (error) - goto out_end_trans; - error = gfs2_change_nlink(odip, -1); - if (error) - goto out_end_trans; - - error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); - if (error) - goto out_end_trans; - } else { - struct buffer_head *dibh; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto out_end_trans; - ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - } - - error = gfs2_dir_del(odip, &odentry->d_name); - if (error) - goto out_end_trans; - - error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode)); - if (error) - goto out_end_trans; - -out_end_trans: - gfs2_trans_end(sdp); -out_ipreserv: - if (alloc_required) - gfs2_inplace_release(ndip); -out_gunlock_q: - if (alloc_required) - gfs2_quota_unlock(ndip); -out_alloc: - if (alloc_required) - gfs2_alloc_put(ndip); -out_gunlock: - while (x--) { - gfs2_glock_dq(ghs + x); - gfs2_holder_uninit(ghs + x); - } -out_gunlock_r: - if (r_gh.gh_gl) - gfs2_glock_dq_uninit(&r_gh); -out: - gfs2_glock_dq_uninit(&ri_gh); - return error; -} - -/** - * gfs2_follow_link - Follow a symbolic link - * @dentry: The dentry of the link - * @nd: Data that we pass to vfs_follow_link() - * - * This can handle symlinks of any size. - * - * Returns: 0 on success or error code - */ - -static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder i_gh; - struct buffer_head *dibh; - unsigned int x, size; - char *buf; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - nd_set_link(nd, ERR_PTR(error)); - return NULL; - } - - size = (unsigned int)i_size_read(&ip->i_inode); - if (size == 0) { - gfs2_consist_inode(ip); - buf = ERR_PTR(-EIO); - goto out; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) { - buf = ERR_PTR(error); - goto out; - } - - x = size + 1; - buf = kmalloc(x, GFP_NOFS); - if (!buf) - buf = ERR_PTR(-ENOMEM); - else - memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x); - brelse(dibh); -out: - gfs2_glock_dq_uninit(&i_gh); - nd_set_link(nd, buf); - return NULL; -} - -static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - -/** - * gfs2_permission - - * @inode: - * @mask: - * @nd: passed from Linux VFS, ignored by us - * - * This may be called from the VFS directly, or from within GFS2 with the - * inode locked, so we look to see if the glock is already locked and only - * lock the glock if its not already been done. - * - * Returns: errno - */ - -int gfs2_permission(struct inode *inode, int mask, unsigned int flags) -{ - struct gfs2_inode *ip; - struct gfs2_holder i_gh; - int error; - int unlock = 0; - - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - - ip = GFS2_I(inode); - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; - } - - if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) - error = -EACCES; - else - error = generic_permission(inode, mask, flags, gfs2_check_acl); - if (unlock) - gfs2_glock_dq_uninit(&i_gh); - - return error; -} - -static int setattr_chown(struct inode *inode, struct iattr *attr) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - u32 ouid, ogid, nuid, ngid; - int error; - - ouid = inode->i_uid; - ogid = inode->i_gid; - nuid = attr->ia_uid; - ngid = attr->ia_gid; - - if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) - ouid = nuid = NO_QUOTA_CHANGE; - if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) - ogid = ngid = NO_QUOTA_CHANGE; - - if (!gfs2_alloc_get(ip)) - return -ENOMEM; - - error = gfs2_quota_lock(ip, nuid, ngid); - if (error) - goto out_alloc; - - if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { - error = gfs2_quota_check(ip, nuid, ngid); - if (error) - goto out_gunlock_q; - } - - error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); - if (error) - goto out_gunlock_q; - - error = gfs2_setattr_simple(ip, attr); - if (error) - goto out_end_trans; - - if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { - u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); - gfs2_quota_change(ip, -blocks, ouid, ogid); - gfs2_quota_change(ip, blocks, nuid, ngid); - } - -out_end_trans: - gfs2_trans_end(sdp); -out_gunlock_q: - gfs2_quota_unlock(ip); -out_alloc: - gfs2_alloc_put(ip); - return error; -} - -/** - * gfs2_setattr - Change attributes on an inode - * @dentry: The dentry which is changing - * @attr: The structure describing the change - * - * The VFS layer wants to change one or more of an inodes attributes. Write - * that change out to disk. - * - * Returns: errno - */ - -static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder i_gh; - int error; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); - if (error) - return error; - - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; - - error = inode_change_ok(inode, attr); - if (error) - goto out; - - if (attr->ia_valid & ATTR_SIZE) - error = gfs2_setattr_size(inode, attr->ia_size); - else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) - error = setattr_chown(inode, attr); - else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) - error = gfs2_acl_chmod(ip, attr); - else - error = gfs2_setattr_simple(ip, attr); - -out: - gfs2_glock_dq_uninit(&i_gh); - if (!error) - mark_inode_dirty(inode); - return error; -} - -/** - * gfs2_getattr - Read out an inode's attributes - * @mnt: The vfsmount the inode is being accessed from - * @dentry: The dentry to stat - * @stat: The inode's stats - * - * This may be called from the VFS directly, or from within GFS2 with the - * inode locked, so we look to see if the glock is already locked and only - * lock the glock if its not already been done. Note that its the NFS - * readdirplus operation which causes this to be called (from filldir) - * with the glock already held. - * - * Returns: errno - */ - -static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int error; - int unlock = 0; - - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; - } - - generic_fillattr(inode, stat); - if (unlock) - gfs2_glock_dq_uninit(&gh); - - return 0; -} - -static int gfs2_setxattr(struct dentry *dentry, const char *name, - const void *data, size_t size, int flags) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int ret; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - ret = gfs2_glock_nq(&gh); - if (ret == 0) { - ret = generic_setxattr(dentry, name, data, size, flags); - gfs2_glock_dq(&gh); - } - gfs2_holder_uninit(&gh); - return ret; -} - -static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, - void *data, size_t size) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int ret; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - ret = gfs2_glock_nq(&gh); - if (ret == 0) { - ret = generic_getxattr(dentry, name, data, size); - gfs2_glock_dq(&gh); - } - gfs2_holder_uninit(&gh); - return ret; -} - -static int gfs2_removexattr(struct dentry *dentry, const char *name) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int ret; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - ret = gfs2_glock_nq(&gh); - if (ret == 0) { - ret = generic_removexattr(dentry, name); - gfs2_glock_dq(&gh); - } - gfs2_holder_uninit(&gh); - return ret; -} - -static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int ret; - - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); - if (ret) - return ret; - - mutex_lock(&inode->i_mutex); - - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - if (ret) - goto out; - - if (gfs2_is_stuffed(ip)) { - u64 phys = ip->i_no_addr << inode->i_blkbits; - u64 size = i_size_read(inode); - u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED| - FIEMAP_EXTENT_DATA_INLINE; - phys += sizeof(struct gfs2_dinode); - phys += start; - if (start + len > size) - len = size - start; - if (start < size) - ret = fiemap_fill_next_extent(fieinfo, start, phys, - len, flags); - if (ret == 1) - ret = 0; - } else { - ret = __generic_block_fiemap(inode, fieinfo, start, len, - gfs2_block_map); - } - - gfs2_glock_dq_uninit(&gh); -out: - mutex_unlock(&inode->i_mutex); - return ret; -} - -const struct inode_operations gfs2_file_iops = { - .permission = gfs2_permission, - .setattr = gfs2_setattr, - .getattr = gfs2_getattr, - .setxattr = gfs2_setxattr, - .getxattr = gfs2_getxattr, - .listxattr = gfs2_listxattr, - .removexattr = gfs2_removexattr, - .fiemap = gfs2_fiemap, -}; - -const struct inode_operations gfs2_dir_iops = { - .create = gfs2_create, - .lookup = gfs2_lookup, - .link = gfs2_link, - .unlink = gfs2_unlink, - .symlink = gfs2_symlink, - .mkdir = gfs2_mkdir, - .rmdir = gfs2_rmdir, - .mknod = gfs2_mknod, - .rename = gfs2_rename, - .permission = gfs2_permission, - .setattr = gfs2_setattr, - .getattr = gfs2_getattr, - .setxattr = gfs2_setxattr, - .getxattr = gfs2_getxattr, - .listxattr = gfs2_listxattr, - .removexattr = gfs2_removexattr, - .fiemap = gfs2_fiemap, -}; - -const struct inode_operations gfs2_symlink_iops = { - .readlink = generic_readlink, - .follow_link = gfs2_follow_link, - .put_link = gfs2_put_link, - .permission = gfs2_permission, - .setattr = gfs2_setattr, - .getattr = gfs2_getattr, - .setxattr = gfs2_setxattr, - .getxattr = gfs2_getxattr, - .listxattr = gfs2_listxattr, - .removexattr = gfs2_removexattr, - .fiemap = gfs2_fiemap, -}; - diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a689901963d..42e8d23bc04 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -38,6 +38,7 @@ #include <linux/sched.h> #include <linux/slab.h> +#include <linux/mm.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> @@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; + int nr_to_scan = sc->nr_to_scan; - if (nr == 0) + if (nr_to_scan == 0) goto out; - if (!(gfp_mask & __GFP_FS)) + if (!(sc->gfp_mask & __GFP_FS)) return -1; spin_lock(&qd_lru_lock); - while (nr && !list_empty(&qd_lru_list)) { + while (nr_to_scan && !list_empty(&qd_lru_list)) { qd = list_entry(qd_lru_list.next, struct gfs2_quota_data, qd_reclaim); sdp = qd->qd_gl->gl_sbd; @@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) spin_unlock(&qd_lru_lock); kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); - nr--; + nr_to_scan--; } spin_unlock(&qd_lru_lock); @@ -834,6 +836,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) goto out_end_trans; do_qc(qd, -qd->qd_change_sync); + set_bit(QDF_REFRESH, &qd->qd_flags); } error = 0; @@ -929,6 +932,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_quota_data *qd; unsigned int x; int error = 0; @@ -942,7 +946,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) sort_qd, NULL); for (x = 0; x < al->al_qd_num; x++) { - error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); + int force = NO_FORCE; + qd = al->al_qd[x]; + if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) + force = FORCE; + error = do_glock(qd, force, &al->al_qd_ghs[x]); if (error) break; } @@ -1587,6 +1595,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, offset = qd2offset(qd); alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); + if (gfs2_is_stuffed(ip)) + alloc_required = 1; if (alloc_required) { al = gfs2_alloc_get(ip); if (al == NULL) @@ -1600,7 +1610,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, blocks += gfs2_rg_blocks(al); } - error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); + /* Some quotas span block boundaries and can update two blocks, + adding an extra block to the transaction to handle such quotas */ + error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0); if (error) goto out_release; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e7d236ca48b..90bf1c302a9 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -12,6 +12,7 @@ struct gfs2_inode; struct gfs2_sbd; +struct shrink_control; #define NO_QUOTA_CHANGE ((u32)-1) @@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); +extern int gfs2_shrink_qd_memory(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7293ea27020..9b780df3fd5 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, unsigned char *buf2, unsigned int offset, - unsigned int buflen, u32 block, + struct gfs2_bitmap *bi, u32 block, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; + unsigned int buflen = bi->bi_len; const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; byte1 = buf1 + offset + (block / GFS2_NBBY); @@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { + printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " + "new_state=%d\n", + (unsigned long long)block, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", + (unsigned long long)rgd->rd_addr, + (unsigned long)bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", + (unsigned long)bi->bi_offset, + (unsigned long)bi->bi_len); + dump_stack(); gfs2_consist_rgrpd(rgd); return; } @@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) if (gl) { gl->gl_object = NULL; + gfs2_glock_add_to_lru(gl); gfs2_glock_put(gl); } @@ -945,7 +957,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip /* rgblk_search can return a block < goal, so we need to keep it marching forward. */ no_addr = block + rgd->rd_data0; - goal++; + goal = max(block + 1, goal + 1); if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) continue; if (no_addr == skip) @@ -971,7 +983,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip found++; /* Limit reclaim to sensible number of tasks */ - if (found > 2*NR_CPUS) + if (found > NR_CPUS) return; } @@ -1365,7 +1377,7 @@ skip: gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, blk, new_state); + bi, blk, new_state); goal = blk; while (*n < elen) { goal++; @@ -1375,7 +1387,7 @@ skip: GFS2_BLKST_FREE) break; gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, goal, new_state); + bi, goal, new_state); (*n)++; } out: @@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, - bi->bi_len, buf_blk, new_state); + bi, buf_blk, new_state); } return rgd; @@ -1602,7 +1614,7 @@ rgrp_error: * */ -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1618,6 +1630,24 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); + /* Directories keep their data in the metadata address space */ + if (ip->i_depth) + gfs2_meta_wipe(ip, bstart, blen); +} + +/** + * gfs2_free_data - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + __gfs2_free_data(ip, bstart, blen); gfs2_statfs_change(sdp, 0, +blen, 0); gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } @@ -1630,7 +1660,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) * */ -void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) +void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1645,10 +1675,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_trans_add_rg(rgd); + gfs2_meta_wipe(ip, bstart, blen); +} + +/** + * gfs2_free_meta - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + __gfs2_free_meta(ip, bstart, blen); gfs2_statfs_change(sdp, 0, +blen, 0); gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); - gfs2_meta_wipe(ip, bstart, blen); } void gfs2_unlink_di(struct inode *inode) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 50c2bb04369..a80e3034ac4 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); +extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); extern void gfs2_unlink_di(struct inode *inode); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ec73ed70bae..ed540e7018b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -23,6 +23,7 @@ #include <linux/time.h> #include <linux/wait.h> #include <linux/writeback.h> +#include <linux/backing-dev.h> #include "gfs2.h" #include "incore.h" @@ -657,7 +658,7 @@ out: * @sdp: the file system * * This function flushes data and meta data for all machines by - * aquiring the transaction log exclusively. All journals are + * acquiring the transaction log exclusively. All journals are * ensured to be in a clean state as well. * * Returns: errno @@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) mutex_unlock(&sdp->sd_freeze_lock); } +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) +{ + struct gfs2_dinode *str = buf; + + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); + str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); + str->di_mode = cpu_to_be32(ip->i_inode.i_mode); + str->di_uid = cpu_to_be32(ip->i_inode.i_uid); + str->di_gid = cpu_to_be32(ip->i_inode.i_gid); + str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); + str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); + str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); + str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + + str->di_goal_meta = cpu_to_be64(ip->i_goal); + str->di_goal_data = cpu_to_be64(ip->i_goal); + str->di_generation = cpu_to_be64(ip->i_generation); + + str->di_flags = cpu_to_be32(ip->i_diskflags); + str->di_height = cpu_to_be16(ip->i_height); + str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + !(ip->i_diskflags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); + str->di_depth = cpu_to_be16(ip->i_depth); + str->di_entries = cpu_to_be32(ip->i_entries); + + str->di_eattr = cpu_to_be64(ip->i_eattr); + str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); +} /** * gfs2_write_inode - Make sure the inode is stable on the disk * @inode: The inode - * @sync: synchronous write flag + * @wbc: The writeback control structure * * Returns: errno */ @@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + struct backing_dev_info *bdi = metamapping->backing_dev_info; struct gfs2_holder gh; struct buffer_head *bh; struct timespec atime; struct gfs2_dinode *di; - int ret = 0; + int ret = -EAGAIN; - /* Check this is a "normal" inode, etc */ + /* Skip timestamp update, if this is from a memalloc */ if (current->flags & PF_MEMALLOC) - return 0; + goto do_flush; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) goto do_flush; @@ -745,6 +784,13 @@ do_unlock: do_flush: if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + filemap_fdatawrite(metamapping); + if (bdi->dirty_exceeded) + gfs2_ail1_flush(sdp, wbc); + if (!ret && (wbc->sync_mode == WB_SYNC_ALL)) + ret = filemap_fdatawait(metamapping); + if (ret) + mark_inode_dirty_sync(inode); return ret; } @@ -874,8 +920,9 @@ restart: static int gfs2_sync_fs(struct super_block *sb, int wait) { - if (wait && sb->s_fs_info) - gfs2_log_flush(sb->s_fs_info, NULL); + struct gfs2_sbd *sdp = sb->s_fs_info; + if (wait && sdp) + gfs2_log_flush(sdp, NULL); return 0; } @@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) return 0; } +static void gfs2_final_release_pages(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + struct gfs2_glock *gl = ip->i_gl; + + truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); + truncate_inode_pages(&inode->i_data, 0); + + if (atomic_read(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } +} + +static int gfs2_dinode_dealloc(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc *al; + struct gfs2_rgrpd *rgd; + int error; + + if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { + gfs2_consist_inode(ip); + return -EIO; + } + + al = gfs2_alloc_get(ip); + if (!al) + return -ENOMEM; + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + if (error) + goto out_qs; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_rindex_relse; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &al->al_rgd_gh); + if (error) + goto out_rindex_relse; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, + sdp->sd_jdesc->jd_blocks); + if (error) + goto out_rg_gunlock; + + gfs2_free_di(rgd, ip); + + gfs2_final_release_pages(ip); + + gfs2_trans_end(sdp); + +out_rg_gunlock: + gfs2_glock_dq_uninit(&al->al_rgd_gh); +out_rindex_relse: + gfs2_glock_dq_uninit(&al->al_ri_gh); +out_qs: + gfs2_quota_unhold(ip); +out: + gfs2_alloc_put(ip); + return error; +} + /* * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and @@ -1318,15 +1437,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) static void gfs2_evict_inode(struct inode *inode) { - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct super_block *sb = inode->i_sb; + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; - if (inode->i_nlink) + if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) goto out; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + /* Must not read inode block until block type has been verified */ + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; @@ -1336,6 +1457,12 @@ static void gfs2_evict_inode(struct inode *inode) if (error) goto out_truncate; + if (test_bit(GIF_INVALID, &ip->i_flags)) { + error = gfs2_inode_refresh(ip); + if (error) + goto out_truncate; + } + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); @@ -1363,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode) } error = gfs2_dinode_dealloc(ip); - if (error) - goto out_unlock; + goto out_unlock; out_truncate: error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) goto out_unlock; - /* Needs to be done before glock release & also in a transaction */ - truncate_inode_pages(&inode->i_data, 0); + gfs2_final_release_pages(ip); gfs2_trans_end(sdp); out_unlock: @@ -1386,6 +1511,7 @@ out: end_writeback(inode); ip->i_gl->gl_object = NULL; + gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; if (ip->i_iopen_gh.gh_gl) { diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 748ccb557c1..e20eab37bc8 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid) static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) { - const u8 *uuid = sdp->sd_sb.sb_uuid; + struct super_block *s = sdp->sd_vfs; + const u8 *uuid = s->s_uuid; buf[0] = '\0'; if (!gfs2_uuid_valid(uuid)) return 0; @@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, struct kobj_uevent_env *env) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - const u8 *uuid = sdp->sd_sb.sb_uuid; + struct super_block *s = sdp->sd_vfs; + const u8 *uuid = s->s_uuid; add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index cedb0bb96d9..5d07609ec57 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -10,6 +10,7 @@ #include <linux/buffer_head.h> #include <linux/dlmconstants.h> #include <linux/gfs2_ondisk.h> +#include <linux/writeback.h> #include "incore.h" #include "glock.h" @@ -40,7 +41,9 @@ {(1UL << GLF_REPLY_PENDING), "r" }, \ {(1UL << GLF_INITIAL), "I" }, \ {(1UL << GLF_FROZEN), "F" }, \ - {(1UL << GLF_QUEUED), "q" }) + {(1UL << GLF_QUEUED), "q" }, \ + {(1UL << GLF_LRU), "L" }, \ + {(1UL << GLF_OBJECT), "o" }) #ifndef NUMPTY #define NUMPTY @@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change, __entry->new_state = glock_trace_state(new_state); __entry->tgt_state = glock_trace_state(gl->gl_target); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0); ), TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", @@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put, __entry->gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0); ), TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", @@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq, __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0); ), TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", @@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks, MINOR(__entry->dev), __entry->blocks) ); +/* Writing back the AIL */ +TRACE_EVENT(gfs2_ail_flush, + + TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start), + + TP_ARGS(sdp, wbc, start), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, start ) + __field( int, sync_mode ) + __field( long, nr_to_write ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->start = start; + __entry->sync_mode = wbc->sync_mode; + __entry->nr_to_write = wbc->nr_to_write; + ), + + TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->start ? "start" : "end", + __entry->sync_mode == WB_SYNC_ALL ? "all" : "none", + __entry->nr_to_write) +); + /* Section 3 - bmap * * Objectives: |