From f057f6cdf64175db1151b1f5d110e29904f119a1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 12 Jan 2009 10:43:39 +0000 Subject: GFS2: Merge lock_dlm module into GFS2 This is the big patch that I've been working on for some time now. There are many reasons for wanting to make this change such as: o Reducing overhead by eliminating duplicated fields between structures o Simplifcation of the code (reduces the code size by a fair bit) o The locking interface is now the DLM interface itself as proposed some time ago. o Fewer lookups of glocks when processing replies from the DLM o Fewer memory allocations/deallocations for each glock o Scope to do further optimisations in the future (but this patch is more than big enough for now!) Please note that (a) this patch relates to the lock_dlm module and not the DLM itself, that is still a separate module; and (b) that we retain the ability to build GFS2 as a standalone single node filesystem with out requiring the DLM. This patch needs a lot of testing, hence my keeping it I restarted my -git tree after the last merge window. That way, this has the maximum exposure before its merged. This is (modulo a few minor bug fixes) the same patch that I've been posting on and off the the last three months and its passed a number of different tests so far. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 74 ++++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 52 deletions(-) (limited to 'fs/gfs2/ops_file.c') diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 93fe41b67f9..99d726f1c7a 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -20,9 +20,10 @@ #include #include #include -#include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -560,57 +561,24 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) return ret; } +#ifdef CONFIG_GFS2_FS_LOCKING_DLM + /** * gfs2_setlease - acquire/release a file lease * @file: the file pointer * @arg: lease type * @fl: file lock * + * We don't currently have a way to enforce a lease across the whole + * cluster; until we do, disable leases (by just returning -EINVAL), + * unless the administrator has requested purely local locking. + * * Returns: errno */ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) { - struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); - - /* - * We don't currently have a way to enforce a lease across the whole - * cluster; until we do, disable leases (by just returning -EINVAL), - * unless the administrator has requested purely local locking. - */ - if (!sdp->sd_args.ar_localflocks) - return -EINVAL; - return generic_setlease(file, arg, fl); -} - -static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - int error = -EIO; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = sdp->sd_lockstruct.ls_ops->lm_plock_get( - sdp->sd_lockstruct.ls_lockspace, name, file, fl); - return error; -} - -static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, - struct file *file, int cmd, struct file_lock *fl) -{ - int error = -EIO; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = sdp->sd_lockstruct.ls_ops->lm_plock( - sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); - return error; -} - -static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - int error = -EIO; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = sdp->sd_lockstruct.ls_ops->lm_punlock( - sdp->sd_lockstruct.ls_lockspace, name, file, fl); - return error; + return -EINVAL; } /** @@ -626,9 +594,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); - struct lm_lockname name = - { .ln_number = ip->i_no_addr, - .ln_type = LM_TYPE_PLOCK }; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -640,12 +606,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; if (IS_GETLK(cmd)) - return gfs2_lm_plock_get(sdp, &name, file, fl); + return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) - return gfs2_lm_punlock(sdp, &name, file, fl); + return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else - return gfs2_lm_plock(sdp, &name, file, cmd, fl); + return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) @@ -732,7 +700,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) } } -const struct file_operations gfs2_file_fops = { +const struct file_operations *gfs2_file_fops = &(const struct file_operations){ .llseek = gfs2_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, @@ -750,7 +718,7 @@ const struct file_operations gfs2_file_fops = { .setlease = gfs2_setlease, }; -const struct file_operations gfs2_dir_fops = { +const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, @@ -760,7 +728,9 @@ const struct file_operations gfs2_dir_fops = { .flock = gfs2_flock, }; -const struct file_operations gfs2_file_fops_nolock = { +#endif /* CONFIG_GFS2_FS_LOCKING_DLM */ + +const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){ .llseek = gfs2_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, @@ -773,10 +743,10 @@ const struct file_operations gfs2_file_fops_nolock = { .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, - .setlease = gfs2_setlease, + .setlease = generic_setlease, }; -const struct file_operations gfs2_dir_fops_nolock = { +const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){ .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, -- cgit v1.2.3-70-g09d2 From 6bac243f0793499782267342eba852a8a6cc7ac4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 Mar 2009 09:03:51 +0000 Subject: GFS2: Clean up of glops.c This cleans up a number of bits of code mostly based in glops.c. A couple of simple functions have been merged into the callers to make it more obvious what is going on, the mysterious raising of i_writecount around the truncate_inode_pages() call has been removed. The meta_go_* operations have been renamed rgrp_go_* since that is the only lock type that they are used with. The unused argument of gfs2_read_sb has been removed. Also a bug has been fixed where a check for the rindex inode was in the wrong callback. More comments are added, and the debugging code is improved too. Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 115 +++++++++++++++++++++++---------------------------- fs/gfs2/meta_io.c | 21 ---------- fs/gfs2/meta_io.h | 1 - fs/gfs2/ops_file.c | 3 +- fs/gfs2/ops_fstype.c | 6 +-- 5 files changed, 57 insertions(+), 89 deletions(-) (limited to 'fs/gfs2/ops_file.c') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index f34bc7093dd..bf23a62aa92 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -76,29 +76,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) } /** - * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock - * @gl: the glock - * - */ - -static void gfs2_pte_inval(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip; - struct inode *inode; - - ip = gl->gl_object; - inode = &ip->i_inode; - if (!ip || !S_ISREG(inode->i_mode)) - return; - - unmap_shared_mapping_range(inode->i_mapping, 0, 0); - if (test_bit(GIF_SW_PAGED, &ip->i_flags)) - set_bit(GLF_DIRTY, &gl->gl_flags); - -} - -/** - * meta_go_sync - sync out the metadata for this glock + * rgrp_go_sync - sync out the metadata for this glock * @gl: the glock * * Called when demoting or unlocking an EX glock. We must flush @@ -106,36 +84,42 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) * not return to caller to demote/unlock the glock until I/O is complete. */ -static void meta_go_sync(struct gfs2_glock *gl) +static void rgrp_go_sync(struct gfs2_glock *gl) { - if (gl->gl_state != LM_ST_EXCLUSIVE) + struct address_space *metamapping = gl->gl_aspace->i_mapping; + int error; + + if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; + BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); - if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { - gfs2_log_flush(gl->gl_sbd, gl); - gfs2_meta_sync(gl); - gfs2_ail_empty_gl(gl); - } + gfs2_log_flush(gl->gl_sbd, gl); + filemap_fdatawrite(metamapping); + error = filemap_fdatawait(metamapping); + mapping_set_error(metamapping, error); + gfs2_ail_empty_gl(gl); } /** - * meta_go_inval - invalidate the metadata for this glock + * rgrp_go_inval - invalidate the metadata for this glock * @gl: the glock * @flags: * + * We never used LM_ST_DEFERRED with resource groups, so that we + * should always see the metadata flag set here. + * */ -static void meta_go_inval(struct gfs2_glock *gl, int flags) +static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - if (!(flags & DIO_METADATA)) - return; + struct address_space *mapping = gl->gl_aspace->i_mapping; - gfs2_meta_inval(gl); - if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex)) - gl->gl_sbd->sd_rindex_uptodate = 0; - else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) { - struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; + BUG_ON(!(flags & DIO_METADATA)); + gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); + truncate_inode_pages(mapping, 0); + if (gl->gl_object) { + struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; } } @@ -152,48 +136,54 @@ static void inode_go_sync(struct gfs2_glock *gl) struct address_space *metamapping = gl->gl_aspace->i_mapping; int error; - if (gl->gl_state != LM_ST_UNLOCKED) - gfs2_pte_inval(gl); - if (gl->gl_state != LM_ST_EXCLUSIVE) - return; - if (ip && !S_ISREG(ip->i_inode.i_mode)) ip = NULL; + if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) + return; - if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - gfs2_log_flush(gl->gl_sbd, gl); - filemap_fdatawrite(metamapping); - if (ip) { - struct address_space *mapping = ip->i_inode.i_mapping; - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - mapping_set_error(mapping, error); - } - error = filemap_fdatawait(metamapping); - mapping_set_error(metamapping, error); - clear_bit(GLF_DIRTY, &gl->gl_flags); - gfs2_ail_empty_gl(gl); + BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); + + gfs2_log_flush(gl->gl_sbd, gl); + filemap_fdatawrite(metamapping); + if (ip) { + struct address_space *mapping = ip->i_inode.i_mapping; + filemap_fdatawrite(mapping); + error = filemap_fdatawait(mapping); + mapping_set_error(mapping, error); } + error = filemap_fdatawait(metamapping); + mapping_set_error(metamapping, error); + gfs2_ail_empty_gl(gl); } /** * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: + * + * Normally we invlidate everything, but if we are moving into + * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we + * can keep hold of the metadata, since it won't have changed. * */ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gl->gl_object; - int meta = (flags & DIO_METADATA); - if (meta) { - gfs2_meta_inval(gl); + gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); + + if (flags & DIO_METADATA) { + struct address_space *mapping = gl->gl_aspace->i_mapping; + truncate_inode_pages(mapping, 0); if (ip) set_bit(GIF_INVALID, &ip->i_flags); } + if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) + gl->gl_sbd->sd_rindex_uptodate = 0; if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); } @@ -395,7 +385,6 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl) } const struct gfs2_glock_operations gfs2_meta_glops = { - .go_xmote_th = meta_go_sync, .go_type = LM_TYPE_META, }; @@ -410,8 +399,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = { }; const struct gfs2_glock_operations gfs2_rgrp_glops = { - .go_xmote_th = meta_go_sync, - .go_inval = meta_go_inval, + .go_xmote_th = rgrp_go_sync, + .go_inval = rgrp_go_inval, .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 870d65ae7ae..8d6f13256b2 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -88,27 +88,6 @@ void gfs2_aspace_put(struct inode *aspace) iput(aspace); } -/** - * gfs2_meta_inval - Invalidate all buffers associated with a glock - * @gl: the glock - * - */ - -void gfs2_meta_inval(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - struct inode *aspace = gl->gl_aspace; - struct address_space *mapping = gl->gl_aspace->i_mapping; - - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); - - atomic_inc(&aspace->i_writecount); - truncate_inode_pages(mapping, 0); - atomic_dec(&aspace->i_writecount); - - gfs2_assert_withdraw(sdp, !mapping->nrpages); -} - /** * gfs2_meta_sync - Sync all buffers associated with a glock * @gl: The glock diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index b1a5f3674d4..de270c2f9b6 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -40,7 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); void gfs2_aspace_put(struct inode *aspace); -void gfs2_meta_inval(struct gfs2_glock *gl); void gfs2_meta_sync(struct gfs2_glock *gl); struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 99d726f1c7a..48ec3d5e29e 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -355,7 +355,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (ret) goto out; - set_bit(GIF_SW_PAGED, &ip->i_flags); ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); if (ret || !alloc_required) goto out_unlock; @@ -396,6 +395,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock_page; } ret = gfs2_allocate_page_backing(page); + if (!ret) + set_bit(GIF_SW_PAGED, &ip->i_flags); out_unlock_page: unlock_page(page); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 804ca7273a4..51883b3ad89 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -296,15 +296,15 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) __free_page(page); return 0; } + /** * gfs2_read_sb - Read super block * @sdp: The GFS2 superblock - * @gl: the glock for the superblock (assumed to be held) * @silent: Don't print message if mount fails * */ -static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) +static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) { u32 hash_blocks, ind_blocks, leaf_blocks; u32 tmp_blocks; @@ -524,7 +524,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent) return ret; } - ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); + ret = gfs2_read_sb(sdp, silent); if (ret) { fs_err(sdp, "can't read superblock: %d\n", ret); goto out; -- cgit v1.2.3-70-g09d2 From 9c538837d844574787c95bd5665f684559fb7065 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 19 Mar 2009 13:15:44 +0000 Subject: Fix a minor bug in the previous patch The logic requires that we mark the glock dirty in page_mkwrite otherwise we might not flush correctly in the case that no allocation was required in the process of dirying the page. Also we need to set the shared write flag early for the same reason. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/gfs2/ops_file.c') diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 48ec3d5e29e..3b9e8de3500 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -355,6 +355,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (ret) goto out; + set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); if (ret || !alloc_required) goto out_unlock; @@ -395,8 +398,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock_page; } ret = gfs2_allocate_page_backing(page); - if (!ret) - set_bit(GIF_SW_PAGED, &ip->i_flags); out_unlock_page: unlock_page(page); -- cgit v1.2.3-70-g09d2 From c2ec175c39f62949438354f603f4aa170846aabb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 31 Mar 2009 15:23:21 -0700 Subject: mm: page_mkwrite change prototype to match fault Change the page_mkwrite prototype to take a struct vm_fault, and return VM_FAULT_xxx flags. There should be no functional change. This makes it possible to return much more detailed error information to the VM (and also can provide more information eg. virtual_address to the driver, which might be important in some special cases). This is required for a subsequent fix. And will also make it easier to merge page_mkwrite() with fault() in future. Signed-off-by: Nick Piggin Cc: Chris Mason Cc: Trond Myklebust Cc: Miklos Szeredi Cc: Steven Whitehouse Cc: Mark Fasheh Cc: Joel Becker Cc: Artem Bityutskiy Cc: Felix Blyakher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 2 +- drivers/video/fb_defio.c | 3 ++- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 5 ++++- fs/buffer.c | 6 +++++- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 5 ++++- fs/fuse/file.c | 3 ++- fs/gfs2/ops_file.c | 5 ++++- fs/nfs/file.c | 5 ++++- fs/ocfs2/mmap.c | 6 ++++-- fs/ubifs/file.c | 9 ++++++--- fs/xfs/linux-2.6/xfs_file.c | 4 ++-- include/linux/buffer_head.h | 2 +- include/linux/mm.h | 3 ++- mm/memory.c | 26 ++++++++++++++++++++++---- 16 files changed, 65 insertions(+), 23 deletions(-) (limited to 'fs/gfs2/ops_file.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4e78ce67784..76efe5b71d7 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -505,7 +505,7 @@ prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); - int (*page_mkwrite)(struct vm_area_struct *, struct page *); + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 082026546ae..0a7a6679ee6 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -85,8 +85,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); /* vm_ops->page_mkwrite handler */ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { + struct page *page = vmf->page; struct fb_info *info = vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; struct page *cur; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e30e9d..7dd1b6d0bf3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2060,7 +2060,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948bc22..ec5423790bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4292,8 +4292,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -4362,6 +4363,8 @@ again: out_unlock: unlock_page(page); out: + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 73abe6d8218..6d51a3da362 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2313,9 +2313,10 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; @@ -2340,6 +2341,9 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, ret = block_commit_write(page, 0, end); out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; + unlock_page(page); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6083bb38057..990c9400092 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1098,7 +1098,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t ext4_get_reserved_space(struct inode *inode); /* ioctl.c */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71d3ecd5db7..dd82ff39006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5146,8 +5146,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; loff_t size; unsigned long len; int ret = -EINVAL; @@ -5199,6 +5200,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock; ret = 0; out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; up_read(&inode->i_alloc_sem); return ret; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 821d10f719b..4e340fedf76 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; /* * Don't use page->mapping as it may become NULL from a * concurrent truncate. diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 3b9e8de3500..70b9b854894 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -412,6 +413,8 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b520d..cec79392e4b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; @@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = pagelen; out_unlock: unlock_page(page); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index eea1d24713e..b606496b72e 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -154,8 +154,9 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct buffer_head *di_bh = NULL; sigset_t blocked, oldset; @@ -196,7 +197,8 @@ out: ret2 = ocfs2_vm_op_unblock_sigs(&oldset); if (ret2 < 0) mlog_errno(ret2); - + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 93b6de51f26..0ff89fe71e5 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made * writable. UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); @@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); if (unlikely(c->ro_media)) - return -EROFS; + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (err == -ENOSPC) ubifs_warn("out of space for mmapped file " "(inode number %lu)", inode->i_ino); - return err; + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index e14c4e3aea0..f4e25544157 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -234,9 +234,9 @@ xfs_file_mmap( STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { - return block_page_mkwrite(vma, page, xfs_get_blocks); + return block_page_mkwrite(vma, vmf, xfs_get_blocks); } const struct file_operations xfs_file_operations = { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f19fd9045ea..3d7bcde2e33 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -216,7 +216,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2223f8dfa56..aeabe953ba4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -135,6 +135,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -187,7 +188,7 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware diff --git a/mm/memory.c b/mm/memory.c index 5b4ad5e4f98..cf6873e91c6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1945,6 +1945,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * get_user_pages(.write=1, .force=1). */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + struct vm_fault vmf; + int tmp; + + vmf.virtual_address = (void __user *)(address & + PAGE_MASK); + vmf.pgoff = old_page->index; + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.page = old_page; + /* * Notify the address space that the page is about to * become writable so that it can prohibit this or wait @@ -1956,8 +1965,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); pte_unmap_unlock(page_table, ptl); - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; goto unwritable_page; + } /* * Since we dropped the lock we need to revalidate @@ -2106,7 +2119,7 @@ oom: unwritable_page: page_cache_release(old_page); - return VM_FAULT_SIGBUS; + return ret; } /* @@ -2648,9 +2661,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * to become writable */ if (vma->vm_ops->page_mkwrite) { + int tmp; + unlock_page(page); - if (vma->vm_ops->page_mkwrite(vma, page) < 0) { - ret = VM_FAULT_SIGBUS; + vmf.flags |= FAULT_FLAG_MKWRITE; + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; anon = 1; /* no anon but release vmf.page */ goto out_unlocked; } -- cgit v1.2.3-70-g09d2